summaryrefslogtreecommitdiffstats
path: root/WebKitTools/Scripts/webkitpy/thirdparty
diff options
context:
space:
mode:
authorBen Murdoch <benm@google.com>2010-05-11 18:35:50 +0100
committerBen Murdoch <benm@google.com>2010-05-14 10:23:05 +0100
commit21939df44de1705786c545cd1bf519d47250322d (patch)
treeef56c310f5c0cdc379c2abb2e212308a3281ce20 /WebKitTools/Scripts/webkitpy/thirdparty
parent4ff1d8891d520763f17675827154340c7c740f90 (diff)
downloadexternal_webkit-21939df44de1705786c545cd1bf519d47250322d.zip
external_webkit-21939df44de1705786c545cd1bf519d47250322d.tar.gz
external_webkit-21939df44de1705786c545cd1bf519d47250322d.tar.bz2
Merge Webkit at r58956: Initial merge by Git.
Change-Id: I1d9fb60ea2c3f2ddc04c17a871acdb39353be228
Diffstat (limited to 'WebKitTools/Scripts/webkitpy/thirdparty')
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/__init__.py8
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.mechanize.url1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.pep8.py.url1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/README2
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/__init__.py1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/.ClientForm.py.url1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/ClientForm.py3401
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/__init__.py1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.ircbot.py.url1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.irclib.py.url1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/__init__.py1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/ircbot.py438
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/irclib.py1560
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/__init__.py140
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_auth.py522
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_beautifulsoup.py1080
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_clientcookie.py1707
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_debug.py28
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_file.py60
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_firefox3cookiejar.py249
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_gzip.py103
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_headersutil.py232
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_html.py631
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_http.py758
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_lwpcookiejar.py185
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mechanize.py676
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mozillacookiejar.py161
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_msiecookiejar.py388
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_opener.py436
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_pullparser.py390
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_request.py87
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_response.py527
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_rfc3986.py241
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_seek.py16
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_sockettimeout.py6
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_testcase.py73
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_upgrade.py40
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_urllib2.py55
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_useragent.py352
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_util.py291
-rwxr-xr-xWebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/pep8.py1254
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/.upload.py.url1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/__init__.py1
-rwxr-xr-xWebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/upload.py1702
44 files changed, 17805 insertions, 4 deletions
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py
index f1e5334..c052f00 100644
--- a/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py
@@ -24,6 +24,9 @@
"""Autoinstalls third-party code required by WebKit."""
+from __future__ import with_statement
+
+import codecs
import os
from webkitpy.common.system.autoinstall import AutoInstaller
@@ -88,10 +91,7 @@ installer.install(url="http://iweb.dl.sourceforge.net/project/python-irclib/pyth
readme_path = os.path.join(autoinstalled_dir, "README")
if not os.path.exists(readme_path):
- file = open(readme_path, "w")
- try:
+ with codecs.open(readme_path, "w", "ascii") as file:
file.write("This directory is auto-generated by WebKit and is "
"safe to delete.\nIt contains needed third-party Python "
"packages automatically downloaded from the web.")
- finally:
- file.close()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.mechanize.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.mechanize.url
new file mode 100644
index 0000000..4186aee
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.mechanize.url
@@ -0,0 +1 @@
+http://pypi.python.org/packages/source/m/mechanize/mechanize-0.1.11.zip \ No newline at end of file
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.pep8.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.pep8.py.url
new file mode 100644
index 0000000..0fb1ef6
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.pep8.py.url
@@ -0,0 +1 @@
+http://pypi.python.org/packages/source/p/pep8/pep8-0.5.0.tar.gz#md5=512a818af9979290cd619cce8e9c2e2b \ No newline at end of file
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/README b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/README
new file mode 100644
index 0000000..1d68cf3
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/README
@@ -0,0 +1,2 @@
+This directory is auto-generated by WebKit and is safe to delete.
+It contains needed third-party Python packages automatically downloaded from the web. \ No newline at end of file
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/__init__.py
new file mode 100644
index 0000000..c1e4c6d
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/__init__.py
@@ -0,0 +1 @@
+# This file is required for Python to search this directory for modules.
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/.ClientForm.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/.ClientForm.py.url
new file mode 100644
index 0000000..c723abf
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/.ClientForm.py.url
@@ -0,0 +1 @@
+http://pypi.python.org/packages/source/C/ClientForm/ClientForm-0.2.10.zip \ No newline at end of file
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/ClientForm.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/ClientForm.py
new file mode 100644
index 0000000..a622de7
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/ClientForm.py
@@ -0,0 +1,3401 @@
+"""HTML form handling for web clients.
+
+ClientForm is a Python module for handling HTML forms on the client
+side, useful for parsing HTML forms, filling them in and returning the
+completed forms to the server. It has developed from a port of Gisle
+Aas' Perl module HTML::Form, from the libwww-perl library, but the
+interface is not the same.
+
+The most useful docstring is the one for HTMLForm.
+
+RFC 1866: HTML 2.0
+RFC 1867: Form-based File Upload in HTML
+RFC 2388: Returning Values from Forms: multipart/form-data
+HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX)
+HTML 4.01 Specification, W3C Recommendation 24 December 1999
+
+
+Copyright 2002-2007 John J. Lee <jjl@pobox.com>
+Copyright 2005 Gary Poster
+Copyright 2005 Zope Corporation
+Copyright 1998-2000 Gisle Aas.
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+# XXX
+# Remove parser testing hack
+# safeUrl()-ize action
+# Switch to unicode throughout (would be 0.3.x)
+# See Wichert Akkerman's 2004-01-22 message to c.l.py.
+# Add charset parameter to Content-type headers? How to find value??
+# Add some more functional tests
+# Especially single and multiple file upload on the internet.
+# Does file upload work when name is missing? Sourceforge tracker form
+# doesn't like it. Check standards, and test with Apache. Test
+# binary upload with Apache.
+# mailto submission & enctype text/plain
+# I'm not going to fix this unless somebody tells me what real servers
+# that want this encoding actually expect: If enctype is
+# application/x-www-form-urlencoded and there's a FILE control present.
+# Strictly, it should be 'name=data' (see HTML 4.01 spec., section
+# 17.13.2), but I send "name=" ATM. What about multiple file upload??
+
+# Would be nice, but I'm not going to do it myself:
+# -------------------------------------------------
+# Maybe a 0.4.x?
+# Replace by_label etc. with moniker / selector concept. Allows, eg.,
+# a choice between selection by value / id / label / element
+# contents. Or choice between matching labels exactly or by
+# substring. Etc.
+# Remove deprecated methods.
+# ...what else?
+# Work on DOMForm.
+# XForms? Don't know if there's a need here.
+
+__all__ = ['AmbiguityError', 'CheckboxControl', 'Control',
+ 'ControlNotFoundError', 'FileControl', 'FormParser', 'HTMLForm',
+ 'HiddenControl', 'IgnoreControl', 'ImageControl', 'IsindexControl',
+ 'Item', 'ItemCountError', 'ItemNotFoundError', 'Label',
+ 'ListControl', 'LocateError', 'Missing', 'ParseError', 'ParseFile',
+ 'ParseFileEx', 'ParseResponse', 'ParseResponseEx','PasswordControl',
+ 'RadioControl', 'ScalarControl', 'SelectControl',
+ 'SubmitButtonControl', 'SubmitControl', 'TextControl',
+ 'TextareaControl', 'XHTMLCompatibleFormParser']
+
+try: True
+except NameError:
+ True = 1
+ False = 0
+
+try: bool
+except NameError:
+ def bool(expr):
+ if expr: return True
+ else: return False
+
+try:
+ import logging
+ import inspect
+except ImportError:
+ def debug(msg, *args, **kwds):
+ pass
+else:
+ _logger = logging.getLogger("ClientForm")
+ OPTIMIZATION_HACK = True
+
+ def debug(msg, *args, **kwds):
+ if OPTIMIZATION_HACK:
+ return
+
+ caller_name = inspect.stack()[1][3]
+ extended_msg = '%%s %s' % msg
+ extended_args = (caller_name,)+args
+ debug = _logger.debug(extended_msg, *extended_args, **kwds)
+
+ def _show_debug_messages():
+ global OPTIMIZATION_HACK
+ OPTIMIZATION_HACK = False
+ _logger.setLevel(logging.DEBUG)
+ handler = logging.StreamHandler(sys.stdout)
+ handler.setLevel(logging.DEBUG)
+ _logger.addHandler(handler)
+
+import sys, urllib, urllib2, types, mimetools, copy, urlparse, \
+ htmlentitydefs, re, random
+from cStringIO import StringIO
+
+import sgmllib
+# monkeypatch to fix http://www.python.org/sf/803422 :-(
+sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
+
+# HTMLParser.HTMLParser is recent, so live without it if it's not available
+# (also, sgmllib.SGMLParser is much more tolerant of bad HTML)
+try:
+ import HTMLParser
+except ImportError:
+ HAVE_MODULE_HTMLPARSER = False
+else:
+ HAVE_MODULE_HTMLPARSER = True
+
+try:
+ import warnings
+except ImportError:
+ def deprecation(message, stack_offset=0):
+ pass
+else:
+ def deprecation(message, stack_offset=0):
+ warnings.warn(message, DeprecationWarning, stacklevel=3+stack_offset)
+
+VERSION = "0.2.10"
+
+CHUNK = 1024 # size of chunks fed to parser, in bytes
+
+DEFAULT_ENCODING = "latin-1"
+
+class Missing: pass
+
+_compress_re = re.compile(r"\s+")
+def compress_text(text): return _compress_re.sub(" ", text.strip())
+
+def normalize_line_endings(text):
+ return re.sub(r"(?:(?<!\r)\n)|(?:\r(?!\n))", "\r\n", text)
+
+
+# This version of urlencode is from my Python 1.5.2 back-port of the
+# Python 2.1 CVS maintenance branch of urllib. It will accept a sequence
+# of pairs instead of a mapping -- the 2.0 version only accepts a mapping.
+def urlencode(query,doseq=False,):
+ """Encode a sequence of two-element tuples or dictionary into a URL query \
+string.
+
+ If any values in the query arg are sequences and doseq is true, each
+ sequence element is converted to a separate parameter.
+
+ If the query arg is a sequence of two-element tuples, the order of the
+ parameters in the output will match the order of parameters in the
+ input.
+ """
+
+ if hasattr(query,"items"):
+ # mapping objects
+ query = query.items()
+ else:
+ # it's a bother at times that strings and string-like objects are
+ # sequences...
+ try:
+ # non-sequence items should not work with len()
+ x = len(query)
+ # non-empty strings will fail this
+ if len(query) and type(query[0]) != types.TupleType:
+ raise TypeError()
+ # zero-length sequences of all types will get here and succeed,
+ # but that's a minor nit - since the original implementation
+ # allowed empty dicts that type of behavior probably should be
+ # preserved for consistency
+ except TypeError:
+ ty,va,tb = sys.exc_info()
+ raise TypeError("not a valid non-string sequence or mapping "
+ "object", tb)
+
+ l = []
+ if not doseq:
+ # preserve old behavior
+ for k, v in query:
+ k = urllib.quote_plus(str(k))
+ v = urllib.quote_plus(str(v))
+ l.append(k + '=' + v)
+ else:
+ for k, v in query:
+ k = urllib.quote_plus(str(k))
+ if type(v) == types.StringType:
+ v = urllib.quote_plus(v)
+ l.append(k + '=' + v)
+ elif type(v) == types.UnicodeType:
+ # is there a reasonable way to convert to ASCII?
+ # encode generates a string, but "replace" or "ignore"
+ # lose information and "strict" can raise UnicodeError
+ v = urllib.quote_plus(v.encode("ASCII","replace"))
+ l.append(k + '=' + v)
+ else:
+ try:
+ # is this a sufficient test for sequence-ness?
+ x = len(v)
+ except TypeError:
+ # not a sequence
+ v = urllib.quote_plus(str(v))
+ l.append(k + '=' + v)
+ else:
+ # loop over the sequence
+ for elt in v:
+ l.append(k + '=' + urllib.quote_plus(str(elt)))
+ return '&'.join(l)
+
+def unescape(data, entities, encoding=DEFAULT_ENCODING):
+ if data is None or "&" not in data:
+ return data
+
+ def replace_entities(match, entities=entities, encoding=encoding):
+ ent = match.group()
+ if ent[1] == "#":
+ return unescape_charref(ent[2:-1], encoding)
+
+ repl = entities.get(ent)
+ if repl is not None:
+ if type(repl) != type(""):
+ try:
+ repl = repl.encode(encoding)
+ except UnicodeError:
+ repl = ent
+ else:
+ repl = ent
+
+ return repl
+
+ return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
+
+def unescape_charref(data, encoding):
+ name, base = data, 10
+ if name.startswith("x"):
+ name, base= name[1:], 16
+ uc = unichr(int(name, base))
+ if encoding is None:
+ return uc
+ else:
+ try:
+ repl = uc.encode(encoding)
+ except UnicodeError:
+ repl = "&#%s;" % data
+ return repl
+
+def get_entitydefs():
+ import htmlentitydefs
+ from codecs import latin_1_decode
+ entitydefs = {}
+ try:
+ htmlentitydefs.name2codepoint
+ except AttributeError:
+ entitydefs = {}
+ for name, char in htmlentitydefs.entitydefs.items():
+ uc = latin_1_decode(char)[0]
+ if uc.startswith("&#") and uc.endswith(";"):
+ uc = unescape_charref(uc[2:-1], None)
+ entitydefs["&%s;" % name] = uc
+ else:
+ for name, codepoint in htmlentitydefs.name2codepoint.items():
+ entitydefs["&%s;" % name] = unichr(codepoint)
+ return entitydefs
+
+
+def issequence(x):
+ try:
+ x[0]
+ except (TypeError, KeyError):
+ return False
+ except IndexError:
+ pass
+ return True
+
+def isstringlike(x):
+ try: x+""
+ except: return False
+ else: return True
+
+
+def choose_boundary():
+ """Return a string usable as a multipart boundary."""
+ # follow IE and firefox
+ nonce = "".join([str(random.randint(0, sys.maxint-1)) for i in 0,1,2])
+ return "-"*27 + nonce
+
+# This cut-n-pasted MimeWriter from standard library is here so can add
+# to HTTP headers rather than message body when appropriate. It also uses
+# \r\n in place of \n. This is a bit nasty.
+class MimeWriter:
+
+ """Generic MIME writer.
+
+ Methods:
+
+ __init__()
+ addheader()
+ flushheaders()
+ startbody()
+ startmultipartbody()
+ nextpart()
+ lastpart()
+
+ A MIME writer is much more primitive than a MIME parser. It
+ doesn't seek around on the output file, and it doesn't use large
+ amounts of buffer space, so you have to write the parts in the
+ order they should occur on the output file. It does buffer the
+ headers you add, allowing you to rearrange their order.
+
+ General usage is:
+
+ f = <open the output file>
+ w = MimeWriter(f)
+ ...call w.addheader(key, value) 0 or more times...
+
+ followed by either:
+
+ f = w.startbody(content_type)
+ ...call f.write(data) for body data...
+
+ or:
+
+ w.startmultipartbody(subtype)
+ for each part:
+ subwriter = w.nextpart()
+ ...use the subwriter's methods to create the subpart...
+ w.lastpart()
+
+ The subwriter is another MimeWriter instance, and should be
+ treated in the same way as the toplevel MimeWriter. This way,
+ writing recursive body parts is easy.
+
+ Warning: don't forget to call lastpart()!
+
+ XXX There should be more state so calls made in the wrong order
+ are detected.
+
+ Some special cases:
+
+ - startbody() just returns the file passed to the constructor;
+ but don't use this knowledge, as it may be changed.
+
+ - startmultipartbody() actually returns a file as well;
+ this can be used to write the initial 'if you can read this your
+ mailer is not MIME-aware' message.
+
+ - If you call flushheaders(), the headers accumulated so far are
+ written out (and forgotten); this is useful if you don't need a
+ body part at all, e.g. for a subpart of type message/rfc822
+ that's (mis)used to store some header-like information.
+
+ - Passing a keyword argument 'prefix=<flag>' to addheader(),
+ start*body() affects where the header is inserted; 0 means
+ append at the end, 1 means insert at the start; default is
+ append for addheader(), but insert for start*body(), which use
+ it to determine where the Content-type header goes.
+
+ """
+
+ def __init__(self, fp, http_hdrs=None):
+ self._http_hdrs = http_hdrs
+ self._fp = fp
+ self._headers = []
+ self._boundary = []
+ self._first_part = True
+
+ def addheader(self, key, value, prefix=0,
+ add_to_http_hdrs=0):
+ """
+ prefix is ignored if add_to_http_hdrs is true.
+ """
+ lines = value.split("\r\n")
+ while lines and not lines[-1]: del lines[-1]
+ while lines and not lines[0]: del lines[0]
+ if add_to_http_hdrs:
+ value = "".join(lines)
+ # 2.2 urllib2 doesn't normalize header case
+ self._http_hdrs.append((key.capitalize(), value))
+ else:
+ for i in range(1, len(lines)):
+ lines[i] = " " + lines[i].strip()
+ value = "\r\n".join(lines) + "\r\n"
+ line = key.title() + ": " + value
+ if prefix:
+ self._headers.insert(0, line)
+ else:
+ self._headers.append(line)
+
+ def flushheaders(self):
+ self._fp.writelines(self._headers)
+ self._headers = []
+
+ def startbody(self, ctype=None, plist=[], prefix=1,
+ add_to_http_hdrs=0, content_type=1):
+ """
+ prefix is ignored if add_to_http_hdrs is true.
+ """
+ if content_type and ctype:
+ for name, value in plist:
+ ctype = ctype + ';\r\n %s=%s' % (name, value)
+ self.addheader("Content-Type", ctype, prefix=prefix,
+ add_to_http_hdrs=add_to_http_hdrs)
+ self.flushheaders()
+ if not add_to_http_hdrs: self._fp.write("\r\n")
+ self._first_part = True
+ return self._fp
+
+ def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1,
+ add_to_http_hdrs=0, content_type=1):
+ boundary = boundary or choose_boundary()
+ self._boundary.append(boundary)
+ return self.startbody("multipart/" + subtype,
+ [("boundary", boundary)] + plist,
+ prefix=prefix,
+ add_to_http_hdrs=add_to_http_hdrs,
+ content_type=content_type)
+
+ def nextpart(self):
+ boundary = self._boundary[-1]
+ if self._first_part:
+ self._first_part = False
+ else:
+ self._fp.write("\r\n")
+ self._fp.write("--" + boundary + "\r\n")
+ return self.__class__(self._fp)
+
+ def lastpart(self):
+ if self._first_part:
+ self.nextpart()
+ boundary = self._boundary.pop()
+ self._fp.write("\r\n--" + boundary + "--\r\n")
+
+
+class LocateError(ValueError): pass
+class AmbiguityError(LocateError): pass
+class ControlNotFoundError(LocateError): pass
+class ItemNotFoundError(LocateError): pass
+
+class ItemCountError(ValueError): pass
+
+# for backwards compatibility, ParseError derives from exceptions that were
+# raised by versions of ClientForm <= 0.2.5
+if HAVE_MODULE_HTMLPARSER:
+ SGMLLIB_PARSEERROR = sgmllib.SGMLParseError
+ class ParseError(sgmllib.SGMLParseError,
+ HTMLParser.HTMLParseError,
+ ):
+ pass
+else:
+ if hasattr(sgmllib, "SGMLParseError"):
+ SGMLLIB_PARSEERROR = sgmllib.SGMLParseError
+ class ParseError(sgmllib.SGMLParseError):
+ pass
+ else:
+ SGMLLIB_PARSEERROR = RuntimeError
+ class ParseError(RuntimeError):
+ pass
+
+
+class _AbstractFormParser:
+ """forms attribute contains HTMLForm instances on completion."""
+ # thanks to Moshe Zadka for an example of sgmllib/htmllib usage
+ def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
+ if entitydefs is None:
+ entitydefs = get_entitydefs()
+ self._entitydefs = entitydefs
+ self._encoding = encoding
+
+ self.base = None
+ self.forms = []
+ self.labels = []
+ self._current_label = None
+ self._current_form = None
+ self._select = None
+ self._optgroup = None
+ self._option = None
+ self._textarea = None
+
+ # forms[0] will contain all controls that are outside of any form
+ # self._global_form is an alias for self.forms[0]
+ self._global_form = None
+ self.start_form([])
+ self.end_form()
+ self._current_form = self._global_form = self.forms[0]
+
+ def do_base(self, attrs):
+ debug("%s", attrs)
+ for key, value in attrs:
+ if key == "href":
+ self.base = self.unescape_attr_if_required(value)
+
+ def end_body(self):
+ debug("")
+ if self._current_label is not None:
+ self.end_label()
+ if self._current_form is not self._global_form:
+ self.end_form()
+
+ def start_form(self, attrs):
+ debug("%s", attrs)
+ if self._current_form is not self._global_form:
+ raise ParseError("nested FORMs")
+ name = None
+ action = None
+ enctype = "application/x-www-form-urlencoded"
+ method = "GET"
+ d = {}
+ for key, value in attrs:
+ if key == "name":
+ name = self.unescape_attr_if_required(value)
+ elif key == "action":
+ action = self.unescape_attr_if_required(value)
+ elif key == "method":
+ method = self.unescape_attr_if_required(value.upper())
+ elif key == "enctype":
+ enctype = self.unescape_attr_if_required(value.lower())
+ d[key] = self.unescape_attr_if_required(value)
+ controls = []
+ self._current_form = (name, action, method, enctype), d, controls
+
+ def end_form(self):
+ debug("")
+ if self._current_label is not None:
+ self.end_label()
+ if self._current_form is self._global_form:
+ raise ParseError("end of FORM before start")
+ self.forms.append(self._current_form)
+ self._current_form = self._global_form
+
+ def start_select(self, attrs):
+ debug("%s", attrs)
+ if self._select is not None:
+ raise ParseError("nested SELECTs")
+ if self._textarea is not None:
+ raise ParseError("SELECT inside TEXTAREA")
+ d = {}
+ for key, val in attrs:
+ d[key] = self.unescape_attr_if_required(val)
+
+ self._select = d
+ self._add_label(d)
+
+ self._append_select_control({"__select": d})
+
+ def end_select(self):
+ debug("")
+ if self._select is None:
+ raise ParseError("end of SELECT before start")
+
+ if self._option is not None:
+ self._end_option()
+
+ self._select = None
+
+ def start_optgroup(self, attrs):
+ debug("%s", attrs)
+ if self._select is None:
+ raise ParseError("OPTGROUP outside of SELECT")
+ d = {}
+ for key, val in attrs:
+ d[key] = self.unescape_attr_if_required(val)
+
+ self._optgroup = d
+
+ def end_optgroup(self):
+ debug("")
+ if self._optgroup is None:
+ raise ParseError("end of OPTGROUP before start")
+ self._optgroup = None
+
+ def _start_option(self, attrs):
+ debug("%s", attrs)
+ if self._select is None:
+ raise ParseError("OPTION outside of SELECT")
+ if self._option is not None:
+ self._end_option()
+
+ d = {}
+ for key, val in attrs:
+ d[key] = self.unescape_attr_if_required(val)
+
+ self._option = {}
+ self._option.update(d)
+ if (self._optgroup and self._optgroup.has_key("disabled") and
+ not self._option.has_key("disabled")):
+ self._option["disabled"] = None
+
+ def _end_option(self):
+ debug("")
+ if self._option is None:
+ raise ParseError("end of OPTION before start")
+
+ contents = self._option.get("contents", "").strip()
+ self._option["contents"] = contents
+ if not self._option.has_key("value"):
+ self._option["value"] = contents
+ if not self._option.has_key("label"):
+ self._option["label"] = contents
+ # stuff dict of SELECT HTML attrs into a special private key
+ # (gets deleted again later)
+ self._option["__select"] = self._select
+ self._append_select_control(self._option)
+ self._option = None
+
+ def _append_select_control(self, attrs):
+ debug("%s", attrs)
+ controls = self._current_form[2]
+ name = self._select.get("name")
+ controls.append(("select", name, attrs))
+
+ def start_textarea(self, attrs):
+ debug("%s", attrs)
+ if self._textarea is not None:
+ raise ParseError("nested TEXTAREAs")
+ if self._select is not None:
+ raise ParseError("TEXTAREA inside SELECT")
+ d = {}
+ for key, val in attrs:
+ d[key] = self.unescape_attr_if_required(val)
+ self._add_label(d)
+
+ self._textarea = d
+
+ def end_textarea(self):
+ debug("")
+ if self._textarea is None:
+ raise ParseError("end of TEXTAREA before start")
+ controls = self._current_form[2]
+ name = self._textarea.get("name")
+ controls.append(("textarea", name, self._textarea))
+ self._textarea = None
+
+ def start_label(self, attrs):
+ debug("%s", attrs)
+ if self._current_label:
+ self.end_label()
+ d = {}
+ for key, val in attrs:
+ d[key] = self.unescape_attr_if_required(val)
+ taken = bool(d.get("for")) # empty id is invalid
+ d["__text"] = ""
+ d["__taken"] = taken
+ if taken:
+ self.labels.append(d)
+ self._current_label = d
+
+ def end_label(self):
+ debug("")
+ label = self._current_label
+ if label is None:
+ # something is ugly in the HTML, but we're ignoring it
+ return
+ self._current_label = None
+ # if it is staying around, it is True in all cases
+ del label["__taken"]
+
+ def _add_label(self, d):
+ #debug("%s", d)
+ if self._current_label is not None:
+ if not self._current_label["__taken"]:
+ self._current_label["__taken"] = True
+ d["__label"] = self._current_label
+
+ def handle_data(self, data):
+ debug("%s", data)
+
+ if self._option is not None:
+ # self._option is a dictionary of the OPTION element's HTML
+ # attributes, but it has two special keys, one of which is the
+ # special "contents" key contains text between OPTION tags (the
+ # other is the "__select" key: see the end_option method)
+ map = self._option
+ key = "contents"
+ elif self._textarea is not None:
+ map = self._textarea
+ key = "value"
+ data = normalize_line_endings(data)
+ # not if within option or textarea
+ elif self._current_label is not None:
+ map = self._current_label
+ key = "__text"
+ else:
+ return
+
+ if data and not map.has_key(key):
+ # according to
+ # http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.1 line break
+ # immediately after start tags or immediately before end tags must
+ # be ignored, but real browsers only ignore a line break after a
+ # start tag, so we'll do that.
+ if data[0:2] == "\r\n":
+ data = data[2:]
+ elif data[0:1] in ["\n", "\r"]:
+ data = data[1:]
+ map[key] = data
+ else:
+ map[key] = map[key] + data
+
+ def do_button(self, attrs):
+ debug("%s", attrs)
+ d = {}
+ d["type"] = "submit" # default
+ for key, val in attrs:
+ d[key] = self.unescape_attr_if_required(val)
+ controls = self._current_form[2]
+
+ type = d["type"]
+ name = d.get("name")
+ # we don't want to lose information, so use a type string that
+ # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON}
+ # e.g. type for BUTTON/RESET is "resetbutton"
+ # (type for INPUT/RESET is "reset")
+ type = type+"button"
+ self._add_label(d)
+ controls.append((type, name, d))
+
+ def do_input(self, attrs):
+ debug("%s", attrs)
+ d = {}
+ d["type"] = "text" # default
+ for key, val in attrs:
+ d[key] = self.unescape_attr_if_required(val)
+ controls = self._current_form[2]
+
+ type = d["type"]
+ name = d.get("name")
+ self._add_label(d)
+ controls.append((type, name, d))
+
+ def do_isindex(self, attrs):
+ debug("%s", attrs)
+ d = {}
+ for key, val in attrs:
+ d[key] = self.unescape_attr_if_required(val)
+ controls = self._current_form[2]
+
+ self._add_label(d)
+ # isindex doesn't have type or name HTML attributes
+ controls.append(("isindex", None, d))
+
+ def handle_entityref(self, name):
+ #debug("%s", name)
+ self.handle_data(unescape(
+ '&%s;' % name, self._entitydefs, self._encoding))
+
+ def handle_charref(self, name):
+ #debug("%s", name)
+ self.handle_data(unescape_charref(name, self._encoding))
+
+ def unescape_attr(self, name):
+ #debug("%s", name)
+ return unescape(name, self._entitydefs, self._encoding)
+
+ def unescape_attrs(self, attrs):
+ #debug("%s", attrs)
+ escaped_attrs = {}
+ for key, val in attrs.items():
+ try:
+ val.items
+ except AttributeError:
+ escaped_attrs[key] = self.unescape_attr(val)
+ else:
+ # e.g. "__select" -- yuck!
+ escaped_attrs[key] = self.unescape_attrs(val)
+ return escaped_attrs
+
+ def unknown_entityref(self, ref): self.handle_data("&%s;" % ref)
+ def unknown_charref(self, ref): self.handle_data("&#%s;" % ref)
+
+
+if not HAVE_MODULE_HTMLPARSER:
+ class XHTMLCompatibleFormParser:
+ def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
+ raise ValueError("HTMLParser could not be imported")
+else:
+ class XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser):
+ """Good for XHTML, bad for tolerance of incorrect HTML."""
+ # thanks to Michael Howitz for this!
+ def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
+ HTMLParser.HTMLParser.__init__(self)
+ _AbstractFormParser.__init__(self, entitydefs, encoding)
+
+ def feed(self, data):
+ try:
+ HTMLParser.HTMLParser.feed(self, data)
+ except HTMLParser.HTMLParseError, exc:
+ raise ParseError(exc)
+
+ def start_option(self, attrs):
+ _AbstractFormParser._start_option(self, attrs)
+
+ def end_option(self):
+ _AbstractFormParser._end_option(self)
+
+ def handle_starttag(self, tag, attrs):
+ try:
+ method = getattr(self, "start_" + tag)
+ except AttributeError:
+ try:
+ method = getattr(self, "do_" + tag)
+ except AttributeError:
+ pass # unknown tag
+ else:
+ method(attrs)
+ else:
+ method(attrs)
+
+ def handle_endtag(self, tag):
+ try:
+ method = getattr(self, "end_" + tag)
+ except AttributeError:
+ pass # unknown tag
+ else:
+ method()
+
+ def unescape(self, name):
+ # Use the entitydefs passed into constructor, not
+ # HTMLParser.HTMLParser's entitydefs.
+ return self.unescape_attr(name)
+
+ def unescape_attr_if_required(self, name):
+ return name # HTMLParser.HTMLParser already did it
+ def unescape_attrs_if_required(self, attrs):
+ return attrs # ditto
+
+ def close(self):
+ HTMLParser.HTMLParser.close(self)
+ self.end_body()
+
+
+class _AbstractSgmllibParser(_AbstractFormParser):
+
+ def do_option(self, attrs):
+ _AbstractFormParser._start_option(self, attrs)
+
+ if sys.version_info[:2] >= (2,5):
+ # we override this attr to decode hex charrefs
+ entity_or_charref = re.compile(
+ '&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)')
+ def convert_entityref(self, name):
+ return unescape("&%s;" % name, self._entitydefs, self._encoding)
+ def convert_charref(self, name):
+ return unescape_charref("%s" % name, self._encoding)
+ def unescape_attr_if_required(self, name):
+ return name # sgmllib already did it
+ def unescape_attrs_if_required(self, attrs):
+ return attrs # ditto
+ else:
+ def unescape_attr_if_required(self, name):
+ return self.unescape_attr(name)
+ def unescape_attrs_if_required(self, attrs):
+ return self.unescape_attrs(attrs)
+
+
+class FormParser(_AbstractSgmllibParser, sgmllib.SGMLParser):
+ """Good for tolerance of incorrect HTML, bad for XHTML."""
+ def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
+ sgmllib.SGMLParser.__init__(self)
+ _AbstractFormParser.__init__(self, entitydefs, encoding)
+
+ def feed(self, data):
+ try:
+ sgmllib.SGMLParser.feed(self, data)
+ except SGMLLIB_PARSEERROR, exc:
+ raise ParseError(exc)
+
+ def close(self):
+ sgmllib.SGMLParser.close(self)
+ self.end_body()
+
+
+# sigh, must support mechanize by allowing dynamic creation of classes based on
+# its bundled copy of BeautifulSoup (which was necessary because of dependency
+# problems)
+
+def _create_bs_classes(bs,
+ icbinbs,
+ ):
+ class _AbstractBSFormParser(_AbstractSgmllibParser):
+ bs_base_class = None
+ def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
+ _AbstractFormParser.__init__(self, entitydefs, encoding)
+ self.bs_base_class.__init__(self)
+ def handle_data(self, data):
+ _AbstractFormParser.handle_data(self, data)
+ self.bs_base_class.handle_data(self, data)
+ def feed(self, data):
+ try:
+ self.bs_base_class.feed(self, data)
+ except SGMLLIB_PARSEERROR, exc:
+ raise ParseError(exc)
+ def close(self):
+ self.bs_base_class.close(self)
+ self.end_body()
+
+ class RobustFormParser(_AbstractBSFormParser, bs):
+ """Tries to be highly tolerant of incorrect HTML."""
+ pass
+ RobustFormParser.bs_base_class = bs
+ class NestingRobustFormParser(_AbstractBSFormParser, icbinbs):
+ """Tries to be highly tolerant of incorrect HTML.
+
+ Different from RobustFormParser in that it more often guesses nesting
+ above missing end tags (see BeautifulSoup docs).
+
+ """
+ pass
+ NestingRobustFormParser.bs_base_class = icbinbs
+
+ return RobustFormParser, NestingRobustFormParser
+
+try:
+ if sys.version_info[:2] < (2, 2):
+ raise ImportError # BeautifulSoup uses generators
+ import BeautifulSoup
+except ImportError:
+ pass
+else:
+ RobustFormParser, NestingRobustFormParser = _create_bs_classes(
+ BeautifulSoup.BeautifulSoup, BeautifulSoup.ICantBelieveItsBeautifulSoup
+ )
+ __all__ += ['RobustFormParser', 'NestingRobustFormParser']
+
+
+#FormParser = XHTMLCompatibleFormParser # testing hack
+#FormParser = RobustFormParser # testing hack
+
+
+def ParseResponseEx(response,
+ select_default=False,
+ form_parser_class=FormParser,
+ request_class=urllib2.Request,
+ entitydefs=None,
+ encoding=DEFAULT_ENCODING,
+
+ # private
+ _urljoin=urlparse.urljoin,
+ _urlparse=urlparse.urlparse,
+ _urlunparse=urlparse.urlunparse,
+ ):
+ """Identical to ParseResponse, except that:
+
+ 1. The returned list contains an extra item. The first form in the list
+ contains all controls not contained in any FORM element.
+
+ 2. The arguments ignore_errors and backwards_compat have been removed.
+
+ 3. Backwards-compatibility mode (backwards_compat=True) is not available.
+ """
+ return _ParseFileEx(response, response.geturl(),
+ select_default,
+ False,
+ form_parser_class,
+ request_class,
+ entitydefs,
+ False,
+ encoding,
+ _urljoin=_urljoin,
+ _urlparse=_urlparse,
+ _urlunparse=_urlunparse,
+ )
+
+def ParseFileEx(file, base_uri,
+ select_default=False,
+ form_parser_class=FormParser,
+ request_class=urllib2.Request,
+ entitydefs=None,
+ encoding=DEFAULT_ENCODING,
+
+ # private
+ _urljoin=urlparse.urljoin,
+ _urlparse=urlparse.urlparse,
+ _urlunparse=urlparse.urlunparse,
+ ):
+ """Identical to ParseFile, except that:
+
+ 1. The returned list contains an extra item. The first form in the list
+ contains all controls not contained in any FORM element.
+
+ 2. The arguments ignore_errors and backwards_compat have been removed.
+
+ 3. Backwards-compatibility mode (backwards_compat=True) is not available.
+ """
+ return _ParseFileEx(file, base_uri,
+ select_default,
+ False,
+ form_parser_class,
+ request_class,
+ entitydefs,
+ False,
+ encoding,
+ _urljoin=_urljoin,
+ _urlparse=_urlparse,
+ _urlunparse=_urlunparse,
+ )
+
+def ParseResponse(response, *args, **kwds):
+ """Parse HTTP response and return a list of HTMLForm instances.
+
+ The return value of urllib2.urlopen can be conveniently passed to this
+ function as the response parameter.
+
+ ClientForm.ParseError is raised on parse errors.
+
+ response: file-like object (supporting read() method) with a method
+ geturl(), returning the URI of the HTTP response
+ select_default: for multiple-selection SELECT controls and RADIO controls,
+ pick the first item as the default if none are selected in the HTML
+ form_parser_class: class to instantiate and use to pass
+ request_class: class to return from .click() method (default is
+ urllib2.Request)
+ entitydefs: mapping like {"&amp;": "&", ...} containing HTML entity
+ definitions (a sensible default is used)
+ encoding: character encoding used for encoding numeric character references
+ when matching link text. ClientForm does not attempt to find the encoding
+ in a META HTTP-EQUIV attribute in the document itself (mechanize, for
+ example, does do that and will pass the correct value to ClientForm using
+ this parameter).
+
+ backwards_compat: boolean that determines whether the returned HTMLForm
+ objects are backwards-compatible with old code. If backwards_compat is
+ true:
+
+ - ClientForm 0.1 code will continue to work as before.
+
+ - Label searches that do not specify a nr (number or count) will always
+ get the first match, even if other controls match. If
+ backwards_compat is False, label searches that have ambiguous results
+ will raise an AmbiguityError.
+
+ - Item label matching is done by strict string comparison rather than
+ substring matching.
+
+ - De-selecting individual list items is allowed even if the Item is
+ disabled.
+
+ The backwards_compat argument will be deprecated in a future release.
+
+ Pass a true value for select_default if you want the behaviour specified by
+ RFC 1866 (the HTML 2.0 standard), which is to select the first item in a
+ RADIO or multiple-selection SELECT control if none were selected in the
+ HTML. Most browsers (including Microsoft Internet Explorer (IE) and
+ Netscape Navigator) instead leave all items unselected in these cases. The
+ W3C HTML 4.0 standard leaves this behaviour undefined in the case of
+ multiple-selection SELECT controls, but insists that at least one RADIO
+ button should be checked at all times, in contradiction to browser
+ behaviour.
+
+ There is a choice of parsers. ClientForm.XHTMLCompatibleFormParser (uses
+ HTMLParser.HTMLParser) works best for XHTML, ClientForm.FormParser (uses
+ sgmllib.SGMLParser) (the default) works better for ordinary grubby HTML.
+ Note that HTMLParser is only available in Python 2.2 and later. You can
+ pass your own class in here as a hack to work around bad HTML, but at your
+ own risk: there is no well-defined interface.
+
+ """
+ return _ParseFileEx(response, response.geturl(), *args, **kwds)[1:]
+
+def ParseFile(file, base_uri, *args, **kwds):
+ """Parse HTML and return a list of HTMLForm instances.
+
+ ClientForm.ParseError is raised on parse errors.
+
+ file: file-like object (supporting read() method) containing HTML with zero
+ or more forms to be parsed
+ base_uri: the URI of the document (note that the base URI used to submit
+ the form will be that given in the BASE element if present, not that of
+ the document)
+
+ For the other arguments and further details, see ParseResponse.__doc__.
+
+ """
+ return _ParseFileEx(file, base_uri, *args, **kwds)[1:]
+
+def _ParseFileEx(file, base_uri,
+ select_default=False,
+ ignore_errors=False,
+ form_parser_class=FormParser,
+ request_class=urllib2.Request,
+ entitydefs=None,
+ backwards_compat=True,
+ encoding=DEFAULT_ENCODING,
+ _urljoin=urlparse.urljoin,
+ _urlparse=urlparse.urlparse,
+ _urlunparse=urlparse.urlunparse,
+ ):
+ if backwards_compat:
+ deprecation("operating in backwards-compatibility mode", 1)
+ fp = form_parser_class(entitydefs, encoding)
+ while 1:
+ data = file.read(CHUNK)
+ try:
+ fp.feed(data)
+ except ParseError, e:
+ e.base_uri = base_uri
+ raise
+ if len(data) != CHUNK: break
+ fp.close()
+ if fp.base is not None:
+ # HTML BASE element takes precedence over document URI
+ base_uri = fp.base
+ labels = [] # Label(label) for label in fp.labels]
+ id_to_labels = {}
+ for l in fp.labels:
+ label = Label(l)
+ labels.append(label)
+ for_id = l["for"]
+ coll = id_to_labels.get(for_id)
+ if coll is None:
+ id_to_labels[for_id] = [label]
+ else:
+ coll.append(label)
+ forms = []
+ for (name, action, method, enctype), attrs, controls in fp.forms:
+ if action is None:
+ action = base_uri
+ else:
+ action = _urljoin(base_uri, action)
+ # would be nice to make HTMLForm class (form builder) pluggable
+ form = HTMLForm(
+ action, method, enctype, name, attrs, request_class,
+ forms, labels, id_to_labels, backwards_compat)
+ form._urlparse = _urlparse
+ form._urlunparse = _urlunparse
+ for ii in range(len(controls)):
+ type, name, attrs = controls[ii]
+ # index=ii*10 allows ImageControl to return multiple ordered pairs
+ form.new_control(
+ type, name, attrs, select_default=select_default, index=ii*10)
+ forms.append(form)
+ for form in forms:
+ form.fixup()
+ return forms
+
+
+class Label:
+ def __init__(self, attrs):
+ self.id = attrs.get("for")
+ self._text = attrs.get("__text").strip()
+ self._ctext = compress_text(self._text)
+ self.attrs = attrs
+ self._backwards_compat = False # maintained by HTMLForm
+
+ def __getattr__(self, name):
+ if name == "text":
+ if self._backwards_compat:
+ return self._text
+ else:
+ return self._ctext
+ return getattr(Label, name)
+
+ def __setattr__(self, name, value):
+ if name == "text":
+ # don't see any need for this, so make it read-only
+ raise AttributeError("text attribute is read-only")
+ self.__dict__[name] = value
+
+ def __str__(self):
+ return "<Label(id=%r, text=%r)>" % (self.id, self.text)
+
+
+def _get_label(attrs):
+ text = attrs.get("__label")
+ if text is not None:
+ return Label(text)
+ else:
+ return None
+
+class Control:
+ """An HTML form control.
+
+ An HTMLForm contains a sequence of Controls. The Controls in an HTMLForm
+ are accessed using the HTMLForm.find_control method or the
+ HTMLForm.controls attribute.
+
+ Control instances are usually constructed using the ParseFile /
+ ParseResponse functions. If you use those functions, you can ignore the
+ rest of this paragraph. A Control is only properly initialised after the
+ fixup method has been called. In fact, this is only strictly necessary for
+ ListControl instances. This is necessary because ListControls are built up
+ from ListControls each containing only a single item, and their initial
+ value(s) can only be known after the sequence is complete.
+
+ The types and values that are acceptable for assignment to the value
+ attribute are defined by subclasses.
+
+ If the disabled attribute is true, this represents the state typically
+ represented by browsers by 'greying out' a control. If the disabled
+ attribute is true, the Control will raise AttributeError if an attempt is
+ made to change its value. In addition, the control will not be considered
+ 'successful' as defined by the W3C HTML 4 standard -- ie. it will
+ contribute no data to the return value of the HTMLForm.click* methods. To
+ enable a control, set the disabled attribute to a false value.
+
+ If the readonly attribute is true, the Control will raise AttributeError if
+ an attempt is made to change its value. To make a control writable, set
+ the readonly attribute to a false value.
+
+ All controls have the disabled and readonly attributes, not only those that
+ may have the HTML attributes of the same names.
+
+ On assignment to the value attribute, the following exceptions are raised:
+ TypeError, AttributeError (if the value attribute should not be assigned
+ to, because the control is disabled, for example) and ValueError.
+
+ If the name or value attributes are None, or the value is an empty list, or
+ if the control is disabled, the control is not successful.
+
+ Public attributes:
+
+ type: string describing type of control (see the keys of the
+ HTMLForm.type2class dictionary for the allowable values) (readonly)
+ name: name of control (readonly)
+ value: current value of control (subclasses may allow a single value, a
+ sequence of values, or either)
+ disabled: disabled state
+ readonly: readonly state
+ id: value of id HTML attribute
+
+ """
+ def __init__(self, type, name, attrs, index=None):
+ """
+ type: string describing type of control (see the keys of the
+ HTMLForm.type2class dictionary for the allowable values)
+ name: control name
+ attrs: HTML attributes of control's HTML element
+
+ """
+ raise NotImplementedError()
+
+ def add_to_form(self, form):
+ self._form = form
+ form.controls.append(self)
+
+ def fixup(self):
+ pass
+
+ def is_of_kind(self, kind):
+ raise NotImplementedError()
+
+ def clear(self):
+ raise NotImplementedError()
+
+ def __getattr__(self, name): raise NotImplementedError()
+ def __setattr__(self, name, value): raise NotImplementedError()
+
+ def pairs(self):
+ """Return list of (key, value) pairs suitable for passing to urlencode.
+ """
+ return [(k, v) for (i, k, v) in self._totally_ordered_pairs()]
+
+ def _totally_ordered_pairs(self):
+ """Return list of (key, value, index) tuples.
+
+ Like pairs, but allows preserving correct ordering even where several
+ controls are involved.
+
+ """
+ raise NotImplementedError()
+
+ def _write_mime_data(self, mw, name, value):
+ """Write data for a subitem of this control to a MimeWriter."""
+ # called by HTMLForm
+ mw2 = mw.nextpart()
+ mw2.addheader("Content-Disposition",
+ 'form-data; name="%s"' % name, 1)
+ f = mw2.startbody(prefix=0)
+ f.write(value)
+
+ def __str__(self):
+ raise NotImplementedError()
+
+ def get_labels(self):
+ """Return all labels (Label instances) for this control.
+
+ If the control was surrounded by a <label> tag, that will be the first
+ label; all other labels, connected by 'for' and 'id', are in the order
+ that appear in the HTML.
+
+ """
+ res = []
+ if self._label:
+ res.append(self._label)
+ if self.id:
+ res.extend(self._form._id_to_labels.get(self.id, ()))
+ return res
+
+
+#---------------------------------------------------
+class ScalarControl(Control):
+ """Control whose value is not restricted to one of a prescribed set.
+
+ Some ScalarControls don't accept any value attribute. Otherwise, takes a
+ single value, which must be string-like.
+
+ Additional read-only public attribute:
+
+ attrs: dictionary mapping the names of original HTML attributes of the
+ control to their values
+
+ """
+ def __init__(self, type, name, attrs, index=None):
+ self._index = index
+ self._label = _get_label(attrs)
+ self.__dict__["type"] = type.lower()
+ self.__dict__["name"] = name
+ self._value = attrs.get("value")
+ self.disabled = attrs.has_key("disabled")
+ self.readonly = attrs.has_key("readonly")
+ self.id = attrs.get("id")
+
+ self.attrs = attrs.copy()
+
+ self._clicked = False
+
+ self._urlparse = urlparse.urlparse
+ self._urlunparse = urlparse.urlunparse
+
+ def __getattr__(self, name):
+ if name == "value":
+ return self.__dict__["_value"]
+ else:
+ raise AttributeError("%s instance has no attribute '%s'" %
+ (self.__class__.__name__, name))
+
+ def __setattr__(self, name, value):
+ if name == "value":
+ if not isstringlike(value):
+ raise TypeError("must assign a string")
+ elif self.readonly:
+ raise AttributeError("control '%s' is readonly" % self.name)
+ elif self.disabled:
+ raise AttributeError("control '%s' is disabled" % self.name)
+ self.__dict__["_value"] = value
+ elif name in ("name", "type"):
+ raise AttributeError("%s attribute is readonly" % name)
+ else:
+ self.__dict__[name] = value
+
+ def _totally_ordered_pairs(self):
+ name = self.name
+ value = self.value
+ if name is None or value is None or self.disabled:
+ return []
+ return [(self._index, name, value)]
+
+ def clear(self):
+ if self.readonly:
+ raise AttributeError("control '%s' is readonly" % self.name)
+ self.__dict__["_value"] = None
+
+ def __str__(self):
+ name = self.name
+ value = self.value
+ if name is None: name = "<None>"
+ if value is None: value = "<None>"
+
+ infos = []
+ if self.disabled: infos.append("disabled")
+ if self.readonly: infos.append("readonly")
+ info = ", ".join(infos)
+ if info: info = " (%s)" % info
+
+ return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
+
+
+#---------------------------------------------------
+class TextControl(ScalarControl):
+ """Textual input control.
+
+ Covers:
+
+ INPUT/TEXT
+ INPUT/PASSWORD
+ INPUT/HIDDEN
+ TEXTAREA
+
+ """
+ def __init__(self, type, name, attrs, index=None):
+ ScalarControl.__init__(self, type, name, attrs, index)
+ if self.type == "hidden": self.readonly = True
+ if self._value is None:
+ self._value = ""
+
+ def is_of_kind(self, kind): return kind == "text"
+
+#---------------------------------------------------
+class FileControl(ScalarControl):
+ """File upload with INPUT TYPE=FILE.
+
+ The value attribute of a FileControl is always None. Use add_file instead.
+
+ Additional public method: add_file
+
+ """
+
+ def __init__(self, type, name, attrs, index=None):
+ ScalarControl.__init__(self, type, name, attrs, index)
+ self._value = None
+ self._upload_data = []
+
+ def is_of_kind(self, kind): return kind == "file"
+
+ def clear(self):
+ if self.readonly:
+ raise AttributeError("control '%s' is readonly" % self.name)
+ self._upload_data = []
+
+ def __setattr__(self, name, value):
+ if name in ("value", "name", "type"):
+ raise AttributeError("%s attribute is readonly" % name)
+ else:
+ self.__dict__[name] = value
+
+ def add_file(self, file_object, content_type=None, filename=None):
+ if not hasattr(file_object, "read"):
+ raise TypeError("file-like object must have read method")
+ if content_type is not None and not isstringlike(content_type):
+ raise TypeError("content type must be None or string-like")
+ if filename is not None and not isstringlike(filename):
+ raise TypeError("filename must be None or string-like")
+ if content_type is None:
+ content_type = "application/octet-stream"
+ self._upload_data.append((file_object, content_type, filename))
+
+ def _totally_ordered_pairs(self):
+ # XXX should it be successful even if unnamed?
+ if self.name is None or self.disabled:
+ return []
+ return [(self._index, self.name, "")]
+
+ def _write_mime_data(self, mw, _name, _value):
+ # called by HTMLForm
+ # assert _name == self.name and _value == ''
+ if len(self._upload_data) < 2:
+ if len(self._upload_data) == 0:
+ file_object = StringIO()
+ content_type = "application/octet-stream"
+ filename = ""
+ else:
+ file_object, content_type, filename = self._upload_data[0]
+ if filename is None:
+ filename = ""
+ mw2 = mw.nextpart()
+ fn_part = '; filename="%s"' % filename
+ disp = 'form-data; name="%s"%s' % (self.name, fn_part)
+ mw2.addheader("Content-Disposition", disp, prefix=1)
+ fh = mw2.startbody(content_type, prefix=0)
+ fh.write(file_object.read())
+ else:
+ # multiple files
+ mw2 = mw.nextpart()
+ disp = 'form-data; name="%s"' % self.name
+ mw2.addheader("Content-Disposition", disp, prefix=1)
+ fh = mw2.startmultipartbody("mixed", prefix=0)
+ for file_object, content_type, filename in self._upload_data:
+ mw3 = mw2.nextpart()
+ if filename is None:
+ filename = ""
+ fn_part = '; filename="%s"' % filename
+ disp = "file%s" % fn_part
+ mw3.addheader("Content-Disposition", disp, prefix=1)
+ fh2 = mw3.startbody(content_type, prefix=0)
+ fh2.write(file_object.read())
+ mw2.lastpart()
+
+ def __str__(self):
+ name = self.name
+ if name is None: name = "<None>"
+
+ if not self._upload_data:
+ value = "<No files added>"
+ else:
+ value = []
+ for file, ctype, filename in self._upload_data:
+ if filename is None:
+ value.append("<Unnamed file>")
+ else:
+ value.append(filename)
+ value = ", ".join(value)
+
+ info = []
+ if self.disabled: info.append("disabled")
+ if self.readonly: info.append("readonly")
+ info = ", ".join(info)
+ if info: info = " (%s)" % info
+
+ return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
+
+
+#---------------------------------------------------
+class IsindexControl(ScalarControl):
+ """ISINDEX control.
+
+ ISINDEX is the odd-one-out of HTML form controls. In fact, it isn't really
+ part of regular HTML forms at all, and predates it. You're only allowed
+ one ISINDEX per HTML document. ISINDEX and regular form submission are
+ mutually exclusive -- either submit a form, or the ISINDEX.
+
+ Having said this, since ISINDEX controls may appear in forms (which is
+ probably bad HTML), ParseFile / ParseResponse will include them in the
+ HTMLForm instances it returns. You can set the ISINDEX's value, as with
+ any other control (but note that ISINDEX controls have no name, so you'll
+ need to use the type argument of set_value!). When you submit the form,
+ the ISINDEX will not be successful (ie., no data will get returned to the
+ server as a result of its presence), unless you click on the ISINDEX
+ control, in which case the ISINDEX gets submitted instead of the form:
+
+ form.set_value("my isindex value", type="isindex")
+ urllib2.urlopen(form.click(type="isindex"))
+
+ ISINDEX elements outside of FORMs are ignored. If you want to submit one
+ by hand, do it like so:
+
+ url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value"))
+ result = urllib2.urlopen(url)
+
+ """
+ def __init__(self, type, name, attrs, index=None):
+ ScalarControl.__init__(self, type, name, attrs, index)
+ if self._value is None:
+ self._value = ""
+
+ def is_of_kind(self, kind): return kind in ["text", "clickable"]
+
+ def _totally_ordered_pairs(self):
+ return []
+
+ def _click(self, form, coord, return_type, request_class=urllib2.Request):
+ # Relative URL for ISINDEX submission: instead of "foo=bar+baz",
+ # want "bar+baz".
+ # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is
+ # deprecated in 4.01, but it should still say how to submit it).
+ # Submission of ISINDEX is explained in the HTML 3.2 spec, though.
+ parts = self._urlparse(form.action)
+ rest, (query, frag) = parts[:-2], parts[-2:]
+ parts = rest + (urllib.quote_plus(self.value), None)
+ url = self._urlunparse(parts)
+ req_data = url, None, []
+
+ if return_type == "pairs":
+ return []
+ elif return_type == "request_data":
+ return req_data
+ else:
+ return request_class(url)
+
+ def __str__(self):
+ value = self.value
+ if value is None: value = "<None>"
+
+ infos = []
+ if self.disabled: infos.append("disabled")
+ if self.readonly: infos.append("readonly")
+ info = ", ".join(infos)
+ if info: info = " (%s)" % info
+
+ return "<%s(%s)%s>" % (self.__class__.__name__, value, info)
+
+
+#---------------------------------------------------
+class IgnoreControl(ScalarControl):
+ """Control that we're not interested in.
+
+ Covers:
+
+ INPUT/RESET
+ BUTTON/RESET
+ INPUT/BUTTON
+ BUTTON/BUTTON
+
+ These controls are always unsuccessful, in the terminology of HTML 4 (ie.
+ they never require any information to be returned to the server).
+
+ BUTTON/BUTTON is used to generate events for script embedded in HTML.
+
+ The value attribute of IgnoreControl is always None.
+
+ """
+ def __init__(self, type, name, attrs, index=None):
+ ScalarControl.__init__(self, type, name, attrs, index)
+ self._value = None
+
+ def is_of_kind(self, kind): return False
+
+ def __setattr__(self, name, value):
+ if name == "value":
+ raise AttributeError(
+ "control '%s' is ignored, hence read-only" % self.name)
+ elif name in ("name", "type"):
+ raise AttributeError("%s attribute is readonly" % name)
+ else:
+ self.__dict__[name] = value
+
+
+#---------------------------------------------------
+# ListControls
+
+# helpers and subsidiary classes
+
+class Item:
+ def __init__(self, control, attrs, index=None):
+ label = _get_label(attrs)
+ self.__dict__.update({
+ "name": attrs["value"],
+ "_labels": label and [label] or [],
+ "attrs": attrs,
+ "_control": control,
+ "disabled": attrs.has_key("disabled"),
+ "_selected": False,
+ "id": attrs.get("id"),
+ "_index": index,
+ })
+ control.items.append(self)
+
+ def get_labels(self):
+ """Return all labels (Label instances) for this item.
+
+ For items that represent radio buttons or checkboxes, if the item was
+ surrounded by a <label> tag, that will be the first label; all other
+ labels, connected by 'for' and 'id', are in the order that appear in
+ the HTML.
+
+ For items that represent select options, if the option had a label
+ attribute, that will be the first label. If the option has contents
+ (text within the option tags) and it is not the same as the label
+ attribute (if any), that will be a label. There is nothing in the
+ spec to my knowledge that makes an option with an id unable to be the
+ target of a label's for attribute, so those are included, if any, for
+ the sake of consistency and completeness.
+
+ """
+ res = []
+ res.extend(self._labels)
+ if self.id:
+ res.extend(self._control._form._id_to_labels.get(self.id, ()))
+ return res
+
+ def __getattr__(self, name):
+ if name=="selected":
+ return self._selected
+ raise AttributeError(name)
+
+ def __setattr__(self, name, value):
+ if name == "selected":
+ self._control._set_selected_state(self, value)
+ elif name == "disabled":
+ self.__dict__["disabled"] = bool(value)
+ else:
+ raise AttributeError(name)
+
+ def __str__(self):
+ res = self.name
+ if self.selected:
+ res = "*" + res
+ if self.disabled:
+ res = "(%s)" % res
+ return res
+
+ def __repr__(self):
+ # XXX appending the attrs without distinguishing them from name and id
+ # is silly
+ attrs = [("name", self.name), ("id", self.id)]+self.attrs.items()
+ return "<%s %s>" % (
+ self.__class__.__name__,
+ " ".join(["%s=%r" % (k, v) for k, v in attrs])
+ )
+
+def disambiguate(items, nr, **kwds):
+ msgs = []
+ for key, value in kwds.items():
+ msgs.append("%s=%r" % (key, value))
+ msg = " ".join(msgs)
+ if not items:
+ raise ItemNotFoundError(msg)
+ if nr is None:
+ if len(items) > 1:
+ raise AmbiguityError(msg)
+ nr = 0
+ if len(items) <= nr:
+ raise ItemNotFoundError(msg)
+ return items[nr]
+
+class ListControl(Control):
+ """Control representing a sequence of items.
+
+ The value attribute of a ListControl represents the successful list items
+ in the control. The successful list items are those that are selected and
+ not disabled.
+
+ ListControl implements both list controls that take a length-1 value
+ (single-selection) and those that take length >1 values
+ (multiple-selection).
+
+ ListControls accept sequence values only. Some controls only accept
+ sequences of length 0 or 1 (RADIO, and single-selection SELECT).
+ In those cases, ItemCountError is raised if len(sequence) > 1. CHECKBOXes
+ and multiple-selection SELECTs (those having the "multiple" HTML attribute)
+ accept sequences of any length.
+
+ Note the following mistake:
+
+ control.value = some_value
+ assert control.value == some_value # not necessarily true
+
+ The reason for this is that the value attribute always gives the list items
+ in the order they were listed in the HTML.
+
+ ListControl items can also be referred to by their labels instead of names.
+ Use the label argument to .get(), and the .set_value_by_label(),
+ .get_value_by_label() methods.
+
+ Note that, rather confusingly, though SELECT controls are represented in
+ HTML by SELECT elements (which contain OPTION elements, representing
+ individual list items), CHECKBOXes and RADIOs are not represented by *any*
+ element. Instead, those controls are represented by a collection of INPUT
+ elements. For example, this is a SELECT control, named "control1":
+
+ <select name="control1">
+ <option>foo</option>
+ <option value="1">bar</option>
+ </select>
+
+ and this is a CHECKBOX control, named "control2":
+
+ <input type="checkbox" name="control2" value="foo" id="cbe1">
+ <input type="checkbox" name="control2" value="bar" id="cbe2">
+
+ The id attribute of a CHECKBOX or RADIO ListControl is always that of its
+ first element (for example, "cbe1" above).
+
+
+ Additional read-only public attribute: multiple.
+
+ """
+
+ # ListControls are built up by the parser from their component items by
+ # creating one ListControl per item, consolidating them into a single
+ # master ListControl held by the HTMLForm:
+
+ # -User calls form.new_control(...)
+ # -Form creates Control, and calls control.add_to_form(self).
+ # -Control looks for a Control with the same name and type in the form,
+ # and if it finds one, merges itself with that control by calling
+ # control.merge_control(self). The first Control added to the form, of
+ # a particular name and type, is the only one that survives in the
+ # form.
+ # -Form calls control.fixup for all its controls. ListControls in the
+ # form know they can now safely pick their default values.
+
+ # To create a ListControl without an HTMLForm, use:
+
+ # control.merge_control(new_control)
+
+ # (actually, it's much easier just to use ParseFile)
+
+ _label = None
+
+ def __init__(self, type, name, attrs={}, select_default=False,
+ called_as_base_class=False, index=None):
+ """
+ select_default: for RADIO and multiple-selection SELECT controls, pick
+ the first item as the default if no 'selected' HTML attribute is
+ present
+
+ """
+ if not called_as_base_class:
+ raise NotImplementedError()
+
+ self.__dict__["type"] = type.lower()
+ self.__dict__["name"] = name
+ self._value = attrs.get("value")
+ self.disabled = False
+ self.readonly = False
+ self.id = attrs.get("id")
+ self._closed = False
+
+ # As Controls are merged in with .merge_control(), self.attrs will
+ # refer to each Control in turn -- always the most recently merged
+ # control. Each merged-in Control instance corresponds to a single
+ # list item: see ListControl.__doc__.
+ self.items = []
+ self._form = None
+
+ self._select_default = select_default
+ self._clicked = False
+
+ def clear(self):
+ self.value = []
+
+ def is_of_kind(self, kind):
+ if kind == "list":
+ return True
+ elif kind == "multilist":
+ return bool(self.multiple)
+ elif kind == "singlelist":
+ return not self.multiple
+ else:
+ return False
+
+ def get_items(self, name=None, label=None, id=None,
+ exclude_disabled=False):
+ """Return matching items by name or label.
+
+ For argument docs, see the docstring for .get()
+
+ """
+ if name is not None and not isstringlike(name):
+ raise TypeError("item name must be string-like")
+ if label is not None and not isstringlike(label):
+ raise TypeError("item label must be string-like")
+ if id is not None and not isstringlike(id):
+ raise TypeError("item id must be string-like")
+ items = [] # order is important
+ compat = self._form.backwards_compat
+ for o in self.items:
+ if exclude_disabled and o.disabled:
+ continue
+ if name is not None and o.name != name:
+ continue
+ if label is not None:
+ for l in o.get_labels():
+ if ((compat and l.text == label) or
+ (not compat and l.text.find(label) > -1)):
+ break
+ else:
+ continue
+ if id is not None and o.id != id:
+ continue
+ items.append(o)
+ return items
+
+ def get(self, name=None, label=None, id=None, nr=None,
+ exclude_disabled=False):
+ """Return item by name or label, disambiguating if necessary with nr.
+
+ All arguments must be passed by name, with the exception of 'name',
+ which may be used as a positional argument.
+
+ If name is specified, then the item must have the indicated name.
+
+ If label is specified, then the item must have a label whose
+ whitespace-compressed, stripped, text substring-matches the indicated
+ label string (eg. label="please choose" will match
+ " Do please choose an item ").
+
+ If id is specified, then the item must have the indicated id.
+
+ nr is an optional 0-based index of the items matching the query.
+
+ If nr is the default None value and more than item is found, raises
+ AmbiguityError (unless the HTMLForm instance's backwards_compat
+ attribute is true).
+
+ If no item is found, or if items are found but nr is specified and not
+ found, raises ItemNotFoundError.
+
+ Optionally excludes disabled items.
+
+ """
+ if nr is None and self._form.backwards_compat:
+ nr = 0 # :-/
+ items = self.get_items(name, label, id, exclude_disabled)
+ return disambiguate(items, nr, name=name, label=label, id=id)
+
+ def _get(self, name, by_label=False, nr=None, exclude_disabled=False):
+ # strictly for use by deprecated methods
+ if by_label:
+ name, label = None, name
+ else:
+ name, label = name, None
+ return self.get(name, label, nr, exclude_disabled)
+
+ def toggle(self, name, by_label=False, nr=None):
+ """Deprecated: given a name or label and optional disambiguating index
+ nr, toggle the matching item's selection.
+
+ Selecting items follows the behavior described in the docstring of the
+ 'get' method.
+
+ if the item is disabled, or this control is disabled or readonly,
+ raise AttributeError.
+
+ """
+ deprecation(
+ "item = control.get(...); item.selected = not item.selected")
+ o = self._get(name, by_label, nr)
+ self._set_selected_state(o, not o.selected)
+
+ def set(self, selected, name, by_label=False, nr=None):
+ """Deprecated: given a name or label and optional disambiguating index
+ nr, set the matching item's selection to the bool value of selected.
+
+ Selecting items follows the behavior described in the docstring of the
+ 'get' method.
+
+ if the item is disabled, or this control is disabled or readonly,
+ raise AttributeError.
+
+ """
+ deprecation(
+ "control.get(...).selected = <boolean>")
+ self._set_selected_state(self._get(name, by_label, nr), selected)
+
+ def _set_selected_state(self, item, action):
+ # action:
+ # bool False: off
+ # bool True: on
+ if self.disabled:
+ raise AttributeError("control '%s' is disabled" % self.name)
+ if self.readonly:
+ raise AttributeError("control '%s' is readonly" % self.name)
+ action == bool(action)
+ compat = self._form.backwards_compat
+ if not compat and item.disabled:
+ raise AttributeError("item is disabled")
+ else:
+ if compat and item.disabled and action:
+ raise AttributeError("item is disabled")
+ if self.multiple:
+ item.__dict__["_selected"] = action
+ else:
+ if not action:
+ item.__dict__["_selected"] = False
+ else:
+ for o in self.items:
+ o.__dict__["_selected"] = False
+ item.__dict__["_selected"] = True
+
+ def toggle_single(self, by_label=None):
+ """Deprecated: toggle the selection of the single item in this control.
+
+ Raises ItemCountError if the control does not contain only one item.
+
+ by_label argument is ignored, and included only for backwards
+ compatibility.
+
+ """
+ deprecation(
+ "control.items[0].selected = not control.items[0].selected")
+ if len(self.items) != 1:
+ raise ItemCountError(
+ "'%s' is not a single-item control" % self.name)
+ item = self.items[0]
+ self._set_selected_state(item, not item.selected)
+
+ def set_single(self, selected, by_label=None):
+ """Deprecated: set the selection of the single item in this control.
+
+ Raises ItemCountError if the control does not contain only one item.
+
+ by_label argument is ignored, and included only for backwards
+ compatibility.
+
+ """
+ deprecation(
+ "control.items[0].selected = <boolean>")
+ if len(self.items) != 1:
+ raise ItemCountError(
+ "'%s' is not a single-item control" % self.name)
+ self._set_selected_state(self.items[0], selected)
+
+ def get_item_disabled(self, name, by_label=False, nr=None):
+ """Get disabled state of named list item in a ListControl."""
+ deprecation(
+ "control.get(...).disabled")
+ return self._get(name, by_label, nr).disabled
+
+ def set_item_disabled(self, disabled, name, by_label=False, nr=None):
+ """Set disabled state of named list item in a ListControl.
+
+ disabled: boolean disabled state
+
+ """
+ deprecation(
+ "control.get(...).disabled = <boolean>")
+ self._get(name, by_label, nr).disabled = disabled
+
+ def set_all_items_disabled(self, disabled):
+ """Set disabled state of all list items in a ListControl.
+
+ disabled: boolean disabled state
+
+ """
+ for o in self.items:
+ o.disabled = disabled
+
+ def get_item_attrs(self, name, by_label=False, nr=None):
+ """Return dictionary of HTML attributes for a single ListControl item.
+
+ The HTML element types that describe list items are: OPTION for SELECT
+ controls, INPUT for the rest. These elements have HTML attributes that
+ you may occasionally want to know about -- for example, the "alt" HTML
+ attribute gives a text string describing the item (graphical browsers
+ usually display this as a tooltip).
+
+ The returned dictionary maps HTML attribute names to values. The names
+ and values are taken from the original HTML.
+
+ """
+ deprecation(
+ "control.get(...).attrs")
+ return self._get(name, by_label, nr).attrs
+
+ def close_control(self):
+ self._closed = True
+
+ def add_to_form(self, form):
+ assert self._form is None or form == self._form, (
+ "can't add control to more than one form")
+ self._form = form
+ if self.name is None:
+ # always count nameless elements as separate controls
+ Control.add_to_form(self, form)
+ else:
+ for ii in range(len(form.controls)-1, -1, -1):
+ control = form.controls[ii]
+ if control.name == self.name and control.type == self.type:
+ if control._closed:
+ Control.add_to_form(self, form)
+ else:
+ control.merge_control(self)
+ break
+ else:
+ Control.add_to_form(self, form)
+
+ def merge_control(self, control):
+ assert bool(control.multiple) == bool(self.multiple)
+ # usually, isinstance(control, self.__class__)
+ self.items.extend(control.items)
+
+ def fixup(self):
+ """
+ ListControls are built up from component list items (which are also
+ ListControls) during parsing. This method should be called after all
+ items have been added. See ListControl.__doc__ for the reason this is
+ required.
+
+ """
+ # Need to set default selection where no item was indicated as being
+ # selected by the HTML:
+
+ # CHECKBOX:
+ # Nothing should be selected.
+ # SELECT/single, SELECT/multiple and RADIO:
+ # RFC 1866 (HTML 2.0): says first item should be selected.
+ # W3C HTML 4.01 Specification: says that client behaviour is
+ # undefined in this case. For RADIO, exactly one must be selected,
+ # though which one is undefined.
+ # Both Netscape and Microsoft Internet Explorer (IE) choose first
+ # item for SELECT/single. However, both IE5 and Mozilla (both 1.0
+ # and Firebird 0.6) leave all items unselected for RADIO and
+ # SELECT/multiple.
+
+ # Since both Netscape and IE all choose the first item for
+ # SELECT/single, we do the same. OTOH, both Netscape and IE
+ # leave SELECT/multiple with nothing selected, in violation of RFC 1866
+ # (but not in violation of the W3C HTML 4 standard); the same is true
+ # of RADIO (which *is* in violation of the HTML 4 standard). We follow
+ # RFC 1866 if the _select_default attribute is set, and Netscape and IE
+ # otherwise. RFC 1866 and HTML 4 are always violated insofar as you
+ # can deselect all items in a RadioControl.
+
+ for o in self.items:
+ # set items' controls to self, now that we've merged
+ o.__dict__["_control"] = self
+
+ def __getattr__(self, name):
+ if name == "value":
+ compat = self._form.backwards_compat
+ if self.name is None:
+ return []
+ return [o.name for o in self.items if o.selected and
+ (not o.disabled or compat)]
+ else:
+ raise AttributeError("%s instance has no attribute '%s'" %
+ (self.__class__.__name__, name))
+
+ def __setattr__(self, name, value):
+ if name == "value":
+ if self.disabled:
+ raise AttributeError("control '%s' is disabled" % self.name)
+ if self.readonly:
+ raise AttributeError("control '%s' is readonly" % self.name)
+ self._set_value(value)
+ elif name in ("name", "type", "multiple"):
+ raise AttributeError("%s attribute is readonly" % name)
+ else:
+ self.__dict__[name] = value
+
+ def _set_value(self, value):
+ if value is None or isstringlike(value):
+ raise TypeError("ListControl, must set a sequence")
+ if not value:
+ compat = self._form.backwards_compat
+ for o in self.items:
+ if not o.disabled or compat:
+ o.selected = False
+ elif self.multiple:
+ self._multiple_set_value(value)
+ elif len(value) > 1:
+ raise ItemCountError(
+ "single selection list, must set sequence of "
+ "length 0 or 1")
+ else:
+ self._single_set_value(value)
+
+ def _get_items(self, name, target=1):
+ all_items = self.get_items(name)
+ items = [o for o in all_items if not o.disabled]
+ if len(items) < target:
+ if len(all_items) < target:
+ raise ItemNotFoundError(
+ "insufficient items with name %r" % name)
+ else:
+ raise AttributeError(
+ "insufficient non-disabled items with name %s" % name)
+ on = []
+ off = []
+ for o in items:
+ if o.selected:
+ on.append(o)
+ else:
+ off.append(o)
+ return on, off
+
+ def _single_set_value(self, value):
+ assert len(value) == 1
+ on, off = self._get_items(value[0])
+ assert len(on) <= 1
+ if not on:
+ off[0].selected = True
+
+ def _multiple_set_value(self, value):
+ compat = self._form.backwards_compat
+ turn_on = [] # transactional-ish
+ turn_off = [item for item in self.items if
+ item.selected and (not item.disabled or compat)]
+ names = {}
+ for nn in value:
+ if nn in names.keys():
+ names[nn] += 1
+ else:
+ names[nn] = 1
+ for name, count in names.items():
+ on, off = self._get_items(name, count)
+ for i in range(count):
+ if on:
+ item = on[0]
+ del on[0]
+ del turn_off[turn_off.index(item)]
+ else:
+ item = off[0]
+ del off[0]
+ turn_on.append(item)
+ for item in turn_off:
+ item.selected = False
+ for item in turn_on:
+ item.selected = True
+
+ def set_value_by_label(self, value):
+ """Set the value of control by item labels.
+
+ value is expected to be an iterable of strings that are substrings of
+ the item labels that should be selected. Before substring matching is
+ performed, the original label text is whitespace-compressed
+ (consecutive whitespace characters are converted to a single space
+ character) and leading and trailing whitespace is stripped. Ambiguous
+ labels are accepted without complaint if the form's backwards_compat is
+ True; otherwise, it will not complain as long as all ambiguous labels
+ share the same item name (e.g. OPTION value).
+
+ """
+ if isstringlike(value):
+ raise TypeError(value)
+ if not self.multiple and len(value) > 1:
+ raise ItemCountError(
+ "single selection list, must set sequence of "
+ "length 0 or 1")
+ items = []
+ for nn in value:
+ found = self.get_items(label=nn)
+ if len(found) > 1:
+ if not self._form.backwards_compat:
+ # ambiguous labels are fine as long as item names (e.g.
+ # OPTION values) are same
+ opt_name = found[0].name
+ if [o for o in found[1:] if o.name != opt_name]:
+ raise AmbiguityError(nn)
+ else:
+ # OK, we'll guess :-( Assume first available item.
+ found = found[:1]
+ for o in found:
+ # For the multiple-item case, we could try to be smarter,
+ # saving them up and trying to resolve, but that's too much.
+ if self._form.backwards_compat or o not in items:
+ items.append(o)
+ break
+ else: # all of them are used
+ raise ItemNotFoundError(nn)
+ # now we have all the items that should be on
+ # let's just turn everything off and then back on.
+ self.value = []
+ for o in items:
+ o.selected = True
+
+ def get_value_by_label(self):
+ """Return the value of the control as given by normalized labels."""
+ res = []
+ compat = self._form.backwards_compat
+ for o in self.items:
+ if (not o.disabled or compat) and o.selected:
+ for l in o.get_labels():
+ if l.text:
+ res.append(l.text)
+ break
+ else:
+ res.append(None)
+ return res
+
+ def possible_items(self, by_label=False):
+ """Deprecated: return the names or labels of all possible items.
+
+ Includes disabled items, which may be misleading for some use cases.
+
+ """
+ deprecation(
+ "[item.name for item in self.items]")
+ if by_label:
+ res = []
+ for o in self.items:
+ for l in o.get_labels():
+ if l.text:
+ res.append(l.text)
+ break
+ else:
+ res.append(None)
+ return res
+ return [o.name for o in self.items]
+
+ def _totally_ordered_pairs(self):
+ if self.disabled or self.name is None:
+ return []
+ else:
+ return [(o._index, self.name, o.name) for o in self.items
+ if o.selected and not o.disabled]
+
+ def __str__(self):
+ name = self.name
+ if name is None: name = "<None>"
+
+ display = [str(o) for o in self.items]
+
+ infos = []
+ if self.disabled: infos.append("disabled")
+ if self.readonly: infos.append("readonly")
+ info = ", ".join(infos)
+ if info: info = " (%s)" % info
+
+ return "<%s(%s=[%s])%s>" % (self.__class__.__name__,
+ name, ", ".join(display), info)
+
+
+class RadioControl(ListControl):
+ """
+ Covers:
+
+ INPUT/RADIO
+
+ """
+ def __init__(self, type, name, attrs, select_default=False, index=None):
+ attrs.setdefault("value", "on")
+ ListControl.__init__(self, type, name, attrs, select_default,
+ called_as_base_class=True, index=index)
+ self.__dict__["multiple"] = False
+ o = Item(self, attrs, index)
+ o.__dict__["_selected"] = attrs.has_key("checked")
+
+ def fixup(self):
+ ListControl.fixup(self)
+ found = [o for o in self.items if o.selected and not o.disabled]
+ if not found:
+ if self._select_default:
+ for o in self.items:
+ if not o.disabled:
+ o.selected = True
+ break
+ else:
+ # Ensure only one item selected. Choose the last one,
+ # following IE and Firefox.
+ for o in found[:-1]:
+ o.selected = False
+
+ def get_labels(self):
+ return []
+
+class CheckboxControl(ListControl):
+ """
+ Covers:
+
+ INPUT/CHECKBOX
+
+ """
+ def __init__(self, type, name, attrs, select_default=False, index=None):
+ attrs.setdefault("value", "on")
+ ListControl.__init__(self, type, name, attrs, select_default,
+ called_as_base_class=True, index=index)
+ self.__dict__["multiple"] = True
+ o = Item(self, attrs, index)
+ o.__dict__["_selected"] = attrs.has_key("checked")
+
+ def get_labels(self):
+ return []
+
+
+class SelectControl(ListControl):
+ """
+ Covers:
+
+ SELECT (and OPTION)
+
+
+ OPTION 'values', in HTML parlance, are Item 'names' in ClientForm parlance.
+
+ SELECT control values and labels are subject to some messy defaulting
+ rules. For example, if the HTML representation of the control is:
+
+ <SELECT name=year>
+ <OPTION value=0 label="2002">current year</OPTION>
+ <OPTION value=1>2001</OPTION>
+ <OPTION>2000</OPTION>
+ </SELECT>
+
+ The items, in order, have labels "2002", "2001" and "2000", whereas their
+ names (the OPTION values) are "0", "1" and "2000" respectively. Note that
+ the value of the last OPTION in this example defaults to its contents, as
+ specified by RFC 1866, as do the labels of the second and third OPTIONs.
+
+ The OPTION labels are sometimes more meaningful than the OPTION values,
+ which can make for more maintainable code.
+
+ Additional read-only public attribute: attrs
+
+ The attrs attribute is a dictionary of the original HTML attributes of the
+ SELECT element. Other ListControls do not have this attribute, because in
+ other cases the control as a whole does not correspond to any single HTML
+ element. control.get(...).attrs may be used as usual to get at the HTML
+ attributes of the HTML elements corresponding to individual list items (for
+ SELECT controls, these are OPTION elements).
+
+ Another special case is that the Item.attrs dictionaries have a special key
+ "contents" which does not correspond to any real HTML attribute, but rather
+ contains the contents of the OPTION element:
+
+ <OPTION>this bit</OPTION>
+
+ """
+ # HTML attributes here are treated slightly differently from other list
+ # controls:
+ # -The SELECT HTML attributes dictionary is stuffed into the OPTION
+ # HTML attributes dictionary under the "__select" key.
+ # -The content of each OPTION element is stored under the special
+ # "contents" key of the dictionary.
+ # After all this, the dictionary is passed to the SelectControl constructor
+ # as the attrs argument, as usual. However:
+ # -The first SelectControl constructed when building up a SELECT control
+ # has a constructor attrs argument containing only the __select key -- so
+ # this SelectControl represents an empty SELECT control.
+ # -Subsequent SelectControls have both OPTION HTML-attribute in attrs and
+ # the __select dictionary containing the SELECT HTML-attributes.
+
+ def __init__(self, type, name, attrs, select_default=False, index=None):
+ # fish out the SELECT HTML attributes from the OPTION HTML attributes
+ # dictionary
+ self.attrs = attrs["__select"].copy()
+ self.__dict__["_label"] = _get_label(self.attrs)
+ self.__dict__["id"] = self.attrs.get("id")
+ self.__dict__["multiple"] = self.attrs.has_key("multiple")
+ # the majority of the contents, label, and value dance already happened
+ contents = attrs.get("contents")
+ attrs = attrs.copy()
+ del attrs["__select"]
+
+ ListControl.__init__(self, type, name, self.attrs, select_default,
+ called_as_base_class=True, index=index)
+ self.disabled = self.attrs.has_key("disabled")
+ self.readonly = self.attrs.has_key("readonly")
+ if attrs.has_key("value"):
+ # otherwise it is a marker 'select started' token
+ o = Item(self, attrs, index)
+ o.__dict__["_selected"] = attrs.has_key("selected")
+ # add 'label' label and contents label, if different. If both are
+ # provided, the 'label' label is used for display in HTML
+ # 4.0-compliant browsers (and any lower spec? not sure) while the
+ # contents are used for display in older or less-compliant
+ # browsers. We make label objects for both, if the values are
+ # different.
+ label = attrs.get("label")
+ if label:
+ o._labels.append(Label({"__text": label}))
+ if contents and contents != label:
+ o._labels.append(Label({"__text": contents}))
+ elif contents:
+ o._labels.append(Label({"__text": contents}))
+
+ def fixup(self):
+ ListControl.fixup(self)
+ # Firefox doesn't exclude disabled items from those considered here
+ # (i.e. from 'found', for both branches of the if below). Note that
+ # IE6 doesn't support the disabled attribute on OPTIONs at all.
+ found = [o for o in self.items if o.selected]
+ if not found:
+ if not self.multiple or self._select_default:
+ for o in self.items:
+ if not o.disabled:
+ was_disabled = self.disabled
+ self.disabled = False
+ try:
+ o.selected = True
+ finally:
+ o.disabled = was_disabled
+ break
+ elif not self.multiple:
+ # Ensure only one item selected. Choose the last one,
+ # following IE and Firefox.
+ for o in found[:-1]:
+ o.selected = False
+
+
+#---------------------------------------------------
+class SubmitControl(ScalarControl):
+ """
+ Covers:
+
+ INPUT/SUBMIT
+ BUTTON/SUBMIT
+
+ """
+ def __init__(self, type, name, attrs, index=None):
+ ScalarControl.__init__(self, type, name, attrs, index)
+ # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it
+ # blank, Konqueror 3.1 defaults to "Submit". HTML spec. doesn't seem
+ # to define this.
+ if self.value is None: self.value = ""
+ self.readonly = True
+
+ def get_labels(self):
+ res = []
+ if self.value:
+ res.append(Label({"__text": self.value}))
+ res.extend(ScalarControl.get_labels(self))
+ return res
+
+ def is_of_kind(self, kind): return kind == "clickable"
+
+ def _click(self, form, coord, return_type, request_class=urllib2.Request):
+ self._clicked = coord
+ r = form._switch_click(return_type, request_class)
+ self._clicked = False
+ return r
+
+ def _totally_ordered_pairs(self):
+ if not self._clicked:
+ return []
+ return ScalarControl._totally_ordered_pairs(self)
+
+
+#---------------------------------------------------
+class ImageControl(SubmitControl):
+ """
+ Covers:
+
+ INPUT/IMAGE
+
+ Coordinates are specified using one of the HTMLForm.click* methods.
+
+ """
+ def __init__(self, type, name, attrs, index=None):
+ SubmitControl.__init__(self, type, name, attrs, index)
+ self.readonly = False
+
+ def _totally_ordered_pairs(self):
+ clicked = self._clicked
+ if self.disabled or not clicked:
+ return []
+ name = self.name
+ if name is None: return []
+ pairs = [
+ (self._index, "%s.x" % name, str(clicked[0])),
+ (self._index+1, "%s.y" % name, str(clicked[1])),
+ ]
+ value = self._value
+ if value:
+ pairs.append((self._index+2, name, value))
+ return pairs
+
+ get_labels = ScalarControl.get_labels
+
+# aliases, just to make str(control) and str(form) clearer
+class PasswordControl(TextControl): pass
+class HiddenControl(TextControl): pass
+class TextareaControl(TextControl): pass
+class SubmitButtonControl(SubmitControl): pass
+
+
+def is_listcontrol(control): return control.is_of_kind("list")
+
+
+class HTMLForm:
+ """Represents a single HTML <form> ... </form> element.
+
+ A form consists of a sequence of controls that usually have names, and
+ which can take on various values. The values of the various types of
+ controls represent variously: text, zero-or-one-of-many or many-of-many
+ choices, and files to be uploaded. Some controls can be clicked on to
+ submit the form, and clickable controls' values sometimes include the
+ coordinates of the click.
+
+ Forms can be filled in with data to be returned to the server, and then
+ submitted, using the click method to generate a request object suitable for
+ passing to urllib2.urlopen (or the click_request_data or click_pairs
+ methods if you're not using urllib2).
+
+ import ClientForm
+ forms = ClientForm.ParseFile(html, base_uri)
+ form = forms[0]
+
+ form["query"] = "Python"
+ form.find_control("nr_results").get("lots").selected = True
+
+ response = urllib2.urlopen(form.click())
+
+ Usually, HTMLForm instances are not created directly. Instead, the
+ ParseFile or ParseResponse factory functions are used. If you do construct
+ HTMLForm objects yourself, however, note that an HTMLForm instance is only
+ properly initialised after the fixup method has been called (ParseFile and
+ ParseResponse do this for you). See ListControl.__doc__ for the reason
+ this is required.
+
+ Indexing a form (form["control_name"]) returns the named Control's value
+ attribute. Assignment to a form index (form["control_name"] = something)
+ is equivalent to assignment to the named Control's value attribute. If you
+ need to be more specific than just supplying the control's name, use the
+ set_value and get_value methods.
+
+ ListControl values are lists of item names (specifically, the names of the
+ items that are selected and not disabled, and hence are "successful" -- ie.
+ cause data to be returned to the server). The list item's name is the
+ value of the corresponding HTML element's"value" attribute.
+
+ Example:
+
+ <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT>
+ <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT>
+
+ defines a CHECKBOX control with name "cheeses" which has two items, named
+ "leicester" and "cheddar".
+
+ Another example:
+
+ <SELECT name="more_cheeses">
+ <OPTION>1</OPTION>
+ <OPTION value="2" label="CHEDDAR">cheddar</OPTION>
+ </SELECT>
+
+ defines a SELECT control with name "more_cheeses" which has two items,
+ named "1" and "2" (because the OPTION element's value HTML attribute
+ defaults to the element contents -- see SelectControl.__doc__ for more on
+ these defaulting rules).
+
+ To select, deselect or otherwise manipulate individual list items, use the
+ HTMLForm.find_control() and ListControl.get() methods. To set the whole
+ value, do as for any other control: use indexing or the set_/get_value
+ methods.
+
+ Example:
+
+ # select *only* the item named "cheddar"
+ form["cheeses"] = ["cheddar"]
+ # select "cheddar", leave other items unaffected
+ form.find_control("cheeses").get("cheddar").selected = True
+
+ Some controls (RADIO and SELECT without the multiple attribute) can only
+ have zero or one items selected at a time. Some controls (CHECKBOX and
+ SELECT with the multiple attribute) can have multiple items selected at a
+ time. To set the whole value of a ListControl, assign a sequence to a form
+ index:
+
+ form["cheeses"] = ["cheddar", "leicester"]
+
+ If the ListControl is not multiple-selection, the assigned list must be of
+ length one.
+
+ To check if a control has an item, if an item is selected, or if an item is
+ successful (selected and not disabled), respectively:
+
+ "cheddar" in [item.name for item in form.find_control("cheeses").items]
+ "cheddar" in [item.name for item in form.find_control("cheeses").items and
+ item.selected]
+ "cheddar" in form["cheeses"] # (or "cheddar" in form.get_value("cheeses"))
+
+ Note that some list items may be disabled (see below).
+
+ Note the following mistake:
+
+ form[control_name] = control_value
+ assert form[control_name] == control_value # not necessarily true
+
+ The reason for this is that form[control_name] always gives the list items
+ in the order they were listed in the HTML.
+
+ List items (hence list values, too) can be referred to in terms of list
+ item labels rather than list item names using the appropriate label
+ arguments. Note that each item may have several labels.
+
+ The question of default values of OPTION contents, labels and values is
+ somewhat complicated: see SelectControl.__doc__ and
+ ListControl.get_item_attrs.__doc__ if you think you need to know.
+
+ Controls can be disabled or readonly. In either case, the control's value
+ cannot be changed until you clear those flags (see example below).
+ Disabled is the state typically represented by browsers by 'greying out' a
+ control. Disabled controls are not 'successful' -- they don't cause data
+ to get returned to the server. Readonly controls usually appear in
+ browsers as read-only text boxes. Readonly controls are successful. List
+ items can also be disabled. Attempts to select or deselect disabled items
+ fail with AttributeError.
+
+ If a lot of controls are readonly, it can be useful to do this:
+
+ form.set_all_readonly(False)
+
+ To clear a control's value attribute, so that it is not successful (until a
+ value is subsequently set):
+
+ form.clear("cheeses")
+
+ More examples:
+
+ control = form.find_control("cheeses")
+ control.disabled = False
+ control.readonly = False
+ control.get("gruyere").disabled = True
+ control.items[0].selected = True
+
+ See the various Control classes for further documentation. Many methods
+ take name, type, kind, id, label and nr arguments to specify the control to
+ be operated on: see HTMLForm.find_control.__doc__.
+
+ ControlNotFoundError (subclass of ValueError) is raised if the specified
+ control can't be found. This includes occasions where a non-ListControl
+ is found, but the method (set, for example) requires a ListControl.
+ ItemNotFoundError (subclass of ValueError) is raised if a list item can't
+ be found. ItemCountError (subclass of ValueError) is raised if an attempt
+ is made to select more than one item and the control doesn't allow that, or
+ set/get_single are called and the control contains more than one item.
+ AttributeError is raised if a control or item is readonly or disabled and
+ an attempt is made to alter its value.
+
+ Security note: Remember that any passwords you store in HTMLForm instances
+ will be saved to disk in the clear if you pickle them (directly or
+ indirectly). The simplest solution to this is to avoid pickling HTMLForm
+ objects. You could also pickle before filling in any password, or just set
+ the password to "" before pickling.
+
+
+ Public attributes:
+
+ action: full (absolute URI) form action
+ method: "GET" or "POST"
+ enctype: form transfer encoding MIME type
+ name: name of form (None if no name was specified)
+ attrs: dictionary mapping original HTML form attributes to their values
+
+ controls: list of Control instances; do not alter this list
+ (instead, call form.new_control to make a Control and add it to the
+ form, or control.add_to_form if you already have a Control instance)
+
+
+
+ Methods for form filling:
+ -------------------------
+
+ Most of the these methods have very similar arguments. See
+ HTMLForm.find_control.__doc__ for details of the name, type, kind, label
+ and nr arguments.
+
+ def find_control(self,
+ name=None, type=None, kind=None, id=None, predicate=None,
+ nr=None, label=None)
+
+ get_value(name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False, # by_label is deprecated
+ label=None)
+ set_value(value,
+ name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False, # by_label is deprecated
+ label=None)
+
+ clear_all()
+ clear(name=None, type=None, kind=None, id=None, nr=None, label=None)
+
+ set_all_readonly(readonly)
+
+
+ Method applying only to FileControls:
+
+ add_file(file_object,
+ content_type="application/octet-stream", filename=None,
+ name=None, id=None, nr=None, label=None)
+
+
+ Methods applying only to clickable controls:
+
+ click(name=None, type=None, id=None, nr=0, coord=(1,1), label=None)
+ click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1),
+ label=None)
+ click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1), label=None)
+
+ """
+
+ type2class = {
+ "text": TextControl,
+ "password": PasswordControl,
+ "hidden": HiddenControl,
+ "textarea": TextareaControl,
+
+ "isindex": IsindexControl,
+
+ "file": FileControl,
+
+ "button": IgnoreControl,
+ "buttonbutton": IgnoreControl,
+ "reset": IgnoreControl,
+ "resetbutton": IgnoreControl,
+
+ "submit": SubmitControl,
+ "submitbutton": SubmitButtonControl,
+ "image": ImageControl,
+
+ "radio": RadioControl,
+ "checkbox": CheckboxControl,
+ "select": SelectControl,
+ }
+
+#---------------------------------------------------
+# Initialisation. Use ParseResponse / ParseFile instead.
+
+ def __init__(self, action, method="GET",
+ enctype="application/x-www-form-urlencoded",
+ name=None, attrs=None,
+ request_class=urllib2.Request,
+ forms=None, labels=None, id_to_labels=None,
+ backwards_compat=True):
+ """
+ In the usual case, use ParseResponse (or ParseFile) to create new
+ HTMLForm objects.
+
+ action: full (absolute URI) form action
+ method: "GET" or "POST"
+ enctype: form transfer encoding MIME type
+ name: name of form
+ attrs: dictionary mapping original HTML form attributes to their values
+
+ """
+ self.action = action
+ self.method = method
+ self.enctype = enctype
+ self.name = name
+ if attrs is not None:
+ self.attrs = attrs.copy()
+ else:
+ self.attrs = {}
+ self.controls = []
+ self._request_class = request_class
+
+ # these attributes are used by zope.testbrowser
+ self._forms = forms # this is a semi-public API!
+ self._labels = labels # this is a semi-public API!
+ self._id_to_labels = id_to_labels # this is a semi-public API!
+
+ self.backwards_compat = backwards_compat # note __setattr__
+
+ self._urlunparse = urlparse.urlunparse
+ self._urlparse = urlparse.urlparse
+
+ def __getattr__(self, name):
+ if name == "backwards_compat":
+ return self._backwards_compat
+ return getattr(HTMLForm, name)
+
+ def __setattr__(self, name, value):
+ # yuck
+ if name == "backwards_compat":
+ name = "_backwards_compat"
+ value = bool(value)
+ for cc in self.controls:
+ try:
+ items = cc.items
+ except AttributeError:
+ continue
+ else:
+ for ii in items:
+ for ll in ii.get_labels():
+ ll._backwards_compat = value
+ self.__dict__[name] = value
+
+ def new_control(self, type, name, attrs,
+ ignore_unknown=False, select_default=False, index=None):
+ """Adds a new control to the form.
+
+ This is usually called by ParseFile and ParseResponse. Don't call it
+ youself unless you're building your own Control instances.
+
+ Note that controls representing lists of items are built up from
+ controls holding only a single list item. See ListControl.__doc__ for
+ further information.
+
+ type: type of control (see Control.__doc__ for a list)
+ attrs: HTML attributes of control
+ ignore_unknown: if true, use a dummy Control instance for controls of
+ unknown type; otherwise, use a TextControl
+ select_default: for RADIO and multiple-selection SELECT controls, pick
+ the first item as the default if no 'selected' HTML attribute is
+ present (this defaulting happens when the HTMLForm.fixup method is
+ called)
+ index: index of corresponding element in HTML (see
+ MoreFormTests.test_interspersed_controls for motivation)
+
+ """
+ type = type.lower()
+ klass = self.type2class.get(type)
+ if klass is None:
+ if ignore_unknown:
+ klass = IgnoreControl
+ else:
+ klass = TextControl
+
+ a = attrs.copy()
+ if issubclass(klass, ListControl):
+ control = klass(type, name, a, select_default, index)
+ else:
+ control = klass(type, name, a, index)
+
+ if type == "select" and len(attrs) == 1:
+ for ii in range(len(self.controls)-1, -1, -1):
+ ctl = self.controls[ii]
+ if ctl.type == "select":
+ ctl.close_control()
+ break
+
+ control.add_to_form(self)
+ control._urlparse = self._urlparse
+ control._urlunparse = self._urlunparse
+
+ def fixup(self):
+ """Normalise form after all controls have been added.
+
+ This is usually called by ParseFile and ParseResponse. Don't call it
+ youself unless you're building your own Control instances.
+
+ This method should only be called once, after all controls have been
+ added to the form.
+
+ """
+ for control in self.controls:
+ control.fixup()
+ self.backwards_compat = self._backwards_compat
+
+#---------------------------------------------------
+ def __str__(self):
+ header = "%s%s %s %s" % (
+ (self.name and self.name+" " or ""),
+ self.method, self.action, self.enctype)
+ rep = [header]
+ for control in self.controls:
+ rep.append(" %s" % str(control))
+ return "<%s>" % "\n".join(rep)
+
+#---------------------------------------------------
+# Form-filling methods.
+
+ def __getitem__(self, name):
+ return self.find_control(name).value
+ def __contains__(self, name):
+ return bool(self.find_control(name))
+ def __setitem__(self, name, value):
+ control = self.find_control(name)
+ try:
+ control.value = value
+ except AttributeError, e:
+ raise ValueError(str(e))
+
+ def get_value(self,
+ name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False, # by_label is deprecated
+ label=None):
+ """Return value of control.
+
+ If only name and value arguments are supplied, equivalent to
+
+ form[name]
+
+ """
+ if by_label:
+ deprecation("form.get_value_by_label(...)")
+ c = self.find_control(name, type, kind, id, label=label, nr=nr)
+ if by_label:
+ try:
+ meth = c.get_value_by_label
+ except AttributeError:
+ raise NotImplementedError(
+ "control '%s' does not yet support by_label" % c.name)
+ else:
+ return meth()
+ else:
+ return c.value
+ def set_value(self, value,
+ name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False, # by_label is deprecated
+ label=None):
+ """Set value of control.
+
+ If only name and value arguments are supplied, equivalent to
+
+ form[name] = value
+
+ """
+ if by_label:
+ deprecation("form.get_value_by_label(...)")
+ c = self.find_control(name, type, kind, id, label=label, nr=nr)
+ if by_label:
+ try:
+ meth = c.set_value_by_label
+ except AttributeError:
+ raise NotImplementedError(
+ "control '%s' does not yet support by_label" % c.name)
+ else:
+ meth(value)
+ else:
+ c.value = value
+ def get_value_by_label(
+ self, name=None, type=None, kind=None, id=None, label=None, nr=None):
+ """
+
+ All arguments should be passed by name.
+
+ """
+ c = self.find_control(name, type, kind, id, label=label, nr=nr)
+ return c.get_value_by_label()
+
+ def set_value_by_label(
+ self, value,
+ name=None, type=None, kind=None, id=None, label=None, nr=None):
+ """
+
+ All arguments should be passed by name.
+
+ """
+ c = self.find_control(name, type, kind, id, label=label, nr=nr)
+ c.set_value_by_label(value)
+
+ def set_all_readonly(self, readonly):
+ for control in self.controls:
+ control.readonly = bool(readonly)
+
+ def clear_all(self):
+ """Clear the value attributes of all controls in the form.
+
+ See HTMLForm.clear.__doc__.
+
+ """
+ for control in self.controls:
+ control.clear()
+
+ def clear(self,
+ name=None, type=None, kind=None, id=None, nr=None, label=None):
+ """Clear the value attribute of a control.
+
+ As a result, the affected control will not be successful until a value
+ is subsequently set. AttributeError is raised on readonly controls.
+
+ """
+ c = self.find_control(name, type, kind, id, label=label, nr=nr)
+ c.clear()
+
+
+#---------------------------------------------------
+# Form-filling methods applying only to ListControls.
+
+ def possible_items(self, # deprecated
+ name=None, type=None, kind=None, id=None,
+ nr=None, by_label=False, label=None):
+ """Return a list of all values that the specified control can take."""
+ c = self._find_list_control(name, type, kind, id, label, nr)
+ return c.possible_items(by_label)
+
+ def set(self, selected, item_name, # deprecated
+ name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False, label=None):
+ """Select / deselect named list item.
+
+ selected: boolean selected state
+
+ """
+ self._find_list_control(name, type, kind, id, label, nr).set(
+ selected, item_name, by_label)
+ def toggle(self, item_name, # deprecated
+ name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False, label=None):
+ """Toggle selected state of named list item."""
+ self._find_list_control(name, type, kind, id, label, nr).toggle(
+ item_name, by_label)
+
+ def set_single(self, selected, # deprecated
+ name=None, type=None, kind=None, id=None,
+ nr=None, by_label=None, label=None):
+ """Select / deselect list item in a control having only one item.
+
+ If the control has multiple list items, ItemCountError is raised.
+
+ This is just a convenience method, so you don't need to know the item's
+ name -- the item name in these single-item controls is usually
+ something meaningless like "1" or "on".
+
+ For example, if a checkbox has a single item named "on", the following
+ two calls are equivalent:
+
+ control.toggle("on")
+ control.toggle_single()
+
+ """ # by_label ignored and deprecated
+ self._find_list_control(
+ name, type, kind, id, label, nr).set_single(selected)
+ def toggle_single(self, name=None, type=None, kind=None, id=None,
+ nr=None, by_label=None, label=None): # deprecated
+ """Toggle selected state of list item in control having only one item.
+
+ The rest is as for HTMLForm.set_single.__doc__.
+
+ """ # by_label ignored and deprecated
+ self._find_list_control(name, type, kind, id, label, nr).toggle_single()
+
+#---------------------------------------------------
+# Form-filling method applying only to FileControls.
+
+ def add_file(self, file_object, content_type=None, filename=None,
+ name=None, id=None, nr=None, label=None):
+ """Add a file to be uploaded.
+
+ file_object: file-like object (with read method) from which to read
+ data to upload
+ content_type: MIME content type of data to upload
+ filename: filename to pass to server
+
+ If filename is None, no filename is sent to the server.
+
+ If content_type is None, the content type is guessed based on the
+ filename and the data from read from the file object.
+
+ XXX
+ At the moment, guessed content type is always application/octet-stream.
+ Use sndhdr, imghdr modules. Should also try to guess HTML, XML, and
+ plain text.
+
+ Note the following useful HTML attributes of file upload controls (see
+ HTML 4.01 spec, section 17):
+
+ accept: comma-separated list of content types that the server will
+ handle correctly; you can use this to filter out non-conforming files
+ size: XXX IIRC, this is indicative of whether form wants multiple or
+ single files
+ maxlength: XXX hint of max content length in bytes?
+
+ """
+ self.find_control(name, "file", id=id, label=label, nr=nr).add_file(
+ file_object, content_type, filename)
+
+#---------------------------------------------------
+# Form submission methods, applying only to clickable controls.
+
+ def click(self, name=None, type=None, id=None, nr=0, coord=(1,1),
+ request_class=urllib2.Request,
+ label=None):
+ """Return request that would result from clicking on a control.
+
+ The request object is a urllib2.Request instance, which you can pass to
+ urllib2.urlopen (or ClientCookie.urlopen).
+
+ Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and
+ IMAGEs) can be clicked.
+
+ Will click on the first clickable control, subject to the name, type
+ and nr arguments (as for find_control). If no name, type, id or number
+ is specified and there are no clickable controls, a request will be
+ returned for the form in its current, un-clicked, state.
+
+ IndexError is raised if any of name, type, id or nr is specified but no
+ matching control is found. ValueError is raised if the HTMLForm has an
+ enctype attribute that is not recognised.
+
+ You can optionally specify a coordinate to click at, which only makes a
+ difference if you clicked on an image.
+
+ """
+ return self._click(name, type, id, label, nr, coord, "request",
+ self._request_class)
+
+ def click_request_data(self,
+ name=None, type=None, id=None,
+ nr=0, coord=(1,1),
+ request_class=urllib2.Request,
+ label=None):
+ """As for click method, but return a tuple (url, data, headers).
+
+ You can use this data to send a request to the server. This is useful
+ if you're using httplib or urllib rather than urllib2. Otherwise, use
+ the click method.
+
+ # Untested. Have to subclass to add headers, I think -- so use urllib2
+ # instead!
+ import urllib
+ url, data, hdrs = form.click_request_data()
+ r = urllib.urlopen(url, data)
+
+ # Untested. I don't know of any reason to use httplib -- you can get
+ # just as much control with urllib2.
+ import httplib, urlparse
+ url, data, hdrs = form.click_request_data()
+ tup = urlparse(url)
+ host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:])
+ conn = httplib.HTTPConnection(host)
+ if data:
+ httplib.request("POST", path, data, hdrs)
+ else:
+ httplib.request("GET", path, headers=hdrs)
+ r = conn.getresponse()
+
+ """
+ return self._click(name, type, id, label, nr, coord, "request_data",
+ self._request_class)
+
+ def click_pairs(self, name=None, type=None, id=None,
+ nr=0, coord=(1,1),
+ label=None):
+ """As for click_request_data, but returns a list of (key, value) pairs.
+
+ You can use this list as an argument to ClientForm.urlencode. This is
+ usually only useful if you're using httplib or urllib rather than
+ urllib2 or ClientCookie. It may also be useful if you want to manually
+ tweak the keys and/or values, but this should not be necessary.
+ Otherwise, use the click method.
+
+ Note that this method is only useful for forms of MIME type
+ x-www-form-urlencoded. In particular, it does not return the
+ information required for file upload. If you need file upload and are
+ not using urllib2, use click_request_data.
+
+ Also note that Python 2.0's urllib.urlencode is slightly broken: it
+ only accepts a mapping, not a sequence of pairs, as an argument. This
+ messes up any ordering in the argument. Use ClientForm.urlencode
+ instead.
+
+ """
+ return self._click(name, type, id, label, nr, coord, "pairs",
+ self._request_class)
+
+#---------------------------------------------------
+
+ def find_control(self,
+ name=None, type=None, kind=None, id=None,
+ predicate=None, nr=None,
+ label=None):
+ """Locate and return some specific control within the form.
+
+ At least one of the name, type, kind, predicate and nr arguments must
+ be supplied. If no matching control is found, ControlNotFoundError is
+ raised.
+
+ If name is specified, then the control must have the indicated name.
+
+ If type is specified then the control must have the specified type (in
+ addition to the types possible for <input> HTML tags: "text",
+ "password", "hidden", "submit", "image", "button", "radio", "checkbox",
+ "file" we also have "reset", "buttonbutton", "submitbutton",
+ "resetbutton", "textarea", "select" and "isindex").
+
+ If kind is specified, then the control must fall into the specified
+ group, each of which satisfies a particular interface. The types are
+ "text", "list", "multilist", "singlelist", "clickable" and "file".
+
+ If id is specified, then the control must have the indicated id.
+
+ If predicate is specified, then the control must match that function.
+ The predicate function is passed the control as its single argument,
+ and should return a boolean value indicating whether the control
+ matched.
+
+ nr, if supplied, is the sequence number of the control (where 0 is the
+ first). Note that control 0 is the first control matching all the
+ other arguments (if supplied); it is not necessarily the first control
+ in the form. If no nr is supplied, AmbiguityError is raised if
+ multiple controls match the other arguments (unless the
+ .backwards-compat attribute is true).
+
+ If label is specified, then the control must have this label. Note
+ that radio controls and checkboxes never have labels: their items do.
+
+ """
+ if ((name is None) and (type is None) and (kind is None) and
+ (id is None) and (label is None) and (predicate is None) and
+ (nr is None)):
+ raise ValueError(
+ "at least one argument must be supplied to specify control")
+ return self._find_control(name, type, kind, id, label, predicate, nr)
+
+#---------------------------------------------------
+# Private methods.
+
+ def _find_list_control(self,
+ name=None, type=None, kind=None, id=None,
+ label=None, nr=None):
+ if ((name is None) and (type is None) and (kind is None) and
+ (id is None) and (label is None) and (nr is None)):
+ raise ValueError(
+ "at least one argument must be supplied to specify control")
+
+ return self._find_control(name, type, kind, id, label,
+ is_listcontrol, nr)
+
+ def _find_control(self, name, type, kind, id, label, predicate, nr):
+ if ((name is not None) and (name is not Missing) and
+ not isstringlike(name)):
+ raise TypeError("control name must be string-like")
+ if (type is not None) and not isstringlike(type):
+ raise TypeError("control type must be string-like")
+ if (kind is not None) and not isstringlike(kind):
+ raise TypeError("control kind must be string-like")
+ if (id is not None) and not isstringlike(id):
+ raise TypeError("control id must be string-like")
+ if (label is not None) and not isstringlike(label):
+ raise TypeError("control label must be string-like")
+ if (predicate is not None) and not callable(predicate):
+ raise TypeError("control predicate must be callable")
+ if (nr is not None) and nr < 0:
+ raise ValueError("control number must be a positive integer")
+
+ orig_nr = nr
+ found = None
+ ambiguous = False
+ if nr is None and self.backwards_compat:
+ nr = 0
+
+ for control in self.controls:
+ if ((name is not None and name != control.name) and
+ (name is not Missing or control.name is not None)):
+ continue
+ if type is not None and type != control.type:
+ continue
+ if kind is not None and not control.is_of_kind(kind):
+ continue
+ if id is not None and id != control.id:
+ continue
+ if predicate and not predicate(control):
+ continue
+ if label:
+ for l in control.get_labels():
+ if l.text.find(label) > -1:
+ break
+ else:
+ continue
+ if nr is not None:
+ if nr == 0:
+ return control # early exit: unambiguous due to nr
+ nr -= 1
+ continue
+ if found:
+ ambiguous = True
+ break
+ found = control
+
+ if found and not ambiguous:
+ return found
+
+ description = []
+ if name is not None: description.append("name %s" % repr(name))
+ if type is not None: description.append("type '%s'" % type)
+ if kind is not None: description.append("kind '%s'" % kind)
+ if id is not None: description.append("id '%s'" % id)
+ if label is not None: description.append("label '%s'" % label)
+ if predicate is not None:
+ description.append("predicate %s" % predicate)
+ if orig_nr: description.append("nr %d" % orig_nr)
+ description = ", ".join(description)
+
+ if ambiguous:
+ raise AmbiguityError("more than one control matching "+description)
+ elif not found:
+ raise ControlNotFoundError("no control matching "+description)
+ assert False
+
+ def _click(self, name, type, id, label, nr, coord, return_type,
+ request_class=urllib2.Request):
+ try:
+ control = self._find_control(
+ name, type, "clickable", id, label, None, nr)
+ except ControlNotFoundError:
+ if ((name is not None) or (type is not None) or (id is not None) or
+ (nr != 0)):
+ raise
+ # no clickable controls, but no control was explicitly requested,
+ # so return state without clicking any control
+ return self._switch_click(return_type, request_class)
+ else:
+ return control._click(self, coord, return_type, request_class)
+
+ def _pairs(self):
+ """Return sequence of (key, value) pairs suitable for urlencoding."""
+ return [(k, v) for (i, k, v, c_i) in self._pairs_and_controls()]
+
+
+ def _pairs_and_controls(self):
+ """Return sequence of (index, key, value, control_index)
+ of totally ordered pairs suitable for urlencoding.
+
+ control_index is the index of the control in self.controls
+ """
+ pairs = []
+ for control_index in range(len(self.controls)):
+ control = self.controls[control_index]
+ for ii, key, val in control._totally_ordered_pairs():
+ pairs.append((ii, key, val, control_index))
+
+ # stable sort by ONLY first item in tuple
+ pairs.sort()
+
+ return pairs
+
+ def _request_data(self):
+ """Return a tuple (url, data, headers)."""
+ method = self.method.upper()
+ #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(self.action)
+ parts = self._urlparse(self.action)
+ rest, (query, frag) = parts[:-2], parts[-2:]
+
+ if method == "GET":
+ if self.enctype != "application/x-www-form-urlencoded":
+ raise ValueError(
+ "unknown GET form encoding type '%s'" % self.enctype)
+ parts = rest + (urlencode(self._pairs()), None)
+ uri = self._urlunparse(parts)
+ return uri, None, []
+ elif method == "POST":
+ parts = rest + (query, None)
+ uri = self._urlunparse(parts)
+ if self.enctype == "application/x-www-form-urlencoded":
+ return (uri, urlencode(self._pairs()),
+ [("Content-Type", self.enctype)])
+ elif self.enctype == "multipart/form-data":
+ data = StringIO()
+ http_hdrs = []
+ mw = MimeWriter(data, http_hdrs)
+ f = mw.startmultipartbody("form-data", add_to_http_hdrs=True,
+ prefix=0)
+ for ii, k, v, control_index in self._pairs_and_controls():
+ self.controls[control_index]._write_mime_data(mw, k, v)
+ mw.lastpart()
+ return uri, data.getvalue(), http_hdrs
+ else:
+ raise ValueError(
+ "unknown POST form encoding type '%s'" % self.enctype)
+ else:
+ raise ValueError("Unknown method '%s'" % method)
+
+ def _switch_click(self, return_type, request_class=urllib2.Request):
+ # This is called by HTMLForm and clickable Controls to hide switching
+ # on return_type.
+ if return_type == "pairs":
+ return self._pairs()
+ elif return_type == "request_data":
+ return self._request_data()
+ else:
+ req_data = self._request_data()
+ req = request_class(req_data[0], req_data[1])
+ for key, val in req_data[2]:
+ add_hdr = req.add_header
+ if key.lower() == "content-type":
+ try:
+ add_hdr = req.add_unredirected_header
+ except AttributeError:
+ # pre-2.4 and not using ClientCookie
+ pass
+ add_hdr(key, val)
+ return req
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/__init__.py
new file mode 100644
index 0000000..c1e4c6d
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/__init__.py
@@ -0,0 +1 @@
+# This file is required for Python to search this directory for modules.
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.ircbot.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.ircbot.py.url
new file mode 100644
index 0000000..f34e243
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.ircbot.py.url
@@ -0,0 +1 @@
+http://iweb.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip \ No newline at end of file
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.irclib.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.irclib.py.url
new file mode 100644
index 0000000..f34e243
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.irclib.py.url
@@ -0,0 +1 @@
+http://iweb.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip \ No newline at end of file
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/__init__.py
new file mode 100644
index 0000000..c1e4c6d
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/__init__.py
@@ -0,0 +1 @@
+# This file is required for Python to search this directory for modules.
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/ircbot.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/ircbot.py
new file mode 100644
index 0000000..6f29a65
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/ircbot.py
@@ -0,0 +1,438 @@
+# Copyright (C) 1999--2002 Joel Rosdahl
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# Joel Rosdahl <joel@rosdahl.net>
+#
+# $Id: ircbot.py,v 1.23 2008/09/11 07:38:30 keltus Exp $
+
+"""ircbot -- Simple IRC bot library.
+
+This module contains a single-server IRC bot class that can be used to
+write simpler bots.
+"""
+
+import sys
+from UserDict import UserDict
+
+from irclib import SimpleIRCClient
+from irclib import nm_to_n, irc_lower, all_events
+from irclib import parse_channel_modes, is_channel
+from irclib import ServerConnectionError
+
+class SingleServerIRCBot(SimpleIRCClient):
+ """A single-server IRC bot class.
+
+ The bot tries to reconnect if it is disconnected.
+
+ The bot keeps track of the channels it has joined, the other
+ clients that are present in the channels and which of those that
+ have operator or voice modes. The "database" is kept in the
+ self.channels attribute, which is an IRCDict of Channels.
+ """
+ def __init__(self, server_list, nickname, realname, reconnection_interval=60):
+ """Constructor for SingleServerIRCBot objects.
+
+ Arguments:
+
+ server_list -- A list of tuples (server, port) that
+ defines which servers the bot should try to
+ connect to.
+
+ nickname -- The bot's nickname.
+
+ realname -- The bot's realname.
+
+ reconnection_interval -- How long the bot should wait
+ before trying to reconnect.
+
+ dcc_connections -- A list of initiated/accepted DCC
+ connections.
+ """
+
+ SimpleIRCClient.__init__(self)
+ self.channels = IRCDict()
+ self.server_list = server_list
+ if not reconnection_interval or reconnection_interval < 0:
+ reconnection_interval = 2**31
+ self.reconnection_interval = reconnection_interval
+
+ self._nickname = nickname
+ self._realname = realname
+ for i in ["disconnect", "join", "kick", "mode",
+ "namreply", "nick", "part", "quit"]:
+ self.connection.add_global_handler(i,
+ getattr(self, "_on_" + i),
+ -10)
+ def _connected_checker(self):
+ """[Internal]"""
+ if not self.connection.is_connected():
+ self.connection.execute_delayed(self.reconnection_interval,
+ self._connected_checker)
+ self.jump_server()
+
+ def _connect(self):
+ """[Internal]"""
+ password = None
+ if len(self.server_list[0]) > 2:
+ password = self.server_list[0][2]
+ try:
+ self.connect(self.server_list[0][0],
+ self.server_list[0][1],
+ self._nickname,
+ password,
+ ircname=self._realname)
+ except ServerConnectionError:
+ pass
+
+ def _on_disconnect(self, c, e):
+ """[Internal]"""
+ self.channels = IRCDict()
+ self.connection.execute_delayed(self.reconnection_interval,
+ self._connected_checker)
+
+ def _on_join(self, c, e):
+ """[Internal]"""
+ ch = e.target()
+ nick = nm_to_n(e.source())
+ if nick == c.get_nickname():
+ self.channels[ch] = Channel()
+ self.channels[ch].add_user(nick)
+
+ def _on_kick(self, c, e):
+ """[Internal]"""
+ nick = e.arguments()[0]
+ channel = e.target()
+
+ if nick == c.get_nickname():
+ del self.channels[channel]
+ else:
+ self.channels[channel].remove_user(nick)
+
+ def _on_mode(self, c, e):
+ """[Internal]"""
+ modes = parse_channel_modes(" ".join(e.arguments()))
+ t = e.target()
+ if is_channel(t):
+ ch = self.channels[t]
+ for mode in modes:
+ if mode[0] == "+":
+ f = ch.set_mode
+ else:
+ f = ch.clear_mode
+ f(mode[1], mode[2])
+ else:
+ # Mode on self... XXX
+ pass
+
+ def _on_namreply(self, c, e):
+ """[Internal]"""
+
+ # e.arguments()[0] == "@" for secret channels,
+ # "*" for private channels,
+ # "=" for others (public channels)
+ # e.arguments()[1] == channel
+ # e.arguments()[2] == nick list
+
+ ch = e.arguments()[1]
+ for nick in e.arguments()[2].split():
+ if nick[0] == "@":
+ nick = nick[1:]
+ self.channels[ch].set_mode("o", nick)
+ elif nick[0] == "+":
+ nick = nick[1:]
+ self.channels[ch].set_mode("v", nick)
+ self.channels[ch].add_user(nick)
+
+ def _on_nick(self, c, e):
+ """[Internal]"""
+ before = nm_to_n(e.source())
+ after = e.target()
+ for ch in self.channels.values():
+ if ch.has_user(before):
+ ch.change_nick(before, after)
+
+ def _on_part(self, c, e):
+ """[Internal]"""
+ nick = nm_to_n(e.source())
+ channel = e.target()
+
+ if nick == c.get_nickname():
+ del self.channels[channel]
+ else:
+ self.channels[channel].remove_user(nick)
+
+ def _on_quit(self, c, e):
+ """[Internal]"""
+ nick = nm_to_n(e.source())
+ for ch in self.channels.values():
+ if ch.has_user(nick):
+ ch.remove_user(nick)
+
+ def die(self, msg="Bye, cruel world!"):
+ """Let the bot die.
+
+ Arguments:
+
+ msg -- Quit message.
+ """
+
+ self.connection.disconnect(msg)
+ sys.exit(0)
+
+ def disconnect(self, msg="I'll be back!"):
+ """Disconnect the bot.
+
+ The bot will try to reconnect after a while.
+
+ Arguments:
+
+ msg -- Quit message.
+ """
+ self.connection.disconnect(msg)
+
+ def get_version(self):
+ """Returns the bot version.
+
+ Used when answering a CTCP VERSION request.
+ """
+ return "ircbot.py by Joel Rosdahl <joel@rosdahl.net>"
+
+ def jump_server(self, msg="Changing servers"):
+ """Connect to a new server, possibly disconnecting from the current.
+
+ The bot will skip to next server in the server_list each time
+ jump_server is called.
+ """
+ if self.connection.is_connected():
+ self.connection.disconnect(msg)
+
+ self.server_list.append(self.server_list.pop(0))
+ self._connect()
+
+ def on_ctcp(self, c, e):
+ """Default handler for ctcp events.
+
+ Replies to VERSION and PING requests and relays DCC requests
+ to the on_dccchat method.
+ """
+ if e.arguments()[0] == "VERSION":
+ c.ctcp_reply(nm_to_n(e.source()),
+ "VERSION " + self.get_version())
+ elif e.arguments()[0] == "PING":
+ if len(e.arguments()) > 1:
+ c.ctcp_reply(nm_to_n(e.source()),
+ "PING " + e.arguments()[1])
+ elif e.arguments()[0] == "DCC" and e.arguments()[1].split(" ", 1)[0] == "CHAT":
+ self.on_dccchat(c, e)
+
+ def on_dccchat(self, c, e):
+ pass
+
+ def start(self):
+ """Start the bot."""
+ self._connect()
+ SimpleIRCClient.start(self)
+
+
+class IRCDict:
+ """A dictionary suitable for storing IRC-related things.
+
+ Dictionary keys a and b are considered equal if and only if
+ irc_lower(a) == irc_lower(b)
+
+ Otherwise, it should behave exactly as a normal dictionary.
+ """
+
+ def __init__(self, dict=None):
+ self.data = {}
+ self.canon_keys = {} # Canonical keys
+ if dict is not None:
+ self.update(dict)
+ def __repr__(self):
+ return repr(self.data)
+ def __cmp__(self, dict):
+ if isinstance(dict, IRCDict):
+ return cmp(self.data, dict.data)
+ else:
+ return cmp(self.data, dict)
+ def __len__(self):
+ return len(self.data)
+ def __getitem__(self, key):
+ return self.data[self.canon_keys[irc_lower(key)]]
+ def __setitem__(self, key, item):
+ if key in self:
+ del self[key]
+ self.data[key] = item
+ self.canon_keys[irc_lower(key)] = key
+ def __delitem__(self, key):
+ ck = irc_lower(key)
+ del self.data[self.canon_keys[ck]]
+ del self.canon_keys[ck]
+ def __iter__(self):
+ return iter(self.data)
+ def __contains__(self, key):
+ return self.has_key(key)
+ def clear(self):
+ self.data.clear()
+ self.canon_keys.clear()
+ def copy(self):
+ if self.__class__ is UserDict:
+ return UserDict(self.data)
+ import copy
+ return copy.copy(self)
+ def keys(self):
+ return self.data.keys()
+ def items(self):
+ return self.data.items()
+ def values(self):
+ return self.data.values()
+ def has_key(self, key):
+ return irc_lower(key) in self.canon_keys
+ def update(self, dict):
+ for k, v in dict.items():
+ self.data[k] = v
+ def get(self, key, failobj=None):
+ return self.data.get(key, failobj)
+
+
+class Channel:
+ """A class for keeping information about an IRC channel.
+
+ This class can be improved a lot.
+ """
+
+ def __init__(self):
+ self.userdict = IRCDict()
+ self.operdict = IRCDict()
+ self.voiceddict = IRCDict()
+ self.modes = {}
+
+ def users(self):
+ """Returns an unsorted list of the channel's users."""
+ return self.userdict.keys()
+
+ def opers(self):
+ """Returns an unsorted list of the channel's operators."""
+ return self.operdict.keys()
+
+ def voiced(self):
+ """Returns an unsorted list of the persons that have voice
+ mode set in the channel."""
+ return self.voiceddict.keys()
+
+ def has_user(self, nick):
+ """Check whether the channel has a user."""
+ return nick in self.userdict
+
+ def is_oper(self, nick):
+ """Check whether a user has operator status in the channel."""
+ return nick in self.operdict
+
+ def is_voiced(self, nick):
+ """Check whether a user has voice mode set in the channel."""
+ return nick in self.voiceddict
+
+ def add_user(self, nick):
+ self.userdict[nick] = 1
+
+ def remove_user(self, nick):
+ for d in self.userdict, self.operdict, self.voiceddict:
+ if nick in d:
+ del d[nick]
+
+ def change_nick(self, before, after):
+ self.userdict[after] = 1
+ del self.userdict[before]
+ if before in self.operdict:
+ self.operdict[after] = 1
+ del self.operdict[before]
+ if before in self.voiceddict:
+ self.voiceddict[after] = 1
+ del self.voiceddict[before]
+
+ def set_mode(self, mode, value=None):
+ """Set mode on the channel.
+
+ Arguments:
+
+ mode -- The mode (a single-character string).
+
+ value -- Value
+ """
+ if mode == "o":
+ self.operdict[value] = 1
+ elif mode == "v":
+ self.voiceddict[value] = 1
+ else:
+ self.modes[mode] = value
+
+ def clear_mode(self, mode, value=None):
+ """Clear mode on the channel.
+
+ Arguments:
+
+ mode -- The mode (a single-character string).
+
+ value -- Value
+ """
+ try:
+ if mode == "o":
+ del self.operdict[value]
+ elif mode == "v":
+ del self.voiceddict[value]
+ else:
+ del self.modes[mode]
+ except KeyError:
+ pass
+
+ def has_mode(self, mode):
+ return mode in self.modes
+
+ def is_moderated(self):
+ return self.has_mode("m")
+
+ def is_secret(self):
+ return self.has_mode("s")
+
+ def is_protected(self):
+ return self.has_mode("p")
+
+ def has_topic_lock(self):
+ return self.has_mode("t")
+
+ def is_invite_only(self):
+ return self.has_mode("i")
+
+ def has_allow_external_messages(self):
+ return self.has_mode("n")
+
+ def has_limit(self):
+ return self.has_mode("l")
+
+ def limit(self):
+ if self.has_limit():
+ return self.modes[l]
+ else:
+ return None
+
+ def has_key(self):
+ return self.has_mode("k")
+
+ def key(self):
+ if self.has_key():
+ return self.modes["k"]
+ else:
+ return None
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/irclib.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/irclib.py
new file mode 100644
index 0000000..5f7141c
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/irclib.py
@@ -0,0 +1,1560 @@
+# Copyright (C) 1999--2002 Joel Rosdahl
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# keltus <keltus@users.sourceforge.net>
+#
+# $Id: irclib.py,v 1.47 2008/09/25 22:00:59 keltus Exp $
+
+"""irclib -- Internet Relay Chat (IRC) protocol client library.
+
+This library is intended to encapsulate the IRC protocol at a quite
+low level. It provides an event-driven IRC client framework. It has
+a fairly thorough support for the basic IRC protocol, CTCP, DCC chat,
+but DCC file transfers is not yet supported.
+
+In order to understand how to make an IRC client, I'm afraid you more
+or less must understand the IRC specifications. They are available
+here: [IRC specifications].
+
+The main features of the IRC client framework are:
+
+ * Abstraction of the IRC protocol.
+ * Handles multiple simultaneous IRC server connections.
+ * Handles server PONGing transparently.
+ * Messages to the IRC server are done by calling methods on an IRC
+ connection object.
+ * Messages from an IRC server triggers events, which can be caught
+ by event handlers.
+ * Reading from and writing to IRC server sockets are normally done
+ by an internal select() loop, but the select()ing may be done by
+ an external main loop.
+ * Functions can be registered to execute at specified times by the
+ event-loop.
+ * Decodes CTCP tagging correctly (hopefully); I haven't seen any
+ other IRC client implementation that handles the CTCP
+ specification subtilties.
+ * A kind of simple, single-server, object-oriented IRC client class
+ that dispatches events to instance methods is included.
+
+Current limitations:
+
+ * The IRC protocol shines through the abstraction a bit too much.
+ * Data is not written asynchronously to the server, i.e. the write()
+ may block if the TCP buffers are stuffed.
+ * There are no support for DCC file transfers.
+ * The author haven't even read RFC 2810, 2811, 2812 and 2813.
+ * Like most projects, documentation is lacking...
+
+.. [IRC specifications] http://www.irchelp.org/irchelp/rfc/
+"""
+
+import bisect
+import re
+import select
+import socket
+import string
+import sys
+import time
+import types
+
+VERSION = 0, 4, 8
+DEBUG = 0
+
+# TODO
+# ----
+# (maybe) thread safety
+# (maybe) color parser convenience functions
+# documentation (including all event types)
+# (maybe) add awareness of different types of ircds
+# send data asynchronously to the server (and DCC connections)
+# (maybe) automatically close unused, passive DCC connections after a while
+
+# NOTES
+# -----
+# connection.quit() only sends QUIT to the server.
+# ERROR from the server triggers the error event and the disconnect event.
+# dropping of the connection triggers the disconnect event.
+
+class IRCError(Exception):
+ """Represents an IRC exception."""
+ pass
+
+
+class IRC:
+ """Class that handles one or several IRC server connections.
+
+ When an IRC object has been instantiated, it can be used to create
+ Connection objects that represent the IRC connections. The
+ responsibility of the IRC object is to provide an event-driven
+ framework for the connections and to keep the connections alive.
+ It runs a select loop to poll each connection's TCP socket and
+ hands over the sockets with incoming data for processing by the
+ corresponding connection.
+
+ The methods of most interest for an IRC client writer are server,
+ add_global_handler, remove_global_handler, execute_at,
+ execute_delayed, process_once and process_forever.
+
+ Here is an example:
+
+ irc = irclib.IRC()
+ server = irc.server()
+ server.connect(\"irc.some.where\", 6667, \"my_nickname\")
+ server.privmsg(\"a_nickname\", \"Hi there!\")
+ irc.process_forever()
+
+ This will connect to the IRC server irc.some.where on port 6667
+ using the nickname my_nickname and send the message \"Hi there!\"
+ to the nickname a_nickname.
+ """
+
+ def __init__(self, fn_to_add_socket=None,
+ fn_to_remove_socket=None,
+ fn_to_add_timeout=None):
+ """Constructor for IRC objects.
+
+ Optional arguments are fn_to_add_socket, fn_to_remove_socket
+ and fn_to_add_timeout. The first two specify functions that
+ will be called with a socket object as argument when the IRC
+ object wants to be notified (or stop being notified) of data
+ coming on a new socket. When new data arrives, the method
+ process_data should be called. Similarly, fn_to_add_timeout
+ is called with a number of seconds (a floating point number)
+ as first argument when the IRC object wants to receive a
+ notification (by calling the process_timeout method). So, if
+ e.g. the argument is 42.17, the object wants the
+ process_timeout method to be called after 42 seconds and 170
+ milliseconds.
+
+ The three arguments mainly exist to be able to use an external
+ main loop (for example Tkinter's or PyGTK's main app loop)
+ instead of calling the process_forever method.
+
+ An alternative is to just call ServerConnection.process_once()
+ once in a while.
+ """
+
+ if fn_to_add_socket and fn_to_remove_socket:
+ self.fn_to_add_socket = fn_to_add_socket
+ self.fn_to_remove_socket = fn_to_remove_socket
+ else:
+ self.fn_to_add_socket = None
+ self.fn_to_remove_socket = None
+
+ self.fn_to_add_timeout = fn_to_add_timeout
+ self.connections = []
+ self.handlers = {}
+ self.delayed_commands = [] # list of tuples in the format (time, function, arguments)
+
+ self.add_global_handler("ping", _ping_ponger, -42)
+
+ def server(self):
+ """Creates and returns a ServerConnection object."""
+
+ c = ServerConnection(self)
+ self.connections.append(c)
+ return c
+
+ def process_data(self, sockets):
+ """Called when there is more data to read on connection sockets.
+
+ Arguments:
+
+ sockets -- A list of socket objects.
+
+ See documentation for IRC.__init__.
+ """
+ for s in sockets:
+ for c in self.connections:
+ if s == c._get_socket():
+ c.process_data()
+
+ def process_timeout(self):
+ """Called when a timeout notification is due.
+
+ See documentation for IRC.__init__.
+ """
+ t = time.time()
+ while self.delayed_commands:
+ if t >= self.delayed_commands[0][0]:
+ self.delayed_commands[0][1](*self.delayed_commands[0][2])
+ del self.delayed_commands[0]
+ else:
+ break
+
+ def process_once(self, timeout=0):
+ """Process data from connections once.
+
+ Arguments:
+
+ timeout -- How long the select() call should wait if no
+ data is available.
+
+ This method should be called periodically to check and process
+ incoming data, if there are any. If that seems boring, look
+ at the process_forever method.
+ """
+ sockets = map(lambda x: x._get_socket(), self.connections)
+ sockets = filter(lambda x: x != None, sockets)
+ if sockets:
+ (i, o, e) = select.select(sockets, [], [], timeout)
+ self.process_data(i)
+ else:
+ time.sleep(timeout)
+ self.process_timeout()
+
+ def process_forever(self, timeout=0.2):
+ """Run an infinite loop, processing data from connections.
+
+ This method repeatedly calls process_once.
+
+ Arguments:
+
+ timeout -- Parameter to pass to process_once.
+ """
+ while 1:
+ self.process_once(timeout)
+
+ def disconnect_all(self, message=""):
+ """Disconnects all connections."""
+ for c in self.connections:
+ c.disconnect(message)
+
+ def add_global_handler(self, event, handler, priority=0):
+ """Adds a global handler function for a specific event type.
+
+ Arguments:
+
+ event -- Event type (a string). Check the values of the
+ numeric_events dictionary in irclib.py for possible event
+ types.
+
+ handler -- Callback function.
+
+ priority -- A number (the lower number, the higher priority).
+
+ The handler function is called whenever the specified event is
+ triggered in any of the connections. See documentation for
+ the Event class.
+
+ The handler functions are called in priority order (lowest
+ number is highest priority). If a handler function returns
+ \"NO MORE\", no more handlers will be called.
+ """
+ if not event in self.handlers:
+ self.handlers[event] = []
+ bisect.insort(self.handlers[event], ((priority, handler)))
+
+ def remove_global_handler(self, event, handler):
+ """Removes a global handler function.
+
+ Arguments:
+
+ event -- Event type (a string).
+
+ handler -- Callback function.
+
+ Returns 1 on success, otherwise 0.
+ """
+ if not event in self.handlers:
+ return 0
+ for h in self.handlers[event]:
+ if handler == h[1]:
+ self.handlers[event].remove(h)
+ return 1
+
+ def execute_at(self, at, function, arguments=()):
+ """Execute a function at a specified time.
+
+ Arguments:
+
+ at -- Execute at this time (standard \"time_t\" time).
+
+ function -- Function to call.
+
+ arguments -- Arguments to give the function.
+ """
+ self.execute_delayed(at-time.time(), function, arguments)
+
+ def execute_delayed(self, delay, function, arguments=()):
+ """Execute a function after a specified time.
+
+ Arguments:
+
+ delay -- How many seconds to wait.
+
+ function -- Function to call.
+
+ arguments -- Arguments to give the function.
+ """
+ bisect.insort(self.delayed_commands, (delay+time.time(), function, arguments))
+ if self.fn_to_add_timeout:
+ self.fn_to_add_timeout(delay)
+
+ def dcc(self, dcctype="chat"):
+ """Creates and returns a DCCConnection object.
+
+ Arguments:
+
+ dcctype -- "chat" for DCC CHAT connections or "raw" for
+ DCC SEND (or other DCC types). If "chat",
+ incoming data will be split in newline-separated
+ chunks. If "raw", incoming data is not touched.
+ """
+ c = DCCConnection(self, dcctype)
+ self.connections.append(c)
+ return c
+
+ def _handle_event(self, connection, event):
+ """[Internal]"""
+ h = self.handlers
+ for handler in h.get("all_events", []) + h.get(event.eventtype(), []):
+ if handler[1](connection, event) == "NO MORE":
+ return
+
+ def _remove_connection(self, connection):
+ """[Internal]"""
+ self.connections.remove(connection)
+ if self.fn_to_remove_socket:
+ self.fn_to_remove_socket(connection._get_socket())
+
+_rfc_1459_command_regexp = re.compile("^(:(?P<prefix>[^ ]+) +)?(?P<command>[^ ]+)( *(?P<argument> .+))?")
+
+class Connection:
+ """Base class for IRC connections.
+
+ Must be overridden.
+ """
+ def __init__(self, irclibobj):
+ self.irclibobj = irclibobj
+
+ def _get_socket():
+ raise IRCError, "Not overridden"
+
+ ##############################
+ ### Convenience wrappers.
+
+ def execute_at(self, at, function, arguments=()):
+ self.irclibobj.execute_at(at, function, arguments)
+
+ def execute_delayed(self, delay, function, arguments=()):
+ self.irclibobj.execute_delayed(delay, function, arguments)
+
+
+class ServerConnectionError(IRCError):
+ pass
+
+class ServerNotConnectedError(ServerConnectionError):
+ pass
+
+
+# Huh!? Crrrrazy EFNet doesn't follow the RFC: their ircd seems to
+# use \n as message separator! :P
+_linesep_regexp = re.compile("\r?\n")
+
+class ServerConnection(Connection):
+ """This class represents an IRC server connection.
+
+ ServerConnection objects are instantiated by calling the server
+ method on an IRC object.
+ """
+
+ def __init__(self, irclibobj):
+ Connection.__init__(self, irclibobj)
+ self.connected = 0 # Not connected yet.
+ self.socket = None
+ self.ssl = None
+
+ def connect(self, server, port, nickname, password=None, username=None,
+ ircname=None, localaddress="", localport=0, ssl=False, ipv6=False):
+ """Connect/reconnect to a server.
+
+ Arguments:
+
+ server -- Server name.
+
+ port -- Port number.
+
+ nickname -- The nickname.
+
+ password -- Password (if any).
+
+ username -- The username.
+
+ ircname -- The IRC name ("realname").
+
+ localaddress -- Bind the connection to a specific local IP address.
+
+ localport -- Bind the connection to a specific local port.
+
+ ssl -- Enable support for ssl.
+
+ ipv6 -- Enable support for ipv6.
+
+ This function can be called to reconnect a closed connection.
+
+ Returns the ServerConnection object.
+ """
+ if self.connected:
+ self.disconnect("Changing servers")
+
+ self.previous_buffer = ""
+ self.handlers = {}
+ self.real_server_name = ""
+ self.real_nickname = nickname
+ self.server = server
+ self.port = port
+ self.nickname = nickname
+ self.username = username or nickname
+ self.ircname = ircname or nickname
+ self.password = password
+ self.localaddress = localaddress
+ self.localport = localport
+ self.localhost = socket.gethostname()
+ if ipv6:
+ self.socket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+ else:
+ self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ try:
+ self.socket.bind((self.localaddress, self.localport))
+ self.socket.connect((self.server, self.port))
+ if ssl:
+ self.ssl = socket.ssl(self.socket)
+ except socket.error, x:
+ self.socket.close()
+ self.socket = None
+ raise ServerConnectionError, "Couldn't connect to socket: %s" % x
+ self.connected = 1
+ if self.irclibobj.fn_to_add_socket:
+ self.irclibobj.fn_to_add_socket(self.socket)
+
+ # Log on...
+ if self.password:
+ self.pass_(self.password)
+ self.nick(self.nickname)
+ self.user(self.username, self.ircname)
+ return self
+
+ def close(self):
+ """Close the connection.
+
+ This method closes the connection permanently; after it has
+ been called, the object is unusable.
+ """
+
+ self.disconnect("Closing object")
+ self.irclibobj._remove_connection(self)
+
+ def _get_socket(self):
+ """[Internal]"""
+ return self.socket
+
+ def get_server_name(self):
+ """Get the (real) server name.
+
+ This method returns the (real) server name, or, more
+ specifically, what the server calls itself.
+ """
+
+ if self.real_server_name:
+ return self.real_server_name
+ else:
+ return ""
+
+ def get_nickname(self):
+ """Get the (real) nick name.
+
+ This method returns the (real) nickname. The library keeps
+ track of nick changes, so it might not be the nick name that
+ was passed to the connect() method. """
+
+ return self.real_nickname
+
+ def process_data(self):
+ """[Internal]"""
+
+ try:
+ if self.ssl:
+ new_data = self.ssl.read(2**14)
+ else:
+ new_data = self.socket.recv(2**14)
+ except socket.error, x:
+ # The server hung up.
+ self.disconnect("Connection reset by peer")
+ return
+ if not new_data:
+ # Read nothing: connection must be down.
+ self.disconnect("Connection reset by peer")
+ return
+
+ lines = _linesep_regexp.split(self.previous_buffer + new_data)
+
+ # Save the last, unfinished line.
+ self.previous_buffer = lines.pop()
+
+ for line in lines:
+ if DEBUG:
+ print "FROM SERVER:", line
+
+ if not line:
+ continue
+
+ prefix = None
+ command = None
+ arguments = None
+ self._handle_event(Event("all_raw_messages",
+ self.get_server_name(),
+ None,
+ [line]))
+
+ m = _rfc_1459_command_regexp.match(line)
+ if m.group("prefix"):
+ prefix = m.group("prefix")
+ if not self.real_server_name:
+ self.real_server_name = prefix
+
+ if m.group("command"):
+ command = m.group("command").lower()
+
+ if m.group("argument"):
+ a = m.group("argument").split(" :", 1)
+ arguments = a[0].split()
+ if len(a) == 2:
+ arguments.append(a[1])
+
+ # Translate numerics into more readable strings.
+ if command in numeric_events:
+ command = numeric_events[command]
+
+ if command == "nick":
+ if nm_to_n(prefix) == self.real_nickname:
+ self.real_nickname = arguments[0]
+ elif command == "welcome":
+ # Record the nickname in case the client changed nick
+ # in a nicknameinuse callback.
+ self.real_nickname = arguments[0]
+
+ if command in ["privmsg", "notice"]:
+ target, message = arguments[0], arguments[1]
+ messages = _ctcp_dequote(message)
+
+ if command == "privmsg":
+ if is_channel(target):
+ command = "pubmsg"
+ else:
+ if is_channel(target):
+ command = "pubnotice"
+ else:
+ command = "privnotice"
+
+ for m in messages:
+ if type(m) is types.TupleType:
+ if command in ["privmsg", "pubmsg"]:
+ command = "ctcp"
+ else:
+ command = "ctcpreply"
+
+ m = list(m)
+ if DEBUG:
+ print "command: %s, source: %s, target: %s, arguments: %s" % (
+ command, prefix, target, m)
+ self._handle_event(Event(command, prefix, target, m))
+ if command == "ctcp" and m[0] == "ACTION":
+ self._handle_event(Event("action", prefix, target, m[1:]))
+ else:
+ if DEBUG:
+ print "command: %s, source: %s, target: %s, arguments: %s" % (
+ command, prefix, target, [m])
+ self._handle_event(Event(command, prefix, target, [m]))
+ else:
+ target = None
+
+ if command == "quit":
+ arguments = [arguments[0]]
+ elif command == "ping":
+ target = arguments[0]
+ else:
+ target = arguments[0]
+ arguments = arguments[1:]
+
+ if command == "mode":
+ if not is_channel(target):
+ command = "umode"
+
+ if DEBUG:
+ print "command: %s, source: %s, target: %s, arguments: %s" % (
+ command, prefix, target, arguments)
+ self._handle_event(Event(command, prefix, target, arguments))
+
+ def _handle_event(self, event):
+ """[Internal]"""
+ self.irclibobj._handle_event(self, event)
+ if event.eventtype() in self.handlers:
+ for fn in self.handlers[event.eventtype()]:
+ fn(self, event)
+
+ def is_connected(self):
+ """Return connection status.
+
+ Returns true if connected, otherwise false.
+ """
+ return self.connected
+
+ def add_global_handler(self, *args):
+ """Add global handler.
+
+ See documentation for IRC.add_global_handler.
+ """
+ self.irclibobj.add_global_handler(*args)
+
+ def remove_global_handler(self, *args):
+ """Remove global handler.
+
+ See documentation for IRC.remove_global_handler.
+ """
+ self.irclibobj.remove_global_handler(*args)
+
+ def action(self, target, action):
+ """Send a CTCP ACTION command."""
+ self.ctcp("ACTION", target, action)
+
+ def admin(self, server=""):
+ """Send an ADMIN command."""
+ self.send_raw(" ".join(["ADMIN", server]).strip())
+
+ def ctcp(self, ctcptype, target, parameter=""):
+ """Send a CTCP command."""
+ ctcptype = ctcptype.upper()
+ self.privmsg(target, "\001%s%s\001" % (ctcptype, parameter and (" " + parameter) or ""))
+
+ def ctcp_reply(self, target, parameter):
+ """Send a CTCP REPLY command."""
+ self.notice(target, "\001%s\001" % parameter)
+
+ def disconnect(self, message=""):
+ """Hang up the connection.
+
+ Arguments:
+
+ message -- Quit message.
+ """
+ if not self.connected:
+ return
+
+ self.connected = 0
+
+ self.quit(message)
+
+ try:
+ self.socket.close()
+ except socket.error, x:
+ pass
+ self.socket = None
+ self._handle_event(Event("disconnect", self.server, "", [message]))
+
+ def globops(self, text):
+ """Send a GLOBOPS command."""
+ self.send_raw("GLOBOPS :" + text)
+
+ def info(self, server=""):
+ """Send an INFO command."""
+ self.send_raw(" ".join(["INFO", server]).strip())
+
+ def invite(self, nick, channel):
+ """Send an INVITE command."""
+ self.send_raw(" ".join(["INVITE", nick, channel]).strip())
+
+ def ison(self, nicks):
+ """Send an ISON command.
+
+ Arguments:
+
+ nicks -- List of nicks.
+ """
+ self.send_raw("ISON " + " ".join(nicks))
+
+ def join(self, channel, key=""):
+ """Send a JOIN command."""
+ self.send_raw("JOIN %s%s" % (channel, (key and (" " + key))))
+
+ def kick(self, channel, nick, comment=""):
+ """Send a KICK command."""
+ self.send_raw("KICK %s %s%s" % (channel, nick, (comment and (" :" + comment))))
+
+ def links(self, remote_server="", server_mask=""):
+ """Send a LINKS command."""
+ command = "LINKS"
+ if remote_server:
+ command = command + " " + remote_server
+ if server_mask:
+ command = command + " " + server_mask
+ self.send_raw(command)
+
+ def list(self, channels=None, server=""):
+ """Send a LIST command."""
+ command = "LIST"
+ if channels:
+ command = command + " " + ",".join(channels)
+ if server:
+ command = command + " " + server
+ self.send_raw(command)
+
+ def lusers(self, server=""):
+ """Send a LUSERS command."""
+ self.send_raw("LUSERS" + (server and (" " + server)))
+
+ def mode(self, target, command):
+ """Send a MODE command."""
+ self.send_raw("MODE %s %s" % (target, command))
+
+ def motd(self, server=""):
+ """Send an MOTD command."""
+ self.send_raw("MOTD" + (server and (" " + server)))
+
+ def names(self, channels=None):
+ """Send a NAMES command."""
+ self.send_raw("NAMES" + (channels and (" " + ",".join(channels)) or ""))
+
+ def nick(self, newnick):
+ """Send a NICK command."""
+ self.send_raw("NICK " + newnick)
+
+ def notice(self, target, text):
+ """Send a NOTICE command."""
+ # Should limit len(text) here!
+ self.send_raw("NOTICE %s :%s" % (target, text))
+
+ def oper(self, nick, password):
+ """Send an OPER command."""
+ self.send_raw("OPER %s %s" % (nick, password))
+
+ def part(self, channels, message=""):
+ """Send a PART command."""
+ if type(channels) == types.StringType:
+ self.send_raw("PART " + channels + (message and (" " + message)))
+ else:
+ self.send_raw("PART " + ",".join(channels) + (message and (" " + message)))
+
+ def pass_(self, password):
+ """Send a PASS command."""
+ self.send_raw("PASS " + password)
+
+ def ping(self, target, target2=""):
+ """Send a PING command."""
+ self.send_raw("PING %s%s" % (target, target2 and (" " + target2)))
+
+ def pong(self, target, target2=""):
+ """Send a PONG command."""
+ self.send_raw("PONG %s%s" % (target, target2 and (" " + target2)))
+
+ def privmsg(self, target, text):
+ """Send a PRIVMSG command."""
+ # Should limit len(text) here!
+ self.send_raw("PRIVMSG %s :%s" % (target, text))
+
+ def privmsg_many(self, targets, text):
+ """Send a PRIVMSG command to multiple targets."""
+ # Should limit len(text) here!
+ self.send_raw("PRIVMSG %s :%s" % (",".join(targets), text))
+
+ def quit(self, message=""):
+ """Send a QUIT command."""
+ # Note that many IRC servers don't use your QUIT message
+ # unless you've been connected for at least 5 minutes!
+ self.send_raw("QUIT" + (message and (" :" + message)))
+
+ def send_raw(self, string):
+ """Send raw string to the server.
+
+ The string will be padded with appropriate CR LF.
+ """
+ if self.socket is None:
+ raise ServerNotConnectedError, "Not connected."
+ try:
+ if self.ssl:
+ self.ssl.write(string + "\r\n")
+ else:
+ self.socket.send(string + "\r\n")
+ if DEBUG:
+ print "TO SERVER:", string
+ except socket.error, x:
+ # Ouch!
+ self.disconnect("Connection reset by peer.")
+
+ def squit(self, server, comment=""):
+ """Send an SQUIT command."""
+ self.send_raw("SQUIT %s%s" % (server, comment and (" :" + comment)))
+
+ def stats(self, statstype, server=""):
+ """Send a STATS command."""
+ self.send_raw("STATS %s%s" % (statstype, server and (" " + server)))
+
+ def time(self, server=""):
+ """Send a TIME command."""
+ self.send_raw("TIME" + (server and (" " + server)))
+
+ def topic(self, channel, new_topic=None):
+ """Send a TOPIC command."""
+ if new_topic is None:
+ self.send_raw("TOPIC " + channel)
+ else:
+ self.send_raw("TOPIC %s :%s" % (channel, new_topic))
+
+ def trace(self, target=""):
+ """Send a TRACE command."""
+ self.send_raw("TRACE" + (target and (" " + target)))
+
+ def user(self, username, realname):
+ """Send a USER command."""
+ self.send_raw("USER %s 0 * :%s" % (username, realname))
+
+ def userhost(self, nicks):
+ """Send a USERHOST command."""
+ self.send_raw("USERHOST " + ",".join(nicks))
+
+ def users(self, server=""):
+ """Send a USERS command."""
+ self.send_raw("USERS" + (server and (" " + server)))
+
+ def version(self, server=""):
+ """Send a VERSION command."""
+ self.send_raw("VERSION" + (server and (" " + server)))
+
+ def wallops(self, text):
+ """Send a WALLOPS command."""
+ self.send_raw("WALLOPS :" + text)
+
+ def who(self, target="", op=""):
+ """Send a WHO command."""
+ self.send_raw("WHO%s%s" % (target and (" " + target), op and (" o")))
+
+ def whois(self, targets):
+ """Send a WHOIS command."""
+ self.send_raw("WHOIS " + ",".join(targets))
+
+ def whowas(self, nick, max="", server=""):
+ """Send a WHOWAS command."""
+ self.send_raw("WHOWAS %s%s%s" % (nick,
+ max and (" " + max),
+ server and (" " + server)))
+
+class DCCConnectionError(IRCError):
+ pass
+
+
+class DCCConnection(Connection):
+ """This class represents a DCC connection.
+
+ DCCConnection objects are instantiated by calling the dcc
+ method on an IRC object.
+ """
+ def __init__(self, irclibobj, dcctype):
+ Connection.__init__(self, irclibobj)
+ self.connected = 0
+ self.passive = 0
+ self.dcctype = dcctype
+ self.peeraddress = None
+ self.peerport = None
+
+ def connect(self, address, port):
+ """Connect/reconnect to a DCC peer.
+
+ Arguments:
+ address -- Host/IP address of the peer.
+
+ port -- The port number to connect to.
+
+ Returns the DCCConnection object.
+ """
+ self.peeraddress = socket.gethostbyname(address)
+ self.peerport = port
+ self.socket = None
+ self.previous_buffer = ""
+ self.handlers = {}
+ self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ self.passive = 0
+ try:
+ self.socket.connect((self.peeraddress, self.peerport))
+ except socket.error, x:
+ raise DCCConnectionError, "Couldn't connect to socket: %s" % x
+ self.connected = 1
+ if self.irclibobj.fn_to_add_socket:
+ self.irclibobj.fn_to_add_socket(self.socket)
+ return self
+
+ def listen(self):
+ """Wait for a connection/reconnection from a DCC peer.
+
+ Returns the DCCConnection object.
+
+ The local IP address and port are available as
+ self.localaddress and self.localport. After connection from a
+ peer, the peer address and port are available as
+ self.peeraddress and self.peerport.
+ """
+ self.previous_buffer = ""
+ self.handlers = {}
+ self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ self.passive = 1
+ try:
+ self.socket.bind((socket.gethostbyname(socket.gethostname()), 0))
+ self.localaddress, self.localport = self.socket.getsockname()
+ self.socket.listen(10)
+ except socket.error, x:
+ raise DCCConnectionError, "Couldn't bind socket: %s" % x
+ return self
+
+ def disconnect(self, message=""):
+ """Hang up the connection and close the object.
+
+ Arguments:
+
+ message -- Quit message.
+ """
+ if not self.connected:
+ return
+
+ self.connected = 0
+ try:
+ self.socket.close()
+ except socket.error, x:
+ pass
+ self.socket = None
+ self.irclibobj._handle_event(
+ self,
+ Event("dcc_disconnect", self.peeraddress, "", [message]))
+ self.irclibobj._remove_connection(self)
+
+ def process_data(self):
+ """[Internal]"""
+
+ if self.passive and not self.connected:
+ conn, (self.peeraddress, self.peerport) = self.socket.accept()
+ self.socket.close()
+ self.socket = conn
+ self.connected = 1
+ if DEBUG:
+ print "DCC connection from %s:%d" % (
+ self.peeraddress, self.peerport)
+ self.irclibobj._handle_event(
+ self,
+ Event("dcc_connect", self.peeraddress, None, None))
+ return
+
+ try:
+ new_data = self.socket.recv(2**14)
+ except socket.error, x:
+ # The server hung up.
+ self.disconnect("Connection reset by peer")
+ return
+ if not new_data:
+ # Read nothing: connection must be down.
+ self.disconnect("Connection reset by peer")
+ return
+
+ if self.dcctype == "chat":
+ # The specification says lines are terminated with LF, but
+ # it seems safer to handle CR LF terminations too.
+ chunks = _linesep_regexp.split(self.previous_buffer + new_data)
+
+ # Save the last, unfinished line.
+ self.previous_buffer = chunks[-1]
+ if len(self.previous_buffer) > 2**14:
+ # Bad peer! Naughty peer!
+ self.disconnect()
+ return
+ chunks = chunks[:-1]
+ else:
+ chunks = [new_data]
+
+ command = "dccmsg"
+ prefix = self.peeraddress
+ target = None
+ for chunk in chunks:
+ if DEBUG:
+ print "FROM PEER:", chunk
+ arguments = [chunk]
+ if DEBUG:
+ print "command: %s, source: %s, target: %s, arguments: %s" % (
+ command, prefix, target, arguments)
+ self.irclibobj._handle_event(
+ self,
+ Event(command, prefix, target, arguments))
+
+ def _get_socket(self):
+ """[Internal]"""
+ return self.socket
+
+ def privmsg(self, string):
+ """Send data to DCC peer.
+
+ The string will be padded with appropriate LF if it's a DCC
+ CHAT session.
+ """
+ try:
+ self.socket.send(string)
+ if self.dcctype == "chat":
+ self.socket.send("\n")
+ if DEBUG:
+ print "TO PEER: %s\n" % string
+ except socket.error, x:
+ # Ouch!
+ self.disconnect("Connection reset by peer.")
+
+class SimpleIRCClient:
+ """A simple single-server IRC client class.
+
+ This is an example of an object-oriented wrapper of the IRC
+ framework. A real IRC client can be made by subclassing this
+ class and adding appropriate methods.
+
+ The method on_join will be called when a "join" event is created
+ (which is done when the server sends a JOIN messsage/command),
+ on_privmsg will be called for "privmsg" events, and so on. The
+ handler methods get two arguments: the connection object (same as
+ self.connection) and the event object.
+
+ Instance attributes that can be used by sub classes:
+
+ ircobj -- The IRC instance.
+
+ connection -- The ServerConnection instance.
+
+ dcc_connections -- A list of DCCConnection instances.
+ """
+ def __init__(self):
+ self.ircobj = IRC()
+ self.connection = self.ircobj.server()
+ self.dcc_connections = []
+ self.ircobj.add_global_handler("all_events", self._dispatcher, -10)
+ self.ircobj.add_global_handler("dcc_disconnect", self._dcc_disconnect, -10)
+
+ def _dispatcher(self, c, e):
+ """[Internal]"""
+ m = "on_" + e.eventtype()
+ if hasattr(self, m):
+ getattr(self, m)(c, e)
+
+ def _dcc_disconnect(self, c, e):
+ self.dcc_connections.remove(c)
+
+ def connect(self, server, port, nickname, password=None, username=None,
+ ircname=None, localaddress="", localport=0, ssl=False, ipv6=False):
+ """Connect/reconnect to a server.
+
+ Arguments:
+
+ server -- Server name.
+
+ port -- Port number.
+
+ nickname -- The nickname.
+
+ password -- Password (if any).
+
+ username -- The username.
+
+ ircname -- The IRC name.
+
+ localaddress -- Bind the connection to a specific local IP address.
+
+ localport -- Bind the connection to a specific local port.
+
+ ssl -- Enable support for ssl.
+
+ ipv6 -- Enable support for ipv6.
+
+ This function can be called to reconnect a closed connection.
+ """
+ self.connection.connect(server, port, nickname,
+ password, username, ircname,
+ localaddress, localport, ssl, ipv6)
+
+ def dcc_connect(self, address, port, dcctype="chat"):
+ """Connect to a DCC peer.
+
+ Arguments:
+
+ address -- IP address of the peer.
+
+ port -- Port to connect to.
+
+ Returns a DCCConnection instance.
+ """
+ dcc = self.ircobj.dcc(dcctype)
+ self.dcc_connections.append(dcc)
+ dcc.connect(address, port)
+ return dcc
+
+ def dcc_listen(self, dcctype="chat"):
+ """Listen for connections from a DCC peer.
+
+ Returns a DCCConnection instance.
+ """
+ dcc = self.ircobj.dcc(dcctype)
+ self.dcc_connections.append(dcc)
+ dcc.listen()
+ return dcc
+
+ def start(self):
+ """Start the IRC client."""
+ self.ircobj.process_forever()
+
+
+class Event:
+ """Class representing an IRC event."""
+ def __init__(self, eventtype, source, target, arguments=None):
+ """Constructor of Event objects.
+
+ Arguments:
+
+ eventtype -- A string describing the event.
+
+ source -- The originator of the event (a nick mask or a server).
+
+ target -- The target of the event (a nick or a channel).
+
+ arguments -- Any event specific arguments.
+ """
+ self._eventtype = eventtype
+ self._source = source
+ self._target = target
+ if arguments:
+ self._arguments = arguments
+ else:
+ self._arguments = []
+
+ def eventtype(self):
+ """Get the event type."""
+ return self._eventtype
+
+ def source(self):
+ """Get the event source."""
+ return self._source
+
+ def target(self):
+ """Get the event target."""
+ return self._target
+
+ def arguments(self):
+ """Get the event arguments."""
+ return self._arguments
+
+_LOW_LEVEL_QUOTE = "\020"
+_CTCP_LEVEL_QUOTE = "\134"
+_CTCP_DELIMITER = "\001"
+
+_low_level_mapping = {
+ "0": "\000",
+ "n": "\n",
+ "r": "\r",
+ _LOW_LEVEL_QUOTE: _LOW_LEVEL_QUOTE
+}
+
+_low_level_regexp = re.compile(_LOW_LEVEL_QUOTE + "(.)")
+
+def mask_matches(nick, mask):
+ """Check if a nick matches a mask.
+
+ Returns true if the nick matches, otherwise false.
+ """
+ nick = irc_lower(nick)
+ mask = irc_lower(mask)
+ mask = mask.replace("\\", "\\\\")
+ for ch in ".$|[](){}+":
+ mask = mask.replace(ch, "\\" + ch)
+ mask = mask.replace("?", ".")
+ mask = mask.replace("*", ".*")
+ r = re.compile(mask, re.IGNORECASE)
+ return r.match(nick)
+
+_special = "-[]\\`^{}"
+nick_characters = string.ascii_letters + string.digits + _special
+_ircstring_translation = string.maketrans(string.ascii_uppercase + "[]\\^",
+ string.ascii_lowercase + "{}|~")
+
+def irc_lower(s):
+ """Returns a lowercased string.
+
+ The definition of lowercased comes from the IRC specification (RFC
+ 1459).
+ """
+ return s.translate(_ircstring_translation)
+
+def _ctcp_dequote(message):
+ """[Internal] Dequote a message according to CTCP specifications.
+
+ The function returns a list where each element can be either a
+ string (normal message) or a tuple of one or two strings (tagged
+ messages). If a tuple has only one element (ie is a singleton),
+ that element is the tag; otherwise the tuple has two elements: the
+ tag and the data.
+
+ Arguments:
+
+ message -- The message to be decoded.
+ """
+
+ def _low_level_replace(match_obj):
+ ch = match_obj.group(1)
+
+ # If low_level_mapping doesn't have the character as key, we
+ # should just return the character.
+ return _low_level_mapping.get(ch, ch)
+
+ if _LOW_LEVEL_QUOTE in message:
+ # Yup, there was a quote. Release the dequoter, man!
+ message = _low_level_regexp.sub(_low_level_replace, message)
+
+ if _CTCP_DELIMITER not in message:
+ return [message]
+ else:
+ # Split it into parts. (Does any IRC client actually *use*
+ # CTCP stacking like this?)
+ chunks = message.split(_CTCP_DELIMITER)
+
+ messages = []
+ i = 0
+ while i < len(chunks)-1:
+ # Add message if it's non-empty.
+ if len(chunks[i]) > 0:
+ messages.append(chunks[i])
+
+ if i < len(chunks)-2:
+ # Aye! CTCP tagged data ahead!
+ messages.append(tuple(chunks[i+1].split(" ", 1)))
+
+ i = i + 2
+
+ if len(chunks) % 2 == 0:
+ # Hey, a lonely _CTCP_DELIMITER at the end! This means
+ # that the last chunk, including the delimiter, is a
+ # normal message! (This is according to the CTCP
+ # specification.)
+ messages.append(_CTCP_DELIMITER + chunks[-1])
+
+ return messages
+
+def is_channel(string):
+ """Check if a string is a channel name.
+
+ Returns true if the argument is a channel name, otherwise false.
+ """
+ return string and string[0] in "#&+!"
+
+def ip_numstr_to_quad(num):
+ """Convert an IP number as an integer given in ASCII
+ representation (e.g. '3232235521') to an IP address string
+ (e.g. '192.168.0.1')."""
+ n = long(num)
+ p = map(str, map(int, [n >> 24 & 0xFF, n >> 16 & 0xFF,
+ n >> 8 & 0xFF, n & 0xFF]))
+ return ".".join(p)
+
+def ip_quad_to_numstr(quad):
+ """Convert an IP address string (e.g. '192.168.0.1') to an IP
+ number as an integer given in ASCII representation
+ (e.g. '3232235521')."""
+ p = map(long, quad.split("."))
+ s = str((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3])
+ if s[-1] == "L":
+ s = s[:-1]
+ return s
+
+def nm_to_n(s):
+ """Get the nick part of a nickmask.
+
+ (The source of an Event is a nickmask.)
+ """
+ return s.split("!")[0]
+
+def nm_to_uh(s):
+ """Get the userhost part of a nickmask.
+
+ (The source of an Event is a nickmask.)
+ """
+ return s.split("!")[1]
+
+def nm_to_h(s):
+ """Get the host part of a nickmask.
+
+ (The source of an Event is a nickmask.)
+ """
+ return s.split("@")[1]
+
+def nm_to_u(s):
+ """Get the user part of a nickmask.
+
+ (The source of an Event is a nickmask.)
+ """
+ s = s.split("!")[1]
+ return s.split("@")[0]
+
+def parse_nick_modes(mode_string):
+ """Parse a nick mode string.
+
+ The function returns a list of lists with three members: sign,
+ mode and argument. The sign is \"+\" or \"-\". The argument is
+ always None.
+
+ Example:
+
+ >>> irclib.parse_nick_modes(\"+ab-c\")
+ [['+', 'a', None], ['+', 'b', None], ['-', 'c', None]]
+ """
+
+ return _parse_modes(mode_string, "")
+
+def parse_channel_modes(mode_string):
+ """Parse a channel mode string.
+
+ The function returns a list of lists with three members: sign,
+ mode and argument. The sign is \"+\" or \"-\". The argument is
+ None if mode isn't one of \"b\", \"k\", \"l\", \"v\" or \"o\".
+
+ Example:
+
+ >>> irclib.parse_channel_modes(\"+ab-c foo\")
+ [['+', 'a', None], ['+', 'b', 'foo'], ['-', 'c', None]]
+ """
+
+ return _parse_modes(mode_string, "bklvo")
+
+def _parse_modes(mode_string, unary_modes=""):
+ """[Internal]"""
+ modes = []
+ arg_count = 0
+
+ # State variable.
+ sign = ""
+
+ a = mode_string.split()
+ if len(a) == 0:
+ return []
+ else:
+ mode_part, args = a[0], a[1:]
+
+ if mode_part[0] not in "+-":
+ return []
+ for ch in mode_part:
+ if ch in "+-":
+ sign = ch
+ elif ch == " ":
+ collecting_arguments = 1
+ elif ch in unary_modes:
+ if len(args) >= arg_count + 1:
+ modes.append([sign, ch, args[arg_count]])
+ arg_count = arg_count + 1
+ else:
+ modes.append([sign, ch, None])
+ else:
+ modes.append([sign, ch, None])
+ return modes
+
+def _ping_ponger(connection, event):
+ """[Internal]"""
+ connection.pong(event.target())
+
+# Numeric table mostly stolen from the Perl IRC module (Net::IRC).
+numeric_events = {
+ "001": "welcome",
+ "002": "yourhost",
+ "003": "created",
+ "004": "myinfo",
+ "005": "featurelist", # XXX
+ "200": "tracelink",
+ "201": "traceconnecting",
+ "202": "tracehandshake",
+ "203": "traceunknown",
+ "204": "traceoperator",
+ "205": "traceuser",
+ "206": "traceserver",
+ "207": "traceservice",
+ "208": "tracenewtype",
+ "209": "traceclass",
+ "210": "tracereconnect",
+ "211": "statslinkinfo",
+ "212": "statscommands",
+ "213": "statscline",
+ "214": "statsnline",
+ "215": "statsiline",
+ "216": "statskline",
+ "217": "statsqline",
+ "218": "statsyline",
+ "219": "endofstats",
+ "221": "umodeis",
+ "231": "serviceinfo",
+ "232": "endofservices",
+ "233": "service",
+ "234": "servlist",
+ "235": "servlistend",
+ "241": "statslline",
+ "242": "statsuptime",
+ "243": "statsoline",
+ "244": "statshline",
+ "250": "luserconns",
+ "251": "luserclient",
+ "252": "luserop",
+ "253": "luserunknown",
+ "254": "luserchannels",
+ "255": "luserme",
+ "256": "adminme",
+ "257": "adminloc1",
+ "258": "adminloc2",
+ "259": "adminemail",
+ "261": "tracelog",
+ "262": "endoftrace",
+ "263": "tryagain",
+ "265": "n_local",
+ "266": "n_global",
+ "300": "none",
+ "301": "away",
+ "302": "userhost",
+ "303": "ison",
+ "305": "unaway",
+ "306": "nowaway",
+ "311": "whoisuser",
+ "312": "whoisserver",
+ "313": "whoisoperator",
+ "314": "whowasuser",
+ "315": "endofwho",
+ "316": "whoischanop",
+ "317": "whoisidle",
+ "318": "endofwhois",
+ "319": "whoischannels",
+ "321": "liststart",
+ "322": "list",
+ "323": "listend",
+ "324": "channelmodeis",
+ "329": "channelcreate",
+ "331": "notopic",
+ "332": "currenttopic",
+ "333": "topicinfo",
+ "341": "inviting",
+ "342": "summoning",
+ "346": "invitelist",
+ "347": "endofinvitelist",
+ "348": "exceptlist",
+ "349": "endofexceptlist",
+ "351": "version",
+ "352": "whoreply",
+ "353": "namreply",
+ "361": "killdone",
+ "362": "closing",
+ "363": "closeend",
+ "364": "links",
+ "365": "endoflinks",
+ "366": "endofnames",
+ "367": "banlist",
+ "368": "endofbanlist",
+ "369": "endofwhowas",
+ "371": "info",
+ "372": "motd",
+ "373": "infostart",
+ "374": "endofinfo",
+ "375": "motdstart",
+ "376": "endofmotd",
+ "377": "motd2", # 1997-10-16 -- tkil
+ "381": "youreoper",
+ "382": "rehashing",
+ "384": "myportis",
+ "391": "time",
+ "392": "usersstart",
+ "393": "users",
+ "394": "endofusers",
+ "395": "nousers",
+ "401": "nosuchnick",
+ "402": "nosuchserver",
+ "403": "nosuchchannel",
+ "404": "cannotsendtochan",
+ "405": "toomanychannels",
+ "406": "wasnosuchnick",
+ "407": "toomanytargets",
+ "409": "noorigin",
+ "411": "norecipient",
+ "412": "notexttosend",
+ "413": "notoplevel",
+ "414": "wildtoplevel",
+ "421": "unknowncommand",
+ "422": "nomotd",
+ "423": "noadmininfo",
+ "424": "fileerror",
+ "431": "nonicknamegiven",
+ "432": "erroneusnickname", # Thiss iz how its speld in thee RFC.
+ "433": "nicknameinuse",
+ "436": "nickcollision",
+ "437": "unavailresource", # "Nick temporally unavailable"
+ "441": "usernotinchannel",
+ "442": "notonchannel",
+ "443": "useronchannel",
+ "444": "nologin",
+ "445": "summondisabled",
+ "446": "usersdisabled",
+ "451": "notregistered",
+ "461": "needmoreparams",
+ "462": "alreadyregistered",
+ "463": "nopermforhost",
+ "464": "passwdmismatch",
+ "465": "yourebannedcreep", # I love this one...
+ "466": "youwillbebanned",
+ "467": "keyset",
+ "471": "channelisfull",
+ "472": "unknownmode",
+ "473": "inviteonlychan",
+ "474": "bannedfromchan",
+ "475": "badchannelkey",
+ "476": "badchanmask",
+ "477": "nochanmodes", # "Channel doesn't support modes"
+ "478": "banlistfull",
+ "481": "noprivileges",
+ "482": "chanoprivsneeded",
+ "483": "cantkillserver",
+ "484": "restricted", # Connection is restricted
+ "485": "uniqopprivsneeded",
+ "491": "nooperhost",
+ "492": "noservicehost",
+ "501": "umodeunknownflag",
+ "502": "usersdontmatch",
+}
+
+generated_events = [
+ # Generated events
+ "dcc_connect",
+ "dcc_disconnect",
+ "dccmsg",
+ "disconnect",
+ "ctcp",
+ "ctcpreply",
+]
+
+protocol_events = [
+ # IRC protocol events
+ "error",
+ "join",
+ "kick",
+ "mode",
+ "part",
+ "ping",
+ "privmsg",
+ "privnotice",
+ "pubmsg",
+ "pubnotice",
+ "quit",
+ "invite",
+ "pong",
+]
+
+all_events = generated_events + protocol_events + numeric_events.values()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/__init__.py
new file mode 100644
index 0000000..4bb20aa
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/__init__.py
@@ -0,0 +1,140 @@
+__all__ = [
+ 'AbstractBasicAuthHandler',
+ 'AbstractDigestAuthHandler',
+ 'BaseHandler',
+ 'Browser',
+ 'BrowserStateError',
+ 'CacheFTPHandler',
+ 'ContentTooShortError',
+ 'Cookie',
+ 'CookieJar',
+ 'CookiePolicy',
+ 'DefaultCookiePolicy',
+ 'DefaultFactory',
+ 'FTPHandler',
+ 'Factory',
+ 'FileCookieJar',
+ 'FileHandler',
+ 'FormNotFoundError',
+ 'FormsFactory',
+ 'HTTPBasicAuthHandler',
+ 'HTTPCookieProcessor',
+ 'HTTPDefaultErrorHandler',
+ 'HTTPDigestAuthHandler',
+ 'HTTPEquivProcessor',
+ 'HTTPError',
+ 'HTTPErrorProcessor',
+ 'HTTPHandler',
+ 'HTTPPasswordMgr',
+ 'HTTPPasswordMgrWithDefaultRealm',
+ 'HTTPProxyPasswordMgr',
+ 'HTTPRedirectDebugProcessor',
+ 'HTTPRedirectHandler',
+ 'HTTPRefererProcessor',
+ 'HTTPRefreshProcessor',
+ 'HTTPRequestUpgradeProcessor',
+ 'HTTPResponseDebugProcessor',
+ 'HTTPRobotRulesProcessor',
+ 'HTTPSClientCertMgr',
+ 'HTTPSHandler',
+ 'HeadParser',
+ 'History',
+ 'LWPCookieJar',
+ 'Link',
+ 'LinkNotFoundError',
+ 'LinksFactory',
+ 'LoadError',
+ 'MSIECookieJar',
+ 'MozillaCookieJar',
+ 'OpenerDirector',
+ 'OpenerFactory',
+ 'ParseError',
+ 'ProxyBasicAuthHandler',
+ 'ProxyDigestAuthHandler',
+ 'ProxyHandler',
+ 'Request',
+ 'ResponseUpgradeProcessor',
+ 'RobotExclusionError',
+ 'RobustFactory',
+ 'RobustFormsFactory',
+ 'RobustLinksFactory',
+ 'RobustTitleFactory',
+ 'SeekableProcessor',
+ 'SeekableResponseOpener',
+ 'TitleFactory',
+ 'URLError',
+ 'USE_BARE_EXCEPT',
+ 'UnknownHandler',
+ 'UserAgent',
+ 'UserAgentBase',
+ 'XHTMLCompatibleHeadParser',
+ '__version__',
+ 'build_opener',
+ 'install_opener',
+ 'lwp_cookie_str',
+ 'make_response',
+ 'request_host',
+ 'response_seek_wrapper', # XXX deprecate in public interface?
+ 'seek_wrapped_response' # XXX should probably use this internally in place of response_seek_wrapper()
+ 'str2time',
+ 'urlopen',
+ 'urlretrieve']
+
+import logging
+import sys
+
+from _mechanize import __version__
+
+# high-level stateful browser-style interface
+from _mechanize import \
+ Browser, History, \
+ BrowserStateError, LinkNotFoundError, FormNotFoundError
+
+# configurable URL-opener interface
+from _useragent import UserAgentBase, UserAgent
+from _html import \
+ ParseError, \
+ Link, \
+ Factory, DefaultFactory, RobustFactory, \
+ FormsFactory, LinksFactory, TitleFactory, \
+ RobustFormsFactory, RobustLinksFactory, RobustTitleFactory
+
+# urllib2 work-alike interface (part from mechanize, part from urllib2)
+# This is a superset of the urllib2 interface.
+from _urllib2 import *
+
+# misc
+from _opener import ContentTooShortError, OpenerFactory, urlretrieve
+from _util import http2time as str2time
+from _response import \
+ response_seek_wrapper, seek_wrapped_response, make_response
+from _http import HeadParser
+try:
+ from _http import XHTMLCompatibleHeadParser
+except ImportError:
+ pass
+
+# cookies
+from _clientcookie import Cookie, CookiePolicy, DefaultCookiePolicy, \
+ CookieJar, FileCookieJar, LoadError, request_host_lc as request_host, \
+ effective_request_host
+from _lwpcookiejar import LWPCookieJar, lwp_cookie_str
+# 2.4 raises SyntaxError due to generator / try/finally use
+if sys.version_info[:2] > (2,4):
+ try:
+ import sqlite3
+ except ImportError:
+ pass
+ else:
+ from _firefox3cookiejar import Firefox3CookieJar
+from _mozillacookiejar import MozillaCookieJar
+from _msiecookiejar import MSIECookieJar
+
+# If you hate the idea of turning bugs into warnings, do:
+# import mechanize; mechanize.USE_BARE_EXCEPT = False
+USE_BARE_EXCEPT = True
+
+logger = logging.getLogger("mechanize")
+if logger.level is logging.NOTSET:
+ logger.setLevel(logging.CRITICAL)
+del logger
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_auth.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_auth.py
new file mode 100644
index 0000000..232f7d8
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_auth.py
@@ -0,0 +1,522 @@
+"""HTTP Authentication and Proxy support.
+
+All but HTTPProxyPasswordMgr come from Python 2.5.
+
+
+Copyright 2006 John J. Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it under
+the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+included with the distribution).
+
+"""
+
+import base64
+import copy
+import os
+import posixpath
+import random
+import re
+import time
+import urlparse
+
+try:
+ import hashlib
+except ImportError:
+ import md5
+ import sha
+ def sha1_digest(bytes):
+ return sha.new(bytes).hexdigest()
+ def md5_digest(bytes):
+ return md5.new(bytes).hexdigest()
+else:
+ def sha1_digest(bytes):
+ return hashlib.sha1(bytes).hexdigest()
+ def md5_digest(bytes):
+ return hashlib.md5(bytes).hexdigest()
+
+from urllib2 import BaseHandler, HTTPError, parse_keqv_list, parse_http_list
+from urllib import getproxies, unquote, splittype, splituser, splitpasswd, \
+ splitport
+
+
+def _parse_proxy(proxy):
+ """Return (scheme, user, password, host/port) given a URL or an authority.
+
+ If a URL is supplied, it must have an authority (host:port) component.
+ According to RFC 3986, having an authority component means the URL must
+ have two slashes after the scheme:
+
+ >>> _parse_proxy('file:/ftp.example.com/')
+ Traceback (most recent call last):
+ ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
+
+ The first three items of the returned tuple may be None.
+
+ Examples of authority parsing:
+
+ >>> _parse_proxy('proxy.example.com')
+ (None, None, None, 'proxy.example.com')
+ >>> _parse_proxy('proxy.example.com:3128')
+ (None, None, None, 'proxy.example.com:3128')
+
+ The authority component may optionally include userinfo (assumed to be
+ username:password):
+
+ >>> _parse_proxy('joe:password@proxy.example.com')
+ (None, 'joe', 'password', 'proxy.example.com')
+ >>> _parse_proxy('joe:password@proxy.example.com:3128')
+ (None, 'joe', 'password', 'proxy.example.com:3128')
+
+ Same examples, but with URLs instead:
+
+ >>> _parse_proxy('http://proxy.example.com/')
+ ('http', None, None, 'proxy.example.com')
+ >>> _parse_proxy('http://proxy.example.com:3128/')
+ ('http', None, None, 'proxy.example.com:3128')
+ >>> _parse_proxy('http://joe:password@proxy.example.com/')
+ ('http', 'joe', 'password', 'proxy.example.com')
+ >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
+ ('http', 'joe', 'password', 'proxy.example.com:3128')
+
+ Everything after the authority is ignored:
+
+ >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
+ ('ftp', 'joe', 'password', 'proxy.example.com')
+
+ Test for no trailing '/' case:
+
+ >>> _parse_proxy('http://joe:password@proxy.example.com')
+ ('http', 'joe', 'password', 'proxy.example.com')
+
+ """
+ scheme, r_scheme = splittype(proxy)
+ if not r_scheme.startswith("/"):
+ # authority
+ scheme = None
+ authority = proxy
+ else:
+ # URL
+ if not r_scheme.startswith("//"):
+ raise ValueError("proxy URL with no authority: %r" % proxy)
+ # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
+ # and 3.3.), path is empty or starts with '/'
+ end = r_scheme.find("/", 2)
+ if end == -1:
+ end = None
+ authority = r_scheme[2:end]
+ userinfo, hostport = splituser(authority)
+ if userinfo is not None:
+ user, password = splitpasswd(userinfo)
+ else:
+ user = password = None
+ return scheme, user, password, hostport
+
+class ProxyHandler(BaseHandler):
+ # Proxies must be in front
+ handler_order = 100
+
+ def __init__(self, proxies=None):
+ if proxies is None:
+ proxies = getproxies()
+ assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
+ self.proxies = proxies
+ for type, url in proxies.items():
+ setattr(self, '%s_open' % type,
+ lambda r, proxy=url, type=type, meth=self.proxy_open: \
+ meth(r, proxy, type))
+
+ def proxy_open(self, req, proxy, type):
+ orig_type = req.get_type()
+ proxy_type, user, password, hostport = _parse_proxy(proxy)
+ if proxy_type is None:
+ proxy_type = orig_type
+ if user and password:
+ user_pass = '%s:%s' % (unquote(user), unquote(password))
+ creds = base64.encodestring(user_pass).strip()
+ req.add_header('Proxy-authorization', 'Basic ' + creds)
+ hostport = unquote(hostport)
+ req.set_proxy(hostport, proxy_type)
+ if orig_type == proxy_type:
+ # let other handlers take care of it
+ return None
+ else:
+ # need to start over, because the other handlers don't
+ # grok the proxy's URL type
+ # e.g. if we have a constructor arg proxies like so:
+ # {'http': 'ftp://proxy.example.com'}, we may end up turning
+ # a request for http://acme.example.com/a into one for
+ # ftp://proxy.example.com/a
+ return self.parent.open(req)
+
+class HTTPPasswordMgr:
+
+ def __init__(self):
+ self.passwd = {}
+
+ def add_password(self, realm, uri, user, passwd):
+ # uri could be a single URI or a sequence
+ if isinstance(uri, basestring):
+ uri = [uri]
+ if not realm in self.passwd:
+ self.passwd[realm] = {}
+ for default_port in True, False:
+ reduced_uri = tuple(
+ [self.reduce_uri(u, default_port) for u in uri])
+ self.passwd[realm][reduced_uri] = (user, passwd)
+
+ def find_user_password(self, realm, authuri):
+ domains = self.passwd.get(realm, {})
+ for default_port in True, False:
+ reduced_authuri = self.reduce_uri(authuri, default_port)
+ for uris, authinfo in domains.iteritems():
+ for uri in uris:
+ if self.is_suburi(uri, reduced_authuri):
+ return authinfo
+ return None, None
+
+ def reduce_uri(self, uri, default_port=True):
+ """Accept authority or URI and extract only the authority and path."""
+ # note HTTP URLs do not have a userinfo component
+ parts = urlparse.urlsplit(uri)
+ if parts[1]:
+ # URI
+ scheme = parts[0]
+ authority = parts[1]
+ path = parts[2] or '/'
+ else:
+ # host or host:port
+ scheme = None
+ authority = uri
+ path = '/'
+ host, port = splitport(authority)
+ if default_port and port is None and scheme is not None:
+ dport = {"http": 80,
+ "https": 443,
+ }.get(scheme)
+ if dport is not None:
+ authority = "%s:%d" % (host, dport)
+ return authority, path
+
+ def is_suburi(self, base, test):
+ """Check if test is below base in a URI tree
+
+ Both args must be URIs in reduced form.
+ """
+ if base == test:
+ return True
+ if base[0] != test[0]:
+ return False
+ common = posixpath.commonprefix((base[1], test[1]))
+ if len(common) == len(base[1]):
+ return True
+ return False
+
+
+class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
+
+ def find_user_password(self, realm, authuri):
+ user, password = HTTPPasswordMgr.find_user_password(self, realm,
+ authuri)
+ if user is not None:
+ return user, password
+ return HTTPPasswordMgr.find_user_password(self, None, authuri)
+
+
+class AbstractBasicAuthHandler:
+
+ rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I)
+
+ # XXX there can actually be multiple auth-schemes in a
+ # www-authenticate header. should probably be a lot more careful
+ # in parsing them to extract multiple alternatives
+
+ def __init__(self, password_mgr=None):
+ if password_mgr is None:
+ password_mgr = HTTPPasswordMgr()
+ self.passwd = password_mgr
+ self.add_password = self.passwd.add_password
+
+ def http_error_auth_reqed(self, authreq, host, req, headers):
+ # host may be an authority (without userinfo) or a URL with an
+ # authority
+ # XXX could be multiple headers
+ authreq = headers.get(authreq, None)
+ if authreq:
+ mo = AbstractBasicAuthHandler.rx.search(authreq)
+ if mo:
+ scheme, realm = mo.groups()
+ if scheme.lower() == 'basic':
+ return self.retry_http_basic_auth(host, req, realm)
+
+ def retry_http_basic_auth(self, host, req, realm):
+ user, pw = self.passwd.find_user_password(realm, host)
+ if pw is not None:
+ raw = "%s:%s" % (user, pw)
+ auth = 'Basic %s' % base64.encodestring(raw).strip()
+ if req.headers.get(self.auth_header, None) == auth:
+ return None
+ newreq = copy.copy(req)
+ newreq.add_header(self.auth_header, auth)
+ newreq.visit = False
+ return self.parent.open(newreq)
+ else:
+ return None
+
+
+class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
+
+ auth_header = 'Authorization'
+
+ def http_error_401(self, req, fp, code, msg, headers):
+ url = req.get_full_url()
+ return self.http_error_auth_reqed('www-authenticate',
+ url, req, headers)
+
+
+class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
+
+ auth_header = 'Proxy-authorization'
+
+ def http_error_407(self, req, fp, code, msg, headers):
+ # http_error_auth_reqed requires that there is no userinfo component in
+ # authority. Assume there isn't one, since urllib2 does not (and
+ # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
+ # userinfo.
+ authority = req.get_host()
+ return self.http_error_auth_reqed('proxy-authenticate',
+ authority, req, headers)
+
+
+def randombytes(n):
+ """Return n random bytes."""
+ # Use /dev/urandom if it is available. Fall back to random module
+ # if not. It might be worthwhile to extend this function to use
+ # other platform-specific mechanisms for getting random bytes.
+ if os.path.exists("/dev/urandom"):
+ f = open("/dev/urandom")
+ s = f.read(n)
+ f.close()
+ return s
+ else:
+ L = [chr(random.randrange(0, 256)) for i in range(n)]
+ return "".join(L)
+
+class AbstractDigestAuthHandler:
+ # Digest authentication is specified in RFC 2617.
+
+ # XXX The client does not inspect the Authentication-Info header
+ # in a successful response.
+
+ # XXX It should be possible to test this implementation against
+ # a mock server that just generates a static set of challenges.
+
+ # XXX qop="auth-int" supports is shaky
+
+ def __init__(self, passwd=None):
+ if passwd is None:
+ passwd = HTTPPasswordMgr()
+ self.passwd = passwd
+ self.add_password = self.passwd.add_password
+ self.retried = 0
+ self.nonce_count = 0
+
+ def reset_retry_count(self):
+ self.retried = 0
+
+ def http_error_auth_reqed(self, auth_header, host, req, headers):
+ authreq = headers.get(auth_header, None)
+ if self.retried > 5:
+ # Don't fail endlessly - if we failed once, we'll probably
+ # fail a second time. Hm. Unless the Password Manager is
+ # prompting for the information. Crap. This isn't great
+ # but it's better than the current 'repeat until recursion
+ # depth exceeded' approach <wink>
+ raise HTTPError(req.get_full_url(), 401, "digest auth failed",
+ headers, None)
+ else:
+ self.retried += 1
+ if authreq:
+ scheme = authreq.split()[0]
+ if scheme.lower() == 'digest':
+ return self.retry_http_digest_auth(req, authreq)
+
+ def retry_http_digest_auth(self, req, auth):
+ token, challenge = auth.split(' ', 1)
+ chal = parse_keqv_list(parse_http_list(challenge))
+ auth = self.get_authorization(req, chal)
+ if auth:
+ auth_val = 'Digest %s' % auth
+ if req.headers.get(self.auth_header, None) == auth_val:
+ return None
+ newreq = copy.copy(req)
+ newreq.add_unredirected_header(self.auth_header, auth_val)
+ newreq.visit = False
+ return self.parent.open(newreq)
+
+ def get_cnonce(self, nonce):
+ # The cnonce-value is an opaque
+ # quoted string value provided by the client and used by both client
+ # and server to avoid chosen plaintext attacks, to provide mutual
+ # authentication, and to provide some message integrity protection.
+ # This isn't a fabulous effort, but it's probably Good Enough.
+ dig = sha1_digest("%s:%s:%s:%s" % (self.nonce_count, nonce,
+ time.ctime(), randombytes(8)))
+ return dig[:16]
+
+ def get_authorization(self, req, chal):
+ try:
+ realm = chal['realm']
+ nonce = chal['nonce']
+ qop = chal.get('qop')
+ algorithm = chal.get('algorithm', 'MD5')
+ # mod_digest doesn't send an opaque, even though it isn't
+ # supposed to be optional
+ opaque = chal.get('opaque', None)
+ except KeyError:
+ return None
+
+ H, KD = self.get_algorithm_impls(algorithm)
+ if H is None:
+ return None
+
+ user, pw = self.passwd.find_user_password(realm, req.get_full_url())
+ if user is None:
+ return None
+
+ # XXX not implemented yet
+ if req.has_data():
+ entdig = self.get_entity_digest(req.get_data(), chal)
+ else:
+ entdig = None
+
+ A1 = "%s:%s:%s" % (user, realm, pw)
+ A2 = "%s:%s" % (req.get_method(),
+ # XXX selector: what about proxies and full urls
+ req.get_selector())
+ if qop == 'auth':
+ self.nonce_count += 1
+ ncvalue = '%08x' % self.nonce_count
+ cnonce = self.get_cnonce(nonce)
+ noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
+ respdig = KD(H(A1), noncebit)
+ elif qop is None:
+ respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
+ else:
+ # XXX handle auth-int.
+ pass
+
+ # XXX should the partial digests be encoded too?
+
+ base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
+ 'response="%s"' % (user, realm, nonce, req.get_selector(),
+ respdig)
+ if opaque:
+ base += ', opaque="%s"' % opaque
+ if entdig:
+ base += ', digest="%s"' % entdig
+ base += ', algorithm="%s"' % algorithm
+ if qop:
+ base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
+ return base
+
+ def get_algorithm_impls(self, algorithm):
+ # lambdas assume digest modules are imported at the top level
+ if algorithm == 'MD5':
+ H = md5_digest
+ elif algorithm == 'SHA':
+ H = sha1_digest
+ # XXX MD5-sess
+ KD = lambda s, d: H("%s:%s" % (s, d))
+ return H, KD
+
+ def get_entity_digest(self, data, chal):
+ # XXX not implemented yet
+ return None
+
+
+class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
+ """An authentication protocol defined by RFC 2069
+
+ Digest authentication improves on basic authentication because it
+ does not transmit passwords in the clear.
+ """
+
+ auth_header = 'Authorization'
+ handler_order = 490
+
+ def http_error_401(self, req, fp, code, msg, headers):
+ host = urlparse.urlparse(req.get_full_url())[1]
+ retry = self.http_error_auth_reqed('www-authenticate',
+ host, req, headers)
+ self.reset_retry_count()
+ return retry
+
+
+class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
+
+ auth_header = 'Proxy-Authorization'
+ handler_order = 490
+
+ def http_error_407(self, req, fp, code, msg, headers):
+ host = req.get_host()
+ retry = self.http_error_auth_reqed('proxy-authenticate',
+ host, req, headers)
+ self.reset_retry_count()
+ return retry
+
+
+# XXX ugly implementation, should probably not bother deriving
+class HTTPProxyPasswordMgr(HTTPPasswordMgr):
+ # has default realm and host/port
+ def add_password(self, realm, uri, user, passwd):
+ # uri could be a single URI or a sequence
+ if uri is None or isinstance(uri, basestring):
+ uris = [uri]
+ else:
+ uris = uri
+ passwd_by_domain = self.passwd.setdefault(realm, {})
+ for uri in uris:
+ for default_port in True, False:
+ reduced_uri = self.reduce_uri(uri, default_port)
+ passwd_by_domain[reduced_uri] = (user, passwd)
+
+ def find_user_password(self, realm, authuri):
+ attempts = [(realm, authuri), (None, authuri)]
+ # bleh, want default realm to take precedence over default
+ # URI/authority, hence this outer loop
+ for default_uri in False, True:
+ for realm, authuri in attempts:
+ authinfo_by_domain = self.passwd.get(realm, {})
+ for default_port in True, False:
+ reduced_authuri = self.reduce_uri(authuri, default_port)
+ for uri, authinfo in authinfo_by_domain.iteritems():
+ if uri is None and not default_uri:
+ continue
+ if self.is_suburi(uri, reduced_authuri):
+ return authinfo
+ user, password = None, None
+
+ if user is not None:
+ break
+ return user, password
+
+ def reduce_uri(self, uri, default_port=True):
+ if uri is None:
+ return None
+ return HTTPPasswordMgr.reduce_uri(self, uri, default_port)
+
+ def is_suburi(self, base, test):
+ if base is None:
+ # default to the proxy's host/port
+ hostport, path = test
+ base = (hostport, "/")
+ return HTTPPasswordMgr.is_suburi(self, base, test)
+
+
+class HTTPSClientCertMgr(HTTPPasswordMgr):
+ # implementation inheritance: this is not a proper subclass
+ def add_key_cert(self, uri, key_file, cert_file):
+ self.add_password(None, uri, key_file, cert_file)
+ def find_key_cert(self, authuri):
+ return HTTPPasswordMgr.find_user_password(self, None, authuri)
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_beautifulsoup.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_beautifulsoup.py
new file mode 100644
index 0000000..268b305
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_beautifulsoup.py
@@ -0,0 +1,1080 @@
+"""Beautiful Soup
+Elixir and Tonic
+"The Screen-Scraper's Friend"
+v2.1.1
+http://www.crummy.com/software/BeautifulSoup/
+
+Beautiful Soup parses arbitrarily invalid XML- or HTML-like substance
+into a tree representation. It provides methods and Pythonic idioms
+that make it easy to search and modify the tree.
+
+A well-formed XML/HTML document will yield a well-formed data
+structure. An ill-formed XML/HTML document will yield a
+correspondingly ill-formed data structure. If your document is only
+locally well-formed, you can use this library to find and process the
+well-formed part of it. The BeautifulSoup class has heuristics for
+obtaining a sensible parse tree in the face of common HTML errors.
+
+Beautiful Soup has no external dependencies. It works with Python 2.2
+and up.
+
+Beautiful Soup defines classes for four different parsing strategies:
+
+ * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific
+ language that kind of looks like XML.
+
+ * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
+ or invalid.
+
+ * ICantBelieveItsBeautifulSoup, for parsing valid but bizarre HTML
+ that trips up BeautifulSoup.
+
+ * BeautifulSOAP, for making it easier to parse XML documents that use
+ lots of subelements containing a single string, where you'd prefer
+ they put that string into an attribute (such as SOAP messages).
+
+You can subclass BeautifulStoneSoup or BeautifulSoup to create a
+parsing strategy specific to an XML schema or a particular bizarre
+HTML document. Typically your subclass would just override
+SELF_CLOSING_TAGS and/or NESTABLE_TAGS.
+""" #"
+from __future__ import generators
+
+__author__ = "Leonard Richardson (leonardr@segfault.org)"
+__version__ = "2.1.1"
+__date__ = "$Date: 2004/10/18 00:14:20 $"
+__copyright__ = "Copyright (c) 2004-2005 Leonard Richardson"
+__license__ = "PSF"
+
+from sgmllib import SGMLParser, SGMLParseError
+import types
+import re
+import sgmllib
+
+#This code makes Beautiful Soup able to parse XML with namespaces
+sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
+
+class NullType(object):
+
+ """Similar to NoneType with a corresponding singleton instance
+ 'Null' that, unlike None, accepts any message and returns itself.
+
+ Examples:
+ >>> Null("send", "a", "message")("and one more",
+ ... "and what you get still") is Null
+ True
+ """
+
+ def __new__(cls): return Null
+ def __call__(self, *args, **kwargs): return Null
+## def __getstate__(self, *args): return Null
+ def __getattr__(self, attr): return Null
+ def __getitem__(self, item): return Null
+ def __setattr__(self, attr, value): pass
+ def __setitem__(self, item, value): pass
+ def __len__(self): return 0
+ # FIXME: is this a python bug? otherwise ``for x in Null: pass``
+ # never terminates...
+ def __iter__(self): return iter([])
+ def __contains__(self, item): return False
+ def __repr__(self): return "Null"
+Null = object.__new__(NullType)
+
+class PageElement:
+ """Contains the navigational information for some part of the page
+ (either a tag or a piece of text)"""
+
+ def setup(self, parent=Null, previous=Null):
+ """Sets up the initial relations between this element and
+ other elements."""
+ self.parent = parent
+ self.previous = previous
+ self.next = Null
+ self.previousSibling = Null
+ self.nextSibling = Null
+ if self.parent and self.parent.contents:
+ self.previousSibling = self.parent.contents[-1]
+ self.previousSibling.nextSibling = self
+
+ def findNext(self, name=None, attrs={}, text=None):
+ """Returns the first item that matches the given criteria and
+ appears after this Tag in the document."""
+ return self._first(self.fetchNext, name, attrs, text)
+ firstNext = findNext
+
+ def fetchNext(self, name=None, attrs={}, text=None, limit=None):
+ """Returns all items that match the given criteria and appear
+ before after Tag in the document."""
+ return self._fetch(name, attrs, text, limit, self.nextGenerator)
+
+ def findNextSibling(self, name=None, attrs={}, text=None):
+ """Returns the closest sibling to this Tag that matches the
+ given criteria and appears after this Tag in the document."""
+ return self._first(self.fetchNextSiblings, name, attrs, text)
+ firstNextSibling = findNextSibling
+
+ def fetchNextSiblings(self, name=None, attrs={}, text=None, limit=None):
+ """Returns the siblings of this Tag that match the given
+ criteria and appear after this Tag in the document."""
+ return self._fetch(name, attrs, text, limit, self.nextSiblingGenerator)
+
+ def findPrevious(self, name=None, attrs={}, text=None):
+ """Returns the first item that matches the given criteria and
+ appears before this Tag in the document."""
+ return self._first(self.fetchPrevious, name, attrs, text)
+
+ def fetchPrevious(self, name=None, attrs={}, text=None, limit=None):
+ """Returns all items that match the given criteria and appear
+ before this Tag in the document."""
+ return self._fetch(name, attrs, text, limit, self.previousGenerator)
+ firstPrevious = findPrevious
+
+ def findPreviousSibling(self, name=None, attrs={}, text=None):
+ """Returns the closest sibling to this Tag that matches the
+ given criteria and appears before this Tag in the document."""
+ return self._first(self.fetchPreviousSiblings, name, attrs, text)
+ firstPreviousSibling = findPreviousSibling
+
+ def fetchPreviousSiblings(self, name=None, attrs={}, text=None,
+ limit=None):
+ """Returns the siblings of this Tag that match the given
+ criteria and appear before this Tag in the document."""
+ return self._fetch(name, attrs, text, limit,
+ self.previousSiblingGenerator)
+
+ def findParent(self, name=None, attrs={}):
+ """Returns the closest parent of this Tag that matches the given
+ criteria."""
+ r = Null
+ l = self.fetchParents(name, attrs, 1)
+ if l:
+ r = l[0]
+ return r
+ firstParent = findParent
+
+ def fetchParents(self, name=None, attrs={}, limit=None):
+ """Returns the parents of this Tag that match the given
+ criteria."""
+ return self._fetch(name, attrs, None, limit, self.parentGenerator)
+
+ #These methods do the real heavy lifting.
+
+ def _first(self, method, name, attrs, text):
+ r = Null
+ l = method(name, attrs, text, 1)
+ if l:
+ r = l[0]
+ return r
+
+ def _fetch(self, name, attrs, text, limit, generator):
+ "Iterates over a generator looking for things that match."
+ if not hasattr(attrs, 'items'):
+ attrs = {'class' : attrs}
+
+ results = []
+ g = generator()
+ while True:
+ try:
+ i = g.next()
+ except StopIteration:
+ break
+ found = None
+ if isinstance(i, Tag):
+ if not text:
+ if not name or self._matches(i, name):
+ match = True
+ for attr, matchAgainst in attrs.items():
+ check = i.get(attr)
+ if not self._matches(check, matchAgainst):
+ match = False
+ break
+ if match:
+ found = i
+ elif text:
+ if self._matches(i, text):
+ found = i
+ if found:
+ results.append(found)
+ if limit and len(results) >= limit:
+ break
+ return results
+
+ #Generators that can be used to navigate starting from both
+ #NavigableTexts and Tags.
+ def nextGenerator(self):
+ i = self
+ while i:
+ i = i.next
+ yield i
+
+ def nextSiblingGenerator(self):
+ i = self
+ while i:
+ i = i.nextSibling
+ yield i
+
+ def previousGenerator(self):
+ i = self
+ while i:
+ i = i.previous
+ yield i
+
+ def previousSiblingGenerator(self):
+ i = self
+ while i:
+ i = i.previousSibling
+ yield i
+
+ def parentGenerator(self):
+ i = self
+ while i:
+ i = i.parent
+ yield i
+
+ def _matches(self, chunk, howToMatch):
+ #print 'looking for %s in %s' % (howToMatch, chunk)
+ #
+ # If given a list of items, return true if the list contains a
+ # text element that matches.
+ if isList(chunk) and not isinstance(chunk, Tag):
+ for tag in chunk:
+ if isinstance(tag, NavigableText) and self._matches(tag, howToMatch):
+ return True
+ return False
+ if callable(howToMatch):
+ return howToMatch(chunk)
+ if isinstance(chunk, Tag):
+ #Custom match methods take the tag as an argument, but all other
+ #ways of matching match the tag name as a string
+ chunk = chunk.name
+ #Now we know that chunk is a string
+ if not isinstance(chunk, basestring):
+ chunk = str(chunk)
+ if hasattr(howToMatch, 'match'):
+ # It's a regexp object.
+ return howToMatch.search(chunk)
+ if isList(howToMatch):
+ return chunk in howToMatch
+ if hasattr(howToMatch, 'items'):
+ return howToMatch.has_key(chunk)
+ #It's just a string
+ return str(howToMatch) == chunk
+
+class NavigableText(PageElement):
+
+ def __getattr__(self, attr):
+ "For backwards compatibility, text.string gives you text"
+ if attr == 'string':
+ return self
+ else:
+ raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
+
+class NavigableString(str, NavigableText):
+ pass
+
+class NavigableUnicodeString(unicode, NavigableText):
+ pass
+
+class Tag(PageElement):
+
+ """Represents a found HTML tag with its attributes and contents."""
+
+ def __init__(self, name, attrs=None, parent=Null, previous=Null):
+ "Basic constructor."
+ self.name = name
+ if attrs == None:
+ attrs = []
+ self.attrs = attrs
+ self.contents = []
+ self.setup(parent, previous)
+ self.hidden = False
+
+ def get(self, key, default=None):
+ """Returns the value of the 'key' attribute for the tag, or
+ the value given for 'default' if it doesn't have that
+ attribute."""
+ return self._getAttrMap().get(key, default)
+
+ def __getitem__(self, key):
+ """tag[key] returns the value of the 'key' attribute for the tag,
+ and throws an exception if it's not there."""
+ return self._getAttrMap()[key]
+
+ def __iter__(self):
+ "Iterating over a tag iterates over its contents."
+ return iter(self.contents)
+
+ def __len__(self):
+ "The length of a tag is the length of its list of contents."
+ return len(self.contents)
+
+ def __contains__(self, x):
+ return x in self.contents
+
+ def __nonzero__(self):
+ "A tag is non-None even if it has no contents."
+ return True
+
+ def __setitem__(self, key, value):
+ """Setting tag[key] sets the value of the 'key' attribute for the
+ tag."""
+ self._getAttrMap()
+ self.attrMap[key] = value
+ found = False
+ for i in range(0, len(self.attrs)):
+ if self.attrs[i][0] == key:
+ self.attrs[i] = (key, value)
+ found = True
+ if not found:
+ self.attrs.append((key, value))
+ self._getAttrMap()[key] = value
+
+ def __delitem__(self, key):
+ "Deleting tag[key] deletes all 'key' attributes for the tag."
+ for item in self.attrs:
+ if item[0] == key:
+ self.attrs.remove(item)
+ #We don't break because bad HTML can define the same
+ #attribute multiple times.
+ self._getAttrMap()
+ if self.attrMap.has_key(key):
+ del self.attrMap[key]
+
+ def __call__(self, *args, **kwargs):
+ """Calling a tag like a function is the same as calling its
+ fetch() method. Eg. tag('a') returns a list of all the A tags
+ found within this tag."""
+ return apply(self.fetch, args, kwargs)
+
+ def __getattr__(self, tag):
+ if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3:
+ return self.first(tag[:-3])
+ elif tag.find('__') != 0:
+ return self.first(tag)
+
+ def __eq__(self, other):
+ """Returns true iff this tag has the same name, the same attributes,
+ and the same contents (recursively) as the given tag.
+
+ NOTE: right now this will return false if two tags have the
+ same attributes in a different order. Should this be fixed?"""
+ if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other):
+ return False
+ for i in range(0, len(self.contents)):
+ if self.contents[i] != other.contents[i]:
+ return False
+ return True
+
+ def __ne__(self, other):
+ """Returns true iff this tag is not identical to the other tag,
+ as defined in __eq__."""
+ return not self == other
+
+ def __repr__(self):
+ """Renders this tag as a string."""
+ return str(self)
+
+ def __unicode__(self):
+ return self.__str__(1)
+
+ def __str__(self, needUnicode=None, showStructureIndent=None):
+ """Returns a string or Unicode representation of this tag and
+ its contents.
+
+ NOTE: since Python's HTML parser consumes whitespace, this
+ method is not certain to reproduce the whitespace present in
+ the original string."""
+
+ attrs = []
+ if self.attrs:
+ for key, val in self.attrs:
+ attrs.append('%s="%s"' % (key, val))
+ close = ''
+ closeTag = ''
+ if self.isSelfClosing():
+ close = ' /'
+ else:
+ closeTag = '</%s>' % self.name
+ indentIncrement = None
+ if showStructureIndent != None:
+ indentIncrement = showStructureIndent
+ if not self.hidden:
+ indentIncrement += 1
+ contents = self.renderContents(indentIncrement, needUnicode=needUnicode)
+ if showStructureIndent:
+ space = '\n%s' % (' ' * showStructureIndent)
+ if self.hidden:
+ s = contents
+ else:
+ s = []
+ attributeString = ''
+ if attrs:
+ attributeString = ' ' + ' '.join(attrs)
+ if showStructureIndent:
+ s.append(space)
+ s.append('<%s%s%s>' % (self.name, attributeString, close))
+ s.append(contents)
+ if closeTag and showStructureIndent != None:
+ s.append(space)
+ s.append(closeTag)
+ s = ''.join(s)
+ isUnicode = type(s) == types.UnicodeType
+ if needUnicode and not isUnicode:
+ s = unicode(s)
+ elif isUnicode and needUnicode==False:
+ s = str(s)
+ return s
+
+ def prettify(self, needUnicode=None):
+ return self.__str__(needUnicode, showStructureIndent=True)
+
+ def renderContents(self, showStructureIndent=None, needUnicode=None):
+ """Renders the contents of this tag as a (possibly Unicode)
+ string."""
+ s=[]
+ for c in self:
+ text = None
+ if isinstance(c, NavigableUnicodeString) or type(c) == types.UnicodeType:
+ text = unicode(c)
+ elif isinstance(c, Tag):
+ s.append(c.__str__(needUnicode, showStructureIndent))
+ elif needUnicode:
+ text = unicode(c)
+ else:
+ text = str(c)
+ if text:
+ if showStructureIndent != None:
+ if text[-1] == '\n':
+ text = text[:-1]
+ s.append(text)
+ return ''.join(s)
+
+ #Soup methods
+
+ def firstText(self, text, recursive=True):
+ """Convenience method to retrieve the first piece of text matching the
+ given criteria. 'text' can be a string, a regular expression object,
+ a callable that takes a string and returns whether or not the
+ string 'matches', etc."""
+ return self.first(recursive=recursive, text=text)
+
+ def fetchText(self, text, recursive=True, limit=None):
+ """Convenience method to retrieve all pieces of text matching the
+ given criteria. 'text' can be a string, a regular expression object,
+ a callable that takes a string and returns whether or not the
+ string 'matches', etc."""
+ return self.fetch(recursive=recursive, text=text, limit=limit)
+
+ def first(self, name=None, attrs={}, recursive=True, text=None):
+ """Return only the first child of this
+ Tag matching the given criteria."""
+ r = Null
+ l = self.fetch(name, attrs, recursive, text, 1)
+ if l:
+ r = l[0]
+ return r
+ findChild = first
+
+ def fetch(self, name=None, attrs={}, recursive=True, text=None,
+ limit=None):
+ """Extracts a list of Tag objects that match the given
+ criteria. You can specify the name of the Tag and any
+ attributes you want the Tag to have.
+
+ The value of a key-value pair in the 'attrs' map can be a
+ string, a list of strings, a regular expression object, or a
+ callable that takes a string and returns whether or not the
+ string matches for some custom definition of 'matches'. The
+ same is true of the tag name."""
+ generator = self.recursiveChildGenerator
+ if not recursive:
+ generator = self.childGenerator
+ return self._fetch(name, attrs, text, limit, generator)
+ fetchChildren = fetch
+
+ #Utility methods
+
+ def isSelfClosing(self):
+ """Returns true iff this is a self-closing tag as defined in the HTML
+ standard.
+
+ TODO: This is specific to BeautifulSoup and its subclasses, but it's
+ used by __str__"""
+ return self.name in BeautifulSoup.SELF_CLOSING_TAGS
+
+ def append(self, tag):
+ """Appends the given tag to the contents of this tag."""
+ self.contents.append(tag)
+
+ #Private methods
+
+ def _getAttrMap(self):
+ """Initializes a map representation of this tag's attributes,
+ if not already initialized."""
+ if not getattr(self, 'attrMap'):
+ self.attrMap = {}
+ for (key, value) in self.attrs:
+ self.attrMap[key] = value
+ return self.attrMap
+
+ #Generator methods
+ def childGenerator(self):
+ for i in range(0, len(self.contents)):
+ yield self.contents[i]
+ raise StopIteration
+
+ def recursiveChildGenerator(self):
+ stack = [(self, 0)]
+ while stack:
+ tag, start = stack.pop()
+ if isinstance(tag, Tag):
+ for i in range(start, len(tag.contents)):
+ a = tag.contents[i]
+ yield a
+ if isinstance(a, Tag) and tag.contents:
+ if i < len(tag.contents) - 1:
+ stack.append((tag, i+1))
+ stack.append((a, 0))
+ break
+ raise StopIteration
+
+
+def isList(l):
+ """Convenience method that works with all 2.x versions of Python
+ to determine whether or not something is listlike."""
+ return hasattr(l, '__iter__') \
+ or (type(l) in (types.ListType, types.TupleType))
+
+def buildTagMap(default, *args):
+ """Turns a list of maps, lists, or scalars into a single map.
+ Used to build the SELF_CLOSING_TAGS and NESTABLE_TAGS maps out
+ of lists and partial maps."""
+ built = {}
+ for portion in args:
+ if hasattr(portion, 'items'):
+ #It's a map. Merge it.
+ for k,v in portion.items():
+ built[k] = v
+ elif isList(portion):
+ #It's a list. Map each item to the default.
+ for k in portion:
+ built[k] = default
+ else:
+ #It's a scalar. Map it to the default.
+ built[portion] = default
+ return built
+
+class BeautifulStoneSoup(Tag, SGMLParser):
+
+ """This class contains the basic parser and fetch code. It defines
+ a parser that knows nothing about tag behavior except for the
+ following:
+
+ You can't close a tag without closing all the tags it encloses.
+ That is, "<foo><bar></foo>" actually means
+ "<foo><bar></bar></foo>".
+
+ [Another possible explanation is "<foo><bar /></foo>", but since
+ this class defines no SELF_CLOSING_TAGS, it will never use that
+ explanation.]
+
+ This class is useful for parsing XML or made-up markup languages,
+ or when BeautifulSoup makes an assumption counter to what you were
+ expecting."""
+
+ SELF_CLOSING_TAGS = {}
+ NESTABLE_TAGS = {}
+ RESET_NESTING_TAGS = {}
+ QUOTE_TAGS = {}
+
+ #As a public service we will by default silently replace MS smart quotes
+ #and similar characters with their HTML or ASCII equivalents.
+ MS_CHARS = { '\x80' : '&euro;',
+ '\x81' : ' ',
+ '\x82' : '&sbquo;',
+ '\x83' : '&fnof;',
+ '\x84' : '&bdquo;',
+ '\x85' : '&hellip;',
+ '\x86' : '&dagger;',
+ '\x87' : '&Dagger;',
+ '\x88' : '&caret;',
+ '\x89' : '%',
+ '\x8A' : '&Scaron;',
+ '\x8B' : '&lt;',
+ '\x8C' : '&OElig;',
+ '\x8D' : '?',
+ '\x8E' : 'Z',
+ '\x8F' : '?',
+ '\x90' : '?',
+ '\x91' : '&lsquo;',
+ '\x92' : '&rsquo;',
+ '\x93' : '&ldquo;',
+ '\x94' : '&rdquo;',
+ '\x95' : '&bull;',
+ '\x96' : '&ndash;',
+ '\x97' : '&mdash;',
+ '\x98' : '&tilde;',
+ '\x99' : '&trade;',
+ '\x9a' : '&scaron;',
+ '\x9b' : '&gt;',
+ '\x9c' : '&oelig;',
+ '\x9d' : '?',
+ '\x9e' : 'z',
+ '\x9f' : '&Yuml;',}
+
+ PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'),
+ lambda(x):x.group(1) + ' />'),
+ (re.compile('<!\s+([^<>]*)>'),
+ lambda(x):'<!' + x.group(1) + '>'),
+ (re.compile("([\x80-\x9f])"),
+ lambda(x): BeautifulStoneSoup.MS_CHARS.get(x.group(1)))
+ ]
+
+ ROOT_TAG_NAME = '[document]'
+
+ def __init__(self, text=None, avoidParserProblems=True,
+ initialTextIsEverything=True):
+ """Initialize this as the 'root tag' and feed in any text to
+ the parser.
+
+ NOTE about avoidParserProblems: sgmllib will process most bad
+ HTML, and BeautifulSoup has tricks for dealing with some HTML
+ that kills sgmllib, but Beautiful Soup can nonetheless choke
+ or lose data if your data uses self-closing tags or
+ declarations incorrectly. By default, Beautiful Soup sanitizes
+ its input to avoid the vast majority of these problems. The
+ problems are relatively rare, even in bad HTML, so feel free
+ to pass in False to avoidParserProblems if they don't apply to
+ you, and you'll get better performance. The only reason I have
+ this turned on by default is so I don't get so many tech
+ support questions.
+
+ The two most common instances of invalid HTML that will choke
+ sgmllib are fixed by the default parser massage techniques:
+
+ <br/> (No space between name of closing tag and tag close)
+ <! --Comment--> (Extraneous whitespace in declaration)
+
+ You can pass in a custom list of (RE object, replace method)
+ tuples to get Beautiful Soup to scrub your input the way you
+ want."""
+ Tag.__init__(self, self.ROOT_TAG_NAME)
+ if avoidParserProblems \
+ and not isList(avoidParserProblems):
+ avoidParserProblems = self.PARSER_MASSAGE
+ self.avoidParserProblems = avoidParserProblems
+ SGMLParser.__init__(self)
+ self.quoteStack = []
+ self.hidden = 1
+ self.reset()
+ if hasattr(text, 'read'):
+ #It's a file-type object.
+ text = text.read()
+ if text:
+ self.feed(text)
+ if initialTextIsEverything:
+ self.done()
+
+ def __getattr__(self, methodName):
+ """This method routes method call requests to either the SGMLParser
+ superclass or the Tag superclass, depending on the method name."""
+ if methodName.find('start_') == 0 or methodName.find('end_') == 0 \
+ or methodName.find('do_') == 0:
+ return SGMLParser.__getattr__(self, methodName)
+ elif methodName.find('__') != 0:
+ return Tag.__getattr__(self, methodName)
+ else:
+ raise AttributeError
+
+ def feed(self, text):
+ if self.avoidParserProblems:
+ for fix, m in self.avoidParserProblems:
+ text = fix.sub(m, text)
+ SGMLParser.feed(self, text)
+
+ def done(self):
+ """Called when you're done parsing, so that the unclosed tags can be
+ correctly processed."""
+ self.endData() #NEW
+ while self.currentTag.name != self.ROOT_TAG_NAME:
+ self.popTag()
+
+ def reset(self):
+ SGMLParser.reset(self)
+ self.currentData = []
+ self.currentTag = None
+ self.tagStack = []
+ self.pushTag(self)
+
+ def popTag(self):
+ tag = self.tagStack.pop()
+ # Tags with just one string-owning child get the child as a
+ # 'string' property, so that soup.tag.string is shorthand for
+ # soup.tag.contents[0]
+ if len(self.currentTag.contents) == 1 and \
+ isinstance(self.currentTag.contents[0], NavigableText):
+ self.currentTag.string = self.currentTag.contents[0]
+
+ #print "Pop", tag.name
+ if self.tagStack:
+ self.currentTag = self.tagStack[-1]
+ return self.currentTag
+
+ def pushTag(self, tag):
+ #print "Push", tag.name
+ if self.currentTag:
+ self.currentTag.append(tag)
+ self.tagStack.append(tag)
+ self.currentTag = self.tagStack[-1]
+
+ def endData(self):
+ currentData = ''.join(self.currentData)
+ if currentData:
+ if not currentData.strip():
+ if '\n' in currentData:
+ currentData = '\n'
+ else:
+ currentData = ' '
+ c = NavigableString
+ if type(currentData) == types.UnicodeType:
+ c = NavigableUnicodeString
+ o = c(currentData)
+ o.setup(self.currentTag, self.previous)
+ if self.previous:
+ self.previous.next = o
+ self.previous = o
+ self.currentTag.contents.append(o)
+ self.currentData = []
+
+ def _popToTag(self, name, inclusivePop=True):
+ """Pops the tag stack up to and including the most recent
+ instance of the given tag. If inclusivePop is false, pops the tag
+ stack up to but *not* including the most recent instqance of
+ the given tag."""
+ if name == self.ROOT_TAG_NAME:
+ return
+
+ numPops = 0
+ mostRecentTag = None
+ for i in range(len(self.tagStack)-1, 0, -1):
+ if name == self.tagStack[i].name:
+ numPops = len(self.tagStack)-i
+ break
+ if not inclusivePop:
+ numPops = numPops - 1
+
+ for i in range(0, numPops):
+ mostRecentTag = self.popTag()
+ return mostRecentTag
+
+ def _smartPop(self, name):
+
+ """We need to pop up to the previous tag of this type, unless
+ one of this tag's nesting reset triggers comes between this
+ tag and the previous tag of this type, OR unless this tag is a
+ generic nesting trigger and another generic nesting trigger
+ comes between this tag and the previous tag of this type.
+
+ Examples:
+ <p>Foo<b>Bar<p> should pop to 'p', not 'b'.
+ <p>Foo<table>Bar<p> should pop to 'table', not 'p'.
+ <p>Foo<table><tr>Bar<p> should pop to 'tr', not 'p'.
+ <p>Foo<b>Bar<p> should pop to 'p', not 'b'.
+
+ <li><ul><li> *<li>* should pop to 'ul', not the first 'li'.
+ <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr'
+ <td><tr><td> *<td>* should pop to 'tr', not the first 'td'
+ """
+
+ nestingResetTriggers = self.NESTABLE_TAGS.get(name)
+ isNestable = nestingResetTriggers != None
+ isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
+ popTo = None
+ inclusive = True
+ for i in range(len(self.tagStack)-1, 0, -1):
+ p = self.tagStack[i]
+ if (not p or p.name == name) and not isNestable:
+ #Non-nestable tags get popped to the top or to their
+ #last occurance.
+ popTo = name
+ break
+ if (nestingResetTriggers != None
+ and p.name in nestingResetTriggers) \
+ or (nestingResetTriggers == None and isResetNesting
+ and self.RESET_NESTING_TAGS.has_key(p.name)):
+
+ #If we encounter one of the nesting reset triggers
+ #peculiar to this tag, or we encounter another tag
+ #that causes nesting to reset, pop up to but not
+ #including that tag.
+
+ popTo = p.name
+ inclusive = False
+ break
+ p = p.parent
+ if popTo:
+ self._popToTag(popTo, inclusive)
+
+ def unknown_starttag(self, name, attrs, selfClosing=0):
+ #print "Start tag %s" % name
+ if self.quoteStack:
+ #This is not a real tag.
+ #print "<%s> is not real!" % name
+ attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs))
+ self.handle_data('<%s%s>' % (name, attrs))
+ return
+ self.endData()
+ if not name in self.SELF_CLOSING_TAGS and not selfClosing:
+ self._smartPop(name)
+ tag = Tag(name, attrs, self.currentTag, self.previous)
+ if self.previous:
+ self.previous.next = tag
+ self.previous = tag
+ self.pushTag(tag)
+ if selfClosing or name in self.SELF_CLOSING_TAGS:
+ self.popTag()
+ if name in self.QUOTE_TAGS:
+ #print "Beginning quote (%s)" % name
+ self.quoteStack.append(name)
+ self.literal = 1
+
+ def unknown_endtag(self, name):
+ if self.quoteStack and self.quoteStack[-1] != name:
+ #This is not a real end tag.
+ #print "</%s> is not real!" % name
+ self.handle_data('</%s>' % name)
+ return
+ self.endData()
+ self._popToTag(name)
+ if self.quoteStack and self.quoteStack[-1] == name:
+ self.quoteStack.pop()
+ self.literal = (len(self.quoteStack) > 0)
+
+ def handle_data(self, data):
+ self.currentData.append(data)
+
+ def handle_pi(self, text):
+ "Propagate processing instructions right through."
+ self.handle_data("<?%s>" % text)
+
+ def handle_comment(self, text):
+ "Propagate comments right through."
+ self.handle_data("<!--%s-->" % text)
+
+ def handle_charref(self, ref):
+ "Propagate char refs right through."
+ self.handle_data('&#%s;' % ref)
+
+ def handle_entityref(self, ref):
+ "Propagate entity refs right through."
+ self.handle_data('&%s;' % ref)
+
+ def handle_decl(self, data):
+ "Propagate DOCTYPEs and the like right through."
+ self.handle_data('<!%s>' % data)
+
+ def parse_declaration(self, i):
+ """Treat a bogus SGML declaration as raw data. Treat a CDATA
+ declaration as regular data."""
+ j = None
+ if self.rawdata[i:i+9] == '<![CDATA[':
+ k = self.rawdata.find(']]>', i)
+ if k == -1:
+ k = len(self.rawdata)
+ self.handle_data(self.rawdata[i+9:k])
+ j = k+3
+ else:
+ try:
+ j = SGMLParser.parse_declaration(self, i)
+ except SGMLParseError:
+ toHandle = self.rawdata[i:]
+ self.handle_data(toHandle)
+ j = i + len(toHandle)
+ return j
+
+class BeautifulSoup(BeautifulStoneSoup):
+
+ """This parser knows the following facts about HTML:
+
+ * Some tags have no closing tag and should be interpreted as being
+ closed as soon as they are encountered.
+
+ * The text inside some tags (ie. 'script') may contain tags which
+ are not really part of the document and which should be parsed
+ as text, not tags. If you want to parse the text as tags, you can
+ always fetch it and parse it explicitly.
+
+ * Tag nesting rules:
+
+ Most tags can't be nested at all. For instance, the occurance of
+ a <p> tag should implicitly close the previous <p> tag.
+
+ <p>Para1<p>Para2
+ should be transformed into:
+ <p>Para1</p><p>Para2
+
+ Some tags can be nested arbitrarily. For instance, the occurance
+ of a <blockquote> tag should _not_ implicitly close the previous
+ <blockquote> tag.
+
+ Alice said: <blockquote>Bob said: <blockquote>Blah
+ should NOT be transformed into:
+ Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah
+
+ Some tags can be nested, but the nesting is reset by the
+ interposition of other tags. For instance, a <tr> tag should
+ implicitly close the previous <tr> tag within the same <table>,
+ but not close a <tr> tag in another table.
+
+ <table><tr>Blah<tr>Blah
+ should be transformed into:
+ <table><tr>Blah</tr><tr>Blah
+ but,
+ <tr>Blah<table><tr>Blah
+ should NOT be transformed into
+ <tr>Blah<table></tr><tr>Blah
+
+ Differing assumptions about tag nesting rules are a major source
+ of problems with the BeautifulSoup class. If BeautifulSoup is not
+ treating as nestable a tag your page author treats as nestable,
+ try ICantBelieveItsBeautifulSoup before writing your own
+ subclass."""
+
+ SELF_CLOSING_TAGS = buildTagMap(None, ['br' , 'hr', 'input', 'img', 'meta',
+ 'spacer', 'link', 'frame', 'base'])
+
+ QUOTE_TAGS = {'script': None}
+
+ #According to the HTML standard, each of these inline tags can
+ #contain another tag of the same type. Furthermore, it's common
+ #to actually use these tags this way.
+ NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup',
+ 'center']
+
+ #According to the HTML standard, these block tags can contain
+ #another tag of the same type. Furthermore, it's common
+ #to actually use these tags this way.
+ NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del']
+
+ #Lists can contain other lists, but there are restrictions.
+ NESTABLE_LIST_TAGS = { 'ol' : [],
+ 'ul' : [],
+ 'li' : ['ul', 'ol'],
+ 'dl' : [],
+ 'dd' : ['dl'],
+ 'dt' : ['dl'] }
+
+ #Tables can contain other tables, but there are restrictions.
+ NESTABLE_TABLE_TAGS = {'table' : [],
+ 'tr' : ['table', 'tbody', 'tfoot', 'thead'],
+ 'td' : ['tr'],
+ 'th' : ['tr'],
+ }
+
+ NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre']
+
+ #If one of these tags is encountered, all tags up to the next tag of
+ #this type are popped.
+ RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript',
+ NON_NESTABLE_BLOCK_TAGS,
+ NESTABLE_LIST_TAGS,
+ NESTABLE_TABLE_TAGS)
+
+ NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS,
+ NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS)
+
+class ICantBelieveItsBeautifulSoup(BeautifulSoup):
+
+ """The BeautifulSoup class is oriented towards skipping over
+ common HTML errors like unclosed tags. However, sometimes it makes
+ errors of its own. For instance, consider this fragment:
+
+ <b>Foo<b>Bar</b></b>
+
+ This is perfectly valid (if bizarre) HTML. However, the
+ BeautifulSoup class will implicitly close the first b tag when it
+ encounters the second 'b'. It will think the author wrote
+ "<b>Foo<b>Bar", and didn't close the first 'b' tag, because
+ there's no real-world reason to bold something that's already
+ bold. When it encounters '</b></b>' it will close two more 'b'
+ tags, for a grand total of three tags closed instead of two. This
+ can throw off the rest of your document structure. The same is
+ true of a number of other tags, listed below.
+
+ It's much more common for someone to forget to close (eg.) a 'b'
+ tag than to actually use nested 'b' tags, and the BeautifulSoup
+ class handles the common case. This class handles the
+ not-co-common case: where you can't believe someone wrote what
+ they did, but it's valid HTML and BeautifulSoup screwed up by
+ assuming it wouldn't be.
+
+ If this doesn't do what you need, try subclassing this class or
+ BeautifulSoup, and providing your own list of NESTABLE_TAGS."""
+
+ I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \
+ ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong',
+ 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b',
+ 'big']
+
+ I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript']
+
+ NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS,
+ I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS,
+ I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS)
+
+class BeautifulSOAP(BeautifulStoneSoup):
+ """This class will push a tag with only a single string child into
+ the tag's parent as an attribute. The attribute's name is the tag
+ name, and the value is the string child. An example should give
+ the flavor of the change:
+
+ <foo><bar>baz</bar></foo>
+ =>
+ <foo bar="baz"><bar>baz</bar></foo>
+
+ You can then access fooTag['bar'] instead of fooTag.barTag.string.
+
+ This is, of course, useful for scraping structures that tend to
+ use subelements instead of attributes, such as SOAP messages. Note
+ that it modifies its input, so don't print the modified version
+ out.
+
+ I'm not sure how many people really want to use this class; let me
+ know if you do. Mainly I like the name."""
+
+ def popTag(self):
+ if len(self.tagStack) > 1:
+ tag = self.tagStack[-1]
+ parent = self.tagStack[-2]
+ parent._getAttrMap()
+ if (isinstance(tag, Tag) and len(tag.contents) == 1 and
+ isinstance(tag.contents[0], NavigableText) and
+ not parent.attrMap.has_key(tag.name)):
+ parent[tag.name] = tag.contents[0]
+ BeautifulStoneSoup.popTag(self)
+
+#Enterprise class names! It has come to our attention that some people
+#think the names of the Beautiful Soup parser classes are too silly
+#and "unprofessional" for use in enterprise screen-scraping. We feel
+#your pain! For such-minded folk, the Beautiful Soup Consortium And
+#All-Night Kosher Bakery recommends renaming this file to
+#"RobustParser.py" (or, in cases of extreme enterprisitude,
+#"RobustParserBeanInterface.class") and using the following
+#enterprise-friendly class aliases:
+class RobustXMLParser(BeautifulStoneSoup):
+ pass
+class RobustHTMLParser(BeautifulSoup):
+ pass
+class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup):
+ pass
+class SimplifyingSOAPParser(BeautifulSOAP):
+ pass
+
+###
+
+
+#By default, act as an HTML pretty-printer.
+if __name__ == '__main__':
+ import sys
+ soup = BeautifulStoneSoup(sys.stdin.read())
+ print soup.prettify()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_clientcookie.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_clientcookie.py
new file mode 100644
index 0000000..caeb82b
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_clientcookie.py
@@ -0,0 +1,1707 @@
+"""HTTP cookie handling for web clients.
+
+This module originally developed from my port of Gisle Aas' Perl module
+HTTP::Cookies, from the libwww-perl library.
+
+Docstrings, comments and debug strings in this code refer to the
+attributes of the HTTP cookie system as cookie-attributes, to distinguish
+them clearly from Python attributes.
+
+ CookieJar____
+ / \ \
+ FileCookieJar \ \
+ / | \ \ \
+ MozillaCookieJar | LWPCookieJar \ \
+ | | \
+ | ---MSIEBase | \
+ | / | | \
+ | / MSIEDBCookieJar BSDDBCookieJar
+ |/
+ MSIECookieJar
+
+Comments to John J Lee <jjl@pobox.com>.
+
+
+Copyright 2002-2006 John J Lee <jjl@pobox.com>
+Copyright 1997-1999 Gisle Aas (original libwww-perl code)
+Copyright 2002-2003 Johnny Lee (original MSIE Perl code)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import sys, re, copy, time, urllib, types, logging
+try:
+ import threading
+ _threading = threading; del threading
+except ImportError:
+ import dummy_threading
+ _threading = dummy_threading; del dummy_threading
+
+MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
+ "instance initialised with one)")
+DEFAULT_HTTP_PORT = "80"
+
+from _headersutil import split_header_words, parse_ns_headers
+from _util import isstringlike
+import _rfc3986
+
+debug = logging.getLogger("mechanize.cookies").debug
+
+
+def reraise_unmasked_exceptions(unmasked=()):
+ # There are a few catch-all except: statements in this module, for
+ # catching input that's bad in unexpected ways.
+ # This function re-raises some exceptions we don't want to trap.
+ import mechanize, warnings
+ if not mechanize.USE_BARE_EXCEPT:
+ raise
+ unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError)
+ etype = sys.exc_info()[0]
+ if issubclass(etype, unmasked):
+ raise
+ # swallowed an exception
+ import traceback, StringIO
+ f = StringIO.StringIO()
+ traceback.print_exc(None, f)
+ msg = f.getvalue()
+ warnings.warn("mechanize bug!\n%s" % msg, stacklevel=2)
+
+
+IPV4_RE = re.compile(r"\.\d+$")
+def is_HDN(text):
+ """Return True if text is a host domain name."""
+ # XXX
+ # This may well be wrong. Which RFC is HDN defined in, if any (for
+ # the purposes of RFC 2965)?
+ # For the current implementation, what about IPv6? Remember to look
+ # at other uses of IPV4_RE also, if change this.
+ return not (IPV4_RE.search(text) or
+ text == "" or
+ text[0] == "." or text[-1] == ".")
+
+def domain_match(A, B):
+ """Return True if domain A domain-matches domain B, according to RFC 2965.
+
+ A and B may be host domain names or IP addresses.
+
+ RFC 2965, section 1:
+
+ Host names can be specified either as an IP address or a HDN string.
+ Sometimes we compare one host name with another. (Such comparisons SHALL
+ be case-insensitive.) Host A's name domain-matches host B's if
+
+ * their host name strings string-compare equal; or
+
+ * A is a HDN string and has the form NB, where N is a non-empty
+ name string, B has the form .B', and B' is a HDN string. (So,
+ x.y.com domain-matches .Y.com but not Y.com.)
+
+ Note that domain-match is not a commutative operation: a.b.c.com
+ domain-matches .c.com, but not the reverse.
+
+ """
+ # Note that, if A or B are IP addresses, the only relevant part of the
+ # definition of the domain-match algorithm is the direct string-compare.
+ A = A.lower()
+ B = B.lower()
+ if A == B:
+ return True
+ if not is_HDN(A):
+ return False
+ i = A.rfind(B)
+ has_form_nb = not (i == -1 or i == 0)
+ return (
+ has_form_nb and
+ B.startswith(".") and
+ is_HDN(B[1:])
+ )
+
+def liberal_is_HDN(text):
+ """Return True if text is a sort-of-like a host domain name.
+
+ For accepting/blocking domains.
+
+ """
+ return not IPV4_RE.search(text)
+
+def user_domain_match(A, B):
+ """For blocking/accepting domains.
+
+ A and B may be host domain names or IP addresses.
+
+ """
+ A = A.lower()
+ B = B.lower()
+ if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
+ if A == B:
+ # equal IP addresses
+ return True
+ return False
+ initial_dot = B.startswith(".")
+ if initial_dot and A.endswith(B):
+ return True
+ if not initial_dot and A == B:
+ return True
+ return False
+
+cut_port_re = re.compile(r":\d+$")
+def request_host(request):
+ """Return request-host, as defined by RFC 2965.
+
+ Variation from RFC: returned value is lowercased, for convenient
+ comparison.
+
+ """
+ url = request.get_full_url()
+ host = _rfc3986.urlsplit(url)[1]
+ if host is None:
+ host = request.get_header("Host", "")
+ # remove port, if present
+ return cut_port_re.sub("", host, 1)
+
+def request_host_lc(request):
+ return request_host(request).lower()
+
+def eff_request_host(request):
+ """Return a tuple (request-host, effective request-host name)."""
+ erhn = req_host = request_host(request)
+ if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
+ erhn = req_host + ".local"
+ return req_host, erhn
+
+def eff_request_host_lc(request):
+ req_host, erhn = eff_request_host(request)
+ return req_host.lower(), erhn.lower()
+
+def effective_request_host(request):
+ """Return the effective request-host, as defined by RFC 2965."""
+ return eff_request_host(request)[1]
+
+def request_path(request):
+ """request-URI, as defined by RFC 2965."""
+ url = request.get_full_url()
+ path, query, frag = _rfc3986.urlsplit(url)[2:]
+ path = escape_path(path)
+ req_path = _rfc3986.urlunsplit((None, None, path, query, frag))
+ if not req_path.startswith("/"):
+ req_path = "/"+req_path
+ return req_path
+
+def request_port(request):
+ host = request.get_host()
+ i = host.find(':')
+ if i >= 0:
+ port = host[i+1:]
+ try:
+ int(port)
+ except ValueError:
+ debug("nonnumeric port: '%s'", port)
+ return None
+ else:
+ port = DEFAULT_HTTP_PORT
+ return port
+
+def request_is_unverifiable(request):
+ try:
+ return request.is_unverifiable()
+ except AttributeError:
+ if hasattr(request, "unverifiable"):
+ return request.unverifiable
+ else:
+ raise
+
+# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
+# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
+HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
+ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
+def uppercase_escaped_char(match):
+ return "%%%s" % match.group(1).upper()
+def escape_path(path):
+ """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
+ # There's no knowing what character encoding was used to create URLs
+ # containing %-escapes, but since we have to pick one to escape invalid
+ # path characters, we pick UTF-8, as recommended in the HTML 4.0
+ # specification:
+ # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
+ # And here, kind of: draft-fielding-uri-rfc2396bis-03
+ # (And in draft IRI specification: draft-duerst-iri-05)
+ # (And here, for new URI schemes: RFC 2718)
+ if isinstance(path, types.UnicodeType):
+ path = path.encode("utf-8")
+ path = urllib.quote(path, HTTP_PATH_SAFE)
+ path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
+ return path
+
+def reach(h):
+ """Return reach of host h, as defined by RFC 2965, section 1.
+
+ The reach R of a host name H is defined as follows:
+
+ * If
+
+ - H is the host domain name of a host; and,
+
+ - H has the form A.B; and
+
+ - A has no embedded (that is, interior) dots; and
+
+ - B has at least one embedded dot, or B is the string "local".
+ then the reach of H is .B.
+
+ * Otherwise, the reach of H is H.
+
+ >>> reach("www.acme.com")
+ '.acme.com'
+ >>> reach("acme.com")
+ 'acme.com'
+ >>> reach("acme.local")
+ '.local'
+
+ """
+ i = h.find(".")
+ if i >= 0:
+ #a = h[:i] # this line is only here to show what a is
+ b = h[i+1:]
+ i = b.find(".")
+ if is_HDN(h) and (i >= 0 or b == "local"):
+ return "."+b
+ return h
+
+def is_third_party(request):
+ """
+
+ RFC 2965, section 3.3.6:
+
+ An unverifiable transaction is to a third-party host if its request-
+ host U does not domain-match the reach R of the request-host O in the
+ origin transaction.
+
+ """
+ req_host = request_host_lc(request)
+ # the origin request's request-host was stuffed into request by
+ # _urllib2_support.AbstractHTTPHandler
+ return not domain_match(req_host, reach(request.origin_req_host))
+
+
+class Cookie:
+ """HTTP Cookie.
+
+ This class represents both Netscape and RFC 2965 cookies.
+
+ This is deliberately a very simple class. It just holds attributes. It's
+ possible to construct Cookie instances that don't comply with the cookie
+ standards. CookieJar.make_cookies is the factory function for Cookie
+ objects -- it deals with cookie parsing, supplying defaults, and
+ normalising to the representation used in this class. CookiePolicy is
+ responsible for checking them to see whether they should be accepted from
+ and returned to the server.
+
+ version: integer;
+ name: string;
+ value: string (may be None);
+ port: string; None indicates no attribute was supplied (eg. "Port", rather
+ than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list
+ string (eg. "80,8080")
+ port_specified: boolean; true if a value was supplied with the Port
+ cookie-attribute
+ domain: string;
+ domain_specified: boolean; true if Domain was explicitly set
+ domain_initial_dot: boolean; true if Domain as set in HTTP header by server
+ started with a dot (yes, this really is necessary!)
+ path: string;
+ path_specified: boolean; true if Path was explicitly set
+ secure: boolean; true if should only be returned over secure connection
+ expires: integer; seconds since epoch (RFC 2965 cookies should calculate
+ this value from the Max-Age attribute)
+ discard: boolean, true if this is a session cookie; (if no expires value,
+ this should be true)
+ comment: string;
+ comment_url: string;
+ rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not
+ Set-Cookie2:) header, but had a version cookie-attribute of 1
+ rest: mapping of other cookie-attributes
+
+ Note that the port may be present in the headers, but unspecified ("Port"
+ rather than"Port=80", for example); if this is the case, port is None.
+
+ """
+
+ def __init__(self, version, name, value,
+ port, port_specified,
+ domain, domain_specified, domain_initial_dot,
+ path, path_specified,
+ secure,
+ expires,
+ discard,
+ comment,
+ comment_url,
+ rest,
+ rfc2109=False,
+ ):
+
+ if version is not None: version = int(version)
+ if expires is not None: expires = int(expires)
+ if port is None and port_specified is True:
+ raise ValueError("if port is None, port_specified must be false")
+
+ self.version = version
+ self.name = name
+ self.value = value
+ self.port = port
+ self.port_specified = port_specified
+ # normalise case, as per RFC 2965 section 3.3.3
+ self.domain = domain.lower()
+ self.domain_specified = domain_specified
+ # Sigh. We need to know whether the domain given in the
+ # cookie-attribute had an initial dot, in order to follow RFC 2965
+ # (as clarified in draft errata). Needed for the returned $Domain
+ # value.
+ self.domain_initial_dot = domain_initial_dot
+ self.path = path
+ self.path_specified = path_specified
+ self.secure = secure
+ self.expires = expires
+ self.discard = discard
+ self.comment = comment
+ self.comment_url = comment_url
+ self.rfc2109 = rfc2109
+
+ self._rest = copy.copy(rest)
+
+ def has_nonstandard_attr(self, name):
+ return self._rest.has_key(name)
+ def get_nonstandard_attr(self, name, default=None):
+ return self._rest.get(name, default)
+ def set_nonstandard_attr(self, name, value):
+ self._rest[name] = value
+ def nonstandard_attr_keys(self):
+ return self._rest.keys()
+
+ def is_expired(self, now=None):
+ if now is None: now = time.time()
+ return (self.expires is not None) and (self.expires <= now)
+
+ def __str__(self):
+ if self.port is None: p = ""
+ else: p = ":"+self.port
+ limit = self.domain + p + self.path
+ if self.value is not None:
+ namevalue = "%s=%s" % (self.name, self.value)
+ else:
+ namevalue = self.name
+ return "<Cookie %s for %s>" % (namevalue, limit)
+
+ def __repr__(self):
+ args = []
+ for name in ["version", "name", "value",
+ "port", "port_specified",
+ "domain", "domain_specified", "domain_initial_dot",
+ "path", "path_specified",
+ "secure", "expires", "discard", "comment", "comment_url",
+ ]:
+ attr = getattr(self, name)
+ args.append("%s=%s" % (name, repr(attr)))
+ args.append("rest=%s" % repr(self._rest))
+ args.append("rfc2109=%s" % repr(self.rfc2109))
+ return "Cookie(%s)" % ", ".join(args)
+
+
+class CookiePolicy:
+ """Defines which cookies get accepted from and returned to server.
+
+ May also modify cookies.
+
+ The subclass DefaultCookiePolicy defines the standard rules for Netscape
+ and RFC 2965 cookies -- override that if you want a customised policy.
+
+ As well as implementing set_ok and return_ok, implementations of this
+ interface must also supply the following attributes, indicating which
+ protocols should be used, and how. These can be read and set at any time,
+ though whether that makes complete sense from the protocol point of view is
+ doubtful.
+
+ Public attributes:
+
+ netscape: implement netscape protocol
+ rfc2965: implement RFC 2965 protocol
+ rfc2109_as_netscape:
+ WARNING: This argument will change or go away if is not accepted into
+ the Python standard library in this form!
+ If true, treat RFC 2109 cookies as though they were Netscape cookies. The
+ default is for this attribute to be None, which means treat 2109 cookies
+ as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is,
+ by default), and as Netscape cookies otherwise.
+ hide_cookie2: don't add Cookie2 header to requests (the presence of
+ this header indicates to the server that we understand RFC 2965
+ cookies)
+
+ """
+ def set_ok(self, cookie, request):
+ """Return true if (and only if) cookie should be accepted from server.
+
+ Currently, pre-expired cookies never get this far -- the CookieJar
+ class deletes such cookies itself.
+
+ cookie: mechanize.Cookie object
+ request: object implementing the interface defined by
+ CookieJar.extract_cookies.__doc__
+
+ """
+ raise NotImplementedError()
+
+ def return_ok(self, cookie, request):
+ """Return true if (and only if) cookie should be returned to server.
+
+ cookie: mechanize.Cookie object
+ request: object implementing the interface defined by
+ CookieJar.add_cookie_header.__doc__
+
+ """
+ raise NotImplementedError()
+
+ def domain_return_ok(self, domain, request):
+ """Return false if cookies should not be returned, given cookie domain.
+
+ This is here as an optimization, to remove the need for checking every
+ cookie with a particular domain (which may involve reading many files).
+ The default implementations of domain_return_ok and path_return_ok
+ (return True) leave all the work to return_ok.
+
+ If domain_return_ok returns true for the cookie domain, path_return_ok
+ is called for the cookie path. Otherwise, path_return_ok and return_ok
+ are never called for that cookie domain. If path_return_ok returns
+ true, return_ok is called with the Cookie object itself for a full
+ check. Otherwise, return_ok is never called for that cookie path.
+
+ Note that domain_return_ok is called for every *cookie* domain, not
+ just for the *request* domain. For example, the function might be
+ called with both ".acme.com" and "www.acme.com" if the request domain
+ is "www.acme.com". The same goes for path_return_ok.
+
+ For argument documentation, see the docstring for return_ok.
+
+ """
+ return True
+
+ def path_return_ok(self, path, request):
+ """Return false if cookies should not be returned, given cookie path.
+
+ See the docstring for domain_return_ok.
+
+ """
+ return True
+
+
+class DefaultCookiePolicy(CookiePolicy):
+ """Implements the standard rules for accepting and returning cookies.
+
+ Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is
+ switched off by default.
+
+ The easiest way to provide your own policy is to override this class and
+ call its methods in your overriden implementations before adding your own
+ additional checks.
+
+ import mechanize
+ class MyCookiePolicy(mechanize.DefaultCookiePolicy):
+ def set_ok(self, cookie, request):
+ if not mechanize.DefaultCookiePolicy.set_ok(
+ self, cookie, request):
+ return False
+ if i_dont_want_to_store_this_cookie():
+ return False
+ return True
+
+ In addition to the features required to implement the CookiePolicy
+ interface, this class allows you to block and allow domains from setting
+ and receiving cookies. There are also some strictness switches that allow
+ you to tighten up the rather loose Netscape protocol rules a little bit (at
+ the cost of blocking some benign cookies).
+
+ A domain blacklist and whitelist is provided (both off by default). Only
+ domains not in the blacklist and present in the whitelist (if the whitelist
+ is active) participate in cookie setting and returning. Use the
+ blocked_domains constructor argument, and blocked_domains and
+ set_blocked_domains methods (and the corresponding argument and methods for
+ allowed_domains). If you set a whitelist, you can turn it off again by
+ setting it to None.
+
+ Domains in block or allow lists that do not start with a dot must
+ string-compare equal. For example, "acme.com" matches a blacklist entry of
+ "acme.com", but "www.acme.com" does not. Domains that do start with a dot
+ are matched by more specific domains too. For example, both "www.acme.com"
+ and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does
+ not). IP addresses are an exception, and must match exactly. For example,
+ if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is
+ blocked, but 193.168.1.2 is not.
+
+ Additional Public Attributes:
+
+ General strictness switches
+
+ strict_domain: don't allow sites to set two-component domains with
+ country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc.
+ This is far from perfect and isn't guaranteed to work!
+
+ RFC 2965 protocol strictness switches
+
+ strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable
+ transactions (usually, an unverifiable transaction is one resulting from
+ a redirect or an image hosted on another site); if this is false, cookies
+ are NEVER blocked on the basis of verifiability
+
+ Netscape protocol strictness switches
+
+ strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions
+ even to Netscape cookies
+ strict_ns_domain: flags indicating how strict to be with domain-matching
+ rules for Netscape cookies:
+ DomainStrictNoDots: when setting cookies, host prefix must not contain a
+ dot (eg. www.foo.bar.com can't set a cookie for .bar.com, because
+ www.foo contains a dot)
+ DomainStrictNonDomain: cookies that did not explicitly specify a Domain
+ cookie-attribute can only be returned to a domain that string-compares
+ equal to the domain that set the cookie (eg. rockets.acme.com won't
+ be returned cookies from acme.com that had no Domain cookie-attribute)
+ DomainRFC2965Match: when setting cookies, require a full RFC 2965
+ domain-match
+ DomainLiberal and DomainStrict are the most useful combinations of the
+ above flags, for convenience
+ strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that
+ have names starting with '$'
+ strict_ns_set_path: don't allow setting cookies whose path doesn't
+ path-match request URI
+
+ """
+
+ DomainStrictNoDots = 1
+ DomainStrictNonDomain = 2
+ DomainRFC2965Match = 4
+
+ DomainLiberal = 0
+ DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
+
+ def __init__(self,
+ blocked_domains=None, allowed_domains=None,
+ netscape=True, rfc2965=False,
+ # WARNING: this argument will change or go away if is not
+ # accepted into the Python standard library in this form!
+ # default, ie. treat 2109 as netscape iff not rfc2965
+ rfc2109_as_netscape=None,
+ hide_cookie2=False,
+ strict_domain=False,
+ strict_rfc2965_unverifiable=True,
+ strict_ns_unverifiable=False,
+ strict_ns_domain=DomainLiberal,
+ strict_ns_set_initial_dollar=False,
+ strict_ns_set_path=False,
+ ):
+ """
+ Constructor arguments should be used as keyword arguments only.
+
+ blocked_domains: sequence of domain names that we never accept cookies
+ from, nor return cookies to
+ allowed_domains: if not None, this is a sequence of the only domains
+ for which we accept and return cookies
+
+ For other arguments, see CookiePolicy.__doc__ and
+ DefaultCookiePolicy.__doc__..
+
+ """
+ self.netscape = netscape
+ self.rfc2965 = rfc2965
+ self.rfc2109_as_netscape = rfc2109_as_netscape
+ self.hide_cookie2 = hide_cookie2
+ self.strict_domain = strict_domain
+ self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
+ self.strict_ns_unverifiable = strict_ns_unverifiable
+ self.strict_ns_domain = strict_ns_domain
+ self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
+ self.strict_ns_set_path = strict_ns_set_path
+
+ if blocked_domains is not None:
+ self._blocked_domains = tuple(blocked_domains)
+ else:
+ self._blocked_domains = ()
+
+ if allowed_domains is not None:
+ allowed_domains = tuple(allowed_domains)
+ self._allowed_domains = allowed_domains
+
+ def blocked_domains(self):
+ """Return the sequence of blocked domains (as a tuple)."""
+ return self._blocked_domains
+ def set_blocked_domains(self, blocked_domains):
+ """Set the sequence of blocked domains."""
+ self._blocked_domains = tuple(blocked_domains)
+
+ def is_blocked(self, domain):
+ for blocked_domain in self._blocked_domains:
+ if user_domain_match(domain, blocked_domain):
+ return True
+ return False
+
+ def allowed_domains(self):
+ """Return None, or the sequence of allowed domains (as a tuple)."""
+ return self._allowed_domains
+ def set_allowed_domains(self, allowed_domains):
+ """Set the sequence of allowed domains, or None."""
+ if allowed_domains is not None:
+ allowed_domains = tuple(allowed_domains)
+ self._allowed_domains = allowed_domains
+
+ def is_not_allowed(self, domain):
+ if self._allowed_domains is None:
+ return False
+ for allowed_domain in self._allowed_domains:
+ if user_domain_match(domain, allowed_domain):
+ return False
+ return True
+
+ def set_ok(self, cookie, request):
+ """
+ If you override set_ok, be sure to call this method. If it returns
+ false, so should your subclass (assuming your subclass wants to be more
+ strict about which cookies to accept).
+
+ """
+ debug(" - checking cookie %s", cookie)
+
+ assert cookie.name is not None
+
+ for n in "version", "verifiability", "name", "path", "domain", "port":
+ fn_name = "set_ok_"+n
+ fn = getattr(self, fn_name)
+ if not fn(cookie, request):
+ return False
+
+ return True
+
+ def set_ok_version(self, cookie, request):
+ if cookie.version is None:
+ # Version is always set to 0 by parse_ns_headers if it's a Netscape
+ # cookie, so this must be an invalid RFC 2965 cookie.
+ debug(" Set-Cookie2 without version attribute (%s)", cookie)
+ return False
+ if cookie.version > 0 and not self.rfc2965:
+ debug(" RFC 2965 cookies are switched off")
+ return False
+ elif cookie.version == 0 and not self.netscape:
+ debug(" Netscape cookies are switched off")
+ return False
+ return True
+
+ def set_ok_verifiability(self, cookie, request):
+ if request_is_unverifiable(request) and is_third_party(request):
+ if cookie.version > 0 and self.strict_rfc2965_unverifiable:
+ debug(" third-party RFC 2965 cookie during "
+ "unverifiable transaction")
+ return False
+ elif cookie.version == 0 and self.strict_ns_unverifiable:
+ debug(" third-party Netscape cookie during "
+ "unverifiable transaction")
+ return False
+ return True
+
+ def set_ok_name(self, cookie, request):
+ # Try and stop servers setting V0 cookies designed to hack other
+ # servers that know both V0 and V1 protocols.
+ if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
+ cookie.name.startswith("$")):
+ debug(" illegal name (starts with '$'): '%s'", cookie.name)
+ return False
+ return True
+
+ def set_ok_path(self, cookie, request):
+ if cookie.path_specified:
+ req_path = request_path(request)
+ if ((cookie.version > 0 or
+ (cookie.version == 0 and self.strict_ns_set_path)) and
+ not req_path.startswith(cookie.path)):
+ debug(" path attribute %s is not a prefix of request "
+ "path %s", cookie.path, req_path)
+ return False
+ return True
+
+ def set_ok_countrycode_domain(self, cookie, request):
+ """Return False if explicit cookie domain is not acceptable.
+
+ Called by set_ok_domain, for convenience of overriding by
+ subclasses.
+
+ """
+ if cookie.domain_specified and self.strict_domain:
+ domain = cookie.domain
+ # since domain was specified, we know that:
+ assert domain.startswith(".")
+ if domain.count(".") == 2:
+ # domain like .foo.bar
+ i = domain.rfind(".")
+ tld = domain[i+1:]
+ sld = domain[1:i]
+ if (sld.lower() in [
+ "co", "ac",
+ "com", "edu", "org", "net", "gov", "mil", "int",
+ "aero", "biz", "cat", "coop", "info", "jobs", "mobi",
+ "museum", "name", "pro", "travel",
+ ] and
+ len(tld) == 2):
+ # domain like .co.uk
+ return False
+ return True
+
+ def set_ok_domain(self, cookie, request):
+ if self.is_blocked(cookie.domain):
+ debug(" domain %s is in user block-list", cookie.domain)
+ return False
+ if self.is_not_allowed(cookie.domain):
+ debug(" domain %s is not in user allow-list", cookie.domain)
+ return False
+ if not self.set_ok_countrycode_domain(cookie, request):
+ debug(" country-code second level domain %s", cookie.domain)
+ return False
+ if cookie.domain_specified:
+ req_host, erhn = eff_request_host_lc(request)
+ domain = cookie.domain
+ if domain.startswith("."):
+ undotted_domain = domain[1:]
+ else:
+ undotted_domain = domain
+ embedded_dots = (undotted_domain.find(".") >= 0)
+ if not embedded_dots and domain != ".local":
+ debug(" non-local domain %s contains no embedded dot",
+ domain)
+ return False
+ if cookie.version == 0:
+ if (not erhn.endswith(domain) and
+ (not erhn.startswith(".") and
+ not ("."+erhn).endswith(domain))):
+ debug(" effective request-host %s (even with added "
+ "initial dot) does not end end with %s",
+ erhn, domain)
+ return False
+ if (cookie.version > 0 or
+ (self.strict_ns_domain & self.DomainRFC2965Match)):
+ if not domain_match(erhn, domain):
+ debug(" effective request-host %s does not domain-match "
+ "%s", erhn, domain)
+ return False
+ if (cookie.version > 0 or
+ (self.strict_ns_domain & self.DomainStrictNoDots)):
+ host_prefix = req_host[:-len(domain)]
+ if (host_prefix.find(".") >= 0 and
+ not IPV4_RE.search(req_host)):
+ debug(" host prefix %s for domain %s contains a dot",
+ host_prefix, domain)
+ return False
+ return True
+
+ def set_ok_port(self, cookie, request):
+ if cookie.port_specified:
+ req_port = request_port(request)
+ if req_port is None:
+ req_port = "80"
+ else:
+ req_port = str(req_port)
+ for p in cookie.port.split(","):
+ try:
+ int(p)
+ except ValueError:
+ debug(" bad port %s (not numeric)", p)
+ return False
+ if p == req_port:
+ break
+ else:
+ debug(" request port (%s) not found in %s",
+ req_port, cookie.port)
+ return False
+ return True
+
+ def return_ok(self, cookie, request):
+ """
+ If you override return_ok, be sure to call this method. If it returns
+ false, so should your subclass (assuming your subclass wants to be more
+ strict about which cookies to return).
+
+ """
+ # Path has already been checked by path_return_ok, and domain blocking
+ # done by domain_return_ok.
+ debug(" - checking cookie %s", cookie)
+
+ for n in ("version", "verifiability", "secure", "expires", "port",
+ "domain"):
+ fn_name = "return_ok_"+n
+ fn = getattr(self, fn_name)
+ if not fn(cookie, request):
+ return False
+ return True
+
+ def return_ok_version(self, cookie, request):
+ if cookie.version > 0 and not self.rfc2965:
+ debug(" RFC 2965 cookies are switched off")
+ return False
+ elif cookie.version == 0 and not self.netscape:
+ debug(" Netscape cookies are switched off")
+ return False
+ return True
+
+ def return_ok_verifiability(self, cookie, request):
+ if request_is_unverifiable(request) and is_third_party(request):
+ if cookie.version > 0 and self.strict_rfc2965_unverifiable:
+ debug(" third-party RFC 2965 cookie during unverifiable "
+ "transaction")
+ return False
+ elif cookie.version == 0 and self.strict_ns_unverifiable:
+ debug(" third-party Netscape cookie during unverifiable "
+ "transaction")
+ return False
+ return True
+
+ def return_ok_secure(self, cookie, request):
+ if cookie.secure and request.get_type() != "https":
+ debug(" secure cookie with non-secure request")
+ return False
+ return True
+
+ def return_ok_expires(self, cookie, request):
+ if cookie.is_expired(self._now):
+ debug(" cookie expired")
+ return False
+ return True
+
+ def return_ok_port(self, cookie, request):
+ if cookie.port:
+ req_port = request_port(request)
+ if req_port is None:
+ req_port = "80"
+ for p in cookie.port.split(","):
+ if p == req_port:
+ break
+ else:
+ debug(" request port %s does not match cookie port %s",
+ req_port, cookie.port)
+ return False
+ return True
+
+ def return_ok_domain(self, cookie, request):
+ req_host, erhn = eff_request_host_lc(request)
+ domain = cookie.domain
+
+ # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
+ if (cookie.version == 0 and
+ (self.strict_ns_domain & self.DomainStrictNonDomain) and
+ not cookie.domain_specified and domain != erhn):
+ debug(" cookie with unspecified domain does not string-compare "
+ "equal to request domain")
+ return False
+
+ if cookie.version > 0 and not domain_match(erhn, domain):
+ debug(" effective request-host name %s does not domain-match "
+ "RFC 2965 cookie domain %s", erhn, domain)
+ return False
+ if cookie.version == 0 and not ("."+erhn).endswith(domain):
+ debug(" request-host %s does not match Netscape cookie domain "
+ "%s", req_host, domain)
+ return False
+ return True
+
+ def domain_return_ok(self, domain, request):
+ # Liberal check of domain. This is here as an optimization to avoid
+ # having to load lots of MSIE cookie files unless necessary.
+
+ # Munge req_host and erhn to always start with a dot, so as to err on
+ # the side of letting cookies through.
+ dotted_req_host, dotted_erhn = eff_request_host_lc(request)
+ if not dotted_req_host.startswith("."):
+ dotted_req_host = "."+dotted_req_host
+ if not dotted_erhn.startswith("."):
+ dotted_erhn = "."+dotted_erhn
+ if not (dotted_req_host.endswith(domain) or
+ dotted_erhn.endswith(domain)):
+ #debug(" request domain %s does not match cookie domain %s",
+ # req_host, domain)
+ return False
+
+ if self.is_blocked(domain):
+ debug(" domain %s is in user block-list", domain)
+ return False
+ if self.is_not_allowed(domain):
+ debug(" domain %s is not in user allow-list", domain)
+ return False
+
+ return True
+
+ def path_return_ok(self, path, request):
+ debug("- checking cookie path=%s", path)
+ req_path = request_path(request)
+ if not req_path.startswith(path):
+ debug(" %s does not path-match %s", req_path, path)
+ return False
+ return True
+
+
+def vals_sorted_by_key(adict):
+ keys = adict.keys()
+ keys.sort()
+ return map(adict.get, keys)
+
+class MappingIterator:
+ """Iterates over nested mapping, depth-first, in sorted order by key."""
+ def __init__(self, mapping):
+ self._s = [(vals_sorted_by_key(mapping), 0, None)] # LIFO stack
+
+ def __iter__(self): return self
+
+ def next(self):
+ # this is hairy because of lack of generators
+ while 1:
+ try:
+ vals, i, prev_item = self._s.pop()
+ except IndexError:
+ raise StopIteration()
+ if i < len(vals):
+ item = vals[i]
+ i = i + 1
+ self._s.append((vals, i, prev_item))
+ try:
+ item.items
+ except AttributeError:
+ # non-mapping
+ break
+ else:
+ # mapping
+ self._s.append((vals_sorted_by_key(item), 0, item))
+ continue
+ return item
+
+
+# Used as second parameter to dict.get method, to distinguish absent
+# dict key from one with a None value.
+class Absent: pass
+
+class CookieJar:
+ """Collection of HTTP cookies.
+
+ You may not need to know about this class: try mechanize.urlopen().
+
+ The major methods are extract_cookies and add_cookie_header; these are all
+ you are likely to need.
+
+ CookieJar supports the iterator protocol:
+
+ for cookie in cookiejar:
+ # do something with cookie
+
+ Methods:
+
+ add_cookie_header(request)
+ extract_cookies(response, request)
+ get_policy()
+ set_policy(policy)
+ cookies_for_request(request)
+ make_cookies(response, request)
+ set_cookie_if_ok(cookie, request)
+ set_cookie(cookie)
+ clear_session_cookies()
+ clear_expired_cookies()
+ clear(domain=None, path=None, name=None)
+
+ Public attributes
+
+ policy: CookiePolicy object
+
+ """
+
+ non_word_re = re.compile(r"\W")
+ quote_re = re.compile(r"([\"\\])")
+ strict_domain_re = re.compile(r"\.?[^.]*")
+ domain_re = re.compile(r"[^.]*")
+ dots_re = re.compile(r"^\.+")
+
+ def __init__(self, policy=None):
+ """
+ See CookieJar.__doc__ for argument documentation.
+
+ """
+ if policy is None:
+ policy = DefaultCookiePolicy()
+ self._policy = policy
+
+ self._cookies = {}
+
+ # for __getitem__ iteration in pre-2.2 Pythons
+ self._prev_getitem_index = 0
+
+ def get_policy(self):
+ return self._policy
+
+ def set_policy(self, policy):
+ self._policy = policy
+
+ def _cookies_for_domain(self, domain, request):
+ cookies = []
+ if not self._policy.domain_return_ok(domain, request):
+ return []
+ debug("Checking %s for cookies to return", domain)
+ cookies_by_path = self._cookies[domain]
+ for path in cookies_by_path.keys():
+ if not self._policy.path_return_ok(path, request):
+ continue
+ cookies_by_name = cookies_by_path[path]
+ for cookie in cookies_by_name.values():
+ if not self._policy.return_ok(cookie, request):
+ debug(" not returning cookie")
+ continue
+ debug(" it's a match")
+ cookies.append(cookie)
+ return cookies
+
+ def cookies_for_request(self, request):
+ """Return a list of cookies to be returned to server.
+
+ The returned list of cookie instances is sorted in the order they
+ should appear in the Cookie: header for return to the server.
+
+ See add_cookie_header.__doc__ for the interface required of the
+ request argument.
+
+ New in version 0.1.10
+
+ """
+ self._policy._now = self._now = int(time.time())
+ cookies = self._cookies_for_request(request)
+ # add cookies in order of most specific (i.e. longest) path first
+ def decreasing_size(a, b): return cmp(len(b.path), len(a.path))
+ cookies.sort(decreasing_size)
+ return cookies
+
+ def _cookies_for_request(self, request):
+ """Return a list of cookies to be returned to server."""
+ # this method still exists (alongside cookies_for_request) because it
+ # is part of an implied protected interface for subclasses of cookiejar
+ # XXX document that implied interface, or provide another way of
+ # implementing cookiejars than subclassing
+ cookies = []
+ for domain in self._cookies.keys():
+ cookies.extend(self._cookies_for_domain(domain, request))
+ return cookies
+
+ def _cookie_attrs(self, cookies):
+ """Return a list of cookie-attributes to be returned to server.
+
+ The $Version attribute is also added when appropriate (currently only
+ once per request).
+
+ >>> jar = CookieJar()
+ >>> ns_cookie = Cookie(0, "foo", '"bar"', None, False,
+ ... "example.com", False, False,
+ ... "/", False, False, None, True,
+ ... None, None, {})
+ >>> jar._cookie_attrs([ns_cookie])
+ ['foo="bar"']
+ >>> rfc2965_cookie = Cookie(1, "foo", "bar", None, False,
+ ... ".example.com", True, False,
+ ... "/", False, False, None, True,
+ ... None, None, {})
+ >>> jar._cookie_attrs([rfc2965_cookie])
+ ['$Version=1', 'foo=bar', '$Domain="example.com"']
+
+ """
+ version_set = False
+
+ attrs = []
+ for cookie in cookies:
+ # set version of Cookie header
+ # XXX
+ # What should it be if multiple matching Set-Cookie headers have
+ # different versions themselves?
+ # Answer: there is no answer; was supposed to be settled by
+ # RFC 2965 errata, but that may never appear...
+ version = cookie.version
+ if not version_set:
+ version_set = True
+ if version > 0:
+ attrs.append("$Version=%s" % version)
+
+ # quote cookie value if necessary
+ # (not for Netscape protocol, which already has any quotes
+ # intact, due to the poorly-specified Netscape Cookie: syntax)
+ if ((cookie.value is not None) and
+ self.non_word_re.search(cookie.value) and version > 0):
+ value = self.quote_re.sub(r"\\\1", cookie.value)
+ else:
+ value = cookie.value
+
+ # add cookie-attributes to be returned in Cookie header
+ if cookie.value is None:
+ attrs.append(cookie.name)
+ else:
+ attrs.append("%s=%s" % (cookie.name, value))
+ if version > 0:
+ if cookie.path_specified:
+ attrs.append('$Path="%s"' % cookie.path)
+ if cookie.domain.startswith("."):
+ domain = cookie.domain
+ if (not cookie.domain_initial_dot and
+ domain.startswith(".")):
+ domain = domain[1:]
+ attrs.append('$Domain="%s"' % domain)
+ if cookie.port is not None:
+ p = "$Port"
+ if cookie.port_specified:
+ p = p + ('="%s"' % cookie.port)
+ attrs.append(p)
+
+ return attrs
+
+ def add_cookie_header(self, request):
+ """Add correct Cookie: header to request (urllib2.Request object).
+
+ The Cookie2 header is also added unless policy.hide_cookie2 is true.
+
+ The request object (usually a urllib2.Request instance) must support
+ the methods get_full_url, get_host, is_unverifiable, get_type,
+ has_header, get_header, header_items and add_unredirected_header, as
+ documented by urllib2, and the port attribute (the port number).
+ Actually, RequestUpgradeProcessor will automatically upgrade your
+ Request object to one with has_header, get_header, header_items and
+ add_unredirected_header, if it lacks those methods, for compatibility
+ with pre-2.4 versions of urllib2.
+
+ """
+ debug("add_cookie_header")
+ cookies = self.cookies_for_request(request)
+
+ attrs = self._cookie_attrs(cookies)
+ if attrs:
+ if not request.has_header("Cookie"):
+ request.add_unredirected_header("Cookie", "; ".join(attrs))
+
+ # if necessary, advertise that we know RFC 2965
+ if self._policy.rfc2965 and not self._policy.hide_cookie2:
+ for cookie in cookies:
+ if cookie.version != 1 and not request.has_header("Cookie2"):
+ request.add_unredirected_header("Cookie2", '$Version="1"')
+ break
+
+ self.clear_expired_cookies()
+
+ def _normalized_cookie_tuples(self, attrs_set):
+ """Return list of tuples containing normalised cookie information.
+
+ attrs_set is the list of lists of key,value pairs extracted from
+ the Set-Cookie or Set-Cookie2 headers.
+
+ Tuples are name, value, standard, rest, where name and value are the
+ cookie name and value, standard is a dictionary containing the standard
+ cookie-attributes (discard, secure, version, expires or max-age,
+ domain, path and port) and rest is a dictionary containing the rest of
+ the cookie-attributes.
+
+ """
+ cookie_tuples = []
+
+ boolean_attrs = "discard", "secure"
+ value_attrs = ("version",
+ "expires", "max-age",
+ "domain", "path", "port",
+ "comment", "commenturl")
+
+ for cookie_attrs in attrs_set:
+ name, value = cookie_attrs[0]
+
+ # Build dictionary of standard cookie-attributes (standard) and
+ # dictionary of other cookie-attributes (rest).
+
+ # Note: expiry time is normalised to seconds since epoch. V0
+ # cookies should have the Expires cookie-attribute, and V1 cookies
+ # should have Max-Age, but since V1 includes RFC 2109 cookies (and
+ # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
+ # accept either (but prefer Max-Age).
+ max_age_set = False
+
+ bad_cookie = False
+
+ standard = {}
+ rest = {}
+ for k, v in cookie_attrs[1:]:
+ lc = k.lower()
+ # don't lose case distinction for unknown fields
+ if lc in value_attrs or lc in boolean_attrs:
+ k = lc
+ if k in boolean_attrs and v is None:
+ # boolean cookie-attribute is present, but has no value
+ # (like "discard", rather than "port=80")
+ v = True
+ if standard.has_key(k):
+ # only first value is significant
+ continue
+ if k == "domain":
+ if v is None:
+ debug(" missing value for domain attribute")
+ bad_cookie = True
+ break
+ # RFC 2965 section 3.3.3
+ v = v.lower()
+ if k == "expires":
+ if max_age_set:
+ # Prefer max-age to expires (like Mozilla)
+ continue
+ if v is None:
+ debug(" missing or invalid value for expires "
+ "attribute: treating as session cookie")
+ continue
+ if k == "max-age":
+ max_age_set = True
+ if v is None:
+ debug(" missing value for max-age attribute")
+ bad_cookie = True
+ break
+ try:
+ v = int(v)
+ except ValueError:
+ debug(" missing or invalid (non-numeric) value for "
+ "max-age attribute")
+ bad_cookie = True
+ break
+ # convert RFC 2965 Max-Age to seconds since epoch
+ # XXX Strictly you're supposed to follow RFC 2616
+ # age-calculation rules. Remember that zero Max-Age is a
+ # is a request to discard (old and new) cookie, though.
+ k = "expires"
+ v = self._now + v
+ if (k in value_attrs) or (k in boolean_attrs):
+ if (v is None and
+ k not in ["port", "comment", "commenturl"]):
+ debug(" missing value for %s attribute" % k)
+ bad_cookie = True
+ break
+ standard[k] = v
+ else:
+ rest[k] = v
+
+ if bad_cookie:
+ continue
+
+ cookie_tuples.append((name, value, standard, rest))
+
+ return cookie_tuples
+
+ def _cookie_from_cookie_tuple(self, tup, request):
+ # standard is dict of standard cookie-attributes, rest is dict of the
+ # rest of them
+ name, value, standard, rest = tup
+
+ domain = standard.get("domain", Absent)
+ path = standard.get("path", Absent)
+ port = standard.get("port", Absent)
+ expires = standard.get("expires", Absent)
+
+ # set the easy defaults
+ version = standard.get("version", None)
+ if version is not None:
+ try:
+ version = int(version)
+ except ValueError:
+ return None # invalid version, ignore cookie
+ secure = standard.get("secure", False)
+ # (discard is also set if expires is Absent)
+ discard = standard.get("discard", False)
+ comment = standard.get("comment", None)
+ comment_url = standard.get("commenturl", None)
+
+ # set default path
+ if path is not Absent and path != "":
+ path_specified = True
+ path = escape_path(path)
+ else:
+ path_specified = False
+ path = request_path(request)
+ i = path.rfind("/")
+ if i != -1:
+ if version == 0:
+ # Netscape spec parts company from reality here
+ path = path[:i]
+ else:
+ path = path[:i+1]
+ if len(path) == 0: path = "/"
+
+ # set default domain
+ domain_specified = domain is not Absent
+ # but first we have to remember whether it starts with a dot
+ domain_initial_dot = False
+ if domain_specified:
+ domain_initial_dot = bool(domain.startswith("."))
+ if domain is Absent:
+ req_host, erhn = eff_request_host_lc(request)
+ domain = erhn
+ elif not domain.startswith("."):
+ domain = "."+domain
+
+ # set default port
+ port_specified = False
+ if port is not Absent:
+ if port is None:
+ # Port attr present, but has no value: default to request port.
+ # Cookie should then only be sent back on that port.
+ port = request_port(request)
+ else:
+ port_specified = True
+ port = re.sub(r"\s+", "", port)
+ else:
+ # No port attr present. Cookie can be sent back on any port.
+ port = None
+
+ # set default expires and discard
+ if expires is Absent:
+ expires = None
+ discard = True
+
+ return Cookie(version,
+ name, value,
+ port, port_specified,
+ domain, domain_specified, domain_initial_dot,
+ path, path_specified,
+ secure,
+ expires,
+ discard,
+ comment,
+ comment_url,
+ rest)
+
+ def _cookies_from_attrs_set(self, attrs_set, request):
+ cookie_tuples = self._normalized_cookie_tuples(attrs_set)
+
+ cookies = []
+ for tup in cookie_tuples:
+ cookie = self._cookie_from_cookie_tuple(tup, request)
+ if cookie: cookies.append(cookie)
+ return cookies
+
+ def _process_rfc2109_cookies(self, cookies):
+ if self._policy.rfc2109_as_netscape is None:
+ rfc2109_as_netscape = not self._policy.rfc2965
+ else:
+ rfc2109_as_netscape = self._policy.rfc2109_as_netscape
+ for cookie in cookies:
+ if cookie.version == 1:
+ cookie.rfc2109 = True
+ if rfc2109_as_netscape:
+ # treat 2109 cookies as Netscape cookies rather than
+ # as RFC2965 cookies
+ cookie.version = 0
+
+ def _make_cookies(self, response, request):
+ # get cookie-attributes for RFC 2965 and Netscape protocols
+ headers = response.info()
+ rfc2965_hdrs = headers.getheaders("Set-Cookie2")
+ ns_hdrs = headers.getheaders("Set-Cookie")
+
+ rfc2965 = self._policy.rfc2965
+ netscape = self._policy.netscape
+
+ if ((not rfc2965_hdrs and not ns_hdrs) or
+ (not ns_hdrs and not rfc2965) or
+ (not rfc2965_hdrs and not netscape) or
+ (not netscape and not rfc2965)):
+ return [] # no relevant cookie headers: quick exit
+
+ try:
+ cookies = self._cookies_from_attrs_set(
+ split_header_words(rfc2965_hdrs), request)
+ except:
+ reraise_unmasked_exceptions()
+ cookies = []
+
+ if ns_hdrs and netscape:
+ try:
+ # RFC 2109 and Netscape cookies
+ ns_cookies = self._cookies_from_attrs_set(
+ parse_ns_headers(ns_hdrs), request)
+ except:
+ reraise_unmasked_exceptions()
+ ns_cookies = []
+ self._process_rfc2109_cookies(ns_cookies)
+
+ # Look for Netscape cookies (from Set-Cookie headers) that match
+ # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
+ # For each match, keep the RFC 2965 cookie and ignore the Netscape
+ # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
+ # bundled in with the Netscape cookies for this purpose, which is
+ # reasonable behaviour.
+ if rfc2965:
+ lookup = {}
+ for cookie in cookies:
+ lookup[(cookie.domain, cookie.path, cookie.name)] = None
+
+ def no_matching_rfc2965(ns_cookie, lookup=lookup):
+ key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
+ return not lookup.has_key(key)
+ ns_cookies = filter(no_matching_rfc2965, ns_cookies)
+
+ if ns_cookies:
+ cookies.extend(ns_cookies)
+
+ return cookies
+
+ def make_cookies(self, response, request):
+ """Return sequence of Cookie objects extracted from response object.
+
+ See extract_cookies.__doc__ for the interface required of the
+ response and request arguments.
+
+ """
+ self._policy._now = self._now = int(time.time())
+ return [cookie for cookie in self._make_cookies(response, request)
+ if cookie.expires is None or not cookie.expires <= self._now]
+
+ def set_cookie_if_ok(self, cookie, request):
+ """Set a cookie if policy says it's OK to do so.
+
+ cookie: mechanize.Cookie instance
+ request: see extract_cookies.__doc__ for the required interface
+
+ """
+ self._policy._now = self._now = int(time.time())
+
+ if self._policy.set_ok(cookie, request):
+ self.set_cookie(cookie)
+
+ def set_cookie(self, cookie):
+ """Set a cookie, without checking whether or not it should be set.
+
+ cookie: mechanize.Cookie instance
+ """
+ c = self._cookies
+ if not c.has_key(cookie.domain): c[cookie.domain] = {}
+ c2 = c[cookie.domain]
+ if not c2.has_key(cookie.path): c2[cookie.path] = {}
+ c3 = c2[cookie.path]
+ c3[cookie.name] = cookie
+
+ def extract_cookies(self, response, request):
+ """Extract cookies from response, where allowable given the request.
+
+ Look for allowable Set-Cookie: and Set-Cookie2: headers in the response
+ object passed as argument. Any of these headers that are found are
+ used to update the state of the object (subject to the policy.set_ok
+ method's approval).
+
+ The response object (usually be the result of a call to
+ mechanize.urlopen, or similar) should support an info method, which
+ returns a mimetools.Message object (in fact, the 'mimetools.Message
+ object' may be any object that provides a getheaders method).
+
+ The request object (usually a urllib2.Request instance) must support
+ the methods get_full_url, get_type, get_host, and is_unverifiable, as
+ documented by urllib2, and the port attribute (the port number). The
+ request is used to set default values for cookie-attributes as well as
+ for checking that the cookie is OK to be set.
+
+ """
+ debug("extract_cookies: %s", response.info())
+ self._policy._now = self._now = int(time.time())
+
+ for cookie in self._make_cookies(response, request):
+ if cookie.expires is not None and cookie.expires <= self._now:
+ # Expiry date in past is request to delete cookie. This can't be
+ # in DefaultCookiePolicy, because can't delete cookies there.
+ try:
+ self.clear(cookie.domain, cookie.path, cookie.name)
+ except KeyError:
+ pass
+ debug("Expiring cookie, domain='%s', path='%s', name='%s'",
+ cookie.domain, cookie.path, cookie.name)
+ elif self._policy.set_ok(cookie, request):
+ debug(" setting cookie: %s", cookie)
+ self.set_cookie(cookie)
+
+ def clear(self, domain=None, path=None, name=None):
+ """Clear some cookies.
+
+ Invoking this method without arguments will clear all cookies. If
+ given a single argument, only cookies belonging to that domain will be
+ removed. If given two arguments, cookies belonging to the specified
+ path within that domain are removed. If given three arguments, then
+ the cookie with the specified name, path and domain is removed.
+
+ Raises KeyError if no matching cookie exists.
+
+ """
+ if name is not None:
+ if (domain is None) or (path is None):
+ raise ValueError(
+ "domain and path must be given to remove a cookie by name")
+ del self._cookies[domain][path][name]
+ elif path is not None:
+ if domain is None:
+ raise ValueError(
+ "domain must be given to remove cookies by path")
+ del self._cookies[domain][path]
+ elif domain is not None:
+ del self._cookies[domain]
+ else:
+ self._cookies = {}
+
+ def clear_session_cookies(self):
+ """Discard all session cookies.
+
+ Discards all cookies held by object which had either no Max-Age or
+ Expires cookie-attribute or an explicit Discard cookie-attribute, or
+ which otherwise have ended up with a true discard attribute. For
+ interactive browsers, the end of a session usually corresponds to
+ closing the browser window.
+
+ Note that the save method won't save session cookies anyway, unless you
+ ask otherwise by passing a true ignore_discard argument.
+
+ """
+ for cookie in self:
+ if cookie.discard:
+ self.clear(cookie.domain, cookie.path, cookie.name)
+
+ def clear_expired_cookies(self):
+ """Discard all expired cookies.
+
+ You probably don't need to call this method: expired cookies are never
+ sent back to the server (provided you're using DefaultCookiePolicy),
+ this method is called by CookieJar itself every so often, and the save
+ method won't save expired cookies anyway (unless you ask otherwise by
+ passing a true ignore_expires argument).
+
+ """
+ now = time.time()
+ for cookie in self:
+ if cookie.is_expired(now):
+ self.clear(cookie.domain, cookie.path, cookie.name)
+
+ def __getitem__(self, i):
+ if i == 0:
+ self._getitem_iterator = self.__iter__()
+ elif self._prev_getitem_index != i-1: raise IndexError(
+ "CookieJar.__getitem__ only supports sequential iteration")
+ self._prev_getitem_index = i
+ try:
+ return self._getitem_iterator.next()
+ except StopIteration:
+ raise IndexError()
+
+ def __iter__(self):
+ return MappingIterator(self._cookies)
+
+ def __len__(self):
+ """Return number of contained cookies."""
+ i = 0
+ for cookie in self: i = i + 1
+ return i
+
+ def __repr__(self):
+ r = []
+ for cookie in self: r.append(repr(cookie))
+ return "<%s[%s]>" % (self.__class__, ", ".join(r))
+
+ def __str__(self):
+ r = []
+ for cookie in self: r.append(str(cookie))
+ return "<%s[%s]>" % (self.__class__, ", ".join(r))
+
+
+class LoadError(Exception): pass
+
+class FileCookieJar(CookieJar):
+ """CookieJar that can be loaded from and saved to a file.
+
+ Additional methods
+
+ save(filename=None, ignore_discard=False, ignore_expires=False)
+ load(filename=None, ignore_discard=False, ignore_expires=False)
+ revert(filename=None, ignore_discard=False, ignore_expires=False)
+
+ Additional public attributes
+
+ filename: filename for loading and saving cookies
+
+ Additional public readable attributes
+
+ delayload: request that cookies are lazily loaded from disk; this is only
+ a hint since this only affects performance, not behaviour (unless the
+ cookies on disk are changing); a CookieJar object may ignore it (in fact,
+ only MSIECookieJar lazily loads cookies at the moment)
+
+ """
+
+ def __init__(self, filename=None, delayload=False, policy=None):
+ """
+ See FileCookieJar.__doc__ for argument documentation.
+
+ Cookies are NOT loaded from the named file until either the load or
+ revert method is called.
+
+ """
+ CookieJar.__init__(self, policy)
+ if filename is not None and not isstringlike(filename):
+ raise ValueError("filename must be string-like")
+ self.filename = filename
+ self.delayload = bool(delayload)
+
+ def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ """Save cookies to a file.
+
+ filename: name of file in which to save cookies
+ ignore_discard: save even cookies set to be discarded
+ ignore_expires: save even cookies that have expired
+
+ The file is overwritten if it already exists, thus wiping all its
+ cookies. Saved cookies can be restored later using the load or revert
+ methods. If filename is not specified, self.filename is used; if
+ self.filename is None, ValueError is raised.
+
+ """
+ raise NotImplementedError()
+
+ def load(self, filename=None, ignore_discard=False, ignore_expires=False):
+ """Load cookies from a file.
+
+ Old cookies are kept unless overwritten by newly loaded ones.
+
+ Arguments are as for .save().
+
+ If filename is not specified, self.filename is used; if self.filename
+ is None, ValueError is raised. The named file must be in the format
+ understood by the class, or LoadError will be raised. This format will
+ be identical to that written by the save method, unless the load format
+ is not sufficiently well understood (as is the case for MSIECookieJar).
+
+ """
+ if filename is None:
+ if self.filename is not None: filename = self.filename
+ else: raise ValueError(MISSING_FILENAME_TEXT)
+
+ f = open(filename)
+ try:
+ self._really_load(f, filename, ignore_discard, ignore_expires)
+ finally:
+ f.close()
+
+ def revert(self, filename=None,
+ ignore_discard=False, ignore_expires=False):
+ """Clear all cookies and reload cookies from a saved file.
+
+ Raises LoadError (or IOError) if reversion is not successful; the
+ object's state will not be altered if this happens.
+
+ """
+ if filename is None:
+ if self.filename is not None: filename = self.filename
+ else: raise ValueError(MISSING_FILENAME_TEXT)
+
+ old_state = copy.deepcopy(self._cookies)
+ self._cookies = {}
+ try:
+ self.load(filename, ignore_discard, ignore_expires)
+ except (LoadError, IOError):
+ self._cookies = old_state
+ raise
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_debug.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_debug.py
new file mode 100644
index 0000000..596b114
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_debug.py
@@ -0,0 +1,28 @@
+import logging
+
+from urllib2 import BaseHandler
+from _response import response_seek_wrapper
+
+
+class HTTPResponseDebugProcessor(BaseHandler):
+ handler_order = 900 # before redirections, after everything else
+
+ def http_response(self, request, response):
+ if not hasattr(response, "seek"):
+ response = response_seek_wrapper(response)
+ info = logging.getLogger("mechanize.http_responses").info
+ try:
+ info(response.read())
+ finally:
+ response.seek(0)
+ info("*****************************************************")
+ return response
+
+ https_response = http_response
+
+class HTTPRedirectDebugProcessor(BaseHandler):
+ def http_request(self, request):
+ if hasattr(request, "redirect_dict"):
+ info = logging.getLogger("mechanize.http_redirects").info
+ info("redirecting to %s", request.get_full_url())
+ return request
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_file.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_file.py
new file mode 100644
index 0000000..db662a8
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_file.py
@@ -0,0 +1,60 @@
+try:
+ from cStringIO import StringIO
+except ImportError:
+ from StringIO import StringIO
+import mimetools
+import os
+import socket
+import urllib
+from urllib2 import BaseHandler, URLError
+
+
+class FileHandler(BaseHandler):
+ # Use local file or FTP depending on form of URL
+ def file_open(self, req):
+ url = req.get_selector()
+ if url[:2] == '//' and url[2:3] != '/':
+ req.type = 'ftp'
+ return self.parent.open(req)
+ else:
+ return self.open_local_file(req)
+
+ # names for the localhost
+ names = None
+ def get_names(self):
+ if FileHandler.names is None:
+ try:
+ FileHandler.names = (socket.gethostbyname('localhost'),
+ socket.gethostbyname(socket.gethostname()))
+ except socket.gaierror:
+ FileHandler.names = (socket.gethostbyname('localhost'),)
+ return FileHandler.names
+
+ # not entirely sure what the rules are here
+ def open_local_file(self, req):
+ try:
+ import email.utils as emailutils
+ except ImportError:
+ import email.Utils as emailutils
+ import mimetypes
+ host = req.get_host()
+ file = req.get_selector()
+ localfile = urllib.url2pathname(file)
+ try:
+ stats = os.stat(localfile)
+ size = stats.st_size
+ modified = emailutils.formatdate(stats.st_mtime, usegmt=True)
+ mtype = mimetypes.guess_type(file)[0]
+ headers = mimetools.Message(StringIO(
+ 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
+ (mtype or 'text/plain', size, modified)))
+ if host:
+ host, port = urllib.splitport(host)
+ if not host or \
+ (not port and socket.gethostbyname(host) in self.get_names()):
+ return urllib.addinfourl(open(localfile, 'rb'),
+ headers, 'file:'+file)
+ except OSError, msg:
+ # urllib2 users shouldn't expect OSErrors coming from urlopen()
+ raise URLError(msg)
+ raise URLError('file not on local host')
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_firefox3cookiejar.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_firefox3cookiejar.py
new file mode 100644
index 0000000..34fe979
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_firefox3cookiejar.py
@@ -0,0 +1,249 @@
+"""Firefox 3 "cookies.sqlite" cookie persistence.
+
+Copyright 2008 John J Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import logging
+import time
+import sqlite3
+
+from _clientcookie import CookieJar, Cookie, MappingIterator
+from _util import isstringlike, experimental
+debug = logging.getLogger("mechanize.cookies").debug
+
+
+class Firefox3CookieJar(CookieJar):
+
+ """Firefox 3 cookie jar.
+
+ The cookies are stored in Firefox 3's "cookies.sqlite" format.
+
+ Constructor arguments:
+
+ filename: filename of cookies.sqlite (typically found at the top level
+ of a firefox profile directory)
+ autoconnect: as a convenience, connect to the SQLite cookies database at
+ Firefox3CookieJar construction time (default True)
+ policy: an object satisfying the mechanize.CookiePolicy interface
+
+ Note that this is NOT a FileCookieJar, and there are no .load(),
+ .save() or .restore() methods. The database is in sync with the
+ cookiejar object's state after each public method call.
+
+ Following Firefox's own behaviour, session cookies are never saved to
+ the database.
+
+ The file is created, and an sqlite database written to it, if it does
+ not already exist. The moz_cookies database table is created if it does
+ not already exist.
+ """
+
+ # XXX
+ # handle DatabaseError exceptions
+ # add a FileCookieJar (explicit .save() / .revert() / .load() methods)
+
+ def __init__(self, filename, autoconnect=True, policy=None):
+ experimental("Firefox3CookieJar is experimental code")
+ CookieJar.__init__(self, policy)
+ if filename is not None and not isstringlike(filename):
+ raise ValueError("filename must be string-like")
+ self.filename = filename
+ self._conn = None
+ if autoconnect:
+ self.connect()
+
+ def connect(self):
+ self._conn = sqlite3.connect(self.filename)
+ self._conn.isolation_level = "DEFERRED"
+ self._create_table_if_necessary()
+
+ def close(self):
+ self._conn.close()
+
+ def _transaction(self, func):
+ try:
+ cur = self._conn.cursor()
+ try:
+ result = func(cur)
+ finally:
+ cur.close()
+ except:
+ self._conn.rollback()
+ raise
+ else:
+ self._conn.commit()
+ return result
+
+ def _execute(self, query, params=()):
+ return self._transaction(lambda cur: cur.execute(query, params))
+
+ def _query(self, query, params=()):
+ # XXX should we bother with a transaction?
+ cur = self._conn.cursor()
+ try:
+ cur.execute(query, params)
+ for row in cur.fetchall():
+ yield row
+ finally:
+ cur.close()
+
+ def _create_table_if_necessary(self):
+ self._execute("""\
+CREATE TABLE IF NOT EXISTS moz_cookies (id INTEGER PRIMARY KEY, name TEXT,
+ value TEXT, host TEXT, path TEXT,expiry INTEGER,
+ lastAccessed INTEGER, isSecure INTEGER, isHttpOnly INTEGER)""")
+
+ def _cookie_from_row(self, row):
+ (pk, name, value, domain, path, expires,
+ last_accessed, secure, http_only) = row
+
+ version = 0
+ domain = domain.encode("ascii", "ignore")
+ path = path.encode("ascii", "ignore")
+ name = name.encode("ascii", "ignore")
+ value = value.encode("ascii", "ignore")
+ secure = bool(secure)
+
+ # last_accessed isn't a cookie attribute, so isn't added to rest
+ rest = {}
+ if http_only:
+ rest["HttpOnly"] = None
+
+ if name == "":
+ name = value
+ value = None
+
+ initial_dot = domain.startswith(".")
+ domain_specified = initial_dot
+
+ discard = False
+ if expires == "":
+ expires = None
+ discard = True
+
+ return Cookie(version, name, value,
+ None, False,
+ domain, domain_specified, initial_dot,
+ path, False,
+ secure,
+ expires,
+ discard,
+ None,
+ None,
+ rest)
+
+ def clear(self, domain=None, path=None, name=None):
+ CookieJar.clear(self, domain, path, name)
+ where_parts = []
+ sql_params = []
+ if domain is not None:
+ where_parts.append("host = ?")
+ sql_params.append(domain)
+ if path is not None:
+ where_parts.append("path = ?")
+ sql_params.append(path)
+ if name is not None:
+ where_parts.append("name = ?")
+ sql_params.append(name)
+ where = " AND ".join(where_parts)
+ if where:
+ where = " WHERE " + where
+ def clear(cur):
+ cur.execute("DELETE FROM moz_cookies%s" % where,
+ tuple(sql_params))
+ self._transaction(clear)
+
+ def _row_from_cookie(self, cookie, cur):
+ expires = cookie.expires
+ if cookie.discard:
+ expires = ""
+
+ domain = unicode(cookie.domain)
+ path = unicode(cookie.path)
+ name = unicode(cookie.name)
+ value = unicode(cookie.value)
+ secure = bool(int(cookie.secure))
+
+ if value is None:
+ value = name
+ name = ""
+
+ last_accessed = int(time.time())
+ http_only = cookie.has_nonstandard_attr("HttpOnly")
+
+ query = cur.execute("""SELECT MAX(id) + 1 from moz_cookies""")
+ pk = query.fetchone()[0]
+ if pk is None:
+ pk = 1
+
+ return (pk, name, value, domain, path, expires,
+ last_accessed, secure, http_only)
+
+ def set_cookie(self, cookie):
+ if cookie.discard:
+ CookieJar.set_cookie(self, cookie)
+ return
+
+ def set_cookie(cur):
+ # XXX
+ # is this RFC 2965-correct?
+ # could this do an UPDATE instead?
+ row = self._row_from_cookie(cookie, cur)
+ name, unused, domain, path = row[1:5]
+ cur.execute("""\
+DELETE FROM moz_cookies WHERE host = ? AND path = ? AND name = ?""",
+ (domain, path, name))
+ cur.execute("""\
+INSERT INTO moz_cookies VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+""", row)
+ self._transaction(set_cookie)
+
+ def __iter__(self):
+ # session (non-persistent) cookies
+ for cookie in MappingIterator(self._cookies):
+ yield cookie
+ # persistent cookies
+ for row in self._query("""\
+SELECT * FROM moz_cookies ORDER BY name, path, host"""):
+ yield self._cookie_from_row(row)
+
+ def _cookies_for_request(self, request):
+ session_cookies = CookieJar._cookies_for_request(self, request)
+ def get_cookies(cur):
+ query = cur.execute("SELECT host from moz_cookies")
+ domains = [row[0] for row in query.fetchmany()]
+ cookies = []
+ for domain in domains:
+ cookies += self._persistent_cookies_for_domain(domain,
+ request, cur)
+ return cookies
+ persistent_coookies = self._transaction(get_cookies)
+ return session_cookies + persistent_coookies
+
+ def _persistent_cookies_for_domain(self, domain, request, cur):
+ cookies = []
+ if not self._policy.domain_return_ok(domain, request):
+ return []
+ debug("Checking %s for cookies to return", domain)
+ query = cur.execute("""\
+SELECT * from moz_cookies WHERE host = ? ORDER BY path""",
+ (domain,))
+ cookies = [self._cookie_from_row(row) for row in query.fetchmany()]
+ last_path = None
+ r = []
+ for cookie in cookies:
+ if (cookie.path != last_path and
+ not self._policy.path_return_ok(cookie.path, request)):
+ last_path = cookie.path
+ continue
+ if not self._policy.return_ok(cookie, request):
+ debug(" not returning cookie")
+ continue
+ debug(" it's a match")
+ r.append(cookie)
+ return r
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_gzip.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_gzip.py
new file mode 100644
index 0000000..26c2743
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_gzip.py
@@ -0,0 +1,103 @@
+import urllib2
+from cStringIO import StringIO
+import _response
+
+# GzipConsumer was taken from Fredrik Lundh's effbot.org-0.1-20041009 library
+class GzipConsumer:
+
+ def __init__(self, consumer):
+ self.__consumer = consumer
+ self.__decoder = None
+ self.__data = ""
+
+ def __getattr__(self, key):
+ return getattr(self.__consumer, key)
+
+ def feed(self, data):
+ if self.__decoder is None:
+ # check if we have a full gzip header
+ data = self.__data + data
+ try:
+ i = 10
+ flag = ord(data[3])
+ if flag & 4: # extra
+ x = ord(data[i]) + 256*ord(data[i+1])
+ i = i + 2 + x
+ if flag & 8: # filename
+ while ord(data[i]):
+ i = i + 1
+ i = i + 1
+ if flag & 16: # comment
+ while ord(data[i]):
+ i = i + 1
+ i = i + 1
+ if flag & 2: # crc
+ i = i + 2
+ if len(data) < i:
+ raise IndexError("not enough data")
+ if data[:3] != "\x1f\x8b\x08":
+ raise IOError("invalid gzip data")
+ data = data[i:]
+ except IndexError:
+ self.__data = data
+ return # need more data
+ import zlib
+ self.__data = ""
+ self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS)
+ data = self.__decoder.decompress(data)
+ if data:
+ self.__consumer.feed(data)
+
+ def close(self):
+ if self.__decoder:
+ data = self.__decoder.flush()
+ if data:
+ self.__consumer.feed(data)
+ self.__consumer.close()
+
+
+# --------------------------------------------------------------------
+
+# the rest of this module is John Lee's stupid code, not
+# Fredrik's nice code :-)
+
+class stupid_gzip_consumer:
+ def __init__(self): self.data = []
+ def feed(self, data): self.data.append(data)
+
+class stupid_gzip_wrapper(_response.closeable_response):
+ def __init__(self, response):
+ self._response = response
+
+ c = stupid_gzip_consumer()
+ gzc = GzipConsumer(c)
+ gzc.feed(response.read())
+ self.__data = StringIO("".join(c.data))
+
+ def read(self, size=-1):
+ return self.__data.read(size)
+ def readline(self, size=-1):
+ return self.__data.readline(size)
+ def readlines(self, sizehint=-1):
+ return self.__data.readlines(sizehint)
+
+ def __getattr__(self, name):
+ # delegate unknown methods/attributes
+ return getattr(self._response, name)
+
+class HTTPGzipProcessor(urllib2.BaseHandler):
+ handler_order = 200 # response processing before HTTPEquivProcessor
+
+ def http_request(self, request):
+ request.add_header("Accept-Encoding", "gzip")
+ return request
+
+ def http_response(self, request, response):
+ # post-process response
+ enc_hdrs = response.info().getheaders("Content-encoding")
+ for enc_hdr in enc_hdrs:
+ if ("gzip" in enc_hdr) or ("compress" in enc_hdr):
+ return stupid_gzip_wrapper(response)
+ return response
+
+ https_response = http_response
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_headersutil.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_headersutil.py
new file mode 100644
index 0000000..49ba5de
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_headersutil.py
@@ -0,0 +1,232 @@
+"""Utility functions for HTTP header value parsing and construction.
+
+Copyright 1997-1998, Gisle Aas
+Copyright 2002-2006, John J. Lee
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import os, re
+from types import StringType
+from types import UnicodeType
+STRING_TYPES = StringType, UnicodeType
+
+from _util import http2time
+import _rfc3986
+
+def is_html(ct_headers, url, allow_xhtml=False):
+ """
+ ct_headers: Sequence of Content-Type headers
+ url: Response URL
+
+ """
+ if not ct_headers:
+ # guess
+ ext = os.path.splitext(_rfc3986.urlsplit(url)[2])[1]
+ html_exts = [".htm", ".html"]
+ if allow_xhtml:
+ html_exts += [".xhtml"]
+ return ext in html_exts
+ # use first header
+ ct = split_header_words(ct_headers)[0][0][0]
+ html_types = ["text/html"]
+ if allow_xhtml:
+ html_types += [
+ "text/xhtml", "text/xml",
+ "application/xml", "application/xhtml+xml",
+ ]
+ return ct in html_types
+
+def unmatched(match):
+ """Return unmatched part of re.Match object."""
+ start, end = match.span(0)
+ return match.string[:start]+match.string[end:]
+
+token_re = re.compile(r"^\s*([^=\s;,]+)")
+quoted_value_re = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
+value_re = re.compile(r"^\s*=\s*([^\s;,]*)")
+escape_re = re.compile(r"\\(.)")
+def split_header_words(header_values):
+ r"""Parse header values into a list of lists containing key,value pairs.
+
+ The function knows how to deal with ",", ";" and "=" as well as quoted
+ values after "=". A list of space separated tokens are parsed as if they
+ were separated by ";".
+
+ If the header_values passed as argument contains multiple values, then they
+ are treated as if they were a single value separated by comma ",".
+
+ This means that this function is useful for parsing header fields that
+ follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
+ the requirement for tokens).
+
+ headers = #header
+ header = (token | parameter) *( [";"] (token | parameter))
+
+ token = 1*<any CHAR except CTLs or separators>
+ separators = "(" | ")" | "<" | ">" | "@"
+ | "," | ";" | ":" | "\" | <">
+ | "/" | "[" | "]" | "?" | "="
+ | "{" | "}" | SP | HT
+
+ quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
+ qdtext = <any TEXT except <">>
+ quoted-pair = "\" CHAR
+
+ parameter = attribute "=" value
+ attribute = token
+ value = token | quoted-string
+
+ Each header is represented by a list of key/value pairs. The value for a
+ simple token (not part of a parameter) is None. Syntactically incorrect
+ headers will not necessarily be parsed as you would want.
+
+ This is easier to describe with some examples:
+
+ >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
+ [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
+ >>> split_header_words(['text/html; charset="iso-8859-1"'])
+ [[('text/html', None), ('charset', 'iso-8859-1')]]
+ >>> split_header_words([r'Basic realm="\"foo\bar\""'])
+ [[('Basic', None), ('realm', '"foobar"')]]
+
+ """
+ assert type(header_values) not in STRING_TYPES
+ result = []
+ for text in header_values:
+ orig_text = text
+ pairs = []
+ while text:
+ m = token_re.search(text)
+ if m:
+ text = unmatched(m)
+ name = m.group(1)
+ m = quoted_value_re.search(text)
+ if m: # quoted value
+ text = unmatched(m)
+ value = m.group(1)
+ value = escape_re.sub(r"\1", value)
+ else:
+ m = value_re.search(text)
+ if m: # unquoted value
+ text = unmatched(m)
+ value = m.group(1)
+ value = value.rstrip()
+ else:
+ # no value, a lone token
+ value = None
+ pairs.append((name, value))
+ elif text.lstrip().startswith(","):
+ # concatenated headers, as per RFC 2616 section 4.2
+ text = text.lstrip()[1:]
+ if pairs: result.append(pairs)
+ pairs = []
+ else:
+ # skip junk
+ non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
+ assert nr_junk_chars > 0, (
+ "split_header_words bug: '%s', '%s', %s" %
+ (orig_text, text, pairs))
+ text = non_junk
+ if pairs: result.append(pairs)
+ return result
+
+join_escape_re = re.compile(r"([\"\\])")
+def join_header_words(lists):
+ """Do the inverse of the conversion done by split_header_words.
+
+ Takes a list of lists of (key, value) pairs and produces a single header
+ value. Attribute values are quoted if needed.
+
+ >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
+ 'text/plain; charset="iso-8859/1"'
+ >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
+ 'text/plain, charset="iso-8859/1"'
+
+ """
+ headers = []
+ for pairs in lists:
+ attr = []
+ for k, v in pairs:
+ if v is not None:
+ if not re.search(r"^\w+$", v):
+ v = join_escape_re.sub(r"\\\1", v) # escape " and \
+ v = '"%s"' % v
+ if k is None: # Netscape cookies may have no name
+ k = v
+ else:
+ k = "%s=%s" % (k, v)
+ attr.append(k)
+ if attr: headers.append("; ".join(attr))
+ return ", ".join(headers)
+
+def strip_quotes(text):
+ if text.startswith('"'):
+ text = text[1:]
+ if text.endswith('"'):
+ text = text[:-1]
+ return text
+
+def parse_ns_headers(ns_headers):
+ """Ad-hoc parser for Netscape protocol cookie-attributes.
+
+ The old Netscape cookie format for Set-Cookie can for instance contain
+ an unquoted "," in the expires field, so we have to use this ad-hoc
+ parser instead of split_header_words.
+
+ XXX This may not make the best possible effort to parse all the crap
+ that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
+ parser is probably better, so could do worse than following that if
+ this ever gives any trouble.
+
+ Currently, this is also used for parsing RFC 2109 cookies.
+
+ """
+ known_attrs = ("expires", "domain", "path", "secure",
+ # RFC 2109 attrs (may turn up in Netscape cookies, too)
+ "version", "port", "max-age")
+
+ result = []
+ for ns_header in ns_headers:
+ pairs = []
+ version_set = False
+ params = re.split(r";\s*", ns_header)
+ for ii in range(len(params)):
+ param = params[ii]
+ param = param.rstrip()
+ if param == "": continue
+ if "=" not in param:
+ k, v = param, None
+ else:
+ k, v = re.split(r"\s*=\s*", param, 1)
+ k = k.lstrip()
+ if ii != 0:
+ lc = k.lower()
+ if lc in known_attrs:
+ k = lc
+ if k == "version":
+ # This is an RFC 2109 cookie.
+ v = strip_quotes(v)
+ version_set = True
+ if k == "expires":
+ # convert expires date to seconds since epoch
+ v = http2time(strip_quotes(v)) # None if invalid
+ pairs.append((k, v))
+
+ if pairs:
+ if not version_set:
+ pairs.append(("version", "0"))
+ result.append(pairs)
+
+ return result
+
+
+def _test():
+ import doctest, _headersutil
+ return doctest.testmod(_headersutil)
+
+if __name__ == "__main__":
+ _test()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_html.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_html.py
new file mode 100644
index 0000000..5da0815
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_html.py
@@ -0,0 +1,631 @@
+"""HTML handling.
+
+Copyright 2003-2006 John J. Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it under
+the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+included with the distribution).
+
+"""
+
+import re, copy, htmlentitydefs
+import sgmllib, ClientForm
+
+import _request
+from _headersutil import split_header_words, is_html as _is_html
+import _rfc3986
+
+DEFAULT_ENCODING = "latin-1"
+
+COMPRESS_RE = re.compile(r"\s+")
+
+
+# the base classe is purely for backwards compatibility
+class ParseError(ClientForm.ParseError): pass
+
+
+class CachingGeneratorFunction(object):
+ """Caching wrapper around a no-arguments iterable."""
+
+ def __init__(self, iterable):
+ self._cache = []
+ # wrap iterable to make it non-restartable (otherwise, repeated
+ # __call__ would give incorrect results)
+ self._iterator = iter(iterable)
+
+ def __call__(self):
+ cache = self._cache
+ for item in cache:
+ yield item
+ for item in self._iterator:
+ cache.append(item)
+ yield item
+
+
+class EncodingFinder:
+ def __init__(self, default_encoding):
+ self._default_encoding = default_encoding
+ def encoding(self, response):
+ # HTTPEquivProcessor may be in use, so both HTTP and HTTP-EQUIV
+ # headers may be in the response. HTTP-EQUIV headers come last,
+ # so try in order from first to last.
+ for ct in response.info().getheaders("content-type"):
+ for k, v in split_header_words([ct])[0]:
+ if k == "charset":
+ return v
+ return self._default_encoding
+
+class ResponseTypeFinder:
+ def __init__(self, allow_xhtml):
+ self._allow_xhtml = allow_xhtml
+ def is_html(self, response, encoding):
+ ct_hdrs = response.info().getheaders("content-type")
+ url = response.geturl()
+ # XXX encoding
+ return _is_html(ct_hdrs, url, self._allow_xhtml)
+
+
+# idea for this argument-processing trick is from Peter Otten
+class Args:
+ def __init__(self, args_map):
+ self.dictionary = dict(args_map)
+ def __getattr__(self, key):
+ try:
+ return self.dictionary[key]
+ except KeyError:
+ return getattr(self.__class__, key)
+
+def form_parser_args(
+ select_default=False,
+ form_parser_class=None,
+ request_class=None,
+ backwards_compat=False,
+ ):
+ return Args(locals())
+
+
+class Link:
+ def __init__(self, base_url, url, text, tag, attrs):
+ assert None not in [url, tag, attrs]
+ self.base_url = base_url
+ self.absolute_url = _rfc3986.urljoin(base_url, url)
+ self.url, self.text, self.tag, self.attrs = url, text, tag, attrs
+ def __cmp__(self, other):
+ try:
+ for name in "url", "text", "tag", "attrs":
+ if getattr(self, name) != getattr(other, name):
+ return -1
+ except AttributeError:
+ return -1
+ return 0
+ def __repr__(self):
+ return "Link(base_url=%r, url=%r, text=%r, tag=%r, attrs=%r)" % (
+ self.base_url, self.url, self.text, self.tag, self.attrs)
+
+
+class LinksFactory:
+
+ def __init__(self,
+ link_parser_class=None,
+ link_class=Link,
+ urltags=None,
+ ):
+ import _pullparser
+ if link_parser_class is None:
+ link_parser_class = _pullparser.TolerantPullParser
+ self.link_parser_class = link_parser_class
+ self.link_class = link_class
+ if urltags is None:
+ urltags = {
+ "a": "href",
+ "area": "href",
+ "frame": "src",
+ "iframe": "src",
+ }
+ self.urltags = urltags
+ self._response = None
+ self._encoding = None
+
+ def set_response(self, response, base_url, encoding):
+ self._response = response
+ self._encoding = encoding
+ self._base_url = base_url
+
+ def links(self):
+ """Return an iterator that provides links of the document."""
+ response = self._response
+ encoding = self._encoding
+ base_url = self._base_url
+ p = self.link_parser_class(response, encoding=encoding)
+
+ try:
+ for token in p.tags(*(self.urltags.keys()+["base"])):
+ if token.type == "endtag":
+ continue
+ if token.data == "base":
+ base_href = dict(token.attrs).get("href")
+ if base_href is not None:
+ base_url = base_href
+ continue
+ attrs = dict(token.attrs)
+ tag = token.data
+ name = attrs.get("name")
+ text = None
+ # XXX use attr_encoding for ref'd doc if that doc does not
+ # provide one by other means
+ #attr_encoding = attrs.get("charset")
+ url = attrs.get(self.urltags[tag]) # XXX is "" a valid URL?
+ if not url:
+ # Probably an <A NAME="blah"> link or <AREA NOHREF...>.
+ # For our purposes a link is something with a URL, so
+ # ignore this.
+ continue
+
+ url = _rfc3986.clean_url(url, encoding)
+ if tag == "a":
+ if token.type != "startendtag":
+ # hmm, this'd break if end tag is missing
+ text = p.get_compressed_text(("endtag", tag))
+ # but this doesn't work for eg.
+ # <a href="blah"><b>Andy</b></a>
+ #text = p.get_compressed_text()
+
+ yield Link(base_url, url, text, tag, token.attrs)
+ except sgmllib.SGMLParseError, exc:
+ raise ParseError(exc)
+
+class FormsFactory:
+
+ """Makes a sequence of objects satisfying ClientForm.HTMLForm interface.
+
+ After calling .forms(), the .global_form attribute is a form object
+ containing all controls not a descendant of any FORM element.
+
+ For constructor argument docs, see ClientForm.ParseResponse
+ argument docs.
+
+ """
+
+ def __init__(self,
+ select_default=False,
+ form_parser_class=None,
+ request_class=None,
+ backwards_compat=False,
+ ):
+ import ClientForm
+ self.select_default = select_default
+ if form_parser_class is None:
+ form_parser_class = ClientForm.FormParser
+ self.form_parser_class = form_parser_class
+ if request_class is None:
+ request_class = _request.Request
+ self.request_class = request_class
+ self.backwards_compat = backwards_compat
+ self._response = None
+ self.encoding = None
+ self.global_form = None
+
+ def set_response(self, response, encoding):
+ self._response = response
+ self.encoding = encoding
+ self.global_form = None
+
+ def forms(self):
+ import ClientForm
+ encoding = self.encoding
+ try:
+ forms = ClientForm.ParseResponseEx(
+ self._response,
+ select_default=self.select_default,
+ form_parser_class=self.form_parser_class,
+ request_class=self.request_class,
+ encoding=encoding,
+ _urljoin=_rfc3986.urljoin,
+ _urlparse=_rfc3986.urlsplit,
+ _urlunparse=_rfc3986.urlunsplit,
+ )
+ except ClientForm.ParseError, exc:
+ raise ParseError(exc)
+ self.global_form = forms[0]
+ return forms[1:]
+
+class TitleFactory:
+ def __init__(self):
+ self._response = self._encoding = None
+
+ def set_response(self, response, encoding):
+ self._response = response
+ self._encoding = encoding
+
+ def _get_title_text(self, parser):
+ import _pullparser
+ text = []
+ tok = None
+ while 1:
+ try:
+ tok = parser.get_token()
+ except _pullparser.NoMoreTokensError:
+ break
+ if tok.type == "data":
+ text.append(str(tok))
+ elif tok.type == "entityref":
+ t = unescape("&%s;" % tok.data,
+ parser._entitydefs, parser.encoding)
+ text.append(t)
+ elif tok.type == "charref":
+ t = unescape_charref(tok.data, parser.encoding)
+ text.append(t)
+ elif tok.type in ["starttag", "endtag", "startendtag"]:
+ tag_name = tok.data
+ if tok.type == "endtag" and tag_name == "title":
+ break
+ text.append(str(tok))
+ return COMPRESS_RE.sub(" ", "".join(text).strip())
+
+ def title(self):
+ import _pullparser
+ p = _pullparser.TolerantPullParser(
+ self._response, encoding=self._encoding)
+ try:
+ try:
+ p.get_tag("title")
+ except _pullparser.NoMoreTokensError:
+ return None
+ else:
+ return self._get_title_text(p)
+ except sgmllib.SGMLParseError, exc:
+ raise ParseError(exc)
+
+
+def unescape(data, entities, encoding):
+ if data is None or "&" not in data:
+ return data
+
+ def replace_entities(match):
+ ent = match.group()
+ if ent[1] == "#":
+ return unescape_charref(ent[2:-1], encoding)
+
+ repl = entities.get(ent[1:-1])
+ if repl is not None:
+ repl = unichr(repl)
+ if type(repl) != type(""):
+ try:
+ repl = repl.encode(encoding)
+ except UnicodeError:
+ repl = ent
+ else:
+ repl = ent
+ return repl
+
+ return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
+
+def unescape_charref(data, encoding):
+ name, base = data, 10
+ if name.startswith("x"):
+ name, base= name[1:], 16
+ uc = unichr(int(name, base))
+ if encoding is None:
+ return uc
+ else:
+ try:
+ repl = uc.encode(encoding)
+ except UnicodeError:
+ repl = "&#%s;" % data
+ return repl
+
+
+# bizarre import gymnastics for bundled BeautifulSoup
+import _beautifulsoup
+import ClientForm
+RobustFormParser, NestingRobustFormParser = ClientForm._create_bs_classes(
+ _beautifulsoup.BeautifulSoup, _beautifulsoup.ICantBelieveItsBeautifulSoup
+ )
+# monkeypatch sgmllib to fix http://www.python.org/sf/803422 :-(
+sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
+
+class MechanizeBs(_beautifulsoup.BeautifulSoup):
+ _entitydefs = htmlentitydefs.name2codepoint
+ # don't want the magic Microsoft-char workaround
+ PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'),
+ lambda(x):x.group(1) + ' />'),
+ (re.compile('<!\s+([^<>]*)>'),
+ lambda(x):'<!' + x.group(1) + '>')
+ ]
+
+ def __init__(self, encoding, text=None, avoidParserProblems=True,
+ initialTextIsEverything=True):
+ self._encoding = encoding
+ _beautifulsoup.BeautifulSoup.__init__(
+ self, text, avoidParserProblems, initialTextIsEverything)
+
+ def handle_charref(self, ref):
+ t = unescape("&#%s;"%ref, self._entitydefs, self._encoding)
+ self.handle_data(t)
+ def handle_entityref(self, ref):
+ t = unescape("&%s;"%ref, self._entitydefs, self._encoding)
+ self.handle_data(t)
+ def unescape_attrs(self, attrs):
+ escaped_attrs = []
+ for key, val in attrs:
+ val = unescape(val, self._entitydefs, self._encoding)
+ escaped_attrs.append((key, val))
+ return escaped_attrs
+
+class RobustLinksFactory:
+
+ compress_re = COMPRESS_RE
+
+ def __init__(self,
+ link_parser_class=None,
+ link_class=Link,
+ urltags=None,
+ ):
+ if link_parser_class is None:
+ link_parser_class = MechanizeBs
+ self.link_parser_class = link_parser_class
+ self.link_class = link_class
+ if urltags is None:
+ urltags = {
+ "a": "href",
+ "area": "href",
+ "frame": "src",
+ "iframe": "src",
+ }
+ self.urltags = urltags
+ self._bs = None
+ self._encoding = None
+ self._base_url = None
+
+ def set_soup(self, soup, base_url, encoding):
+ self._bs = soup
+ self._base_url = base_url
+ self._encoding = encoding
+
+ def links(self):
+ import _beautifulsoup
+ bs = self._bs
+ base_url = self._base_url
+ encoding = self._encoding
+ gen = bs.recursiveChildGenerator()
+ for ch in bs.recursiveChildGenerator():
+ if (isinstance(ch, _beautifulsoup.Tag) and
+ ch.name in self.urltags.keys()+["base"]):
+ link = ch
+ attrs = bs.unescape_attrs(link.attrs)
+ attrs_dict = dict(attrs)
+ if link.name == "base":
+ base_href = attrs_dict.get("href")
+ if base_href is not None:
+ base_url = base_href
+ continue
+ url_attr = self.urltags[link.name]
+ url = attrs_dict.get(url_attr)
+ if not url:
+ continue
+ url = _rfc3986.clean_url(url, encoding)
+ text = link.fetchText(lambda t: True)
+ if not text:
+ # follow _pullparser's weird behaviour rigidly
+ if link.name == "a":
+ text = ""
+ else:
+ text = None
+ else:
+ text = self.compress_re.sub(" ", " ".join(text).strip())
+ yield Link(base_url, url, text, link.name, attrs)
+
+
+class RobustFormsFactory(FormsFactory):
+ def __init__(self, *args, **kwds):
+ args = form_parser_args(*args, **kwds)
+ if args.form_parser_class is None:
+ args.form_parser_class = RobustFormParser
+ FormsFactory.__init__(self, **args.dictionary)
+
+ def set_response(self, response, encoding):
+ self._response = response
+ self.encoding = encoding
+
+
+class RobustTitleFactory:
+ def __init__(self):
+ self._bs = self._encoding = None
+
+ def set_soup(self, soup, encoding):
+ self._bs = soup
+ self._encoding = encoding
+
+ def title(self):
+ import _beautifulsoup
+ title = self._bs.first("title")
+ if title == _beautifulsoup.Null:
+ return None
+ else:
+ inner_html = "".join([str(node) for node in title.contents])
+ return COMPRESS_RE.sub(" ", inner_html.strip())
+
+
+class Factory:
+ """Factory for forms, links, etc.
+
+ This interface may expand in future.
+
+ Public methods:
+
+ set_request_class(request_class)
+ set_response(response)
+ forms()
+ links()
+
+ Public attributes:
+
+ Note that accessing these attributes may raise ParseError.
+
+ encoding: string specifying the encoding of response if it contains a text
+ document (this value is left unspecified for documents that do not have
+ an encoding, e.g. an image file)
+ is_html: true if response contains an HTML document (XHTML may be
+ regarded as HTML too)
+ title: page title, or None if no title or not HTML
+ global_form: form object containing all controls that are not descendants
+ of any FORM element, or None if the forms_factory does not support
+ supplying a global form
+
+ """
+
+ LAZY_ATTRS = ["encoding", "is_html", "title", "global_form"]
+
+ def __init__(self, forms_factory, links_factory, title_factory,
+ encoding_finder=EncodingFinder(DEFAULT_ENCODING),
+ response_type_finder=ResponseTypeFinder(allow_xhtml=False),
+ ):
+ """
+
+ Pass keyword arguments only.
+
+ default_encoding: character encoding to use if encoding cannot be
+ determined (or guessed) from the response. You should turn on
+ HTTP-EQUIV handling if you want the best chance of getting this right
+ without resorting to this default. The default value of this
+ parameter (currently latin-1) may change in future.
+
+ """
+ self._forms_factory = forms_factory
+ self._links_factory = links_factory
+ self._title_factory = title_factory
+ self._encoding_finder = encoding_finder
+ self._response_type_finder = response_type_finder
+
+ self.set_response(None)
+
+ def set_request_class(self, request_class):
+ """Set urllib2.Request class.
+
+ ClientForm.HTMLForm instances returned by .forms() will return
+ instances of this class when .click()ed.
+
+ """
+ self._forms_factory.request_class = request_class
+
+ def set_response(self, response):
+ """Set response.
+
+ The response must either be None or implement the same interface as
+ objects returned by urllib2.urlopen().
+
+ """
+ self._response = response
+ self._forms_genf = self._links_genf = None
+ self._get_title = None
+ for name in self.LAZY_ATTRS:
+ try:
+ delattr(self, name)
+ except AttributeError:
+ pass
+
+ def __getattr__(self, name):
+ if name not in self.LAZY_ATTRS:
+ return getattr(self.__class__, name)
+
+ if name == "encoding":
+ self.encoding = self._encoding_finder.encoding(
+ copy.copy(self._response))
+ return self.encoding
+ elif name == "is_html":
+ self.is_html = self._response_type_finder.is_html(
+ copy.copy(self._response), self.encoding)
+ return self.is_html
+ elif name == "title":
+ if self.is_html:
+ self.title = self._title_factory.title()
+ else:
+ self.title = None
+ return self.title
+ elif name == "global_form":
+ self.forms()
+ return self.global_form
+
+ def forms(self):
+ """Return iterable over ClientForm.HTMLForm-like objects.
+
+ Raises mechanize.ParseError on failure.
+ """
+ # this implementation sets .global_form as a side-effect, for benefit
+ # of __getattr__ impl
+ if self._forms_genf is None:
+ try:
+ self._forms_genf = CachingGeneratorFunction(
+ self._forms_factory.forms())
+ except: # XXXX define exception!
+ self.set_response(self._response)
+ raise
+ self.global_form = getattr(
+ self._forms_factory, "global_form", None)
+ return self._forms_genf()
+
+ def links(self):
+ """Return iterable over mechanize.Link-like objects.
+
+ Raises mechanize.ParseError on failure.
+ """
+ if self._links_genf is None:
+ try:
+ self._links_genf = CachingGeneratorFunction(
+ self._links_factory.links())
+ except: # XXXX define exception!
+ self.set_response(self._response)
+ raise
+ return self._links_genf()
+
+class DefaultFactory(Factory):
+ """Based on sgmllib."""
+ def __init__(self, i_want_broken_xhtml_support=False):
+ Factory.__init__(
+ self,
+ forms_factory=FormsFactory(),
+ links_factory=LinksFactory(),
+ title_factory=TitleFactory(),
+ response_type_finder=ResponseTypeFinder(
+ allow_xhtml=i_want_broken_xhtml_support),
+ )
+
+ def set_response(self, response):
+ Factory.set_response(self, response)
+ if response is not None:
+ self._forms_factory.set_response(
+ copy.copy(response), self.encoding)
+ self._links_factory.set_response(
+ copy.copy(response), response.geturl(), self.encoding)
+ self._title_factory.set_response(
+ copy.copy(response), self.encoding)
+
+class RobustFactory(Factory):
+ """Based on BeautifulSoup, hopefully a bit more robust to bad HTML than is
+ DefaultFactory.
+
+ """
+ def __init__(self, i_want_broken_xhtml_support=False,
+ soup_class=None):
+ Factory.__init__(
+ self,
+ forms_factory=RobustFormsFactory(),
+ links_factory=RobustLinksFactory(),
+ title_factory=RobustTitleFactory(),
+ response_type_finder=ResponseTypeFinder(
+ allow_xhtml=i_want_broken_xhtml_support),
+ )
+ if soup_class is None:
+ soup_class = MechanizeBs
+ self._soup_class = soup_class
+
+ def set_response(self, response):
+ Factory.set_response(self, response)
+ if response is not None:
+ data = response.read()
+ soup = self._soup_class(self.encoding, data)
+ self._forms_factory.set_response(
+ copy.copy(response), self.encoding)
+ self._links_factory.set_soup(
+ soup, response.geturl(), self.encoding)
+ self._title_factory.set_soup(soup, self.encoding)
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_http.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_http.py
new file mode 100644
index 0000000..1b80e2b
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_http.py
@@ -0,0 +1,758 @@
+"""HTTP related handlers.
+
+Note that some other HTTP handlers live in more specific modules: _auth.py,
+_gzip.py, etc.
+
+
+Copyright 2002-2006 John J Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import time, htmlentitydefs, logging, socket, \
+ urllib2, urllib, httplib, sgmllib
+from urllib2 import URLError, HTTPError, BaseHandler
+from cStringIO import StringIO
+
+from _clientcookie import CookieJar
+from _headersutil import is_html
+from _html import unescape, unescape_charref
+from _request import Request
+from _response import closeable_response, response_seek_wrapper
+import _rfc3986
+import _sockettimeout
+
+debug = logging.getLogger("mechanize").debug
+debug_robots = logging.getLogger("mechanize.robots").debug
+
+# monkeypatch urllib2.HTTPError to show URL
+## def urllib2_str(self):
+## return 'HTTP Error %s: %s (%s)' % (
+## self.code, self.msg, self.geturl())
+## urllib2.HTTPError.__str__ = urllib2_str
+
+
+CHUNK = 1024 # size of chunks fed to HTML HEAD parser, in bytes
+DEFAULT_ENCODING = 'latin-1'
+
+
+try:
+ socket._fileobject("fake socket", close=True)
+except TypeError:
+ # python <= 2.4
+ create_readline_wrapper = socket._fileobject
+else:
+ def create_readline_wrapper(fh):
+ return socket._fileobject(fh, close=True)
+
+
+# This adds "refresh" to the list of redirectables and provides a redirection
+# algorithm that doesn't go into a loop in the presence of cookies
+# (Python 2.4 has this new algorithm, 2.3 doesn't).
+class HTTPRedirectHandler(BaseHandler):
+ # maximum number of redirections to any single URL
+ # this is needed because of the state that cookies introduce
+ max_repeats = 4
+ # maximum total number of redirections (regardless of URL) before
+ # assuming we're in a loop
+ max_redirections = 10
+
+ # Implementation notes:
+
+ # To avoid the server sending us into an infinite loop, the request
+ # object needs to track what URLs we have already seen. Do this by
+ # adding a handler-specific attribute to the Request object. The value
+ # of the dict is used to count the number of times the same URL has
+ # been visited. This is needed because visiting the same URL twice
+ # does not necessarily imply a loop, thanks to state introduced by
+ # cookies.
+
+ # Always unhandled redirection codes:
+ # 300 Multiple Choices: should not handle this here.
+ # 304 Not Modified: no need to handle here: only of interest to caches
+ # that do conditional GETs
+ # 305 Use Proxy: probably not worth dealing with here
+ # 306 Unused: what was this for in the previous versions of protocol??
+
+ def redirect_request(self, newurl, req, fp, code, msg, headers):
+ """Return a Request or None in response to a redirect.
+
+ This is called by the http_error_30x methods when a redirection
+ response is received. If a redirection should take place, return a
+ new Request to allow http_error_30x to perform the redirect;
+ otherwise, return None to indicate that an HTTPError should be
+ raised.
+
+ """
+ if code in (301, 302, 303, "refresh") or \
+ (code == 307 and not req.has_data()):
+ # Strictly (according to RFC 2616), 301 or 302 in response to
+ # a POST MUST NOT cause a redirection without confirmation
+ # from the user (of urllib2, in this case). In practice,
+ # essentially all clients do redirect in this case, so we do
+ # the same.
+ # XXX really refresh redirections should be visiting; tricky to
+ # fix, so this will wait until post-stable release
+ new = Request(newurl,
+ headers=req.headers,
+ origin_req_host=req.get_origin_req_host(),
+ unverifiable=True,
+ visit=False,
+ )
+ new._origin_req = getattr(req, "_origin_req", req)
+ return new
+ else:
+ raise HTTPError(req.get_full_url(), code, msg, headers, fp)
+
+ def http_error_302(self, req, fp, code, msg, headers):
+ # Some servers (incorrectly) return multiple Location headers
+ # (so probably same goes for URI). Use first header.
+ if headers.has_key('location'):
+ newurl = headers.getheaders('location')[0]
+ elif headers.has_key('uri'):
+ newurl = headers.getheaders('uri')[0]
+ else:
+ return
+ newurl = _rfc3986.clean_url(newurl, "latin-1")
+ newurl = _rfc3986.urljoin(req.get_full_url(), newurl)
+
+ # XXX Probably want to forget about the state of the current
+ # request, although that might interact poorly with other
+ # handlers that also use handler-specific request attributes
+ new = self.redirect_request(newurl, req, fp, code, msg, headers)
+ if new is None:
+ return
+
+ # loop detection
+ # .redirect_dict has a key url if url was previously visited.
+ if hasattr(req, 'redirect_dict'):
+ visited = new.redirect_dict = req.redirect_dict
+ if (visited.get(newurl, 0) >= self.max_repeats or
+ len(visited) >= self.max_redirections):
+ raise HTTPError(req.get_full_url(), code,
+ self.inf_msg + msg, headers, fp)
+ else:
+ visited = new.redirect_dict = req.redirect_dict = {}
+ visited[newurl] = visited.get(newurl, 0) + 1
+
+ # Don't close the fp until we are sure that we won't use it
+ # with HTTPError.
+ fp.read()
+ fp.close()
+
+ return self.parent.open(new)
+
+ http_error_301 = http_error_303 = http_error_307 = http_error_302
+ http_error_refresh = http_error_302
+
+ inf_msg = "The HTTP server returned a redirect error that would " \
+ "lead to an infinite loop.\n" \
+ "The last 30x error message was:\n"
+
+
+# XXX would self.reset() work, instead of raising this exception?
+class EndOfHeadError(Exception): pass
+class AbstractHeadParser:
+ # only these elements are allowed in or before HEAD of document
+ head_elems = ("html", "head",
+ "title", "base",
+ "script", "style", "meta", "link", "object")
+ _entitydefs = htmlentitydefs.name2codepoint
+ _encoding = DEFAULT_ENCODING
+
+ def __init__(self):
+ self.http_equiv = []
+
+ def start_meta(self, attrs):
+ http_equiv = content = None
+ for key, value in attrs:
+ if key == "http-equiv":
+ http_equiv = self.unescape_attr_if_required(value)
+ elif key == "content":
+ content = self.unescape_attr_if_required(value)
+ if http_equiv is not None and content is not None:
+ self.http_equiv.append((http_equiv, content))
+
+ def end_head(self):
+ raise EndOfHeadError()
+
+ def handle_entityref(self, name):
+ #debug("%s", name)
+ self.handle_data(unescape(
+ '&%s;' % name, self._entitydefs, self._encoding))
+
+ def handle_charref(self, name):
+ #debug("%s", name)
+ self.handle_data(unescape_charref(name, self._encoding))
+
+ def unescape_attr(self, name):
+ #debug("%s", name)
+ return unescape(name, self._entitydefs, self._encoding)
+
+ def unescape_attrs(self, attrs):
+ #debug("%s", attrs)
+ escaped_attrs = {}
+ for key, val in attrs.items():
+ escaped_attrs[key] = self.unescape_attr(val)
+ return escaped_attrs
+
+ def unknown_entityref(self, ref):
+ self.handle_data("&%s;" % ref)
+
+ def unknown_charref(self, ref):
+ self.handle_data("&#%s;" % ref)
+
+
+try:
+ import HTMLParser
+except ImportError:
+ pass
+else:
+ class XHTMLCompatibleHeadParser(AbstractHeadParser,
+ HTMLParser.HTMLParser):
+ def __init__(self):
+ HTMLParser.HTMLParser.__init__(self)
+ AbstractHeadParser.__init__(self)
+
+ def handle_starttag(self, tag, attrs):
+ if tag not in self.head_elems:
+ raise EndOfHeadError()
+ try:
+ method = getattr(self, 'start_' + tag)
+ except AttributeError:
+ try:
+ method = getattr(self, 'do_' + tag)
+ except AttributeError:
+ pass # unknown tag
+ else:
+ method(attrs)
+ else:
+ method(attrs)
+
+ def handle_endtag(self, tag):
+ if tag not in self.head_elems:
+ raise EndOfHeadError()
+ try:
+ method = getattr(self, 'end_' + tag)
+ except AttributeError:
+ pass # unknown tag
+ else:
+ method()
+
+ def unescape(self, name):
+ # Use the entitydefs passed into constructor, not
+ # HTMLParser.HTMLParser's entitydefs.
+ return self.unescape_attr(name)
+
+ def unescape_attr_if_required(self, name):
+ return name # HTMLParser.HTMLParser already did it
+
+class HeadParser(AbstractHeadParser, sgmllib.SGMLParser):
+
+ def _not_called(self):
+ assert False
+
+ def __init__(self):
+ sgmllib.SGMLParser.__init__(self)
+ AbstractHeadParser.__init__(self)
+
+ def handle_starttag(self, tag, method, attrs):
+ if tag not in self.head_elems:
+ raise EndOfHeadError()
+ if tag == "meta":
+ method(attrs)
+
+ def unknown_starttag(self, tag, attrs):
+ self.handle_starttag(tag, self._not_called, attrs)
+
+ def handle_endtag(self, tag, method):
+ if tag in self.head_elems:
+ method()
+ else:
+ raise EndOfHeadError()
+
+ def unescape_attr_if_required(self, name):
+ return self.unescape_attr(name)
+
+def parse_head(fileobj, parser):
+ """Return a list of key, value pairs."""
+ while 1:
+ data = fileobj.read(CHUNK)
+ try:
+ parser.feed(data)
+ except EndOfHeadError:
+ break
+ if len(data) != CHUNK:
+ # this should only happen if there is no HTML body, or if
+ # CHUNK is big
+ break
+ return parser.http_equiv
+
+class HTTPEquivProcessor(BaseHandler):
+ """Append META HTTP-EQUIV headers to regular HTTP headers."""
+
+ handler_order = 300 # before handlers that look at HTTP headers
+
+ def __init__(self, head_parser_class=HeadParser,
+ i_want_broken_xhtml_support=False,
+ ):
+ self.head_parser_class = head_parser_class
+ self._allow_xhtml = i_want_broken_xhtml_support
+
+ def http_response(self, request, response):
+ if not hasattr(response, "seek"):
+ response = response_seek_wrapper(response)
+ http_message = response.info()
+ url = response.geturl()
+ ct_hdrs = http_message.getheaders("content-type")
+ if is_html(ct_hdrs, url, self._allow_xhtml):
+ try:
+ try:
+ html_headers = parse_head(response,
+ self.head_parser_class())
+ finally:
+ response.seek(0)
+ except (HTMLParser.HTMLParseError,
+ sgmllib.SGMLParseError):
+ pass
+ else:
+ for hdr, val in html_headers:
+ # add a header
+ http_message.dict[hdr.lower()] = val
+ text = hdr + ": " + val
+ for line in text.split("\n"):
+ http_message.headers.append(line + "\n")
+ return response
+
+ https_response = http_response
+
+class HTTPCookieProcessor(BaseHandler):
+ """Handle HTTP cookies.
+
+ Public attributes:
+
+ cookiejar: CookieJar instance
+
+ """
+ def __init__(self, cookiejar=None):
+ if cookiejar is None:
+ cookiejar = CookieJar()
+ self.cookiejar = cookiejar
+
+ def http_request(self, request):
+ self.cookiejar.add_cookie_header(request)
+ return request
+
+ def http_response(self, request, response):
+ self.cookiejar.extract_cookies(response, request)
+ return response
+
+ https_request = http_request
+ https_response = http_response
+
+try:
+ import robotparser
+except ImportError:
+ pass
+else:
+ class MechanizeRobotFileParser(robotparser.RobotFileParser):
+
+ def __init__(self, url='', opener=None):
+ robotparser.RobotFileParser.__init__(self, url)
+ self._opener = opener
+ self._timeout = _sockettimeout._GLOBAL_DEFAULT_TIMEOUT
+
+ def set_opener(self, opener=None):
+ import _opener
+ if opener is None:
+ opener = _opener.OpenerDirector()
+ self._opener = opener
+
+ def set_timeout(self, timeout):
+ self._timeout = timeout
+
+ def read(self):
+ """Reads the robots.txt URL and feeds it to the parser."""
+ if self._opener is None:
+ self.set_opener()
+ req = Request(self.url, unverifiable=True, visit=False,
+ timeout=self._timeout)
+ try:
+ f = self._opener.open(req)
+ except HTTPError, f:
+ pass
+ except (IOError, socket.error, OSError), exc:
+ debug_robots("ignoring error opening %r: %s" %
+ (self.url, exc))
+ return
+ lines = []
+ line = f.readline()
+ while line:
+ lines.append(line.strip())
+ line = f.readline()
+ status = f.code
+ if status == 401 or status == 403:
+ self.disallow_all = True
+ debug_robots("disallow all")
+ elif status >= 400:
+ self.allow_all = True
+ debug_robots("allow all")
+ elif status == 200 and lines:
+ debug_robots("parse lines")
+ self.parse(lines)
+
+ class RobotExclusionError(urllib2.HTTPError):
+ def __init__(self, request, *args):
+ apply(urllib2.HTTPError.__init__, (self,)+args)
+ self.request = request
+
+ class HTTPRobotRulesProcessor(BaseHandler):
+ # before redirections, after everything else
+ handler_order = 800
+
+ try:
+ from httplib import HTTPMessage
+ except:
+ from mimetools import Message
+ http_response_class = Message
+ else:
+ http_response_class = HTTPMessage
+
+ def __init__(self, rfp_class=MechanizeRobotFileParser):
+ self.rfp_class = rfp_class
+ self.rfp = None
+ self._host = None
+
+ def http_request(self, request):
+ scheme = request.get_type()
+ if scheme not in ["http", "https"]:
+ # robots exclusion only applies to HTTP
+ return request
+
+ if request.get_selector() == "/robots.txt":
+ # /robots.txt is always OK to fetch
+ return request
+
+ host = request.get_host()
+
+ # robots.txt requests don't need to be allowed by robots.txt :-)
+ origin_req = getattr(request, "_origin_req", None)
+ if (origin_req is not None and
+ origin_req.get_selector() == "/robots.txt" and
+ origin_req.get_host() == host
+ ):
+ return request
+
+ if host != self._host:
+ self.rfp = self.rfp_class()
+ try:
+ self.rfp.set_opener(self.parent)
+ except AttributeError:
+ debug("%r instance does not support set_opener" %
+ self.rfp.__class__)
+ self.rfp.set_url(scheme+"://"+host+"/robots.txt")
+ self.rfp.set_timeout(request.timeout)
+ self.rfp.read()
+ self._host = host
+
+ ua = request.get_header("User-agent", "")
+ if self.rfp.can_fetch(ua, request.get_full_url()):
+ return request
+ else:
+ # XXX This should really have raised URLError. Too late now...
+ msg = "request disallowed by robots.txt"
+ raise RobotExclusionError(
+ request,
+ request.get_full_url(),
+ 403, msg,
+ self.http_response_class(StringIO()), StringIO(msg))
+
+ https_request = http_request
+
+class HTTPRefererProcessor(BaseHandler):
+ """Add Referer header to requests.
+
+ This only makes sense if you use each RefererProcessor for a single
+ chain of requests only (so, for example, if you use a single
+ HTTPRefererProcessor to fetch a series of URLs extracted from a single
+ page, this will break).
+
+ There's a proper implementation of this in mechanize.Browser.
+
+ """
+ def __init__(self):
+ self.referer = None
+
+ def http_request(self, request):
+ if ((self.referer is not None) and
+ not request.has_header("Referer")):
+ request.add_unredirected_header("Referer", self.referer)
+ return request
+
+ def http_response(self, request, response):
+ self.referer = response.geturl()
+ return response
+
+ https_request = http_request
+ https_response = http_response
+
+
+def clean_refresh_url(url):
+ # e.g. Firefox 1.5 does (something like) this
+ if ((url.startswith('"') and url.endswith('"')) or
+ (url.startswith("'") and url.endswith("'"))):
+ url = url[1:-1]
+ return _rfc3986.clean_url(url, "latin-1") # XXX encoding
+
+def parse_refresh_header(refresh):
+ """
+ >>> parse_refresh_header("1; url=http://example.com/")
+ (1.0, 'http://example.com/')
+ >>> parse_refresh_header("1; url='http://example.com/'")
+ (1.0, 'http://example.com/')
+ >>> parse_refresh_header("1")
+ (1.0, None)
+ >>> parse_refresh_header("blah")
+ Traceback (most recent call last):
+ ValueError: invalid literal for float(): blah
+
+ """
+
+ ii = refresh.find(";")
+ if ii != -1:
+ pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
+ jj = newurl_spec.find("=")
+ key = None
+ if jj != -1:
+ key, newurl = newurl_spec[:jj], newurl_spec[jj+1:]
+ newurl = clean_refresh_url(newurl)
+ if key is None or key.strip().lower() != "url":
+ raise ValueError()
+ else:
+ pause, newurl = float(refresh), None
+ return pause, newurl
+
+class HTTPRefreshProcessor(BaseHandler):
+ """Perform HTTP Refresh redirections.
+
+ Note that if a non-200 HTTP code has occurred (for example, a 30x
+ redirect), this processor will do nothing.
+
+ By default, only zero-time Refresh headers are redirected. Use the
+ max_time attribute / constructor argument to allow Refresh with longer
+ pauses. Use the honor_time attribute / constructor argument to control
+ whether the requested pause is honoured (with a time.sleep()) or
+ skipped in favour of immediate redirection.
+
+ Public attributes:
+
+ max_time: see above
+ honor_time: see above
+
+ """
+ handler_order = 1000
+
+ def __init__(self, max_time=0, honor_time=True):
+ self.max_time = max_time
+ self.honor_time = honor_time
+ self._sleep = time.sleep
+
+ def http_response(self, request, response):
+ code, msg, hdrs = response.code, response.msg, response.info()
+
+ if code == 200 and hdrs.has_key("refresh"):
+ refresh = hdrs.getheaders("refresh")[0]
+ try:
+ pause, newurl = parse_refresh_header(refresh)
+ except ValueError:
+ debug("bad Refresh header: %r" % refresh)
+ return response
+
+ if newurl is None:
+ newurl = response.geturl()
+ if (self.max_time is None) or (pause <= self.max_time):
+ if pause > 1E-3 and self.honor_time:
+ self._sleep(pause)
+ hdrs["location"] = newurl
+ # hardcoded http is NOT a bug
+ response = self.parent.error(
+ "http", request, response,
+ "refresh", msg, hdrs)
+ else:
+ debug("Refresh header ignored: %r" % refresh)
+
+ return response
+
+ https_response = http_response
+
+class HTTPErrorProcessor(BaseHandler):
+ """Process HTTP error responses.
+
+ The purpose of this handler is to to allow other response processors a
+ look-in by removing the call to parent.error() from
+ AbstractHTTPHandler.
+
+ For non-200 error codes, this just passes the job on to the
+ Handler.<proto>_error_<code> methods, via the OpenerDirector.error
+ method. Eventually, urllib2.HTTPDefaultErrorHandler will raise an
+ HTTPError if no other handler handles the error.
+
+ """
+ handler_order = 1000 # after all other processors
+
+ def http_response(self, request, response):
+ code, msg, hdrs = response.code, response.msg, response.info()
+
+ if code != 200:
+ # hardcoded http is NOT a bug
+ response = self.parent.error(
+ "http", request, response, code, msg, hdrs)
+
+ return response
+
+ https_response = http_response
+
+
+class HTTPDefaultErrorHandler(BaseHandler):
+ def http_error_default(self, req, fp, code, msg, hdrs):
+ # why these error methods took the code, msg, headers args in the first
+ # place rather than a response object, I don't know, but to avoid
+ # multiple wrapping, we're discarding them
+
+ if isinstance(fp, urllib2.HTTPError):
+ response = fp
+ else:
+ response = urllib2.HTTPError(
+ req.get_full_url(), code, msg, hdrs, fp)
+ assert code == response.code
+ assert msg == response.msg
+ assert hdrs == response.hdrs
+ raise response
+
+
+class AbstractHTTPHandler(BaseHandler):
+
+ def __init__(self, debuglevel=0):
+ self._debuglevel = debuglevel
+
+ def set_http_debuglevel(self, level):
+ self._debuglevel = level
+
+ def do_request_(self, request):
+ host = request.get_host()
+ if not host:
+ raise URLError('no host given')
+
+ if request.has_data(): # POST
+ data = request.get_data()
+ if not request.has_header('Content-type'):
+ request.add_unredirected_header(
+ 'Content-type',
+ 'application/x-www-form-urlencoded')
+ if not request.has_header('Content-length'):
+ request.add_unredirected_header(
+ 'Content-length', '%d' % len(data))
+
+ scheme, sel = urllib.splittype(request.get_selector())
+ sel_host, sel_path = urllib.splithost(sel)
+ if not request.has_header('Host'):
+ request.add_unredirected_header('Host', sel_host or host)
+ for name, value in self.parent.addheaders:
+ name = name.capitalize()
+ if not request.has_header(name):
+ request.add_unredirected_header(name, value)
+
+ return request
+
+ def do_open(self, http_class, req):
+ """Return an addinfourl object for the request, using http_class.
+
+ http_class must implement the HTTPConnection API from httplib.
+ The addinfourl return value is a file-like object. It also
+ has methods and attributes including:
+ - info(): return a mimetools.Message object for the headers
+ - geturl(): return the original request URL
+ - code: HTTP status code
+ """
+ host_port = req.get_host()
+ if not host_port:
+ raise URLError('no host given')
+
+ try:
+ h = http_class(host_port, timeout=req.timeout)
+ except TypeError:
+ # Python < 2.6, no per-connection timeout support
+ h = http_class(host_port)
+ h.set_debuglevel(self._debuglevel)
+
+ headers = dict(req.headers)
+ headers.update(req.unredirected_hdrs)
+ # We want to make an HTTP/1.1 request, but the addinfourl
+ # class isn't prepared to deal with a persistent connection.
+ # It will try to read all remaining data from the socket,
+ # which will block while the server waits for the next request.
+ # So make sure the connection gets closed after the (only)
+ # request.
+ headers["Connection"] = "close"
+ headers = dict(
+ [(name.title(), val) for name, val in headers.items()])
+ try:
+ h.request(req.get_method(), req.get_selector(), req.data, headers)
+ r = h.getresponse()
+ except socket.error, err: # XXX what error?
+ raise URLError(err)
+
+ # Pick apart the HTTPResponse object to get the addinfourl
+ # object initialized properly.
+
+ # Wrap the HTTPResponse object in socket's file object adapter
+ # for Windows. That adapter calls recv(), so delegate recv()
+ # to read(). This weird wrapping allows the returned object to
+ # have readline() and readlines() methods.
+
+ # XXX It might be better to extract the read buffering code
+ # out of socket._fileobject() and into a base class.
+
+ r.recv = r.read
+ fp = create_readline_wrapper(r)
+
+ resp = closeable_response(fp, r.msg, req.get_full_url(),
+ r.status, r.reason)
+ return resp
+
+
+class HTTPHandler(AbstractHTTPHandler):
+ def http_open(self, req):
+ return self.do_open(httplib.HTTPConnection, req)
+
+ http_request = AbstractHTTPHandler.do_request_
+
+if hasattr(httplib, 'HTTPS'):
+
+ class HTTPSConnectionFactory:
+ def __init__(self, key_file, cert_file):
+ self._key_file = key_file
+ self._cert_file = cert_file
+ def __call__(self, hostport):
+ return httplib.HTTPSConnection(
+ hostport,
+ key_file=self._key_file, cert_file=self._cert_file)
+
+ class HTTPSHandler(AbstractHTTPHandler):
+ def __init__(self, client_cert_manager=None):
+ AbstractHTTPHandler.__init__(self)
+ self.client_cert_manager = client_cert_manager
+
+ def https_open(self, req):
+ if self.client_cert_manager is not None:
+ key_file, cert_file = self.client_cert_manager.find_key_cert(
+ req.get_full_url())
+ conn_factory = HTTPSConnectionFactory(key_file, cert_file)
+ else:
+ conn_factory = httplib.HTTPSConnection
+ return self.do_open(conn_factory, req)
+
+ https_request = AbstractHTTPHandler.do_request_
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_lwpcookiejar.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_lwpcookiejar.py
new file mode 100644
index 0000000..f8d49cf
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_lwpcookiejar.py
@@ -0,0 +1,185 @@
+"""Load / save to libwww-perl (LWP) format files.
+
+Actually, the format is slightly extended from that used by LWP's
+(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information
+not recorded by LWP.
+
+It uses the version string "2.0", though really there isn't an LWP Cookies
+2.0 format. This indicates that there is extra information in here
+(domain_dot and port_spec) while still being compatible with libwww-perl,
+I hope.
+
+Copyright 2002-2006 John J Lee <jjl@pobox.com>
+Copyright 1997-1999 Gisle Aas (original libwww-perl code)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import time, re, logging
+
+from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
+ MISSING_FILENAME_TEXT, LoadError
+from _headersutil import join_header_words, split_header_words
+from _util import iso2time, time2isoz
+
+debug = logging.getLogger("mechanize").debug
+
+
+def lwp_cookie_str(cookie):
+ """Return string representation of Cookie in an the LWP cookie file format.
+
+ Actually, the format is extended a bit -- see module docstring.
+
+ """
+ h = [(cookie.name, cookie.value),
+ ("path", cookie.path),
+ ("domain", cookie.domain)]
+ if cookie.port is not None: h.append(("port", cookie.port))
+ if cookie.path_specified: h.append(("path_spec", None))
+ if cookie.port_specified: h.append(("port_spec", None))
+ if cookie.domain_initial_dot: h.append(("domain_dot", None))
+ if cookie.secure: h.append(("secure", None))
+ if cookie.expires: h.append(("expires",
+ time2isoz(float(cookie.expires))))
+ if cookie.discard: h.append(("discard", None))
+ if cookie.comment: h.append(("comment", cookie.comment))
+ if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
+ if cookie.rfc2109: h.append(("rfc2109", None))
+
+ keys = cookie.nonstandard_attr_keys()
+ keys.sort()
+ for k in keys:
+ h.append((k, str(cookie.get_nonstandard_attr(k))))
+
+ h.append(("version", str(cookie.version)))
+
+ return join_header_words([h])
+
+class LWPCookieJar(FileCookieJar):
+ """
+ The LWPCookieJar saves a sequence of"Set-Cookie3" lines.
+ "Set-Cookie3" is the format used by the libwww-perl libary, not known
+ to be compatible with any browser, but which is easy to read and
+ doesn't lose information about RFC 2965 cookies.
+
+ Additional methods
+
+ as_lwp_str(ignore_discard=True, ignore_expired=True)
+
+ """
+
+ magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
+
+ def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
+ """Return cookies as a string of "\n"-separated "Set-Cookie3" headers.
+
+ ignore_discard and ignore_expires: see docstring for FileCookieJar.save
+
+ """
+ now = time.time()
+ r = []
+ for cookie in self:
+ if not ignore_discard and cookie.discard:
+ debug(" Not saving %s: marked for discard", cookie.name)
+ continue
+ if not ignore_expires and cookie.is_expired(now):
+ debug(" Not saving %s: expired", cookie.name)
+ continue
+ r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
+ return "\n".join(r+[""])
+
+ def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ if filename is None:
+ if self.filename is not None: filename = self.filename
+ else: raise ValueError(MISSING_FILENAME_TEXT)
+
+ f = open(filename, "w")
+ try:
+ debug("Saving LWP cookies file")
+ # There really isn't an LWP Cookies 2.0 format, but this indicates
+ # that there is extra information in here (domain_dot and
+ # port_spec) while still being compatible with libwww-perl, I hope.
+ f.write("#LWP-Cookies-2.0\n")
+ f.write(self.as_lwp_str(ignore_discard, ignore_expires))
+ finally:
+ f.close()
+
+ def _really_load(self, f, filename, ignore_discard, ignore_expires):
+ magic = f.readline()
+ if not re.search(self.magic_re, magic):
+ msg = "%s does not seem to contain cookies" % filename
+ raise LoadError(msg)
+
+ now = time.time()
+
+ header = "Set-Cookie3:"
+ boolean_attrs = ("port_spec", "path_spec", "domain_dot",
+ "secure", "discard", "rfc2109")
+ value_attrs = ("version",
+ "port", "path", "domain",
+ "expires",
+ "comment", "commenturl")
+
+ try:
+ while 1:
+ line = f.readline()
+ if line == "": break
+ if not line.startswith(header):
+ continue
+ line = line[len(header):].strip()
+
+ for data in split_header_words([line]):
+ name, value = data[0]
+ standard = {}
+ rest = {}
+ for k in boolean_attrs:
+ standard[k] = False
+ for k, v in data[1:]:
+ if k is not None:
+ lc = k.lower()
+ else:
+ lc = None
+ # don't lose case distinction for unknown fields
+ if (lc in value_attrs) or (lc in boolean_attrs):
+ k = lc
+ if k in boolean_attrs:
+ if v is None: v = True
+ standard[k] = v
+ elif k in value_attrs:
+ standard[k] = v
+ else:
+ rest[k] = v
+
+ h = standard.get
+ expires = h("expires")
+ discard = h("discard")
+ if expires is not None:
+ expires = iso2time(expires)
+ if expires is None:
+ discard = True
+ domain = h("domain")
+ domain_specified = domain.startswith(".")
+ c = Cookie(h("version"), name, value,
+ h("port"), h("port_spec"),
+ domain, domain_specified, h("domain_dot"),
+ h("path"), h("path_spec"),
+ h("secure"),
+ expires,
+ discard,
+ h("comment"),
+ h("commenturl"),
+ rest,
+ h("rfc2109"),
+ )
+ if not ignore_discard and c.discard:
+ continue
+ if not ignore_expires and c.is_expired(now):
+ continue
+ self.set_cookie(c)
+ except:
+ reraise_unmasked_exceptions((IOError,))
+ raise LoadError("invalid Set-Cookie3 format file %s" % filename)
+
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mechanize.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mechanize.py
new file mode 100644
index 0000000..ad729c9
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mechanize.py
@@ -0,0 +1,676 @@
+"""Stateful programmatic WWW navigation, after Perl's WWW::Mechanize.
+
+Copyright 2003-2006 John J. Lee <jjl@pobox.com>
+Copyright 2003 Andy Lester (original Perl code)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+included with the distribution).
+
+"""
+
+import urllib2, copy, re, os, urllib
+
+
+from _html import DefaultFactory
+import _response
+import _request
+import _rfc3986
+import _sockettimeout
+from _useragent import UserAgentBase
+
+__version__ = (0, 1, 11, None, None) # 0.1.11
+
+class BrowserStateError(Exception): pass
+class LinkNotFoundError(Exception): pass
+class FormNotFoundError(Exception): pass
+
+
+def sanepathname2url(path):
+ urlpath = urllib.pathname2url(path)
+ if os.name == "nt" and urlpath.startswith("///"):
+ urlpath = urlpath[2:]
+ # XXX don't ask me about the mac...
+ return urlpath
+
+
+class History:
+ """
+
+ Though this will become public, the implied interface is not yet stable.
+
+ """
+ def __init__(self):
+ self._history = [] # LIFO
+ def add(self, request, response):
+ self._history.append((request, response))
+ def back(self, n, _response):
+ response = _response # XXX move Browser._response into this class?
+ while n > 0 or response is None:
+ try:
+ request, response = self._history.pop()
+ except IndexError:
+ raise BrowserStateError("already at start of history")
+ n -= 1
+ return request, response
+ def clear(self):
+ del self._history[:]
+ def close(self):
+ for request, response in self._history:
+ if response is not None:
+ response.close()
+ del self._history[:]
+
+
+class HTTPRefererProcessor(urllib2.BaseHandler):
+ def http_request(self, request):
+ # See RFC 2616 14.36. The only times we know the source of the
+ # request URI has a URI associated with it are redirect, and
+ # Browser.click() / Browser.submit() / Browser.follow_link().
+ # Otherwise, it's the user's job to add any Referer header before
+ # .open()ing.
+ if hasattr(request, "redirect_dict"):
+ request = self.parent._add_referer_header(
+ request, origin_request=False)
+ return request
+
+ https_request = http_request
+
+
+class Browser(UserAgentBase):
+ """Browser-like class with support for history, forms and links.
+
+ BrowserStateError is raised whenever the browser is in the wrong state to
+ complete the requested operation - eg., when .back() is called when the
+ browser history is empty, or when .follow_link() is called when the current
+ response does not contain HTML data.
+
+ Public attributes:
+
+ request: current request (mechanize.Request or urllib2.Request)
+ form: currently selected form (see .select_form())
+
+ """
+
+ handler_classes = copy.copy(UserAgentBase.handler_classes)
+ handler_classes["_referer"] = HTTPRefererProcessor
+ default_features = copy.copy(UserAgentBase.default_features)
+ default_features.append("_referer")
+
+ def __init__(self,
+ factory=None,
+ history=None,
+ request_class=None,
+ ):
+ """
+
+ Only named arguments should be passed to this constructor.
+
+ factory: object implementing the mechanize.Factory interface.
+ history: object implementing the mechanize.History interface. Note
+ this interface is still experimental and may change in future.
+ request_class: Request class to use. Defaults to mechanize.Request
+ by default for Pythons older than 2.4, urllib2.Request otherwise.
+
+ The Factory and History objects passed in are 'owned' by the Browser,
+ so they should not be shared across Browsers. In particular,
+ factory.set_response() should not be called except by the owning
+ Browser itself.
+
+ Note that the supplied factory's request_class is overridden by this
+ constructor, to ensure only one Request class is used.
+
+ """
+ self._handle_referer = True
+
+ if history is None:
+ history = History()
+ self._history = history
+
+ if request_class is None:
+ if not hasattr(urllib2.Request, "add_unredirected_header"):
+ request_class = _request.Request
+ else:
+ request_class = urllib2.Request # Python >= 2.4
+
+ if factory is None:
+ factory = DefaultFactory()
+ factory.set_request_class(request_class)
+ self._factory = factory
+ self.request_class = request_class
+
+ self.request = None
+ self._set_response(None, False)
+
+ # do this last to avoid __getattr__ problems
+ UserAgentBase.__init__(self)
+
+ def close(self):
+ UserAgentBase.close(self)
+ if self._response is not None:
+ self._response.close()
+ if self._history is not None:
+ self._history.close()
+ self._history = None
+
+ # make use after .close easy to spot
+ self.form = None
+ self.request = self._response = None
+ self.request = self.response = self.set_response = None
+ self.geturl = self.reload = self.back = None
+ self.clear_history = self.set_cookie = self.links = self.forms = None
+ self.viewing_html = self.encoding = self.title = None
+ self.select_form = self.click = self.submit = self.click_link = None
+ self.follow_link = self.find_link = None
+
+ def set_handle_referer(self, handle):
+ """Set whether to add Referer header to each request."""
+ self._set_handler("_referer", handle)
+ self._handle_referer = bool(handle)
+
+ def _add_referer_header(self, request, origin_request=True):
+ if self.request is None:
+ return request
+ scheme = request.get_type()
+ original_scheme = self.request.get_type()
+ if scheme not in ["http", "https"]:
+ return request
+ if not origin_request and not self.request.has_header("Referer"):
+ return request
+
+ if (self._handle_referer and
+ original_scheme in ["http", "https"] and
+ not (original_scheme == "https" and scheme != "https")):
+ # strip URL fragment (RFC 2616 14.36)
+ parts = _rfc3986.urlsplit(self.request.get_full_url())
+ parts = parts[:-1]+(None,)
+ referer = _rfc3986.urlunsplit(parts)
+ request.add_unredirected_header("Referer", referer)
+ return request
+
+ def open_novisit(self, url, data=None,
+ timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
+ """Open a URL without visiting it.
+
+ Browser state (including request, response, history, forms and links)
+ is left unchanged by calling this function.
+
+ The interface is the same as for .open().
+
+ This is useful for things like fetching images.
+
+ See also .retrieve().
+
+ """
+ return self._mech_open(url, data, visit=False, timeout=timeout)
+
+ def open(self, url, data=None,
+ timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
+ return self._mech_open(url, data, timeout=timeout)
+
+ def _mech_open(self, url, data=None, update_history=True, visit=None,
+ timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
+ try:
+ url.get_full_url
+ except AttributeError:
+ # string URL -- convert to absolute URL if required
+ scheme, authority = _rfc3986.urlsplit(url)[:2]
+ if scheme is None:
+ # relative URL
+ if self._response is None:
+ raise BrowserStateError(
+ "can't fetch relative reference: "
+ "not viewing any document")
+ url = _rfc3986.urljoin(self._response.geturl(), url)
+
+ request = self._request(url, data, visit, timeout)
+ visit = request.visit
+ if visit is None:
+ visit = True
+
+ if visit:
+ self._visit_request(request, update_history)
+
+ success = True
+ try:
+ response = UserAgentBase.open(self, request, data)
+ except urllib2.HTTPError, error:
+ success = False
+ if error.fp is None: # not a response
+ raise
+ response = error
+## except (IOError, socket.error, OSError), error:
+## # Yes, urllib2 really does raise all these :-((
+## # See test_urllib2.py for examples of socket.gaierror and OSError,
+## # plus note that FTPHandler raises IOError.
+## # XXX I don't seem to have an example of exactly socket.error being
+## # raised, only socket.gaierror...
+## # I don't want to start fixing these here, though, since this is a
+## # subclass of OpenerDirector, and it would break old code. Even in
+## # Python core, a fix would need some backwards-compat. hack to be
+## # acceptable.
+## raise
+
+ if visit:
+ self._set_response(response, False)
+ response = copy.copy(self._response)
+ elif response is not None:
+ response = _response.upgrade_response(response)
+
+ if not success:
+ raise response
+ return response
+
+ def __str__(self):
+ text = []
+ text.append("<%s " % self.__class__.__name__)
+ if self._response:
+ text.append("visiting %s" % self._response.geturl())
+ else:
+ text.append("(not visiting a URL)")
+ if self.form:
+ text.append("\n selected form:\n %s\n" % str(self.form))
+ text.append(">")
+ return "".join(text)
+
+ def response(self):
+ """Return a copy of the current response.
+
+ The returned object has the same interface as the object returned by
+ .open() (or urllib2.urlopen()).
+
+ """
+ return copy.copy(self._response)
+
+ def open_local_file(self, filename):
+ path = sanepathname2url(os.path.abspath(filename))
+ url = 'file://'+path
+ return self.open(url)
+
+ def set_response(self, response):
+ """Replace current response with (a copy of) response.
+
+ response may be None.
+
+ This is intended mostly for HTML-preprocessing.
+ """
+ self._set_response(response, True)
+
+ def _set_response(self, response, close_current):
+ # sanity check, necessary but far from sufficient
+ if not (response is None or
+ (hasattr(response, "info") and hasattr(response, "geturl") and
+ hasattr(response, "read")
+ )
+ ):
+ raise ValueError("not a response object")
+
+ self.form = None
+ if response is not None:
+ response = _response.upgrade_response(response)
+ if close_current and self._response is not None:
+ self._response.close()
+ self._response = response
+ self._factory.set_response(response)
+
+ def visit_response(self, response, request=None):
+ """Visit the response, as if it had been .open()ed.
+
+ Unlike .set_response(), this updates history rather than replacing the
+ current response.
+ """
+ if request is None:
+ request = _request.Request(response.geturl())
+ self._visit_request(request, True)
+ self._set_response(response, False)
+
+ def _visit_request(self, request, update_history):
+ if self._response is not None:
+ self._response.close()
+ if self.request is not None and update_history:
+ self._history.add(self.request, self._response)
+ self._response = None
+ # we want self.request to be assigned even if UserAgentBase.open
+ # fails
+ self.request = request
+
+ def geturl(self):
+ """Get URL of current document."""
+ if self._response is None:
+ raise BrowserStateError("not viewing any document")
+ return self._response.geturl()
+
+ def reload(self):
+ """Reload current document, and return response object."""
+ if self.request is None:
+ raise BrowserStateError("no URL has yet been .open()ed")
+ if self._response is not None:
+ self._response.close()
+ return self._mech_open(self.request, update_history=False)
+
+ def back(self, n=1):
+ """Go back n steps in history, and return response object.
+
+ n: go back this number of steps (default 1 step)
+
+ """
+ if self._response is not None:
+ self._response.close()
+ self.request, response = self._history.back(n, self._response)
+ self.set_response(response)
+ if not response.read_complete:
+ return self.reload()
+ return copy.copy(response)
+
+ def clear_history(self):
+ self._history.clear()
+
+ def set_cookie(self, cookie_string):
+ """Request to set a cookie.
+
+ Note that it is NOT necessary to call this method under ordinary
+ circumstances: cookie handling is normally entirely automatic. The
+ intended use case is rather to simulate the setting of a cookie by
+ client script in a web page (e.g. JavaScript). In that case, use of
+ this method is necessary because mechanize currently does not support
+ JavaScript, VBScript, etc.
+
+ The cookie is added in the same way as if it had arrived with the
+ current response, as a result of the current request. This means that,
+ for example, if it is not appropriate to set the cookie based on the
+ current request, no cookie will be set.
+
+ The cookie will be returned automatically with subsequent responses
+ made by the Browser instance whenever that's appropriate.
+
+ cookie_string should be a valid value of the Set-Cookie header.
+
+ For example:
+
+ browser.set_cookie(
+ "sid=abcdef; expires=Wednesday, 09-Nov-06 23:12:40 GMT")
+
+ Currently, this method does not allow for adding RFC 2986 cookies.
+ This limitation will be lifted if anybody requests it.
+
+ """
+ if self._response is None:
+ raise BrowserStateError("not viewing any document")
+ if self.request.get_type() not in ["http", "https"]:
+ raise BrowserStateError("can't set cookie for non-HTTP/HTTPS "
+ "transactions")
+ cookiejar = self._ua_handlers["_cookies"].cookiejar
+ response = self.response() # copy
+ headers = response.info()
+ headers["Set-cookie"] = cookie_string
+ cookiejar.extract_cookies(response, self.request)
+
+ def links(self, **kwds):
+ """Return iterable over links (mechanize.Link objects)."""
+ if not self.viewing_html():
+ raise BrowserStateError("not viewing HTML")
+ links = self._factory.links()
+ if kwds:
+ return self._filter_links(links, **kwds)
+ else:
+ return links
+
+ def forms(self):
+ """Return iterable over forms.
+
+ The returned form objects implement the ClientForm.HTMLForm interface.
+
+ """
+ if not self.viewing_html():
+ raise BrowserStateError("not viewing HTML")
+ return self._factory.forms()
+
+ def global_form(self):
+ """Return the global form object, or None if the factory implementation
+ did not supply one.
+
+ The "global" form object contains all controls that are not descendants
+ of any FORM element.
+
+ The returned form object implements the ClientForm.HTMLForm interface.
+
+ This is a separate method since the global form is not regarded as part
+ of the sequence of forms in the document -- mostly for
+ backwards-compatibility.
+
+ """
+ if not self.viewing_html():
+ raise BrowserStateError("not viewing HTML")
+ return self._factory.global_form
+
+ def viewing_html(self):
+ """Return whether the current response contains HTML data."""
+ if self._response is None:
+ raise BrowserStateError("not viewing any document")
+ return self._factory.is_html
+
+ def encoding(self):
+ if self._response is None:
+ raise BrowserStateError("not viewing any document")
+ return self._factory.encoding
+
+ def title(self):
+ r"""Return title, or None if there is no title element in the document.
+
+ Treatment of any tag children of attempts to follow Firefox and IE
+ (currently, tags are preserved).
+
+ """
+ if not self.viewing_html():
+ raise BrowserStateError("not viewing HTML")
+ return self._factory.title
+
+ def select_form(self, name=None, predicate=None, nr=None):
+ """Select an HTML form for input.
+
+ This is a bit like giving a form the "input focus" in a browser.
+
+ If a form is selected, the Browser object supports the HTMLForm
+ interface, so you can call methods like .set_value(), .set(), and
+ .click().
+
+ Another way to select a form is to assign to the .form attribute. The
+ form assigned should be one of the objects returned by the .forms()
+ method.
+
+ At least one of the name, predicate and nr arguments must be supplied.
+ If no matching form is found, mechanize.FormNotFoundError is raised.
+
+ If name is specified, then the form must have the indicated name.
+
+ If predicate is specified, then the form must match that function. The
+ predicate function is passed the HTMLForm as its single argument, and
+ should return a boolean value indicating whether the form matched.
+
+ nr, if supplied, is the sequence number of the form (where 0 is the
+ first). Note that control 0 is the first form matching all the other
+ arguments (if supplied); it is not necessarily the first control in the
+ form. The "global form" (consisting of all form controls not contained
+ in any FORM element) is considered not to be part of this sequence and
+ to have no name, so will not be matched unless both name and nr are
+ None.
+
+ """
+ if not self.viewing_html():
+ raise BrowserStateError("not viewing HTML")
+ if (name is None) and (predicate is None) and (nr is None):
+ raise ValueError(
+ "at least one argument must be supplied to specify form")
+
+ global_form = self._factory.global_form
+ if nr is None and name is None and \
+ predicate is not None and predicate(global_form):
+ self.form = global_form
+ return
+
+ orig_nr = nr
+ for form in self.forms():
+ if name is not None and name != form.name:
+ continue
+ if predicate is not None and not predicate(form):
+ continue
+ if nr:
+ nr -= 1
+ continue
+ self.form = form
+ break # success
+ else:
+ # failure
+ description = []
+ if name is not None: description.append("name '%s'" % name)
+ if predicate is not None:
+ description.append("predicate %s" % predicate)
+ if orig_nr is not None: description.append("nr %d" % orig_nr)
+ description = ", ".join(description)
+ raise FormNotFoundError("no form matching "+description)
+
+ def click(self, *args, **kwds):
+ """See ClientForm.HTMLForm.click for documentation."""
+ if not self.viewing_html():
+ raise BrowserStateError("not viewing HTML")
+ request = self.form.click(*args, **kwds)
+ return self._add_referer_header(request)
+
+ def submit(self, *args, **kwds):
+ """Submit current form.
+
+ Arguments are as for ClientForm.HTMLForm.click().
+
+ Return value is same as for Browser.open().
+
+ """
+ return self.open(self.click(*args, **kwds))
+
+ def click_link(self, link=None, **kwds):
+ """Find a link and return a Request object for it.
+
+ Arguments are as for .find_link(), except that a link may be supplied
+ as the first argument.
+
+ """
+ if not self.viewing_html():
+ raise BrowserStateError("not viewing HTML")
+ if not link:
+ link = self.find_link(**kwds)
+ else:
+ if kwds:
+ raise ValueError(
+ "either pass a Link, or keyword arguments, not both")
+ request = self.request_class(link.absolute_url)
+ return self._add_referer_header(request)
+
+ def follow_link(self, link=None, **kwds):
+ """Find a link and .open() it.
+
+ Arguments are as for .click_link().
+
+ Return value is same as for Browser.open().
+
+ """
+ return self.open(self.click_link(link, **kwds))
+
+ def find_link(self, **kwds):
+ """Find a link in current page.
+
+ Links are returned as mechanize.Link objects.
+
+ # Return third link that .search()-matches the regexp "python"
+ # (by ".search()-matches", I mean that the regular expression method
+ # .search() is used, rather than .match()).
+ find_link(text_regex=re.compile("python"), nr=2)
+
+ # Return first http link in the current page that points to somewhere
+ # on python.org whose link text (after tags have been removed) is
+ # exactly "monty python".
+ find_link(text="monty python",
+ url_regex=re.compile("http.*python.org"))
+
+ # Return first link with exactly three HTML attributes.
+ find_link(predicate=lambda link: len(link.attrs) == 3)
+
+ Links include anchors (<a>), image maps (<area>), and frames (<frame>,
+ <iframe>).
+
+ All arguments must be passed by keyword, not position. Zero or more
+ arguments may be supplied. In order to find a link, all arguments
+ supplied must match.
+
+ If a matching link is not found, mechanize.LinkNotFoundError is raised.
+
+ text: link text between link tags: eg. <a href="blah">this bit</a> (as
+ returned by pullparser.get_compressed_text(), ie. without tags but
+ with opening tags "textified" as per the pullparser docs) must compare
+ equal to this argument, if supplied
+ text_regex: link text between tag (as defined above) must match the
+ regular expression object or regular expression string passed as this
+ argument, if supplied
+ name, name_regex: as for text and text_regex, but matched against the
+ name HTML attribute of the link tag
+ url, url_regex: as for text and text_regex, but matched against the
+ URL of the link tag (note this matches against Link.url, which is a
+ relative or absolute URL according to how it was written in the HTML)
+ tag: element name of opening tag, eg. "a"
+ predicate: a function taking a Link object as its single argument,
+ returning a boolean result, indicating whether the links
+ nr: matches the nth link that matches all other criteria (default 0)
+
+ """
+ try:
+ return self._filter_links(self._factory.links(), **kwds).next()
+ except StopIteration:
+ raise LinkNotFoundError()
+
+ def __getattr__(self, name):
+ # pass through ClientForm / DOMForm methods and attributes
+ form = self.__dict__.get("form")
+ if form is None:
+ raise AttributeError(
+ "%s instance has no attribute %s (perhaps you forgot to "
+ ".select_form()?)" % (self.__class__, name))
+ return getattr(form, name)
+
+ def _filter_links(self, links,
+ text=None, text_regex=None,
+ name=None, name_regex=None,
+ url=None, url_regex=None,
+ tag=None,
+ predicate=None,
+ nr=0
+ ):
+ if not self.viewing_html():
+ raise BrowserStateError("not viewing HTML")
+
+ found_links = []
+ orig_nr = nr
+
+ for link in links:
+ if url is not None and url != link.url:
+ continue
+ if url_regex is not None and not re.search(url_regex, link.url):
+ continue
+ if (text is not None and
+ (link.text is None or text != link.text)):
+ continue
+ if (text_regex is not None and
+ (link.text is None or not re.search(text_regex, link.text))):
+ continue
+ if name is not None and name != dict(link.attrs).get("name"):
+ continue
+ if name_regex is not None:
+ link_name = dict(link.attrs).get("name")
+ if link_name is None or not re.search(name_regex, link_name):
+ continue
+ if tag is not None and tag != link.tag:
+ continue
+ if predicate is not None and not predicate(link):
+ continue
+ if nr:
+ nr -= 1
+ continue
+ yield link
+ nr = orig_nr
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mozillacookiejar.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mozillacookiejar.py
new file mode 100644
index 0000000..51e81bb
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mozillacookiejar.py
@@ -0,0 +1,161 @@
+"""Mozilla / Netscape cookie loading / saving.
+
+Copyright 2002-2006 John J Lee <jjl@pobox.com>
+Copyright 1997-1999 Gisle Aas (original libwww-perl code)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import re, time, logging
+
+from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
+ MISSING_FILENAME_TEXT, LoadError
+debug = logging.getLogger("ClientCookie").debug
+
+
+class MozillaCookieJar(FileCookieJar):
+ """
+
+ WARNING: you may want to backup your browser's cookies file if you use
+ this class to save cookies. I *think* it works, but there have been
+ bugs in the past!
+
+ This class differs from CookieJar only in the format it uses to save and
+ load cookies to and from a file. This class uses the Mozilla/Netscape
+ `cookies.txt' format. lynx uses this file format, too.
+
+ Don't expect cookies saved while the browser is running to be noticed by
+ the browser (in fact, Mozilla on unix will overwrite your saved cookies if
+ you change them on disk while it's running; on Windows, you probably can't
+ save at all while the browser is running).
+
+ Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
+ Netscape cookies on saving.
+
+ In particular, the cookie version and port number information is lost,
+ together with information about whether or not Path, Port and Discard were
+ specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
+ domain as set in the HTTP header started with a dot (yes, I'm aware some
+ domains in Netscape files start with a dot and some don't -- trust me, you
+ really don't want to know any more about this).
+
+ Note that though Mozilla and Netscape use the same format, they use
+ slightly different headers. The class saves cookies using the Netscape
+ header by default (Mozilla can cope with that).
+
+ """
+ magic_re = "#( Netscape)? HTTP Cookie File"
+ header = """\
+ # Netscape HTTP Cookie File
+ # http://www.netscape.com/newsref/std/cookie_spec.html
+ # This is a generated file! Do not edit.
+
+"""
+
+ def _really_load(self, f, filename, ignore_discard, ignore_expires):
+ now = time.time()
+
+ magic = f.readline()
+ if not re.search(self.magic_re, magic):
+ f.close()
+ raise LoadError(
+ "%s does not look like a Netscape format cookies file" %
+ filename)
+
+ try:
+ while 1:
+ line = f.readline()
+ if line == "": break
+
+ # last field may be absent, so keep any trailing tab
+ if line.endswith("\n"): line = line[:-1]
+
+ # skip comments and blank lines XXX what is $ for?
+ if (line.strip().startswith("#") or
+ line.strip().startswith("$") or
+ line.strip() == ""):
+ continue
+
+ domain, domain_specified, path, secure, expires, name, value = \
+ line.split("\t", 6)
+ secure = (secure == "TRUE")
+ domain_specified = (domain_specified == "TRUE")
+ if name == "":
+ name = value
+ value = None
+
+ initial_dot = domain.startswith(".")
+ if domain_specified != initial_dot:
+ raise LoadError("domain and domain specified flag don't "
+ "match in %s: %s" % (filename, line))
+
+ discard = False
+ if expires == "":
+ expires = None
+ discard = True
+
+ # assume path_specified is false
+ c = Cookie(0, name, value,
+ None, False,
+ domain, domain_specified, initial_dot,
+ path, False,
+ secure,
+ expires,
+ discard,
+ None,
+ None,
+ {})
+ if not ignore_discard and c.discard:
+ continue
+ if not ignore_expires and c.is_expired(now):
+ continue
+ self.set_cookie(c)
+
+ except:
+ reraise_unmasked_exceptions((IOError, LoadError))
+ raise LoadError("invalid Netscape format file %s: %s" %
+ (filename, line))
+
+ def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ if filename is None:
+ if self.filename is not None: filename = self.filename
+ else: raise ValueError(MISSING_FILENAME_TEXT)
+
+ f = open(filename, "w")
+ try:
+ debug("Saving Netscape cookies.txt file")
+ f.write(self.header)
+ now = time.time()
+ for cookie in self:
+ if not ignore_discard and cookie.discard:
+ debug(" Not saving %s: marked for discard", cookie.name)
+ continue
+ if not ignore_expires and cookie.is_expired(now):
+ debug(" Not saving %s: expired", cookie.name)
+ continue
+ if cookie.secure: secure = "TRUE"
+ else: secure = "FALSE"
+ if cookie.domain.startswith("."): initial_dot = "TRUE"
+ else: initial_dot = "FALSE"
+ if cookie.expires is not None:
+ expires = str(cookie.expires)
+ else:
+ expires = ""
+ if cookie.value is None:
+ # cookies.txt regards 'Set-Cookie: foo' as a cookie
+ # with no name, whereas cookielib regards it as a
+ # cookie with no value.
+ name = ""
+ value = cookie.name
+ else:
+ name = cookie.name
+ value = cookie.value
+ f.write(
+ "\t".join([cookie.domain, initial_dot, cookie.path,
+ secure, expires, name, value])+
+ "\n")
+ finally:
+ f.close()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_msiecookiejar.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_msiecookiejar.py
new file mode 100644
index 0000000..1057811
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_msiecookiejar.py
@@ -0,0 +1,388 @@
+"""Microsoft Internet Explorer cookie loading on Windows.
+
+Copyright 2002-2003 Johnny Lee <typo_pl@hotmail.com> (MSIE Perl code)
+Copyright 2002-2006 John J Lee <jjl@pobox.com> (The Python port)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+# XXX names and comments are not great here
+
+import os, re, time, struct, logging
+if os.name == "nt":
+ import _winreg
+
+from _clientcookie import FileCookieJar, CookieJar, Cookie, \
+ MISSING_FILENAME_TEXT, LoadError
+
+debug = logging.getLogger("mechanize").debug
+
+
+def regload(path, leaf):
+ key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, path, 0,
+ _winreg.KEY_ALL_ACCESS)
+ try:
+ value = _winreg.QueryValueEx(key, leaf)[0]
+ except WindowsError:
+ value = None
+ return value
+
+WIN32_EPOCH = 0x019db1ded53e8000L # 1970 Jan 01 00:00:00 in Win32 FILETIME
+
+def epoch_time_offset_from_win32_filetime(filetime):
+ """Convert from win32 filetime to seconds-since-epoch value.
+
+ MSIE stores create and expire times as Win32 FILETIME, which is 64
+ bits of 100 nanosecond intervals since Jan 01 1601.
+
+ mechanize expects time in 32-bit value expressed in seconds since the
+ epoch (Jan 01 1970).
+
+ """
+ if filetime < WIN32_EPOCH:
+ raise ValueError("filetime (%d) is before epoch (%d)" %
+ (filetime, WIN32_EPOCH))
+
+ return divmod((filetime - WIN32_EPOCH), 10000000L)[0]
+
+def binary_to_char(c): return "%02X" % ord(c)
+def binary_to_str(d): return "".join(map(binary_to_char, list(d)))
+
+class MSIEBase:
+ magic_re = re.compile(r"Client UrlCache MMF Ver \d\.\d.*")
+ padding = "\x0d\xf0\xad\x0b"
+
+ msie_domain_re = re.compile(r"^([^/]+)(/.*)$")
+ cookie_re = re.compile("Cookie\:.+\@([\x21-\xFF]+).*?"
+ "(.+\@[\x21-\xFF]+\.txt)")
+
+ # path under HKEY_CURRENT_USER from which to get location of index.dat
+ reg_path = r"software\microsoft\windows" \
+ r"\currentversion\explorer\shell folders"
+ reg_key = "Cookies"
+
+ def __init__(self):
+ self._delayload_domains = {}
+
+ def _delayload_domain(self, domain):
+ # if necessary, lazily load cookies for this domain
+ delayload_info = self._delayload_domains.get(domain)
+ if delayload_info is not None:
+ cookie_file, ignore_discard, ignore_expires = delayload_info
+ try:
+ self.load_cookie_data(cookie_file,
+ ignore_discard, ignore_expires)
+ except (LoadError, IOError):
+ debug("error reading cookie file, skipping: %s", cookie_file)
+ else:
+ del self._delayload_domains[domain]
+
+ def _load_cookies_from_file(self, filename):
+ debug("Loading MSIE cookies file: %s", filename)
+ cookies = []
+
+ cookies_fh = open(filename)
+
+ try:
+ while 1:
+ key = cookies_fh.readline()
+ if key == "": break
+
+ rl = cookies_fh.readline
+ def getlong(rl=rl): return long(rl().rstrip())
+ def getstr(rl=rl): return rl().rstrip()
+
+ key = key.rstrip()
+ value = getstr()
+ domain_path = getstr()
+ flags = getlong() # 0x2000 bit is for secure I think
+ lo_expire = getlong()
+ hi_expire = getlong()
+ lo_create = getlong()
+ hi_create = getlong()
+ sep = getstr()
+
+ if "" in (key, value, domain_path, flags, hi_expire, lo_expire,
+ hi_create, lo_create, sep) or (sep != "*"):
+ break
+
+ m = self.msie_domain_re.search(domain_path)
+ if m:
+ domain = m.group(1)
+ path = m.group(2)
+
+ cookies.append({"KEY": key, "VALUE": value,
+ "DOMAIN": domain, "PATH": path,
+ "FLAGS": flags, "HIXP": hi_expire,
+ "LOXP": lo_expire, "HICREATE": hi_create,
+ "LOCREATE": lo_create})
+ finally:
+ cookies_fh.close()
+
+ return cookies
+
+ def load_cookie_data(self, filename,
+ ignore_discard=False, ignore_expires=False):
+ """Load cookies from file containing actual cookie data.
+
+ Old cookies are kept unless overwritten by newly loaded ones.
+
+ You should not call this method if the delayload attribute is set.
+
+ I think each of these files contain all cookies for one user, domain,
+ and path.
+
+ filename: file containing cookies -- usually found in a file like
+ C:\WINNT\Profiles\joe\Cookies\joe@blah[1].txt
+
+ """
+ now = int(time.time())
+
+ cookie_data = self._load_cookies_from_file(filename)
+
+ for cookie in cookie_data:
+ flags = cookie["FLAGS"]
+ secure = ((flags & 0x2000) != 0)
+ filetime = (cookie["HIXP"] << 32) + cookie["LOXP"]
+ expires = epoch_time_offset_from_win32_filetime(filetime)
+ if expires < now:
+ discard = True
+ else:
+ discard = False
+ domain = cookie["DOMAIN"]
+ initial_dot = domain.startswith(".")
+ if initial_dot:
+ domain_specified = True
+ else:
+ # MSIE 5 does not record whether the domain cookie-attribute
+ # was specified.
+ # Assuming it wasn't is conservative, because with strict
+ # domain matching this will match less frequently; with regular
+ # Netscape tail-matching, this will match at exactly the same
+ # times that domain_specified = True would. It also means we
+ # don't have to prepend a dot to achieve consistency with our
+ # own & Mozilla's domain-munging scheme.
+ domain_specified = False
+
+ # assume path_specified is false
+ # XXX is there other stuff in here? -- eg. comment, commentURL?
+ c = Cookie(0,
+ cookie["KEY"], cookie["VALUE"],
+ None, False,
+ domain, domain_specified, initial_dot,
+ cookie["PATH"], False,
+ secure,
+ expires,
+ discard,
+ None,
+ None,
+ {"flags": flags})
+ if not ignore_discard and c.discard:
+ continue
+ if not ignore_expires and c.is_expired(now):
+ continue
+ CookieJar.set_cookie(self, c)
+
+ def load_from_registry(self, ignore_discard=False, ignore_expires=False,
+ username=None):
+ """
+ username: only required on win9x
+
+ """
+ cookies_dir = regload(self.reg_path, self.reg_key)
+ filename = os.path.normpath(os.path.join(cookies_dir, "INDEX.DAT"))
+ self.load(filename, ignore_discard, ignore_expires, username)
+
+ def _really_load(self, index, filename, ignore_discard, ignore_expires,
+ username):
+ now = int(time.time())
+
+ if username is None:
+ username = os.environ['USERNAME'].lower()
+
+ cookie_dir = os.path.dirname(filename)
+
+ data = index.read(256)
+ if len(data) != 256:
+ raise LoadError("%s file is too short" % filename)
+
+ # Cookies' index.dat file starts with 32 bytes of signature
+ # followed by an offset to the first record, stored as a little-
+ # endian DWORD.
+ sig, size, data = data[:32], data[32:36], data[36:]
+ size = struct.unpack("<L", size)[0]
+
+ # check that sig is valid
+ if not self.magic_re.match(sig) or size != 0x4000:
+ raise LoadError("%s ['%s' %s] does not seem to contain cookies" %
+ (str(filename), sig, size))
+
+ # skip to start of first record
+ index.seek(size, 0)
+
+ sector = 128 # size of sector in bytes
+
+ while 1:
+ data = ""
+
+ # Cookies are usually in two contiguous sectors, so read in two
+ # sectors and adjust if not a Cookie.
+ to_read = 2 * sector
+ d = index.read(to_read)
+ if len(d) != to_read:
+ break
+ data = data + d
+
+ # Each record starts with a 4-byte signature and a count
+ # (little-endian DWORD) of sectors for the record.
+ sig, size, data = data[:4], data[4:8], data[8:]
+ size = struct.unpack("<L", size)[0]
+
+ to_read = (size - 2) * sector
+
+## from urllib import quote
+## print "data", quote(data)
+## print "sig", quote(sig)
+## print "size in sectors", size
+## print "size in bytes", size*sector
+## print "size in units of 16 bytes", (size*sector) / 16
+## print "size to read in bytes", to_read
+## print
+
+ if sig != "URL ":
+ assert sig in ("HASH", "LEAK", \
+ self.padding, "\x00\x00\x00\x00"), \
+ "unrecognized MSIE index.dat record: %s" % \
+ binary_to_str(sig)
+ if sig == "\x00\x00\x00\x00":
+ # assume we've got all the cookies, and stop
+ break
+ if sig == self.padding:
+ continue
+ # skip the rest of this record
+ assert to_read >= 0
+ if size != 2:
+ assert to_read != 0
+ index.seek(to_read, 1)
+ continue
+
+ # read in rest of record if necessary
+ if size > 2:
+ more_data = index.read(to_read)
+ if len(more_data) != to_read: break
+ data = data + more_data
+
+ cookie_re = ("Cookie\:%s\@([\x21-\xFF]+).*?" % username +
+ "(%s\@[\x21-\xFF]+\.txt)" % username)
+ m = re.search(cookie_re, data, re.I)
+ if m:
+ cookie_file = os.path.join(cookie_dir, m.group(2))
+ if not self.delayload:
+ try:
+ self.load_cookie_data(cookie_file,
+ ignore_discard, ignore_expires)
+ except (LoadError, IOError):
+ debug("error reading cookie file, skipping: %s",
+ cookie_file)
+ else:
+ domain = m.group(1)
+ i = domain.find("/")
+ if i != -1:
+ domain = domain[:i]
+
+ self._delayload_domains[domain] = (
+ cookie_file, ignore_discard, ignore_expires)
+
+
+class MSIECookieJar(MSIEBase, FileCookieJar):
+ """FileCookieJar that reads from the Windows MSIE cookies database.
+
+ MSIECookieJar can read the cookie files of Microsoft Internet Explorer
+ (MSIE) for Windows version 5 on Windows NT and version 6 on Windows XP and
+ Windows 98. Other configurations may also work, but are untested. Saving
+ cookies in MSIE format is NOT supported. If you save cookies, they'll be
+ in the usual Set-Cookie3 format, which you can read back in using an
+ instance of the plain old CookieJar class. Don't save using the same
+ filename that you loaded cookies from, because you may succeed in
+ clobbering your MSIE cookies index file!
+
+ You should be able to have LWP share Internet Explorer's cookies like
+ this (note you need to supply a username to load_from_registry if you're on
+ Windows 9x or Windows ME):
+
+ cj = MSIECookieJar(delayload=1)
+ # find cookies index file in registry and load cookies from it
+ cj.load_from_registry()
+ opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
+ response = opener.open("http://example.com/")
+
+ Iterating over a delayloaded MSIECookieJar instance will not cause any
+ cookies to be read from disk. To force reading of all cookies from disk,
+ call read_all_cookies. Note that the following methods iterate over self:
+ clear_temporary_cookies, clear_expired_cookies, __len__, __repr__, __str__
+ and as_string.
+
+ Additional methods:
+
+ load_from_registry(ignore_discard=False, ignore_expires=False,
+ username=None)
+ load_cookie_data(filename, ignore_discard=False, ignore_expires=False)
+ read_all_cookies()
+
+ """
+ def __init__(self, filename=None, delayload=False, policy=None):
+ MSIEBase.__init__(self)
+ FileCookieJar.__init__(self, filename, delayload, policy)
+
+ def set_cookie(self, cookie):
+ if self.delayload:
+ self._delayload_domain(cookie.domain)
+ CookieJar.set_cookie(self, cookie)
+
+ def _cookies_for_request(self, request):
+ """Return a list of cookies to be returned to server."""
+ domains = self._cookies.copy()
+ domains.update(self._delayload_domains)
+ domains = domains.keys()
+
+ cookies = []
+ for domain in domains:
+ cookies.extend(self._cookies_for_domain(domain, request))
+ return cookies
+
+ def _cookies_for_domain(self, domain, request):
+ if not self._policy.domain_return_ok(domain, request):
+ return []
+ debug("Checking %s for cookies to return", domain)
+ if self.delayload:
+ self._delayload_domain(domain)
+ return CookieJar._cookies_for_domain(self, domain, request)
+
+ def read_all_cookies(self):
+ """Eagerly read in all cookies."""
+ if self.delayload:
+ for domain in self._delayload_domains.keys():
+ self._delayload_domain(domain)
+
+ def load(self, filename, ignore_discard=False, ignore_expires=False,
+ username=None):
+ """Load cookies from an MSIE 'index.dat' cookies index file.
+
+ filename: full path to cookie index file
+ username: only required on win9x
+
+ """
+ if filename is None:
+ if self.filename is not None: filename = self.filename
+ else: raise ValueError(MISSING_FILENAME_TEXT)
+
+ index = open(filename, "rb")
+
+ try:
+ self._really_load(index, filename, ignore_discard, ignore_expires,
+ username)
+ finally:
+ index.close()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_opener.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_opener.py
new file mode 100644
index 0000000..d94eacf
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_opener.py
@@ -0,0 +1,436 @@
+"""Integration with Python standard library module urllib2: OpenerDirector
+class.
+
+Copyright 2004-2006 John J Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import os, urllib2, bisect, httplib, types, tempfile
+try:
+ import threading as _threading
+except ImportError:
+ import dummy_threading as _threading
+try:
+ set
+except NameError:
+ import sets
+ set = sets.Set
+
+import _file
+import _http
+from _request import Request
+import _response
+import _rfc3986
+import _sockettimeout
+import _upgrade
+from _util import isstringlike
+
+
+class ContentTooShortError(urllib2.URLError):
+ def __init__(self, reason, result):
+ urllib2.URLError.__init__(self, reason)
+ self.result = result
+
+
+def set_request_attr(req, name, value, default):
+ try:
+ getattr(req, name)
+ except AttributeError:
+ setattr(req, name, default)
+ if value is not default:
+ setattr(req, name, value)
+
+
+class OpenerDirector(urllib2.OpenerDirector):
+ def __init__(self):
+ urllib2.OpenerDirector.__init__(self)
+ # really none of these are (sanely) public -- the lack of initial
+ # underscore on some is just due to following urllib2
+ self.process_response = {}
+ self.process_request = {}
+ self._any_request = {}
+ self._any_response = {}
+ self._handler_index_valid = True
+ self._tempfiles = []
+
+ def add_handler(self, handler):
+ if handler in self.handlers:
+ return
+ # XXX why does self.handlers need to be sorted?
+ bisect.insort(self.handlers, handler)
+ handler.add_parent(self)
+ self._handler_index_valid = False
+
+ def _maybe_reindex_handlers(self):
+ if self._handler_index_valid:
+ return
+
+ handle_error = {}
+ handle_open = {}
+ process_request = {}
+ process_response = {}
+ any_request = set()
+ any_response = set()
+ unwanted = []
+
+ for handler in self.handlers:
+ added = False
+ for meth in dir(handler):
+ if meth in ["redirect_request", "do_open", "proxy_open"]:
+ # oops, coincidental match
+ continue
+
+ if meth == "any_request":
+ any_request.add(handler)
+ added = True
+ continue
+ elif meth == "any_response":
+ any_response.add(handler)
+ added = True
+ continue
+
+ ii = meth.find("_")
+ scheme = meth[:ii]
+ condition = meth[ii+1:]
+
+ if condition.startswith("error"):
+ jj = meth[ii+1:].find("_") + ii + 1
+ kind = meth[jj+1:]
+ try:
+ kind = int(kind)
+ except ValueError:
+ pass
+ lookup = handle_error.setdefault(scheme, {})
+ elif condition == "open":
+ kind = scheme
+ lookup = handle_open
+ elif condition == "request":
+ kind = scheme
+ lookup = process_request
+ elif condition == "response":
+ kind = scheme
+ lookup = process_response
+ else:
+ continue
+
+ lookup.setdefault(kind, set()).add(handler)
+ added = True
+
+ if not added:
+ unwanted.append(handler)
+
+ for handler in unwanted:
+ self.handlers.remove(handler)
+
+ # sort indexed methods
+ # XXX could be cleaned up
+ for lookup in [process_request, process_response]:
+ for scheme, handlers in lookup.iteritems():
+ lookup[scheme] = handlers
+ for scheme, lookup in handle_error.iteritems():
+ for code, handlers in lookup.iteritems():
+ handlers = list(handlers)
+ handlers.sort()
+ lookup[code] = handlers
+ for scheme, handlers in handle_open.iteritems():
+ handlers = list(handlers)
+ handlers.sort()
+ handle_open[scheme] = handlers
+
+ # cache the indexes
+ self.handle_error = handle_error
+ self.handle_open = handle_open
+ self.process_request = process_request
+ self.process_response = process_response
+ self._any_request = any_request
+ self._any_response = any_response
+
+ def _request(self, url_or_req, data, visit,
+ timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
+ if isstringlike(url_or_req):
+ req = Request(url_or_req, data, visit=visit, timeout=timeout)
+ else:
+ # already a urllib2.Request or mechanize.Request instance
+ req = url_or_req
+ if data is not None:
+ req.add_data(data)
+ # XXX yuck
+ set_request_attr(req, "visit", visit, None)
+ set_request_attr(req, "timeout", timeout,
+ _sockettimeout._GLOBAL_DEFAULT_TIMEOUT)
+ return req
+
+ def open(self, fullurl, data=None,
+ timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
+ req = self._request(fullurl, data, None, timeout)
+ req_scheme = req.get_type()
+
+ self._maybe_reindex_handlers()
+
+ # pre-process request
+ # XXX should we allow a Processor to change the URL scheme
+ # of the request?
+ request_processors = set(self.process_request.get(req_scheme, []))
+ request_processors.update(self._any_request)
+ request_processors = list(request_processors)
+ request_processors.sort()
+ for processor in request_processors:
+ for meth_name in ["any_request", req_scheme+"_request"]:
+ meth = getattr(processor, meth_name, None)
+ if meth:
+ req = meth(req)
+
+ # In Python >= 2.4, .open() supports processors already, so we must
+ # call ._open() instead.
+ urlopen = getattr(urllib2.OpenerDirector, "_open",
+ urllib2.OpenerDirector.open)
+ response = urlopen(self, req, data)
+
+ # post-process response
+ response_processors = set(self.process_response.get(req_scheme, []))
+ response_processors.update(self._any_response)
+ response_processors = list(response_processors)
+ response_processors.sort()
+ for processor in response_processors:
+ for meth_name in ["any_response", req_scheme+"_response"]:
+ meth = getattr(processor, meth_name, None)
+ if meth:
+ response = meth(req, response)
+
+ return response
+
+ def error(self, proto, *args):
+ if proto in ['http', 'https']:
+ # XXX http[s] protocols are special-cased
+ dict = self.handle_error['http'] # https is not different than http
+ proto = args[2] # YUCK!
+ meth_name = 'http_error_%s' % proto
+ http_err = 1
+ orig_args = args
+ else:
+ dict = self.handle_error
+ meth_name = proto + '_error'
+ http_err = 0
+ args = (dict, proto, meth_name) + args
+ result = apply(self._call_chain, args)
+ if result:
+ return result
+
+ if http_err:
+ args = (dict, 'default', 'http_error_default') + orig_args
+ return apply(self._call_chain, args)
+
+ BLOCK_SIZE = 1024*8
+ def retrieve(self, fullurl, filename=None, reporthook=None, data=None,
+ timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
+ """Returns (filename, headers).
+
+ For remote objects, the default filename will refer to a temporary
+ file. Temporary files are removed when the OpenerDirector.close()
+ method is called.
+
+ For file: URLs, at present the returned filename is None. This may
+ change in future.
+
+ If the actual number of bytes read is less than indicated by the
+ Content-Length header, raises ContentTooShortError (a URLError
+ subclass). The exception's .result attribute contains the (filename,
+ headers) that would have been returned.
+
+ """
+ req = self._request(fullurl, data, False, timeout)
+ scheme = req.get_type()
+ fp = self.open(req)
+ headers = fp.info()
+ if filename is None and scheme == 'file':
+ # XXX req.get_selector() seems broken here, return None,
+ # pending sanity :-/
+ return None, headers
+ #return urllib.url2pathname(req.get_selector()), headers
+ if filename:
+ tfp = open(filename, 'wb')
+ else:
+ path = _rfc3986.urlsplit(req.get_full_url())[2]
+ suffix = os.path.splitext(path)[1]
+ fd, filename = tempfile.mkstemp(suffix)
+ self._tempfiles.append(filename)
+ tfp = os.fdopen(fd, 'wb')
+
+ result = filename, headers
+ bs = self.BLOCK_SIZE
+ size = -1
+ read = 0
+ blocknum = 0
+ if reporthook:
+ if "content-length" in headers:
+ size = int(headers["Content-Length"])
+ reporthook(blocknum, bs, size)
+ while 1:
+ block = fp.read(bs)
+ if block == "":
+ break
+ read += len(block)
+ tfp.write(block)
+ blocknum += 1
+ if reporthook:
+ reporthook(blocknum, bs, size)
+ fp.close()
+ tfp.close()
+ del fp
+ del tfp
+
+ # raise exception if actual size does not match content-length header
+ if size >= 0 and read < size:
+ raise ContentTooShortError(
+ "retrieval incomplete: "
+ "got only %i out of %i bytes" % (read, size),
+ result
+ )
+
+ return result
+
+ def close(self):
+ urllib2.OpenerDirector.close(self)
+
+ # make it very obvious this object is no longer supposed to be used
+ self.open = self.error = self.retrieve = self.add_handler = None
+
+ if self._tempfiles:
+ for filename in self._tempfiles:
+ try:
+ os.unlink(filename)
+ except OSError:
+ pass
+ del self._tempfiles[:]
+
+
+def wrapped_open(urlopen, process_response_object, fullurl, data=None,
+ timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
+ success = True
+ try:
+ response = urlopen(fullurl, data, timeout)
+ except urllib2.HTTPError, error:
+ success = False
+ if error.fp is None: # not a response
+ raise
+ response = error
+
+ if response is not None:
+ response = process_response_object(response)
+
+ if not success:
+ raise response
+ return response
+
+class ResponseProcessingOpener(OpenerDirector):
+
+ def open(self, fullurl, data=None,
+ timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
+ def bound_open(fullurl, data=None,
+ timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
+ return OpenerDirector.open(self, fullurl, data, timeout)
+ return wrapped_open(
+ bound_open, self.process_response_object, fullurl, data, timeout)
+
+ def process_response_object(self, response):
+ return response
+
+
+class SeekableResponseOpener(ResponseProcessingOpener):
+ def process_response_object(self, response):
+ return _response.seek_wrapped_response(response)
+
+
+class OpenerFactory:
+ """This class's interface is quite likely to change."""
+
+ default_classes = [
+ # handlers
+ urllib2.ProxyHandler,
+ urllib2.UnknownHandler,
+ _http.HTTPHandler, # derived from new AbstractHTTPHandler
+ _http.HTTPDefaultErrorHandler,
+ _http.HTTPRedirectHandler, # bugfixed
+ urllib2.FTPHandler,
+ _file.FileHandler,
+ # processors
+ _upgrade.HTTPRequestUpgradeProcessor,
+ _http.HTTPCookieProcessor,
+ _http.HTTPErrorProcessor,
+ ]
+ if hasattr(httplib, 'HTTPS'):
+ default_classes.append(_http.HTTPSHandler)
+ handlers = []
+ replacement_handlers = []
+
+ def __init__(self, klass=OpenerDirector):
+ self.klass = klass
+
+ def build_opener(self, *handlers):
+ """Create an opener object from a list of handlers and processors.
+
+ The opener will use several default handlers and processors, including
+ support for HTTP and FTP.
+
+ If any of the handlers passed as arguments are subclasses of the
+ default handlers, the default handlers will not be used.
+
+ """
+ opener = self.klass()
+ default_classes = list(self.default_classes)
+ skip = []
+ for klass in default_classes:
+ for check in handlers:
+ if type(check) == types.ClassType:
+ if issubclass(check, klass):
+ skip.append(klass)
+ elif type(check) == types.InstanceType:
+ if isinstance(check, klass):
+ skip.append(klass)
+ for klass in skip:
+ default_classes.remove(klass)
+
+ for klass in default_classes:
+ opener.add_handler(klass())
+ for h in handlers:
+ if type(h) == types.ClassType:
+ h = h()
+ opener.add_handler(h)
+
+ return opener
+
+
+build_opener = OpenerFactory().build_opener
+
+_opener = None
+urlopen_lock = _threading.Lock()
+def urlopen(url, data=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
+ global _opener
+ if _opener is None:
+ urlopen_lock.acquire()
+ try:
+ if _opener is None:
+ _opener = build_opener()
+ finally:
+ urlopen_lock.release()
+ return _opener.open(url, data, timeout)
+
+def urlretrieve(url, filename=None, reporthook=None, data=None,
+ timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
+ global _opener
+ if _opener is None:
+ urlopen_lock.acquire()
+ try:
+ if _opener is None:
+ _opener = build_opener()
+ finally:
+ urlopen_lock.release()
+ return _opener.retrieve(url, filename, reporthook, data, timeout)
+
+def install_opener(opener):
+ global _opener
+ _opener = opener
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_pullparser.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_pullparser.py
new file mode 100644
index 0000000..4d8d9d3
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_pullparser.py
@@ -0,0 +1,390 @@
+"""A simple "pull API" for HTML parsing, after Perl's HTML::TokeParser.
+
+Examples
+
+This program extracts all links from a document. It will print one
+line for each link, containing the URL and the textual description
+between the <A>...</A> tags:
+
+import pullparser, sys
+f = file(sys.argv[1])
+p = pullparser.PullParser(f)
+for token in p.tags("a"):
+ if token.type == "endtag": continue
+ url = dict(token.attrs).get("href", "-")
+ text = p.get_compressed_text(endat=("endtag", "a"))
+ print "%s\t%s" % (url, text)
+
+This program extracts the <TITLE> from the document:
+
+import pullparser, sys
+f = file(sys.argv[1])
+p = pullparser.PullParser(f)
+if p.get_tag("title"):
+ title = p.get_compressed_text()
+ print "Title: %s" % title
+
+
+Copyright 2003-2006 John J. Lee <jjl@pobox.com>
+Copyright 1998-2001 Gisle Aas (original libwww-perl code)
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses.
+
+"""
+
+import re, htmlentitydefs
+import sgmllib, HTMLParser
+from xml.sax import saxutils
+
+from _html import unescape, unescape_charref
+
+
+class NoMoreTokensError(Exception): pass
+
+class Token:
+ """Represents an HTML tag, declaration, processing instruction etc.
+
+ Behaves as both a tuple-like object (ie. iterable) and has attributes
+ .type, .data and .attrs.
+
+ >>> t = Token("starttag", "a", [("href", "http://www.python.org/")])
+ >>> t == ("starttag", "a", [("href", "http://www.python.org/")])
+ True
+ >>> (t.type, t.data) == ("starttag", "a")
+ True
+ >>> t.attrs == [("href", "http://www.python.org/")]
+ True
+
+ Public attributes
+
+ type: one of "starttag", "endtag", "startendtag", "charref", "entityref",
+ "data", "comment", "decl", "pi", after the corresponding methods of
+ HTMLParser.HTMLParser
+ data: For a tag, the tag name; otherwise, the relevant data carried by the
+ tag, as a string
+ attrs: list of (name, value) pairs representing HTML attributes
+ (or None if token does not represent an opening tag)
+
+ """
+ def __init__(self, type, data, attrs=None):
+ self.type = type
+ self.data = data
+ self.attrs = attrs
+ def __iter__(self):
+ return iter((self.type, self.data, self.attrs))
+ def __eq__(self, other):
+ type, data, attrs = other
+ if (self.type == type and
+ self.data == data and
+ self.attrs == attrs):
+ return True
+ else:
+ return False
+ def __ne__(self, other): return not self.__eq__(other)
+ def __repr__(self):
+ args = ", ".join(map(repr, [self.type, self.data, self.attrs]))
+ return self.__class__.__name__+"(%s)" % args
+
+ def __str__(self):
+ """
+ >>> print Token("starttag", "br")
+ <br>
+ >>> print Token("starttag", "a",
+ ... [("href", "http://www.python.org/"), ("alt", '"foo"')])
+ <a href="http://www.python.org/" alt='"foo"'>
+ >>> print Token("startendtag", "br")
+ <br />
+ >>> print Token("startendtag", "br", [("spam", "eggs")])
+ <br spam="eggs" />
+ >>> print Token("endtag", "p")
+ </p>
+ >>> print Token("charref", "38")
+ &#38;
+ >>> print Token("entityref", "amp")
+ &amp;
+ >>> print Token("data", "foo\\nbar")
+ foo
+ bar
+ >>> print Token("comment", "Life is a bowl\\nof cherries.")
+ <!--Life is a bowl
+ of cherries.-->
+ >>> print Token("decl", "decl")
+ <!decl>
+ >>> print Token("pi", "pi")
+ <?pi>
+ """
+ if self.attrs is not None:
+ attrs = "".join([" %s=%s" % (k, saxutils.quoteattr(v)) for
+ k, v in self.attrs])
+ else:
+ attrs = ""
+ if self.type == "starttag":
+ return "<%s%s>" % (self.data, attrs)
+ elif self.type == "startendtag":
+ return "<%s%s />" % (self.data, attrs)
+ elif self.type == "endtag":
+ return "</%s>" % self.data
+ elif self.type == "charref":
+ return "&#%s;" % self.data
+ elif self.type == "entityref":
+ return "&%s;" % self.data
+ elif self.type == "data":
+ return self.data
+ elif self.type == "comment":
+ return "<!--%s-->" % self.data
+ elif self.type == "decl":
+ return "<!%s>" % self.data
+ elif self.type == "pi":
+ return "<?%s>" % self.data
+ assert False
+
+
+def iter_until_exception(fn, exception, *args, **kwds):
+ while 1:
+ try:
+ yield fn(*args, **kwds)
+ except exception:
+ raise StopIteration
+
+
+class _AbstractParser:
+ chunk = 1024
+ compress_re = re.compile(r"\s+")
+ def __init__(self, fh, textify={"img": "alt", "applet": "alt"},
+ encoding="ascii", entitydefs=None):
+ """
+ fh: file-like object (only a .read() method is required) from which to
+ read HTML to be parsed
+ textify: mapping used by .get_text() and .get_compressed_text() methods
+ to represent opening tags as text
+ encoding: encoding used to encode numeric character references by
+ .get_text() and .get_compressed_text() ("ascii" by default)
+
+ entitydefs: mapping like {"amp": "&", ...} containing HTML entity
+ definitions (a sensible default is used). This is used to unescape
+ entities in .get_text() (and .get_compressed_text()) and attribute
+ values. If the encoding can not represent the character, the entity
+ reference is left unescaped. Note that entity references (both
+ numeric - e.g. &#123; or &#xabc; - and non-numeric - e.g. &amp;) are
+ unescaped in attribute values and the return value of .get_text(), but
+ not in data outside of tags. Instead, entity references outside of
+ tags are represented as tokens. This is a bit odd, it's true :-/
+
+ If the element name of an opening tag matches a key in the textify
+ mapping then that tag is converted to text. The corresponding value is
+ used to specify which tag attribute to obtain the text from. textify
+ maps from element names to either:
+
+ - an HTML attribute name, in which case the HTML attribute value is
+ used as its text value along with the element name in square
+ brackets (eg."alt text goes here[IMG]", or, if the alt attribute
+ were missing, just "[IMG]")
+ - a callable object (eg. a function) which takes a Token and returns
+ the string to be used as its text value
+
+ If textify has no key for an element name, nothing is substituted for
+ the opening tag.
+
+ Public attributes:
+
+ encoding and textify: see above
+
+ """
+ self._fh = fh
+ self._tokenstack = [] # FIFO
+ self.textify = textify
+ self.encoding = encoding
+ if entitydefs is None:
+ entitydefs = htmlentitydefs.name2codepoint
+ self._entitydefs = entitydefs
+
+ def __iter__(self): return self
+
+ def tags(self, *names):
+ return iter_until_exception(self.get_tag, NoMoreTokensError, *names)
+
+ def tokens(self, *tokentypes):
+ return iter_until_exception(self.get_token, NoMoreTokensError,
+ *tokentypes)
+
+ def next(self):
+ try:
+ return self.get_token()
+ except NoMoreTokensError:
+ raise StopIteration()
+
+ def get_token(self, *tokentypes):
+ """Pop the next Token object from the stack of parsed tokens.
+
+ If arguments are given, they are taken to be token types in which the
+ caller is interested: tokens representing other elements will be
+ skipped. Element names must be given in lower case.
+
+ Raises NoMoreTokensError.
+
+ """
+ while 1:
+ while self._tokenstack:
+ token = self._tokenstack.pop(0)
+ if tokentypes:
+ if token.type in tokentypes:
+ return token
+ else:
+ return token
+ data = self._fh.read(self.chunk)
+ if not data:
+ raise NoMoreTokensError()
+ self.feed(data)
+
+ def unget_token(self, token):
+ """Push a Token back onto the stack."""
+ self._tokenstack.insert(0, token)
+
+ def get_tag(self, *names):
+ """Return the next Token that represents an opening or closing tag.
+
+ If arguments are given, they are taken to be element names in which the
+ caller is interested: tags representing other elements will be skipped.
+ Element names must be given in lower case.
+
+ Raises NoMoreTokensError.
+
+ """
+ while 1:
+ tok = self.get_token()
+ if tok.type not in ["starttag", "endtag", "startendtag"]:
+ continue
+ if names:
+ if tok.data in names:
+ return tok
+ else:
+ return tok
+
+ def get_text(self, endat=None):
+ """Get some text.
+
+ endat: stop reading text at this tag (the tag is included in the
+ returned text); endtag is a tuple (type, name) where type is
+ "starttag", "endtag" or "startendtag", and name is the element name of
+ the tag (element names must be given in lower case)
+
+ If endat is not given, .get_text() will stop at the next opening or
+ closing tag, or when there are no more tokens (no exception is raised).
+ Note that .get_text() includes the text representation (if any) of the
+ opening tag, but pushes the opening tag back onto the stack. As a
+ result, if you want to call .get_text() again, you need to call
+ .get_tag() first (unless you want an empty string returned when you
+ next call .get_text()).
+
+ Entity references are translated using the value of the entitydefs
+ constructor argument (a mapping from names to characters like that
+ provided by the standard module htmlentitydefs). Named entity
+ references that are not in this mapping are left unchanged.
+
+ The textify attribute is used to translate opening tags into text: see
+ the class docstring.
+
+ """
+ text = []
+ tok = None
+ while 1:
+ try:
+ tok = self.get_token()
+ except NoMoreTokensError:
+ # unget last token (not the one we just failed to get)
+ if tok: self.unget_token(tok)
+ break
+ if tok.type == "data":
+ text.append(tok.data)
+ elif tok.type == "entityref":
+ t = unescape("&%s;"%tok.data, self._entitydefs, self.encoding)
+ text.append(t)
+ elif tok.type == "charref":
+ t = unescape_charref(tok.data, self.encoding)
+ text.append(t)
+ elif tok.type in ["starttag", "endtag", "startendtag"]:
+ tag_name = tok.data
+ if tok.type in ["starttag", "startendtag"]:
+ alt = self.textify.get(tag_name)
+ if alt is not None:
+ if callable(alt):
+ text.append(alt(tok))
+ elif tok.attrs is not None:
+ for k, v in tok.attrs:
+ if k == alt:
+ text.append(v)
+ text.append("[%s]" % tag_name.upper())
+ if endat is None or endat == (tok.type, tag_name):
+ self.unget_token(tok)
+ break
+ return "".join(text)
+
+ def get_compressed_text(self, *args, **kwds):
+ """
+ As .get_text(), but collapses each group of contiguous whitespace to a
+ single space character, and removes all initial and trailing
+ whitespace.
+
+ """
+ text = self.get_text(*args, **kwds)
+ text = text.strip()
+ return self.compress_re.sub(" ", text)
+
+ def handle_startendtag(self, tag, attrs):
+ self._tokenstack.append(Token("startendtag", tag, attrs))
+ def handle_starttag(self, tag, attrs):
+ self._tokenstack.append(Token("starttag", tag, attrs))
+ def handle_endtag(self, tag):
+ self._tokenstack.append(Token("endtag", tag))
+ def handle_charref(self, name):
+ self._tokenstack.append(Token("charref", name))
+ def handle_entityref(self, name):
+ self._tokenstack.append(Token("entityref", name))
+ def handle_data(self, data):
+ self._tokenstack.append(Token("data", data))
+ def handle_comment(self, data):
+ self._tokenstack.append(Token("comment", data))
+ def handle_decl(self, decl):
+ self._tokenstack.append(Token("decl", decl))
+ def unknown_decl(self, data):
+ # XXX should this call self.error instead?
+ #self.error("unknown declaration: " + `data`)
+ self._tokenstack.append(Token("decl", data))
+ def handle_pi(self, data):
+ self._tokenstack.append(Token("pi", data))
+
+ def unescape_attr(self, name):
+ return unescape(name, self._entitydefs, self.encoding)
+ def unescape_attrs(self, attrs):
+ escaped_attrs = []
+ for key, val in attrs:
+ escaped_attrs.append((key, self.unescape_attr(val)))
+ return escaped_attrs
+
+class PullParser(_AbstractParser, HTMLParser.HTMLParser):
+ def __init__(self, *args, **kwds):
+ HTMLParser.HTMLParser.__init__(self)
+ _AbstractParser.__init__(self, *args, **kwds)
+ def unescape(self, name):
+ # Use the entitydefs passed into constructor, not
+ # HTMLParser.HTMLParser's entitydefs.
+ return self.unescape_attr(name)
+
+class TolerantPullParser(_AbstractParser, sgmllib.SGMLParser):
+ def __init__(self, *args, **kwds):
+ sgmllib.SGMLParser.__init__(self)
+ _AbstractParser.__init__(self, *args, **kwds)
+ def unknown_starttag(self, tag, attrs):
+ attrs = self.unescape_attrs(attrs)
+ self._tokenstack.append(Token("starttag", tag, attrs))
+ def unknown_endtag(self, tag):
+ self._tokenstack.append(Token("endtag", tag))
+
+
+def _test():
+ import doctest, _pullparser
+ return doctest.testmod(_pullparser)
+
+if __name__ == "__main__":
+ _test()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_request.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_request.py
new file mode 100644
index 0000000..7824441
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_request.py
@@ -0,0 +1,87 @@
+"""Integration with Python standard library module urllib2: Request class.
+
+Copyright 2004-2006 John J Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import urllib2, urllib, logging
+
+from _clientcookie import request_host_lc
+import _rfc3986
+import _sockettimeout
+
+warn = logging.getLogger("mechanize").warning
+
+
+class Request(urllib2.Request):
+ def __init__(self, url, data=None, headers={},
+ origin_req_host=None, unverifiable=False, visit=None,
+ timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
+ # In mechanize 0.2, the interpretation of a unicode url argument will
+ # change: A unicode url argument will be interpreted as an IRI, and a
+ # bytestring as a URI. For now, we accept unicode or bytestring. We
+ # don't insist that the value is always a URI (specifically, must only
+ # contain characters which are legal), because that might break working
+ # code (who knows what bytes some servers want to see, especially with
+ # browser plugins for internationalised URIs).
+ if not _rfc3986.is_clean_uri(url):
+ warn("url argument is not a URI "
+ "(contains illegal characters) %r" % url)
+ urllib2.Request.__init__(self, url, data, headers)
+ self.selector = None
+ self.unredirected_hdrs = {}
+ self.visit = visit
+ self.timeout = timeout
+
+ # All the terminology below comes from RFC 2965.
+ self.unverifiable = unverifiable
+ # Set request-host of origin transaction.
+ # The origin request-host is needed in order to decide whether
+ # unverifiable sub-requests (automatic redirects, images embedded
+ # in HTML, etc.) are to third-party hosts. If they are, the
+ # resulting transactions might need to be conducted with cookies
+ # turned off.
+ if origin_req_host is None:
+ origin_req_host = request_host_lc(self)
+ self.origin_req_host = origin_req_host
+
+ def get_selector(self):
+ return urllib.splittag(self.__r_host)[0]
+
+ def get_origin_req_host(self):
+ return self.origin_req_host
+
+ def is_unverifiable(self):
+ return self.unverifiable
+
+ def add_unredirected_header(self, key, val):
+ """Add a header that will not be added to a redirected request."""
+ self.unredirected_hdrs[key.capitalize()] = val
+
+ def has_header(self, header_name):
+ """True iff request has named header (regular or unredirected)."""
+ return (header_name in self.headers or
+ header_name in self.unredirected_hdrs)
+
+ def get_header(self, header_name, default=None):
+ return self.headers.get(
+ header_name,
+ self.unredirected_hdrs.get(header_name, default))
+
+ def header_items(self):
+ hdrs = self.unredirected_hdrs.copy()
+ hdrs.update(self.headers)
+ return hdrs.items()
+
+ def __str__(self):
+ return "<Request for %s>" % self.get_full_url()
+
+ def get_method(self):
+ if self.has_data():
+ return "POST"
+ else:
+ return "GET"
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_response.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_response.py
new file mode 100644
index 0000000..fad9b57
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_response.py
@@ -0,0 +1,527 @@
+"""Response classes.
+
+The seek_wrapper code is not used if you're using UserAgent with
+.set_seekable_responses(False), or if you're using the urllib2-level interface
+without SeekableProcessor or HTTPEquivProcessor. Class closeable_response is
+instantiated by some handlers (AbstractHTTPHandler), but the closeable_response
+interface is only depended upon by Browser-level code. Function
+upgrade_response is only used if you're using Browser or
+ResponseUpgradeProcessor.
+
+
+Copyright 2006 John J. Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+included with the distribution).
+
+"""
+
+import copy, mimetools
+from cStringIO import StringIO
+import urllib2
+
+
+def len_of_seekable(file_):
+ # this function exists because evaluation of len(file_.getvalue()) on every
+ # .read() from seek_wrapper would be O(N**2) in number of .read()s
+ pos = file_.tell()
+ file_.seek(0, 2) # to end
+ try:
+ return file_.tell()
+ finally:
+ file_.seek(pos)
+
+
+# XXX Andrew Dalke kindly sent me a similar class in response to my request on
+# comp.lang.python, which I then proceeded to lose. I wrote this class
+# instead, but I think he's released his code publicly since, could pinch the
+# tests from it, at least...
+
+# For testing seek_wrapper invariant (note that
+# test_urllib2.HandlerTest.test_seekable is expected to fail when this
+# invariant checking is turned on). The invariant checking is done by module
+# ipdc, which is available here:
+# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/436834
+## from ipdbc import ContractBase
+## class seek_wrapper(ContractBase):
+class seek_wrapper:
+ """Adds a seek method to a file object.
+
+ This is only designed for seeking on readonly file-like objects.
+
+ Wrapped file-like object must have a read method. The readline method is
+ only supported if that method is present on the wrapped object. The
+ readlines method is always supported. xreadlines and iteration are
+ supported only for Python 2.2 and above.
+
+ Public attributes:
+
+ wrapped: the wrapped file object
+ is_closed: true iff .close() has been called
+
+ WARNING: All other attributes of the wrapped object (ie. those that are not
+ one of wrapped, read, readline, readlines, xreadlines, __iter__ and next)
+ are passed through unaltered, which may or may not make sense for your
+ particular file object.
+
+ """
+ # General strategy is to check that cache is full enough, then delegate to
+ # the cache (self.__cache, which is a cStringIO.StringIO instance). A seek
+ # position (self.__pos) is maintained independently of the cache, in order
+ # that a single cache may be shared between multiple seek_wrapper objects.
+ # Copying using module copy shares the cache in this way.
+
+ def __init__(self, wrapped):
+ self.wrapped = wrapped
+ self.__read_complete_state = [False]
+ self.__is_closed_state = [False]
+ self.__have_readline = hasattr(self.wrapped, "readline")
+ self.__cache = StringIO()
+ self.__pos = 0 # seek position
+
+ def invariant(self):
+ # The end of the cache is always at the same place as the end of the
+ # wrapped file (though the .tell() method is not required to be present
+ # on wrapped file).
+ return self.wrapped.tell() == len(self.__cache.getvalue())
+
+ def close(self):
+ self.wrapped.close()
+ self.is_closed = True
+
+ def __getattr__(self, name):
+ if name == "is_closed":
+ return self.__is_closed_state[0]
+ elif name == "read_complete":
+ return self.__read_complete_state[0]
+
+ wrapped = self.__dict__.get("wrapped")
+ if wrapped:
+ return getattr(wrapped, name)
+
+ return getattr(self.__class__, name)
+
+ def __setattr__(self, name, value):
+ if name == "is_closed":
+ self.__is_closed_state[0] = bool(value)
+ elif name == "read_complete":
+ if not self.is_closed:
+ self.__read_complete_state[0] = bool(value)
+ else:
+ self.__dict__[name] = value
+
+ def seek(self, offset, whence=0):
+ assert whence in [0,1,2]
+
+ # how much data, if any, do we need to read?
+ if whence == 2: # 2: relative to end of *wrapped* file
+ if offset < 0: raise ValueError("negative seek offset")
+ # since we don't know yet where the end of that file is, we must
+ # read everything
+ to_read = None
+ else:
+ if whence == 0: # 0: absolute
+ if offset < 0: raise ValueError("negative seek offset")
+ dest = offset
+ else: # 1: relative to current position
+ pos = self.__pos
+ if pos < offset:
+ raise ValueError("seek to before start of file")
+ dest = pos + offset
+ end = len_of_seekable(self.__cache)
+ to_read = dest - end
+ if to_read < 0:
+ to_read = 0
+
+ if to_read != 0:
+ self.__cache.seek(0, 2)
+ if to_read is None:
+ assert whence == 2
+ self.__cache.write(self.wrapped.read())
+ self.read_complete = True
+ self.__pos = self.__cache.tell() - offset
+ else:
+ data = self.wrapped.read(to_read)
+ if not data:
+ self.read_complete = True
+ else:
+ self.__cache.write(data)
+ # Don't raise an exception even if we've seek()ed past the end
+ # of .wrapped, since fseek() doesn't complain in that case.
+ # Also like fseek(), pretend we have seek()ed past the end,
+ # i.e. not:
+ #self.__pos = self.__cache.tell()
+ # but rather:
+ self.__pos = dest
+ else:
+ self.__pos = dest
+
+ def tell(self):
+ return self.__pos
+
+ def __copy__(self):
+ cpy = self.__class__(self.wrapped)
+ cpy.__cache = self.__cache
+ cpy.__read_complete_state = self.__read_complete_state
+ cpy.__is_closed_state = self.__is_closed_state
+ return cpy
+
+ def get_data(self):
+ pos = self.__pos
+ try:
+ self.seek(0)
+ return self.read(-1)
+ finally:
+ self.__pos = pos
+
+ def read(self, size=-1):
+ pos = self.__pos
+ end = len_of_seekable(self.__cache)
+ available = end - pos
+
+ # enough data already cached?
+ if size <= available and size != -1:
+ self.__cache.seek(pos)
+ self.__pos = pos+size
+ return self.__cache.read(size)
+
+ # no, so read sufficient data from wrapped file and cache it
+ self.__cache.seek(0, 2)
+ if size == -1:
+ self.__cache.write(self.wrapped.read())
+ self.read_complete = True
+ else:
+ to_read = size - available
+ assert to_read > 0
+ data = self.wrapped.read(to_read)
+ if not data:
+ self.read_complete = True
+ else:
+ self.__cache.write(data)
+ self.__cache.seek(pos)
+
+ data = self.__cache.read(size)
+ self.__pos = self.__cache.tell()
+ assert self.__pos == pos + len(data)
+ return data
+
+ def readline(self, size=-1):
+ if not self.__have_readline:
+ raise NotImplementedError("no readline method on wrapped object")
+
+ # line we're about to read might not be complete in the cache, so
+ # read another line first
+ pos = self.__pos
+ self.__cache.seek(0, 2)
+ data = self.wrapped.readline()
+ if not data:
+ self.read_complete = True
+ else:
+ self.__cache.write(data)
+ self.__cache.seek(pos)
+
+ data = self.__cache.readline()
+ if size != -1:
+ r = data[:size]
+ self.__pos = pos+size
+ else:
+ r = data
+ self.__pos = pos+len(data)
+ return r
+
+ def readlines(self, sizehint=-1):
+ pos = self.__pos
+ self.__cache.seek(0, 2)
+ self.__cache.write(self.wrapped.read())
+ self.read_complete = True
+ self.__cache.seek(pos)
+ data = self.__cache.readlines(sizehint)
+ self.__pos = self.__cache.tell()
+ return data
+
+ def __iter__(self): return self
+ def next(self):
+ line = self.readline()
+ if line == "": raise StopIteration
+ return line
+
+ xreadlines = __iter__
+
+ def __repr__(self):
+ return ("<%s at %s whose wrapped object = %r>" %
+ (self.__class__.__name__, hex(abs(id(self))), self.wrapped))
+
+
+class response_seek_wrapper(seek_wrapper):
+
+ """
+ Supports copying response objects and setting response body data.
+
+ """
+
+ def __init__(self, wrapped):
+ seek_wrapper.__init__(self, wrapped)
+ self._headers = self.wrapped.info()
+
+ def __copy__(self):
+ cpy = seek_wrapper.__copy__(self)
+ # copy headers from delegate
+ cpy._headers = copy.copy(self.info())
+ return cpy
+
+ # Note that .info() and .geturl() (the only two urllib2 response methods
+ # that are not implemented by seek_wrapper) must be here explicitly rather
+ # than by seek_wrapper's __getattr__ delegation) so that the nasty
+ # dynamically-created HTTPError classes in get_seek_wrapper_class() get the
+ # wrapped object's implementation, and not HTTPError's.
+
+ def info(self):
+ return self._headers
+
+ def geturl(self):
+ return self.wrapped.geturl()
+
+ def set_data(self, data):
+ self.seek(0)
+ self.read()
+ self.close()
+ cache = self._seek_wrapper__cache = StringIO()
+ cache.write(data)
+ self.seek(0)
+
+
+class eoffile:
+ # file-like object that always claims to be at end-of-file...
+ def read(self, size=-1): return ""
+ def readline(self, size=-1): return ""
+ def __iter__(self): return self
+ def next(self): return ""
+ def close(self): pass
+
+class eofresponse(eoffile):
+ def __init__(self, url, headers, code, msg):
+ self._url = url
+ self._headers = headers
+ self.code = code
+ self.msg = msg
+ def geturl(self): return self._url
+ def info(self): return self._headers
+
+
+class closeable_response:
+ """Avoids unnecessarily clobbering urllib.addinfourl methods on .close().
+
+ Only supports responses returned by mechanize.HTTPHandler.
+
+ After .close(), the following methods are supported:
+
+ .read()
+ .readline()
+ .info()
+ .geturl()
+ .__iter__()
+ .next()
+ .close()
+
+ and the following attributes are supported:
+
+ .code
+ .msg
+
+ Also supports pickling (but the stdlib currently does something to prevent
+ it: http://python.org/sf/1144636).
+
+ """
+ # presence of this attr indicates is useable after .close()
+ closeable_response = None
+
+ def __init__(self, fp, headers, url, code, msg):
+ self._set_fp(fp)
+ self._headers = headers
+ self._url = url
+ self.code = code
+ self.msg = msg
+
+ def _set_fp(self, fp):
+ self.fp = fp
+ self.read = self.fp.read
+ self.readline = self.fp.readline
+ if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
+ if hasattr(self.fp, "fileno"):
+ self.fileno = self.fp.fileno
+ else:
+ self.fileno = lambda: None
+ self.__iter__ = self.fp.__iter__
+ self.next = self.fp.next
+
+ def __repr__(self):
+ return '<%s at %s whose fp = %r>' % (
+ self.__class__.__name__, hex(abs(id(self))), self.fp)
+
+ def info(self):
+ return self._headers
+
+ def geturl(self):
+ return self._url
+
+ def close(self):
+ wrapped = self.fp
+ wrapped.close()
+ new_wrapped = eofresponse(
+ self._url, self._headers, self.code, self.msg)
+ self._set_fp(new_wrapped)
+
+ def __getstate__(self):
+ # There are three obvious options here:
+ # 1. truncate
+ # 2. read to end
+ # 3. close socket, pickle state including read position, then open
+ # again on unpickle and use Range header
+ # XXXX um, 4. refuse to pickle unless .close()d. This is better,
+ # actually ("errors should never pass silently"). Pickling doesn't
+ # work anyway ATM, because of http://python.org/sf/1144636 so fix
+ # this later
+
+ # 2 breaks pickle protocol, because one expects the original object
+ # to be left unscathed by pickling. 3 is too complicated and
+ # surprising (and too much work ;-) to happen in a sane __getstate__.
+ # So we do 1.
+
+ state = self.__dict__.copy()
+ new_wrapped = eofresponse(
+ self._url, self._headers, self.code, self.msg)
+ state["wrapped"] = new_wrapped
+ return state
+
+def test_response(data='test data', headers=[],
+ url="http://example.com/", code=200, msg="OK"):
+ return make_response(data, headers, url, code, msg)
+
+def test_html_response(data='test data', headers=[],
+ url="http://example.com/", code=200, msg="OK"):
+ headers += [("Content-type", "text/html")]
+ return make_response(data, headers, url, code, msg)
+
+def make_response(data, headers, url, code, msg):
+ """Convenient factory for objects implementing response interface.
+
+ data: string containing response body data
+ headers: sequence of (name, value) pairs
+ url: URL of response
+ code: integer response code (e.g. 200)
+ msg: string response code message (e.g. "OK")
+
+ """
+ mime_headers = make_headers(headers)
+ r = closeable_response(StringIO(data), mime_headers, url, code, msg)
+ return response_seek_wrapper(r)
+
+
+def make_headers(headers):
+ """
+ headers: sequence of (name, value) pairs
+ """
+ hdr_text = []
+ for name_value in headers:
+ hdr_text.append("%s: %s" % name_value)
+ return mimetools.Message(StringIO("\n".join(hdr_text)))
+
+
+# Rest of this module is especially horrible, but needed, at least until fork
+# urllib2. Even then, may want to preseve urllib2 compatibility.
+
+def get_seek_wrapper_class(response):
+ # in order to wrap response objects that are also exceptions, we must
+ # dynamically subclass the exception :-(((
+ if (isinstance(response, urllib2.HTTPError) and
+ not hasattr(response, "seek")):
+ if response.__class__.__module__ == "__builtin__":
+ exc_class_name = response.__class__.__name__
+ else:
+ exc_class_name = "%s.%s" % (
+ response.__class__.__module__, response.__class__.__name__)
+
+ class httperror_seek_wrapper(response_seek_wrapper, response.__class__):
+ # this only derives from HTTPError in order to be a subclass --
+ # the HTTPError behaviour comes from delegation
+
+ _exc_class_name = exc_class_name
+
+ def __init__(self, wrapped):
+ response_seek_wrapper.__init__(self, wrapped)
+ # be compatible with undocumented HTTPError attributes :-(
+ self.hdrs = wrapped.info()
+ self.filename = wrapped.geturl()
+
+ def __repr__(self):
+ return (
+ "<%s (%s instance) at %s "
+ "whose wrapped object = %r>" % (
+ self.__class__.__name__, self._exc_class_name,
+ hex(abs(id(self))), self.wrapped)
+ )
+ wrapper_class = httperror_seek_wrapper
+ else:
+ wrapper_class = response_seek_wrapper
+ return wrapper_class
+
+def seek_wrapped_response(response):
+ """Return a copy of response that supports seekable response interface.
+
+ Accepts responses from both mechanize and urllib2 handlers.
+
+ Copes with both oridinary response instances and HTTPError instances (which
+ can't be simply wrapped due to the requirement of preserving the exception
+ base class).
+ """
+ if not hasattr(response, "seek"):
+ wrapper_class = get_seek_wrapper_class(response)
+ response = wrapper_class(response)
+ assert hasattr(response, "get_data")
+ return response
+
+def upgrade_response(response):
+ """Return a copy of response that supports Browser response interface.
+
+ Browser response interface is that of "seekable responses"
+ (response_seek_wrapper), plus the requirement that responses must be
+ useable after .close() (closeable_response).
+
+ Accepts responses from both mechanize and urllib2 handlers.
+
+ Copes with both ordinary response instances and HTTPError instances (which
+ can't be simply wrapped due to the requirement of preserving the exception
+ base class).
+ """
+ wrapper_class = get_seek_wrapper_class(response)
+ if hasattr(response, "closeable_response"):
+ if not hasattr(response, "seek"):
+ response = wrapper_class(response)
+ assert hasattr(response, "get_data")
+ return copy.copy(response)
+
+ # a urllib2 handler constructed the response, i.e. the response is an
+ # urllib.addinfourl or a urllib2.HTTPError, instead of a
+ # _Util.closeable_response as returned by e.g. mechanize.HTTPHandler
+ try:
+ code = response.code
+ except AttributeError:
+ code = None
+ try:
+ msg = response.msg
+ except AttributeError:
+ msg = None
+
+ # may have already-.read() data from .seek() cache
+ data = None
+ get_data = getattr(response, "get_data", None)
+ if get_data:
+ data = get_data()
+
+ response = closeable_response(
+ response.fp, response.info(), response.geturl(), code, msg)
+ response = wrapper_class(response)
+ if data:
+ response.set_data(data)
+ return response
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_rfc3986.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_rfc3986.py
new file mode 100644
index 0000000..1bb5021
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_rfc3986.py
@@ -0,0 +1,241 @@
+"""RFC 3986 URI parsing and relative reference resolution / absolutization.
+
+(aka splitting and joining)
+
+Copyright 2006 John J. Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it under
+the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+included with the distribution).
+
+"""
+
+# XXX Wow, this is ugly. Overly-direct translation of the RFC ATM.
+
+import re, urllib
+
+## def chr_range(a, b):
+## return "".join(map(chr, range(ord(a), ord(b)+1)))
+
+## UNRESERVED_URI_CHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+## "abcdefghijklmnopqrstuvwxyz"
+## "0123456789"
+## "-_.~")
+## RESERVED_URI_CHARS = "!*'();:@&=+$,/?#[]"
+## URI_CHARS = RESERVED_URI_CHARS+UNRESERVED_URI_CHARS+'%'
+# this re matches any character that's not in URI_CHARS
+BAD_URI_CHARS_RE = re.compile("[^A-Za-z0-9\-_.~!*'();:@&=+$,/?%#[\]]")
+
+
+def clean_url(url, encoding):
+ # percent-encode illegal URI characters
+ # Trying to come up with test cases for this gave me a headache, revisit
+ # when do switch to unicode.
+ # Somebody else's comments (lost the attribution):
+## - IE will return you the url in the encoding you send it
+## - Mozilla/Firefox will send you latin-1 if there's no non latin-1
+## characters in your link. It will send you utf-8 however if there are...
+ if type(url) == type(""):
+ url = url.decode(encoding, "replace")
+ url = url.strip()
+ # for second param to urllib.quote(), we want URI_CHARS, minus the
+ # 'always_safe' characters that urllib.quote() never percent-encodes
+ return urllib.quote(url.encode(encoding), "!*'();:@&=+$,/?%#[]~")
+
+def is_clean_uri(uri):
+ """
+ >>> is_clean_uri("ABC!")
+ True
+ >>> is_clean_uri(u"ABC!")
+ True
+ >>> is_clean_uri("ABC|")
+ False
+ >>> is_clean_uri(u"ABC|")
+ False
+ >>> is_clean_uri("http://example.com/0")
+ True
+ >>> is_clean_uri(u"http://example.com/0")
+ True
+ """
+ # note module re treats bytestrings as through they were decoded as latin-1
+ # so this function accepts both unicode and bytestrings
+ return not bool(BAD_URI_CHARS_RE.search(uri))
+
+
+SPLIT_MATCH = re.compile(
+ r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?").match
+def urlsplit(absolute_uri):
+ """Return scheme, authority, path, query, fragment."""
+ match = SPLIT_MATCH(absolute_uri)
+ if match:
+ g = match.groups()
+ return g[1], g[3], g[4], g[6], g[8]
+
+def urlunsplit(parts):
+ scheme, authority, path, query, fragment = parts
+ r = []
+ append = r.append
+ if scheme is not None:
+ append(scheme)
+ append(":")
+ if authority is not None:
+ append("//")
+ append(authority)
+ append(path)
+ if query is not None:
+ append("?")
+ append(query)
+ if fragment is not None:
+ append("#")
+ append(fragment)
+ return "".join(r)
+
+def urljoin(base_uri, uri_reference):
+ return urlunsplit(urljoin_parts(urlsplit(base_uri),
+ urlsplit(uri_reference)))
+
+# oops, this doesn't do the same thing as the literal translation
+# from the RFC below
+## import posixpath
+## def urljoin_parts(base_parts, reference_parts):
+## scheme, authority, path, query, fragment = base_parts
+## rscheme, rauthority, rpath, rquery, rfragment = reference_parts
+
+## # compute target URI path
+## if rpath == "":
+## tpath = path
+## else:
+## tpath = rpath
+## if not tpath.startswith("/"):
+## tpath = merge(authority, path, tpath)
+## tpath = posixpath.normpath(tpath)
+
+## if rscheme is not None:
+## return (rscheme, rauthority, tpath, rquery, rfragment)
+## elif rauthority is not None:
+## return (scheme, rauthority, tpath, rquery, rfragment)
+## elif rpath == "":
+## if rquery is not None:
+## tquery = rquery
+## else:
+## tquery = query
+## return (scheme, authority, tpath, tquery, rfragment)
+## else:
+## return (scheme, authority, tpath, rquery, rfragment)
+
+def urljoin_parts(base_parts, reference_parts):
+ scheme, authority, path, query, fragment = base_parts
+ rscheme, rauthority, rpath, rquery, rfragment = reference_parts
+
+ if rscheme == scheme:
+ rscheme = None
+
+ if rscheme is not None:
+ tscheme, tauthority, tpath, tquery = (
+ rscheme, rauthority, remove_dot_segments(rpath), rquery)
+ else:
+ if rauthority is not None:
+ tauthority, tpath, tquery = (
+ rauthority, remove_dot_segments(rpath), rquery)
+ else:
+ if rpath == "":
+ tpath = path
+ if rquery is not None:
+ tquery = rquery
+ else:
+ tquery = query
+ else:
+ if rpath.startswith("/"):
+ tpath = remove_dot_segments(rpath)
+ else:
+ tpath = merge(authority, path, rpath)
+ tpath = remove_dot_segments(tpath)
+ tquery = rquery
+ tauthority = authority
+ tscheme = scheme
+ tfragment = rfragment
+ return (tscheme, tauthority, tpath, tquery, tfragment)
+
+# um, something *vaguely* like this is what I want, but I have to generate
+# lots of test cases first, if only to understand what it is that
+# remove_dot_segments really does...
+## def remove_dot_segments(path):
+## if path == '':
+## return ''
+## comps = path.split('/')
+## new_comps = []
+## for comp in comps:
+## if comp in ['.', '']:
+## if not new_comps or new_comps[-1]:
+## new_comps.append('')
+## continue
+## if comp != '..':
+## new_comps.append(comp)
+## elif new_comps:
+## new_comps.pop()
+## return '/'.join(new_comps)
+
+
+def remove_dot_segments(path):
+ r = []
+ while path:
+ # A
+ if path.startswith("../"):
+ path = path[3:]
+ continue
+ if path.startswith("./"):
+ path = path[2:]
+ continue
+ # B
+ if path.startswith("/./"):
+ path = path[2:]
+ continue
+ if path == "/.":
+ path = "/"
+ continue
+ # C
+ if path.startswith("/../"):
+ path = path[3:]
+ if r:
+ r.pop()
+ continue
+ if path == "/..":
+ path = "/"
+ if r:
+ r.pop()
+ continue
+ # D
+ if path == ".":
+ path = path[1:]
+ continue
+ if path == "..":
+ path = path[2:]
+ continue
+ # E
+ start = 0
+ if path.startswith("/"):
+ start = 1
+ ii = path.find("/", start)
+ if ii < 0:
+ ii = None
+ r.append(path[:ii])
+ if ii is None:
+ break
+ path = path[ii:]
+ return "".join(r)
+
+def merge(base_authority, base_path, ref_path):
+ # XXXX Oddly, the sample Perl implementation of this by Roy Fielding
+ # doesn't even take base_authority as a parameter, despite the wording in
+ # the RFC suggesting otherwise. Perhaps I'm missing some obvious identity.
+ #if base_authority is not None and base_path == "":
+ if base_path == "":
+ return "/" + ref_path
+ ii = base_path.rfind("/")
+ if ii >= 0:
+ return base_path[:ii+1] + ref_path
+ return ref_path
+
+if __name__ == "__main__":
+ import doctest
+ doctest.testmod()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_seek.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_seek.py
new file mode 100644
index 0000000..4086d52
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_seek.py
@@ -0,0 +1,16 @@
+from urllib2 import BaseHandler
+from _util import deprecation
+from _response import response_seek_wrapper
+
+
+class SeekableProcessor(BaseHandler):
+ """Deprecated: Make responses seekable."""
+
+ def __init__(self):
+ deprecation(
+ "See http://wwwsearch.sourceforge.net/mechanize/doc.html#seekable")
+
+ def any_response(self, request, response):
+ if not hasattr(response, "seek"):
+ return response_seek_wrapper(response)
+ return response
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_sockettimeout.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_sockettimeout.py
new file mode 100644
index 0000000..c22b734
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_sockettimeout.py
@@ -0,0 +1,6 @@
+import socket
+
+try:
+ _GLOBAL_DEFAULT_TIMEOUT = socket._GLOBAL_DEFAULT_TIMEOUT
+except AttributeError:
+ _GLOBAL_DEFAULT_TIMEOUT = object()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_testcase.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_testcase.py
new file mode 100644
index 0000000..a13cca3
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_testcase.py
@@ -0,0 +1,73 @@
+import shutil
+import tempfile
+import unittest
+
+
+class SetupStack(object):
+
+ def __init__(self):
+ self._on_teardown = []
+
+ def add_teardown(self, teardown):
+ self._on_teardown.append(teardown)
+
+ def tear_down(self):
+ for func in reversed(self._on_teardown):
+ func()
+
+
+class TearDownConvenience(object):
+
+ def __init__(self, setup_stack=None):
+ self._own_setup_stack = setup_stack is None
+ if setup_stack is None:
+ setup_stack = SetupStack()
+ self._setup_stack = setup_stack
+
+ # only call this convenience method if no setup_stack was supplied to c'tor
+ def tear_down(self):
+ assert self._own_setup_stack
+ self._setup_stack.tear_down()
+
+
+class TempDirMaker(TearDownConvenience):
+
+ def make_temp_dir(self):
+ temp_dir = tempfile.mkdtemp(prefix="tmp-%s-" % self.__class__.__name__)
+ def tear_down():
+ shutil.rmtree(temp_dir)
+ self._setup_stack.add_teardown(tear_down)
+ return temp_dir
+
+
+class MonkeyPatcher(TearDownConvenience):
+
+ def monkey_patch(self, obj, name, value):
+ orig_value = getattr(obj, name)
+ setattr(obj, name, value)
+ def reverse_patch():
+ setattr(obj, name, orig_value)
+ self._setup_stack.add_teardown(reverse_patch)
+
+
+class TestCase(unittest.TestCase):
+
+ def setUp(self):
+ self._setup_stack = SetupStack()
+
+ def tearDown(self):
+ self._setup_stack.tear_down()
+
+ def make_temp_dir(self, *args, **kwds):
+ return TempDirMaker(self._setup_stack).make_temp_dir(*args, **kwds)
+
+ def monkey_patch(self, *args, **kwds):
+ return MonkeyPatcher(self._setup_stack).monkey_patch(*args, **kwds)
+
+ def assert_contains(self, container, containee):
+ self.assertTrue(containee in container, "%r not in %r" %
+ (containee, container))
+
+ def assert_less_than(self, got, expected):
+ self.assertTrue(got < expected, "%r >= %r" %
+ (got, expected))
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_upgrade.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_upgrade.py
new file mode 100644
index 0000000..df59c01
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_upgrade.py
@@ -0,0 +1,40 @@
+from urllib2 import BaseHandler
+
+from _request import Request
+from _response import upgrade_response
+from _util import deprecation
+
+
+class HTTPRequestUpgradeProcessor(BaseHandler):
+ # upgrade urllib2.Request to this module's Request
+ # yuck!
+ handler_order = 0 # before anything else
+
+ def http_request(self, request):
+ if not hasattr(request, "add_unredirected_header"):
+ newrequest = Request(request.get_full_url(), request.data,
+ request.headers)
+ try: newrequest.origin_req_host = request.origin_req_host
+ except AttributeError: pass
+ try: newrequest.unverifiable = request.unverifiable
+ except AttributeError: pass
+ try: newrequest.visit = request.visit
+ except AttributeError: pass
+ request = newrequest
+ return request
+
+ https_request = http_request
+
+
+class ResponseUpgradeProcessor(BaseHandler):
+ # upgrade responses to be .close()able without becoming unusable
+ handler_order = 0 # before anything else
+
+ def __init__(self):
+ deprecation(
+ "See http://wwwsearch.sourceforge.net/mechanize/doc.html#seekable")
+
+ def any_response(self, request, response):
+ if not hasattr(response, 'closeable_response'):
+ response = upgrade_response(response)
+ return response
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_urllib2.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_urllib2.py
new file mode 100644
index 0000000..cbb761b
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_urllib2.py
@@ -0,0 +1,55 @@
+# urllib2 work-alike interface
+# ...from urllib2...
+from urllib2 import \
+ URLError, \
+ HTTPError, \
+ BaseHandler, \
+ UnknownHandler, \
+ FTPHandler, \
+ CacheFTPHandler
+# ...and from mechanize
+from _auth import \
+ HTTPPasswordMgr, \
+ HTTPPasswordMgrWithDefaultRealm, \
+ AbstractBasicAuthHandler, \
+ AbstractDigestAuthHandler, \
+ HTTPProxyPasswordMgr, \
+ ProxyHandler, \
+ ProxyBasicAuthHandler, \
+ ProxyDigestAuthHandler, \
+ HTTPBasicAuthHandler, \
+ HTTPDigestAuthHandler, \
+ HTTPSClientCertMgr
+from _debug import \
+ HTTPResponseDebugProcessor, \
+ HTTPRedirectDebugProcessor
+from _file import \
+ FileHandler
+# crap ATM
+## from _gzip import \
+## HTTPGzipProcessor
+from _http import \
+ HTTPHandler, \
+ HTTPDefaultErrorHandler, \
+ HTTPRedirectHandler, \
+ HTTPEquivProcessor, \
+ HTTPCookieProcessor, \
+ HTTPRefererProcessor, \
+ HTTPRefreshProcessor, \
+ HTTPErrorProcessor, \
+ HTTPRobotRulesProcessor, \
+ RobotExclusionError
+import httplib
+if hasattr(httplib, 'HTTPS'):
+ from _http import HTTPSHandler
+del httplib
+from _opener import OpenerDirector, \
+ SeekableResponseOpener, \
+ build_opener, install_opener, urlopen
+from _request import \
+ Request
+from _seek import \
+ SeekableProcessor
+from _upgrade import \
+ HTTPRequestUpgradeProcessor, \
+ ResponseUpgradeProcessor
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_useragent.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_useragent.py
new file mode 100644
index 0000000..723f87c
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_useragent.py
@@ -0,0 +1,352 @@
+"""Convenient HTTP UserAgent class.
+
+This is a subclass of urllib2.OpenerDirector.
+
+
+Copyright 2003-2006 John J. Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it under
+the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+included with the distribution).
+
+"""
+
+import warnings
+
+import _auth
+import _gzip
+import _opener
+import _response
+import _sockettimeout
+import _urllib2
+
+
+class UserAgentBase(_opener.OpenerDirector):
+ """Convenient user-agent class.
+
+ Do not use .add_handler() to add a handler for something already dealt with
+ by this code.
+
+ The only reason at present for the distinction between UserAgent and
+ UserAgentBase is so that classes that depend on .seek()able responses
+ (e.g. mechanize.Browser) can inherit from UserAgentBase. The subclass
+ UserAgent exposes a .set_seekable_responses() method that allows switching
+ off the adding of a .seek() method to responses.
+
+ Public attributes:
+
+ addheaders: list of (name, value) pairs specifying headers to send with
+ every request, unless they are overridden in the Request instance.
+
+ >>> ua = UserAgentBase()
+ >>> ua.addheaders = [
+ ... ("User-agent", "Mozilla/5.0 (compatible)"),
+ ... ("From", "responsible.person@example.com")]
+
+ """
+
+ handler_classes = {
+ # scheme handlers
+ "http": _urllib2.HTTPHandler,
+ # CacheFTPHandler is buggy, at least in 2.3, so we don't use it
+ "ftp": _urllib2.FTPHandler,
+ "file": _urllib2.FileHandler,
+
+ # other handlers
+ "_unknown": _urllib2.UnknownHandler,
+ # HTTP{S,}Handler depend on HTTPErrorProcessor too
+ "_http_error": _urllib2.HTTPErrorProcessor,
+ "_http_request_upgrade": _urllib2.HTTPRequestUpgradeProcessor,
+ "_http_default_error": _urllib2.HTTPDefaultErrorHandler,
+
+ # feature handlers
+ "_basicauth": _urllib2.HTTPBasicAuthHandler,
+ "_digestauth": _urllib2.HTTPDigestAuthHandler,
+ "_redirect": _urllib2.HTTPRedirectHandler,
+ "_cookies": _urllib2.HTTPCookieProcessor,
+ "_refresh": _urllib2.HTTPRefreshProcessor,
+ "_equiv": _urllib2.HTTPEquivProcessor,
+ "_proxy": _urllib2.ProxyHandler,
+ "_proxy_basicauth": _urllib2.ProxyBasicAuthHandler,
+ "_proxy_digestauth": _urllib2.ProxyDigestAuthHandler,
+ "_robots": _urllib2.HTTPRobotRulesProcessor,
+ "_gzip": _gzip.HTTPGzipProcessor, # experimental!
+
+ # debug handlers
+ "_debug_redirect": _urllib2.HTTPRedirectDebugProcessor,
+ "_debug_response_body": _urllib2.HTTPResponseDebugProcessor,
+ }
+
+ default_schemes = ["http", "ftp", "file"]
+ default_others = ["_unknown", "_http_error", "_http_request_upgrade",
+ "_http_default_error",
+ ]
+ default_features = ["_redirect", "_cookies",
+ "_refresh", "_equiv",
+ "_basicauth", "_digestauth",
+ "_proxy", "_proxy_basicauth", "_proxy_digestauth",
+ "_robots",
+ ]
+ if hasattr(_urllib2, 'HTTPSHandler'):
+ handler_classes["https"] = _urllib2.HTTPSHandler
+ default_schemes.append("https")
+
+ def __init__(self):
+ _opener.OpenerDirector.__init__(self)
+
+ ua_handlers = self._ua_handlers = {}
+ for scheme in (self.default_schemes+
+ self.default_others+
+ self.default_features):
+ klass = self.handler_classes[scheme]
+ ua_handlers[scheme] = klass()
+ for handler in ua_handlers.itervalues():
+ self.add_handler(handler)
+
+ # Yuck.
+ # Ensure correct default constructor args were passed to
+ # HTTPRefreshProcessor and HTTPEquivProcessor.
+ if "_refresh" in ua_handlers:
+ self.set_handle_refresh(True)
+ if "_equiv" in ua_handlers:
+ self.set_handle_equiv(True)
+ # Ensure default password managers are installed.
+ pm = ppm = None
+ if "_basicauth" in ua_handlers or "_digestauth" in ua_handlers:
+ pm = _urllib2.HTTPPasswordMgrWithDefaultRealm()
+ if ("_proxy_basicauth" in ua_handlers or
+ "_proxy_digestauth" in ua_handlers):
+ ppm = _auth.HTTPProxyPasswordMgr()
+ self.set_password_manager(pm)
+ self.set_proxy_password_manager(ppm)
+ # set default certificate manager
+ if "https" in ua_handlers:
+ cm = _urllib2.HTTPSClientCertMgr()
+ self.set_client_cert_manager(cm)
+
+ def close(self):
+ _opener.OpenerDirector.close(self)
+ self._ua_handlers = None
+
+ # XXX
+## def set_timeout(self, timeout):
+## self._timeout = timeout
+## def set_http_connection_cache(self, conn_cache):
+## self._http_conn_cache = conn_cache
+## def set_ftp_connection_cache(self, conn_cache):
+## # XXX ATM, FTP has cache as part of handler; should it be separate?
+## self._ftp_conn_cache = conn_cache
+
+ def set_handled_schemes(self, schemes):
+ """Set sequence of URL scheme (protocol) strings.
+
+ For example: ua.set_handled_schemes(["http", "ftp"])
+
+ If this fails (with ValueError) because you've passed an unknown
+ scheme, the set of handled schemes will not be changed.
+
+ """
+ want = {}
+ for scheme in schemes:
+ if scheme.startswith("_"):
+ raise ValueError("not a scheme '%s'" % scheme)
+ if scheme not in self.handler_classes:
+ raise ValueError("unknown scheme '%s'")
+ want[scheme] = None
+
+ # get rid of scheme handlers we don't want
+ for scheme, oldhandler in self._ua_handlers.items():
+ if scheme.startswith("_"): continue # not a scheme handler
+ if scheme not in want:
+ self._replace_handler(scheme, None)
+ else:
+ del want[scheme] # already got it
+ # add the scheme handlers that are missing
+ for scheme in want.keys():
+ self._set_handler(scheme, True)
+
+ def set_cookiejar(self, cookiejar):
+ """Set a mechanize.CookieJar, or None."""
+ self._set_handler("_cookies", obj=cookiejar)
+
+ # XXX could use Greg Stein's httpx for some of this instead?
+ # or httplib2??
+ def set_proxies(self, proxies):
+ """Set a dictionary mapping URL scheme to proxy specification, or None.
+
+ e.g. {"http": "joe:password@myproxy.example.com:3128",
+ "ftp": "proxy.example.com"}
+
+ """
+ self._set_handler("_proxy", obj=proxies)
+
+ def add_password(self, url, user, password, realm=None):
+ self._password_manager.add_password(realm, url, user, password)
+ def add_proxy_password(self, user, password, hostport=None, realm=None):
+ self._proxy_password_manager.add_password(
+ realm, hostport, user, password)
+
+ def add_client_certificate(self, url, key_file, cert_file):
+ """Add an SSL client certificate, for HTTPS client auth.
+
+ key_file and cert_file must be filenames of the key and certificate
+ files, in PEM format. You can use e.g. OpenSSL to convert a p12 (PKCS
+ 12) file to PEM format:
+
+ openssl pkcs12 -clcerts -nokeys -in cert.p12 -out cert.pem
+ openssl pkcs12 -nocerts -in cert.p12 -out key.pem
+
+
+ Note that client certificate password input is very inflexible ATM. At
+ the moment this seems to be console only, which is presumably the
+ default behaviour of libopenssl. In future mechanize may support
+ third-party libraries that (I assume) allow more options here.
+
+ """
+ self._client_cert_manager.add_key_cert(url, key_file, cert_file)
+
+ # the following are rarely useful -- use add_password / add_proxy_password
+ # instead
+ def set_password_manager(self, password_manager):
+ """Set a mechanize.HTTPPasswordMgrWithDefaultRealm, or None."""
+ self._password_manager = password_manager
+ self._set_handler("_basicauth", obj=password_manager)
+ self._set_handler("_digestauth", obj=password_manager)
+ def set_proxy_password_manager(self, password_manager):
+ """Set a mechanize.HTTPProxyPasswordMgr, or None."""
+ self._proxy_password_manager = password_manager
+ self._set_handler("_proxy_basicauth", obj=password_manager)
+ self._set_handler("_proxy_digestauth", obj=password_manager)
+ def set_client_cert_manager(self, cert_manager):
+ """Set a mechanize.HTTPClientCertMgr, or None."""
+ self._client_cert_manager = cert_manager
+ handler = self._ua_handlers["https"]
+ handler.client_cert_manager = cert_manager
+
+ # these methods all take a boolean parameter
+ def set_handle_robots(self, handle):
+ """Set whether to observe rules from robots.txt."""
+ self._set_handler("_robots", handle)
+ def set_handle_redirect(self, handle):
+ """Set whether to handle HTTP 30x redirections."""
+ self._set_handler("_redirect", handle)
+ def set_handle_refresh(self, handle, max_time=None, honor_time=True):
+ """Set whether to handle HTTP Refresh headers."""
+ self._set_handler("_refresh", handle, constructor_kwds=
+ {"max_time": max_time, "honor_time": honor_time})
+ def set_handle_equiv(self, handle, head_parser_class=None):
+ """Set whether to treat HTML http-equiv headers like HTTP headers.
+
+ Response objects may be .seek()able if this is set (currently returned
+ responses are, raised HTTPError exception responses are not).
+
+ """
+ if head_parser_class is not None:
+ constructor_kwds = {"head_parser_class": head_parser_class}
+ else:
+ constructor_kwds={}
+ self._set_handler("_equiv", handle, constructor_kwds=constructor_kwds)
+ def set_handle_gzip(self, handle):
+ """Handle gzip transfer encoding.
+
+ """
+ if handle:
+ warnings.warn(
+ "gzip transfer encoding is experimental!", stacklevel=2)
+ self._set_handler("_gzip", handle)
+ def set_debug_redirects(self, handle):
+ """Log information about HTTP redirects (including refreshes).
+
+ Logging is performed using module logging. The logger name is
+ "mechanize.http_redirects". To actually print some debug output,
+ eg:
+
+ import sys, logging
+ logger = logging.getLogger("mechanize.http_redirects")
+ logger.addHandler(logging.StreamHandler(sys.stdout))
+ logger.setLevel(logging.INFO)
+
+ Other logger names relevant to this module:
+
+ "mechanize.http_responses"
+ "mechanize.cookies" (or "cookielib" if running Python 2.4)
+
+ To turn on everything:
+
+ import sys, logging
+ logger = logging.getLogger("mechanize")
+ logger.addHandler(logging.StreamHandler(sys.stdout))
+ logger.setLevel(logging.INFO)
+
+ """
+ self._set_handler("_debug_redirect", handle)
+ def set_debug_responses(self, handle):
+ """Log HTTP response bodies.
+
+ See docstring for .set_debug_redirects() for details of logging.
+
+ Response objects may be .seek()able if this is set (currently returned
+ responses are, raised HTTPError exception responses are not).
+
+ """
+ self._set_handler("_debug_response_body", handle)
+ def set_debug_http(self, handle):
+ """Print HTTP headers to sys.stdout."""
+ level = int(bool(handle))
+ for scheme in "http", "https":
+ h = self._ua_handlers.get(scheme)
+ if h is not None:
+ h.set_http_debuglevel(level)
+
+ def _set_handler(self, name, handle=None, obj=None,
+ constructor_args=(), constructor_kwds={}):
+ if handle is None:
+ handle = obj is not None
+ if handle:
+ handler_class = self.handler_classes[name]
+ if obj is not None:
+ newhandler = handler_class(obj)
+ else:
+ newhandler = handler_class(
+ *constructor_args, **constructor_kwds)
+ else:
+ newhandler = None
+ self._replace_handler(name, newhandler)
+
+ def _replace_handler(self, name, newhandler=None):
+ # first, if handler was previously added, remove it
+ if name is not None:
+ handler = self._ua_handlers.get(name)
+ if handler:
+ try:
+ self.handlers.remove(handler)
+ except ValueError:
+ pass
+ # then add the replacement, if any
+ if newhandler is not None:
+ self.add_handler(newhandler)
+ self._ua_handlers[name] = newhandler
+
+
+class UserAgent(UserAgentBase):
+
+ def __init__(self):
+ UserAgentBase.__init__(self)
+ self._seekable = False
+
+ def set_seekable_responses(self, handle):
+ """Make response objects .seek()able."""
+ self._seekable = bool(handle)
+
+ def open(self, fullurl, data=None,
+ timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
+ if self._seekable:
+ def bound_open(fullurl, data=None,
+ timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
+ return UserAgentBase.open(self, fullurl, data, timeout)
+ response = _opener.wrapped_open(
+ bound_open, _response.seek_wrapped_response, fullurl, data,
+ timeout)
+ else:
+ response = UserAgentBase.open(self, fullurl, data)
+ return response
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_util.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_util.py
new file mode 100644
index 0000000..dcdefa9
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_util.py
@@ -0,0 +1,291 @@
+"""Utility functions and date/time routines.
+
+ Copyright 2002-2006 John J Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+COPYING.txt included with the distribution).
+
+"""
+
+import re, time, warnings
+
+
+class ExperimentalWarning(UserWarning):
+ pass
+
+def experimental(message):
+ warnings.warn(message, ExperimentalWarning, stacklevel=3)
+def hide_experimental_warnings():
+ warnings.filterwarnings("ignore", category=ExperimentalWarning)
+def reset_experimental_warnings():
+ warnings.filterwarnings("default", category=ExperimentalWarning)
+
+def deprecation(message):
+ warnings.warn(message, DeprecationWarning, stacklevel=3)
+def hide_deprecations():
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
+def reset_deprecations():
+ warnings.filterwarnings("default", category=DeprecationWarning)
+
+
+def isstringlike(x):
+ try: x+""
+ except: return False
+ else: return True
+
+## def caller():
+## try:
+## raise SyntaxError
+## except:
+## import sys
+## return sys.exc_traceback.tb_frame.f_back.f_back.f_code.co_name
+
+
+from calendar import timegm
+
+# Date/time conversion routines for formats used by the HTTP protocol.
+
+EPOCH = 1970
+def my_timegm(tt):
+ year, month, mday, hour, min, sec = tt[:6]
+ if ((year >= EPOCH) and (1 <= month <= 12) and (1 <= mday <= 31) and
+ (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
+ return timegm(tt)
+ else:
+ return None
+
+days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
+months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
+months_lower = []
+for month in months: months_lower.append(month.lower())
+
+
+def time2isoz(t=None):
+ """Return a string representing time in seconds since epoch, t.
+
+ If the function is called without an argument, it will use the current
+ time.
+
+ The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
+ representing Universal Time (UTC, aka GMT). An example of this format is:
+
+ 1994-11-24 08:49:37Z
+
+ """
+ if t is None: t = time.time()
+ year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
+ return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
+ year, mon, mday, hour, min, sec)
+
+def time2netscape(t=None):
+ """Return a string representing time in seconds since epoch, t.
+
+ If the function is called without an argument, it will use the current
+ time.
+
+ The format of the returned string is like this:
+
+ Wed, DD-Mon-YYYY HH:MM:SS GMT
+
+ """
+ if t is None: t = time.time()
+ year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
+ return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
+ days[wday], mday, months[mon-1], year, hour, min, sec)
+
+
+UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
+
+timezone_re = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
+def offset_from_tz_string(tz):
+ offset = None
+ if UTC_ZONES.has_key(tz):
+ offset = 0
+ else:
+ m = timezone_re.search(tz)
+ if m:
+ offset = 3600 * int(m.group(2))
+ if m.group(3):
+ offset = offset + 60 * int(m.group(3))
+ if m.group(1) == '-':
+ offset = -offset
+ return offset
+
+def _str2time(day, mon, yr, hr, min, sec, tz):
+ # translate month name to number
+ # month numbers start with 1 (January)
+ try:
+ mon = months_lower.index(mon.lower())+1
+ except ValueError:
+ # maybe it's already a number
+ try:
+ imon = int(mon)
+ except ValueError:
+ return None
+ if 1 <= imon <= 12:
+ mon = imon
+ else:
+ return None
+
+ # make sure clock elements are defined
+ if hr is None: hr = 0
+ if min is None: min = 0
+ if sec is None: sec = 0
+
+ yr = int(yr)
+ day = int(day)
+ hr = int(hr)
+ min = int(min)
+ sec = int(sec)
+
+ if yr < 1000:
+ # find "obvious" year
+ cur_yr = time.localtime(time.time())[0]
+ m = cur_yr % 100
+ tmp = yr
+ yr = yr + cur_yr - m
+ m = m - tmp
+ if abs(m) > 50:
+ if m > 0: yr = yr + 100
+ else: yr = yr - 100
+
+ # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
+ t = my_timegm((yr, mon, day, hr, min, sec, tz))
+
+ if t is not None:
+ # adjust time using timezone string, to get absolute time since epoch
+ if tz is None:
+ tz = "UTC"
+ tz = tz.upper()
+ offset = offset_from_tz_string(tz)
+ if offset is None:
+ return None
+ t = t - offset
+
+ return t
+
+
+strict_re = re.compile(r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
+ r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
+wkday_re = re.compile(
+ r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
+loose_http_re = re.compile(
+ r"""^
+ (\d\d?) # day
+ (?:\s+|[-\/])
+ (\w+) # month
+ (?:\s+|[-\/])
+ (\d+) # year
+ (?:
+ (?:\s+|:) # separator before clock
+ (\d\d?):(\d\d) # hour:min
+ (?::(\d\d))? # optional seconds
+ )? # optional clock
+ \s*
+ ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
+ \s*
+ (?:\(\w+\))? # ASCII representation of timezone in parens.
+ \s*$""", re.X)
+def http2time(text):
+ """Returns time in seconds since epoch of time represented by a string.
+
+ Return value is an integer.
+
+ None is returned if the format of str is unrecognized, the time is outside
+ the representable range, or the timezone string is not recognized. If the
+ string contains no timezone, UTC is assumed.
+
+ The timezone in the string may be numerical (like "-0800" or "+0100") or a
+ string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
+ timezone strings equivalent to UTC (zero offset) are known to the function.
+
+ The function loosely parses the following formats:
+
+ Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
+ Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
+ Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
+ 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
+ 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
+ 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
+
+ The parser ignores leading and trailing whitespace. The time may be
+ absent.
+
+ If the year is given with only 2 digits, the function will select the
+ century that makes the year closest to the current date.
+
+ """
+ # fast exit for strictly conforming string
+ m = strict_re.search(text)
+ if m:
+ g = m.groups()
+ mon = months_lower.index(g[1].lower()) + 1
+ tt = (int(g[2]), mon, int(g[0]),
+ int(g[3]), int(g[4]), float(g[5]))
+ return my_timegm(tt)
+
+ # No, we need some messy parsing...
+
+ # clean up
+ text = text.lstrip()
+ text = wkday_re.sub("", text, 1) # Useless weekday
+
+ # tz is time zone specifier string
+ day, mon, yr, hr, min, sec, tz = [None]*7
+
+ # loose regexp parse
+ m = loose_http_re.search(text)
+ if m is not None:
+ day, mon, yr, hr, min, sec, tz = m.groups()
+ else:
+ return None # bad format
+
+ return _str2time(day, mon, yr, hr, min, sec, tz)
+
+
+iso_re = re.compile(
+ """^
+ (\d{4}) # year
+ [-\/]?
+ (\d\d?) # numerical month
+ [-\/]?
+ (\d\d?) # day
+ (?:
+ (?:\s+|[-:Tt]) # separator before clock
+ (\d\d?):?(\d\d) # hour:min
+ (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
+ )? # optional clock
+ \s*
+ ([-+]?\d\d?:?(:?\d\d)?
+ |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
+ \s*$""", re.X)
+def iso2time(text):
+ """
+ As for http2time, but parses the ISO 8601 formats:
+
+ 1994-02-03 14:15:29 -0100 -- ISO 8601 format
+ 1994-02-03 14:15:29 -- zone is optional
+ 1994-02-03 -- only date
+ 1994-02-03T14:15:29 -- Use T as separator
+ 19940203T141529Z -- ISO 8601 compact format
+ 19940203 -- only date
+
+ """
+ # clean up
+ text = text.lstrip()
+
+ # tz is time zone specifier string
+ day, mon, yr, hr, min, sec, tz = [None]*7
+
+ # loose regexp parse
+ m = iso_re.search(text)
+ if m is not None:
+ # XXX there's an extra bit of the timezone I'm ignoring here: is
+ # this the right thing to do?
+ yr, mon, day, hr, min, sec, tz, _ = m.groups()
+ else:
+ return None # bad format
+
+ return _str2time(day, mon, yr, hr, min, sec, tz)
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/pep8.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/pep8.py
new file mode 100755
index 0000000..c319370
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/pep8.py
@@ -0,0 +1,1254 @@
+#!/usr/bin/python
+# pep8.py - Check Python source code formatting, according to PEP 8
+# Copyright (C) 2006 Johann C. Rocholl <johann@rocholl.net>
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation files
+# (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge,
+# publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""
+Check Python source code formatting, according to PEP 8:
+http://www.python.org/dev/peps/pep-0008/
+
+For usage and a list of options, try this:
+$ python pep8.py -h
+
+This program and its regression test suite live here:
+http://github.com/jcrocholl/pep8
+
+Groups of errors and warnings:
+E errors
+W warnings
+100 indentation
+200 whitespace
+300 blank lines
+400 imports
+500 line length
+600 deprecation
+700 statements
+
+You can add checks to this program by writing plugins. Each plugin is
+a simple function that is called for each line of source code, either
+physical or logical.
+
+Physical line:
+- Raw line of text from the input file.
+
+Logical line:
+- Multi-line statements converted to a single line.
+- Stripped left and right.
+- Contents of strings replaced with 'xxx' of same length.
+- Comments removed.
+
+The check function requests physical or logical lines by the name of
+the first argument:
+
+def maximum_line_length(physical_line)
+def extraneous_whitespace(logical_line)
+def blank_lines(logical_line, blank_lines, indent_level, line_number)
+
+The last example above demonstrates how check plugins can request
+additional information with extra arguments. All attributes of the
+Checker object are available. Some examples:
+
+lines: a list of the raw lines from the input file
+tokens: the tokens that contribute to this logical line
+line_number: line number in the input file
+blank_lines: blank lines before this one
+indent_char: first indentation character in this file (' ' or '\t')
+indent_level: indentation (with tabs expanded to multiples of 8)
+previous_indent_level: indentation on previous line
+previous_logical: previous logical line
+
+The docstring of each check function shall be the relevant part of
+text from PEP 8. It is printed if the user enables --show-pep8.
+Several docstrings contain examples directly from the PEP 8 document.
+
+Okay: spam(ham[1], {eggs: 2})
+E201: spam( ham[1], {eggs: 2})
+
+These examples are verified automatically when pep8.py is run with the
+--doctest option. You can add examples for your own check functions.
+The format is simple: "Okay" or error/warning code followed by colon
+and space, the rest of the line is example source code. If you put 'r'
+before the docstring, you can use \n for newline, \t for tab and \s
+for space.
+
+"""
+
+__version__ = '0.5.0'
+
+import os
+import sys
+import re
+import time
+import inspect
+import tokenize
+from optparse import OptionParser
+from keyword import iskeyword
+from fnmatch import fnmatch
+
+DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git'
+DEFAULT_IGNORE = ['E24']
+
+INDENT_REGEX = re.compile(r'([ \t]*)')
+RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)')
+SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)')
+ERRORCODE_REGEX = re.compile(r'[EW]\d{3}')
+E301NOT_REGEX = re.compile(r'class |def |u?r?["\']')
+
+WHITESPACE = ' \t'
+
+BINARY_OPERATORS = ['**=', '*=', '+=', '-=', '!=', '<>',
+ '%=', '^=', '&=', '|=', '==', '/=', '//=', '>=', '<=', '>>=', '<<=',
+ '%', '^', '&', '|', '=', '/', '//', '>', '<', '>>', '<<']
+UNARY_OPERATORS = ['**', '*', '+', '-']
+OPERATORS = BINARY_OPERATORS + UNARY_OPERATORS
+
+options = None
+args = None
+
+
+##############################################################################
+# Plugins (check functions) for physical lines
+##############################################################################
+
+
+def tabs_or_spaces(physical_line, indent_char):
+ r"""
+ Never mix tabs and spaces.
+
+ The most popular way of indenting Python is with spaces only. The
+ second-most popular way is with tabs only. Code indented with a mixture
+ of tabs and spaces should be converted to using spaces exclusively. When
+ invoking the Python command line interpreter with the -t option, it issues
+ warnings about code that illegally mixes tabs and spaces. When using -tt
+ these warnings become errors. These options are highly recommended!
+
+ Okay: if a == 0:\n a = 1\n b = 1
+ E101: if a == 0:\n a = 1\n\tb = 1
+ """
+ indent = INDENT_REGEX.match(physical_line).group(1)
+ for offset, char in enumerate(indent):
+ if char != indent_char:
+ return offset, "E101 indentation contains mixed spaces and tabs"
+
+
+def tabs_obsolete(physical_line):
+ r"""
+ For new projects, spaces-only are strongly recommended over tabs. Most
+ editors have features that make this easy to do.
+
+ Okay: if True:\n return
+ W191: if True:\n\treturn
+ """
+ indent = INDENT_REGEX.match(physical_line).group(1)
+ if indent.count('\t'):
+ return indent.index('\t'), "W191 indentation contains tabs"
+
+
+def trailing_whitespace(physical_line):
+ """
+ JCR: Trailing whitespace is superfluous.
+
+ Okay: spam(1)
+ W291: spam(1)\s
+ """
+ physical_line = physical_line.rstrip('\n') # chr(10), newline
+ physical_line = physical_line.rstrip('\r') # chr(13), carriage return
+ physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L
+ stripped = physical_line.rstrip()
+ if physical_line != stripped:
+ return len(stripped), "W291 trailing whitespace"
+
+
+def trailing_blank_lines(physical_line, lines, line_number):
+ r"""
+ JCR: Trailing blank lines are superfluous.
+
+ Okay: spam(1)
+ W391: spam(1)\n
+ """
+ if physical_line.strip() == '' and line_number == len(lines):
+ return 0, "W391 blank line at end of file"
+
+
+def missing_newline(physical_line):
+ """
+ JCR: The last line should have a newline.
+ """
+ if physical_line.rstrip() == physical_line:
+ return len(physical_line), "W292 no newline at end of file"
+
+
+def maximum_line_length(physical_line):
+ """
+ Limit all lines to a maximum of 79 characters.
+
+ There are still many devices around that are limited to 80 character
+ lines; plus, limiting windows to 80 characters makes it possible to have
+ several windows side-by-side. The default wrapping on such devices looks
+ ugly. Therefore, please limit all lines to a maximum of 79 characters.
+ For flowing long blocks of text (docstrings or comments), limiting the
+ length to 72 characters is recommended.
+ """
+ length = len(physical_line.rstrip())
+ if length > 79:
+ return 79, "E501 line too long (%d characters)" % length
+
+
+##############################################################################
+# Plugins (check functions) for logical lines
+##############################################################################
+
+
+def blank_lines(logical_line, blank_lines, indent_level, line_number,
+ previous_logical, blank_lines_before_comment):
+ r"""
+ Separate top-level function and class definitions with two blank lines.
+
+ Method definitions inside a class are separated by a single blank line.
+
+ Extra blank lines may be used (sparingly) to separate groups of related
+ functions. Blank lines may be omitted between a bunch of related
+ one-liners (e.g. a set of dummy implementations).
+
+ Use blank lines in functions, sparingly, to indicate logical sections.
+
+ Okay: def a():\n pass\n\n\ndef b():\n pass
+ Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass
+
+ E301: class Foo:\n b = 0\n def bar():\n pass
+ E302: def a():\n pass\n\ndef b(n):\n pass
+ E303: def a():\n pass\n\n\n\ndef b(n):\n pass
+ E303: def a():\n\n\n\n pass
+ E304: @decorator\n\ndef a():\n pass
+ """
+ if line_number == 1:
+ return # Don't expect blank lines before the first line
+ max_blank_lines = max(blank_lines, blank_lines_before_comment)
+ if previous_logical.startswith('@'):
+ if max_blank_lines:
+ return 0, "E304 blank lines found after function decorator"
+ elif max_blank_lines > 2 or (indent_level and max_blank_lines == 2):
+ return 0, "E303 too many blank lines (%d)" % max_blank_lines
+ elif (logical_line.startswith('def ') or
+ logical_line.startswith('class ') or
+ logical_line.startswith('@')):
+ if indent_level:
+ if not (max_blank_lines or E301NOT_REGEX.match(previous_logical)):
+ return 0, "E301 expected 1 blank line, found 0"
+ elif max_blank_lines != 2:
+ return 0, "E302 expected 2 blank lines, found %d" % max_blank_lines
+
+
+def extraneous_whitespace(logical_line):
+ """
+ Avoid extraneous whitespace in the following situations:
+
+ - Immediately inside parentheses, brackets or braces.
+
+ - Immediately before a comma, semicolon, or colon.
+
+ Okay: spam(ham[1], {eggs: 2})
+ E201: spam( ham[1], {eggs: 2})
+ E201: spam(ham[ 1], {eggs: 2})
+ E201: spam(ham[1], { eggs: 2})
+ E202: spam(ham[1], {eggs: 2} )
+ E202: spam(ham[1 ], {eggs: 2})
+ E202: spam(ham[1], {eggs: 2 })
+
+ E203: if x == 4: print x, y; x, y = y , x
+ E203: if x == 4: print x, y ; x, y = y, x
+ E203: if x == 4 : print x, y; x, y = y, x
+ """
+ line = logical_line
+ for char in '([{':
+ found = line.find(char + ' ')
+ if found > -1:
+ return found + 1, "E201 whitespace after '%s'" % char
+ for char in '}])':
+ found = line.find(' ' + char)
+ if found > -1 and line[found - 1] != ',':
+ return found, "E202 whitespace before '%s'" % char
+ for char in ',;:':
+ found = line.find(' ' + char)
+ if found > -1:
+ return found, "E203 whitespace before '%s'" % char
+
+
+def missing_whitespace(logical_line):
+ """
+ JCR: Each comma, semicolon or colon should be followed by whitespace.
+
+ Okay: [a, b]
+ Okay: (3,)
+ Okay: a[1:4]
+ Okay: a[:4]
+ Okay: a[1:]
+ Okay: a[1:4:2]
+ E231: ['a','b']
+ E231: foo(bar,baz)
+ """
+ line = logical_line
+ for index in range(len(line) - 1):
+ char = line[index]
+ if char in ',;:' and line[index + 1] not in WHITESPACE:
+ before = line[:index]
+ if char == ':' and before.count('[') > before.count(']'):
+ continue # Slice syntax, no space required
+ if char == ',' and line[index + 1] == ')':
+ continue # Allow tuple with only one element: (3,)
+ return index, "E231 missing whitespace after '%s'" % char
+
+
+def indentation(logical_line, previous_logical, indent_char,
+ indent_level, previous_indent_level):
+ r"""
+ Use 4 spaces per indentation level.
+
+ For really old code that you don't want to mess up, you can continue to
+ use 8-space tabs.
+
+ Okay: a = 1
+ Okay: if a == 0:\n a = 1
+ E111: a = 1
+
+ Okay: for item in items:\n pass
+ E112: for item in items:\npass
+
+ Okay: a = 1\nb = 2
+ E113: a = 1\n b = 2
+ """
+ if indent_char == ' ' and indent_level % 4:
+ return 0, "E111 indentation is not a multiple of four"
+ indent_expect = previous_logical.endswith(':')
+ if indent_expect and indent_level <= previous_indent_level:
+ return 0, "E112 expected an indented block"
+ if indent_level > previous_indent_level and not indent_expect:
+ return 0, "E113 unexpected indentation"
+
+
+def whitespace_before_parameters(logical_line, tokens):
+ """
+ Avoid extraneous whitespace in the following situations:
+
+ - Immediately before the open parenthesis that starts the argument
+ list of a function call.
+
+ - Immediately before the open parenthesis that starts an indexing or
+ slicing.
+
+ Okay: spam(1)
+ E211: spam (1)
+
+ Okay: dict['key'] = list[index]
+ E211: dict ['key'] = list[index]
+ E211: dict['key'] = list [index]
+ """
+ prev_type = tokens[0][0]
+ prev_text = tokens[0][1]
+ prev_end = tokens[0][3]
+ for index in range(1, len(tokens)):
+ token_type, text, start, end, line = tokens[index]
+ if (token_type == tokenize.OP and
+ text in '([' and
+ start != prev_end and
+ prev_type == tokenize.NAME and
+ (index < 2 or tokens[index - 2][1] != 'class') and
+ (not iskeyword(prev_text))):
+ return prev_end, "E211 whitespace before '%s'" % text
+ prev_type = token_type
+ prev_text = text
+ prev_end = end
+
+
+def whitespace_around_operator(logical_line):
+ """
+ Avoid extraneous whitespace in the following situations:
+
+ - More than one space around an assignment (or other) operator to
+ align it with another.
+
+ Okay: a = 12 + 3
+ E221: a = 4 + 5
+ E222: a = 4 + 5
+ E223: a = 4\t+ 5
+ E224: a = 4 +\t5
+ """
+ line = logical_line
+ for operator in OPERATORS:
+ found = line.find(' ' + operator)
+ if found > -1:
+ return found, "E221 multiple spaces before operator"
+ found = line.find(operator + ' ')
+ if found > -1:
+ return found, "E222 multiple spaces after operator"
+ found = line.find('\t' + operator)
+ if found > -1:
+ return found, "E223 tab before operator"
+ found = line.find(operator + '\t')
+ if found > -1:
+ return found, "E224 tab after operator"
+
+
+def missing_whitespace_around_operator(logical_line, tokens):
+ r"""
+ - Always surround these binary operators with a single space on
+ either side: assignment (=), augmented assignment (+=, -= etc.),
+ comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not),
+ Booleans (and, or, not).
+
+ - Use spaces around arithmetic operators.
+
+ Okay: i = i + 1
+ Okay: submitted += 1
+ Okay: x = x * 2 - 1
+ Okay: hypot2 = x * x + y * y
+ Okay: c = (a + b) * (a - b)
+ Okay: foo(bar, key='word', *args, **kwargs)
+ Okay: baz(**kwargs)
+ Okay: negative = -1
+ Okay: spam(-1)
+ Okay: alpha[:-i]
+ Okay: if not -5 < x < +5:\n pass
+ Okay: lambda *args, **kw: (args, kw)
+
+ E225: i=i+1
+ E225: submitted +=1
+ E225: x = x*2 - 1
+ E225: hypot2 = x*x + y*y
+ E225: c = (a+b) * (a-b)
+ E225: c = alpha -4
+ E225: z = x **y
+ """
+ parens = 0
+ need_space = False
+ prev_type = tokenize.OP
+ prev_text = prev_end = None
+ for token_type, text, start, end, line in tokens:
+ if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN):
+ # ERRORTOKEN is triggered by backticks in Python 3000
+ continue
+ if text in ('(', 'lambda'):
+ parens += 1
+ elif text == ')':
+ parens -= 1
+ if need_space:
+ if start == prev_end:
+ return prev_end, "E225 missing whitespace around operator"
+ need_space = False
+ elif token_type == tokenize.OP:
+ if text == '=' and parens:
+ # Allow keyword args or defaults: foo(bar=None).
+ pass
+ elif text in BINARY_OPERATORS:
+ need_space = True
+ elif text in UNARY_OPERATORS:
+ if ((prev_type != tokenize.OP or prev_text in '}])') and not
+ (prev_type == tokenize.NAME and iskeyword(prev_text))):
+ # Allow unary operators: -123, -x, +1.
+ # Allow argument unpacking: foo(*args, **kwargs).
+ need_space = True
+ if need_space and start == prev_end:
+ return prev_end, "E225 missing whitespace around operator"
+ prev_type = token_type
+ prev_text = text
+ prev_end = end
+
+
+def whitespace_around_comma(logical_line):
+ """
+ Avoid extraneous whitespace in the following situations:
+
+ - More than one space around an assignment (or other) operator to
+ align it with another.
+
+ JCR: This should also be applied around comma etc.
+ Note: these checks are disabled by default
+
+ Okay: a = (1, 2)
+ E241: a = (1, 2)
+ E242: a = (1,\t2)
+ """
+ line = logical_line
+ for separator in ',;:':
+ found = line.find(separator + ' ')
+ if found > -1:
+ return found + 1, "E241 multiple spaces after '%s'" % separator
+ found = line.find(separator + '\t')
+ if found > -1:
+ return found + 1, "E242 tab after '%s'" % separator
+
+
+def whitespace_around_named_parameter_equals(logical_line):
+ """
+ Don't use spaces around the '=' sign when used to indicate a
+ keyword argument or a default parameter value.
+
+ Okay: def complex(real, imag=0.0):
+ Okay: return magic(r=real, i=imag)
+ Okay: boolean(a == b)
+ Okay: boolean(a != b)
+ Okay: boolean(a <= b)
+ Okay: boolean(a >= b)
+
+ E251: def complex(real, imag = 0.0):
+ E251: return magic(r = real, i = imag)
+ """
+ parens = 0
+ window = ' '
+ equal_ok = ['==', '!=', '<=', '>=']
+
+ for pos, c in enumerate(logical_line):
+ window = window[1:] + c
+ if parens:
+ if window[0] in WHITESPACE and window[1] == '=':
+ if window[1:] not in equal_ok:
+ issue = "E251 no spaces around keyword / parameter equals"
+ return pos, issue
+ if window[2] in WHITESPACE and window[1] == '=':
+ if window[:2] not in equal_ok:
+ issue = "E251 no spaces around keyword / parameter equals"
+ return pos, issue
+ if c == '(':
+ parens += 1
+ elif c == ')':
+ parens -= 1
+
+
+def whitespace_before_inline_comment(logical_line, tokens):
+ """
+ Separate inline comments by at least two spaces.
+
+ An inline comment is a comment on the same line as a statement. Inline
+ comments should be separated by at least two spaces from the statement.
+ They should start with a # and a single space.
+
+ Okay: x = x + 1 # Increment x
+ Okay: x = x + 1 # Increment x
+ E261: x = x + 1 # Increment x
+ E262: x = x + 1 #Increment x
+ E262: x = x + 1 # Increment x
+ """
+ prev_end = (0, 0)
+ for token_type, text, start, end, line in tokens:
+ if token_type == tokenize.NL:
+ continue
+ if token_type == tokenize.COMMENT:
+ if not line[:start[1]].strip():
+ continue
+ if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
+ return (prev_end,
+ "E261 at least two spaces before inline comment")
+ if (len(text) > 1 and text.startswith('# ')
+ or not text.startswith('# ')):
+ return start, "E262 inline comment should start with '# '"
+ else:
+ prev_end = end
+
+
+def imports_on_separate_lines(logical_line):
+ r"""
+ Imports should usually be on separate lines.
+
+ Okay: import os\nimport sys
+ E401: import sys, os
+
+ Okay: from subprocess import Popen, PIPE
+ Okay: from myclas import MyClass
+ Okay: from foo.bar.yourclass import YourClass
+ Okay: import myclass
+ Okay: import foo.bar.yourclass
+ """
+ line = logical_line
+ if line.startswith('import '):
+ found = line.find(',')
+ if found > -1:
+ return found, "E401 multiple imports on one line"
+
+
+def compound_statements(logical_line):
+ r"""
+ Compound statements (multiple statements on the same line) are
+ generally discouraged.
+
+ While sometimes it's okay to put an if/for/while with a small body
+ on the same line, never do this for multi-clause statements. Also
+ avoid folding such long lines!
+
+ Okay: if foo == 'blah':\n do_blah_thing()
+ Okay: do_one()
+ Okay: do_two()
+ Okay: do_three()
+
+ E701: if foo == 'blah': do_blah_thing()
+ E701: for x in lst: total += x
+ E701: while t < 10: t = delay()
+ E701: if foo == 'blah': do_blah_thing()
+ E701: else: do_non_blah_thing()
+ E701: try: something()
+ E701: finally: cleanup()
+ E701: if foo == 'blah': one(); two(); three()
+
+ E702: do_one(); do_two(); do_three()
+ """
+ line = logical_line
+ found = line.find(':')
+ if -1 < found < len(line) - 1:
+ before = line[:found]
+ if (before.count('{') <= before.count('}') and # {'a': 1} (dict)
+ before.count('[') <= before.count(']') and # [1:2] (slice)
+ not re.search(r'\blambda\b', before)): # lambda x: x
+ return found, "E701 multiple statements on one line (colon)"
+ found = line.find(';')
+ if -1 < found:
+ return found, "E702 multiple statements on one line (semicolon)"
+
+
+def python_3000_has_key(logical_line):
+ """
+ The {}.has_key() method will be removed in the future version of
+ Python. Use the 'in' operation instead, like:
+ d = {"a": 1, "b": 2}
+ if "b" in d:
+ print d["b"]
+ """
+ pos = logical_line.find('.has_key(')
+ if pos > -1:
+ return pos, "W601 .has_key() is deprecated, use 'in'"
+
+
+def python_3000_raise_comma(logical_line):
+ """
+ When raising an exception, use "raise ValueError('message')"
+ instead of the older form "raise ValueError, 'message'".
+
+ The paren-using form is preferred because when the exception arguments
+ are long or include string formatting, you don't need to use line
+ continuation characters thanks to the containing parentheses. The older
+ form will be removed in Python 3000.
+ """
+ match = RAISE_COMMA_REGEX.match(logical_line)
+ if match:
+ return match.start(1), "W602 deprecated form of raising exception"
+
+
+def python_3000_not_equal(logical_line):
+ """
+ != can also be written <>, but this is an obsolete usage kept for
+ backwards compatibility only. New code should always use !=.
+ The older syntax is removed in Python 3000.
+ """
+ pos = logical_line.find('<>')
+ if pos > -1:
+ return pos, "W603 '<>' is deprecated, use '!='"
+
+
+def python_3000_backticks(logical_line):
+ """
+ Backticks are removed in Python 3000.
+ Use repr() instead.
+ """
+ pos = logical_line.find('`')
+ if pos > -1:
+ return pos, "W604 backticks are deprecated, use 'repr()'"
+
+
+##############################################################################
+# Helper functions
+##############################################################################
+
+
+def expand_indent(line):
+ """
+ Return the amount of indentation.
+ Tabs are expanded to the next multiple of 8.
+
+ >>> expand_indent(' ')
+ 4
+ >>> expand_indent('\\t')
+ 8
+ >>> expand_indent(' \\t')
+ 8
+ >>> expand_indent(' \\t')
+ 8
+ >>> expand_indent(' \\t')
+ 16
+ """
+ result = 0
+ for char in line:
+ if char == '\t':
+ result = result // 8 * 8 + 8
+ elif char == ' ':
+ result += 1
+ else:
+ break
+ return result
+
+
+def mute_string(text):
+ """
+ Replace contents with 'xxx' to prevent syntax matching.
+
+ >>> mute_string('"abc"')
+ '"xxx"'
+ >>> mute_string("'''abc'''")
+ "'''xxx'''"
+ >>> mute_string("r'abc'")
+ "r'xxx'"
+ """
+ start = 1
+ end = len(text) - 1
+ # String modifiers (e.g. u or r)
+ if text.endswith('"'):
+ start += text.index('"')
+ elif text.endswith("'"):
+ start += text.index("'")
+ # Triple quotes
+ if text.endswith('"""') or text.endswith("'''"):
+ start += 2
+ end -= 2
+ return text[:start] + 'x' * (end - start) + text[end:]
+
+
+def message(text):
+ """Print a message."""
+ # print >> sys.stderr, options.prog + ': ' + text
+ # print >> sys.stderr, text
+ print(text)
+
+
+##############################################################################
+# Framework to run all checks
+##############################################################################
+
+
+def find_checks(argument_name):
+ """
+ Find all globally visible functions where the first argument name
+ starts with argument_name.
+ """
+ checks = []
+ for name, function in globals().items():
+ if not inspect.isfunction(function):
+ continue
+ args = inspect.getargspec(function)[0]
+ if args and args[0].startswith(argument_name):
+ codes = ERRORCODE_REGEX.findall(inspect.getdoc(function) or '')
+ for code in codes or ['']:
+ if not code or not ignore_code(code):
+ checks.append((name, function, args))
+ break
+ checks.sort()
+ return checks
+
+
+class Checker(object):
+ """
+ Load a Python source file, tokenize it, check coding style.
+ """
+
+ def __init__(self, filename):
+ if filename:
+ self.filename = filename
+ try:
+ self.lines = open(filename).readlines()
+ except UnicodeDecodeError:
+ # Errors may occur with non-UTF8 files in Python 3000
+ self.lines = open(filename, errors='replace').readlines()
+ else:
+ self.filename = 'stdin'
+ self.lines = []
+ options.counters['physical lines'] = \
+ options.counters.get('physical lines', 0) + len(self.lines)
+
+ def readline(self):
+ """
+ Get the next line from the input buffer.
+ """
+ self.line_number += 1
+ if self.line_number > len(self.lines):
+ return ''
+ return self.lines[self.line_number - 1]
+
+ def readline_check_physical(self):
+ """
+ Check and return the next physical line. This method can be
+ used to feed tokenize.generate_tokens.
+ """
+ line = self.readline()
+ if line:
+ self.check_physical(line)
+ return line
+
+ def run_check(self, check, argument_names):
+ """
+ Run a check plugin.
+ """
+ arguments = []
+ for name in argument_names:
+ arguments.append(getattr(self, name))
+ return check(*arguments)
+
+ def check_physical(self, line):
+ """
+ Run all physical checks on a raw input line.
+ """
+ self.physical_line = line
+ if self.indent_char is None and len(line) and line[0] in ' \t':
+ self.indent_char = line[0]
+ for name, check, argument_names in options.physical_checks:
+ result = self.run_check(check, argument_names)
+ if result is not None:
+ offset, text = result
+ self.report_error(self.line_number, offset, text, check)
+
+ def build_tokens_line(self):
+ """
+ Build a logical line from tokens.
+ """
+ self.mapping = []
+ logical = []
+ length = 0
+ previous = None
+ for token in self.tokens:
+ token_type, text = token[0:2]
+ if token_type in (tokenize.COMMENT, tokenize.NL,
+ tokenize.INDENT, tokenize.DEDENT,
+ tokenize.NEWLINE):
+ continue
+ if token_type == tokenize.STRING:
+ text = mute_string(text)
+ if previous:
+ end_line, end = previous[3]
+ start_line, start = token[2]
+ if end_line != start_line: # different row
+ if self.lines[end_line - 1][end - 1] not in '{[(':
+ logical.append(' ')
+ length += 1
+ elif end != start: # different column
+ fill = self.lines[end_line - 1][end:start]
+ logical.append(fill)
+ length += len(fill)
+ self.mapping.append((length, token))
+ logical.append(text)
+ length += len(text)
+ previous = token
+ self.logical_line = ''.join(logical)
+ assert self.logical_line.lstrip() == self.logical_line
+ assert self.logical_line.rstrip() == self.logical_line
+
+ def check_logical(self):
+ """
+ Build a line from tokens and run all logical checks on it.
+ """
+ options.counters['logical lines'] = \
+ options.counters.get('logical lines', 0) + 1
+ self.build_tokens_line()
+ first_line = self.lines[self.mapping[0][1][2][0] - 1]
+ indent = first_line[:self.mapping[0][1][2][1]]
+ self.previous_indent_level = self.indent_level
+ self.indent_level = expand_indent(indent)
+ if options.verbose >= 2:
+ print(self.logical_line[:80].rstrip())
+ for name, check, argument_names in options.logical_checks:
+ if options.verbose >= 3:
+ print(' ', name)
+ result = self.run_check(check, argument_names)
+ if result is not None:
+ offset, text = result
+ if isinstance(offset, tuple):
+ original_number, original_offset = offset
+ else:
+ for token_offset, token in self.mapping:
+ if offset >= token_offset:
+ original_number = token[2][0]
+ original_offset = (token[2][1]
+ + offset - token_offset)
+ self.report_error(original_number, original_offset,
+ text, check)
+ self.previous_logical = self.logical_line
+
+ def check_all(self):
+ """
+ Run all checks on the input file.
+ """
+ self.file_errors = 0
+ self.line_number = 0
+ self.indent_char = None
+ self.indent_level = 0
+ self.previous_logical = ''
+ self.blank_lines = 0
+ self.blank_lines_before_comment = 0
+ self.tokens = []
+ parens = 0
+ for token in tokenize.generate_tokens(self.readline_check_physical):
+ # print(tokenize.tok_name[token[0]], repr(token))
+ self.tokens.append(token)
+ token_type, text = token[0:2]
+ if token_type == tokenize.OP and text in '([{':
+ parens += 1
+ if token_type == tokenize.OP and text in '}])':
+ parens -= 1
+ if token_type == tokenize.NEWLINE and not parens:
+ self.check_logical()
+ self.blank_lines = 0
+ self.blank_lines_before_comment = 0
+ self.tokens = []
+ if token_type == tokenize.NL and not parens:
+ if len(self.tokens) <= 1:
+ # The physical line contains only this token.
+ self.blank_lines += 1
+ self.tokens = []
+ if token_type == tokenize.COMMENT:
+ source_line = token[4]
+ token_start = token[2][1]
+ if source_line[:token_start].strip() == '':
+ self.blank_lines_before_comment = max(self.blank_lines,
+ self.blank_lines_before_comment)
+ self.blank_lines = 0
+ if text.endswith('\n') and not parens:
+ # The comment also ends a physical line. This works around
+ # Python < 2.6 behaviour, which does not generate NL after
+ # a comment which is on a line by itself.
+ self.tokens = []
+ return self.file_errors
+
+ def report_error(self, line_number, offset, text, check):
+ """
+ Report an error, according to options.
+ """
+ if options.quiet == 1 and not self.file_errors:
+ message(self.filename)
+ self.file_errors += 1
+ code = text[:4]
+ options.counters[code] = options.counters.get(code, 0) + 1
+ options.messages[code] = text[5:]
+ if options.quiet:
+ return
+ if options.testsuite:
+ basename = os.path.basename(self.filename)
+ if basename[:4] != code:
+ return # Don't care about other errors or warnings
+ if 'not' not in basename:
+ return # Don't print the expected error message
+ if ignore_code(code):
+ return
+ if options.counters[code] == 1 or options.repeat:
+ message("%s:%s:%d: %s" %
+ (self.filename, line_number, offset + 1, text))
+ if options.show_source:
+ line = self.lines[line_number - 1]
+ message(line.rstrip())
+ message(' ' * offset + '^')
+ if options.show_pep8:
+ message(check.__doc__.lstrip('\n').rstrip())
+
+
+def input_file(filename):
+ """
+ Run all checks on a Python source file.
+ """
+ if excluded(filename):
+ return {}
+ if options.verbose:
+ message('checking ' + filename)
+ files_counter_before = options.counters.get('files', 0)
+ if options.testsuite: # Keep showing errors for multiple tests
+ options.counters = {}
+ options.counters['files'] = files_counter_before + 1
+ errors = Checker(filename).check_all()
+ if options.testsuite: # Check if the expected error was found
+ basename = os.path.basename(filename)
+ code = basename[:4]
+ count = options.counters.get(code, 0)
+ if count == 0 and 'not' not in basename:
+ message("%s: error %s not found" % (filename, code))
+
+
+def input_dir(dirname):
+ """
+ Check all Python source files in this directory and all subdirectories.
+ """
+ dirname = dirname.rstrip('/')
+ if excluded(dirname):
+ return
+ for root, dirs, files in os.walk(dirname):
+ if options.verbose:
+ message('directory ' + root)
+ options.counters['directories'] = \
+ options.counters.get('directories', 0) + 1
+ dirs.sort()
+ for subdir in dirs:
+ if excluded(subdir):
+ dirs.remove(subdir)
+ files.sort()
+ for filename in files:
+ if filename_match(filename):
+ input_file(os.path.join(root, filename))
+
+
+def excluded(filename):
+ """
+ Check if options.exclude contains a pattern that matches filename.
+ """
+ basename = os.path.basename(filename)
+ for pattern in options.exclude:
+ if fnmatch(basename, pattern):
+ # print basename, 'excluded because it matches', pattern
+ return True
+
+
+def filename_match(filename):
+ """
+ Check if options.filename contains a pattern that matches filename.
+ If options.filename is unspecified, this always returns True.
+ """
+ if not options.filename:
+ return True
+ for pattern in options.filename:
+ if fnmatch(filename, pattern):
+ return True
+
+
+def ignore_code(code):
+ """
+ Check if options.ignore contains a prefix of the error code.
+ If options.select contains a prefix of the error code, do not ignore it.
+ """
+ for select in options.select:
+ if code.startswith(select):
+ return False
+ for ignore in options.ignore:
+ if code.startswith(ignore):
+ return True
+
+
+def get_error_statistics():
+ """Get error statistics."""
+ return get_statistics("E")
+
+
+def get_warning_statistics():
+ """Get warning statistics."""
+ return get_statistics("W")
+
+
+def get_statistics(prefix=''):
+ """
+ Get statistics for message codes that start with the prefix.
+
+ prefix='' matches all errors and warnings
+ prefix='E' matches all errors
+ prefix='W' matches all warnings
+ prefix='E4' matches all errors that have to do with imports
+ """
+ stats = []
+ keys = list(options.messages.keys())
+ keys.sort()
+ for key in keys:
+ if key.startswith(prefix):
+ stats.append('%-7s %s %s' %
+ (options.counters[key], key, options.messages[key]))
+ return stats
+
+
+def get_count(prefix=''):
+ """Return the total count of errors and warnings."""
+ keys = list(options.messages.keys())
+ count = 0
+ for key in keys:
+ if key.startswith(prefix):
+ count += options.counters[key]
+ return count
+
+
+def print_statistics(prefix=''):
+ """Print overall statistics (number of errors and warnings)."""
+ for line in get_statistics(prefix):
+ print(line)
+
+
+def print_benchmark(elapsed):
+ """
+ Print benchmark numbers.
+ """
+ print('%-7.2f %s' % (elapsed, 'seconds elapsed'))
+ keys = ['directories', 'files',
+ 'logical lines', 'physical lines']
+ for key in keys:
+ if key in options.counters:
+ print('%-7d %s per second (%d total)' % (
+ options.counters[key] / elapsed, key,
+ options.counters[key]))
+
+
+def selftest():
+ """
+ Test all check functions with test cases in docstrings.
+ """
+ count_passed = 0
+ count_failed = 0
+ checks = options.physical_checks + options.logical_checks
+ for name, check, argument_names in checks:
+ for line in check.__doc__.splitlines():
+ line = line.lstrip()
+ match = SELFTEST_REGEX.match(line)
+ if match is None:
+ continue
+ code, source = match.groups()
+ checker = Checker(None)
+ for part in source.split(r'\n'):
+ part = part.replace(r'\t', '\t')
+ part = part.replace(r'\s', ' ')
+ checker.lines.append(part + '\n')
+ options.quiet = 2
+ options.counters = {}
+ checker.check_all()
+ error = None
+ if code == 'Okay':
+ if len(options.counters) > 1:
+ codes = [key for key in options.counters.keys()
+ if key != 'logical lines']
+ error = "incorrectly found %s" % ', '.join(codes)
+ elif options.counters.get(code, 0) == 0:
+ error = "failed to find %s" % code
+ if not error:
+ count_passed += 1
+ else:
+ count_failed += 1
+ if len(checker.lines) == 1:
+ print("pep8.py: %s: %s" %
+ (error, checker.lines[0].rstrip()))
+ else:
+ print("pep8.py: %s:" % error)
+ for line in checker.lines:
+ print(line.rstrip())
+ if options.verbose:
+ print("%d passed and %d failed." % (count_passed, count_failed))
+ if count_failed:
+ print("Test failed.")
+ else:
+ print("Test passed.")
+
+
+def process_options(arglist=None):
+ """
+ Process options passed either via arglist or via command line args.
+ """
+ global options, args
+ parser = OptionParser(version=__version__,
+ usage="%prog [options] input ...")
+ parser.add_option('-v', '--verbose', default=0, action='count',
+ help="print status messages, or debug with -vv")
+ parser.add_option('-q', '--quiet', default=0, action='count',
+ help="report only file names, or nothing with -qq")
+ parser.add_option('-r', '--repeat', action='store_true',
+ help="show all occurrences of the same error")
+ parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
+ help="exclude files or directories which match these "
+ "comma separated patterns (default: %s)" %
+ DEFAULT_EXCLUDE)
+ parser.add_option('--filename', metavar='patterns', default='*.py',
+ help="when parsing directories, only check filenames "
+ "matching these comma separated patterns (default: "
+ "*.py)")
+ parser.add_option('--select', metavar='errors', default='',
+ help="select errors and warnings (e.g. E,W6)")
+ parser.add_option('--ignore', metavar='errors', default='',
+ help="skip errors and warnings (e.g. E4,W)")
+ parser.add_option('--show-source', action='store_true',
+ help="show source code for each error")
+ parser.add_option('--show-pep8', action='store_true',
+ help="show text of PEP 8 for each error")
+ parser.add_option('--statistics', action='store_true',
+ help="count errors and warnings")
+ parser.add_option('--count', action='store_true',
+ help="print total number of errors and warnings "
+ "to standard error and set exit code to 1 if "
+ "total is not null")
+ parser.add_option('--benchmark', action='store_true',
+ help="measure processing speed")
+ parser.add_option('--testsuite', metavar='dir',
+ help="run regression tests from dir")
+ parser.add_option('--doctest', action='store_true',
+ help="run doctest on myself")
+ options, args = parser.parse_args(arglist)
+ if options.testsuite:
+ args.append(options.testsuite)
+ if len(args) == 0 and not options.doctest:
+ parser.error('input not specified')
+ options.prog = os.path.basename(sys.argv[0])
+ options.exclude = options.exclude.split(',')
+ for index in range(len(options.exclude)):
+ options.exclude[index] = options.exclude[index].rstrip('/')
+ if options.filename:
+ options.filename = options.filename.split(',')
+ if options.select:
+ options.select = options.select.split(',')
+ else:
+ options.select = []
+ if options.ignore:
+ options.ignore = options.ignore.split(',')
+ elif options.select:
+ # Ignore all checks which are not explicitly selected
+ options.ignore = ['']
+ elif options.testsuite or options.doctest:
+ # For doctest and testsuite, all checks are required
+ options.ignore = []
+ else:
+ # The default choice: ignore controversial checks
+ options.ignore = DEFAULT_IGNORE
+ options.physical_checks = find_checks('physical_line')
+ options.logical_checks = find_checks('logical_line')
+ options.counters = {}
+ options.messages = {}
+ return options, args
+
+
+def _main():
+ """
+ Parse options and run checks on Python source.
+ """
+ options, args = process_options()
+ if options.doctest:
+ import doctest
+ doctest.testmod(verbose=options.verbose)
+ selftest()
+ start_time = time.time()
+ for path in args:
+ if os.path.isdir(path):
+ input_dir(path)
+ else:
+ input_file(path)
+ elapsed = time.time() - start_time
+ if options.statistics:
+ print_statistics()
+ if options.benchmark:
+ print_benchmark(elapsed)
+ if options.count:
+ count = get_count()
+ if count:
+ sys.stderr.write(str(count) + '\n')
+ sys.exit(1)
+
+
+if __name__ == '__main__':
+ _main()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/.upload.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/.upload.py.url
new file mode 100644
index 0000000..8098dbc
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/.upload.py.url
@@ -0,0 +1 @@
+http://webkit-rietveld.googlecode.com/svn/trunk/static/upload.py \ No newline at end of file
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/__init__.py
new file mode 100644
index 0000000..c1e4c6d
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/__init__.py
@@ -0,0 +1 @@
+# This file is required for Python to search this directory for modules.
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/upload.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/upload.py
new file mode 100755
index 0000000..e91060f
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/upload.py
@@ -0,0 +1,1702 @@
+#!/usr/bin/env python
+#
+# Copyright 2007 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tool for uploading diffs from a version control system to the codereview app.
+
+Usage summary: upload.py [options] [-- diff_options]
+
+Diff options are passed to the diff command of the underlying system.
+
+Supported version control systems:
+ Git
+ Mercurial
+ Subversion
+
+It is important for Git/Mercurial users to specify a tree/node/branch to diff
+against by using the '--rev' option.
+"""
+# This code is derived from appcfg.py in the App Engine SDK (open source),
+# and from ASPN recipe #146306.
+
+import ConfigParser
+import cookielib
+import fnmatch
+import getpass
+import logging
+import mimetypes
+import optparse
+import os
+import re
+import socket
+import subprocess
+import sys
+import urllib
+import urllib2
+import urlparse
+
+# The md5 module was deprecated in Python 2.5.
+try:
+ from hashlib import md5
+except ImportError:
+ from md5 import md5
+
+try:
+ import readline
+except ImportError:
+ pass
+
+# The logging verbosity:
+# 0: Errors only.
+# 1: Status messages.
+# 2: Info logs.
+# 3: Debug logs.
+verbosity = 1
+
+# Max size of patch or base file.
+MAX_UPLOAD_SIZE = 900 * 1024
+
+# Constants for version control names. Used by GuessVCSName.
+VCS_GIT = "Git"
+VCS_MERCURIAL = "Mercurial"
+VCS_SUBVERSION = "Subversion"
+VCS_UNKNOWN = "Unknown"
+
+# whitelist for non-binary filetypes which do not start with "text/"
+# .mm (Objective-C) shows up as application/x-freemind on my Linux box.
+TEXT_MIMETYPES = ['application/javascript', 'application/x-javascript',
+ 'application/xml', 'application/x-freemind']
+
+VCS_ABBREVIATIONS = {
+ VCS_MERCURIAL.lower(): VCS_MERCURIAL,
+ "hg": VCS_MERCURIAL,
+ VCS_SUBVERSION.lower(): VCS_SUBVERSION,
+ "svn": VCS_SUBVERSION,
+ VCS_GIT.lower(): VCS_GIT,
+}
+
+# The result of parsing Subversion's [auto-props] setting.
+svn_auto_props_map = None
+
+def GetEmail(prompt):
+ """Prompts the user for their email address and returns it.
+
+ The last used email address is saved to a file and offered up as a suggestion
+ to the user. If the user presses enter without typing in anything the last
+ used email address is used. If the user enters a new address, it is saved
+ for next time we prompt.
+
+ """
+ last_email_file_name = os.path.expanduser("~/.last_codereview_email_address")
+ last_email = ""
+ if os.path.exists(last_email_file_name):
+ try:
+ last_email_file = open(last_email_file_name, "r")
+ last_email = last_email_file.readline().strip("\n")
+ last_email_file.close()
+ prompt += " [%s]" % last_email
+ except IOError, e:
+ pass
+ email = raw_input(prompt + ": ").strip()
+ if email:
+ try:
+ last_email_file = open(last_email_file_name, "w")
+ last_email_file.write(email)
+ last_email_file.close()
+ except IOError, e:
+ pass
+ else:
+ email = last_email
+ return email
+
+
+def StatusUpdate(msg):
+ """Print a status message to stdout.
+
+ If 'verbosity' is greater than 0, print the message.
+
+ Args:
+ msg: The string to print.
+ """
+ if verbosity > 0:
+ print msg
+
+
+def ErrorExit(msg):
+ """Print an error message to stderr and exit."""
+ print >>sys.stderr, msg
+ sys.exit(1)
+
+
+class ClientLoginError(urllib2.HTTPError):
+ """Raised to indicate there was an error authenticating with ClientLogin."""
+
+ def __init__(self, url, code, msg, headers, args):
+ urllib2.HTTPError.__init__(self, url, code, msg, headers, None)
+ self.args = args
+ self.reason = args["Error"]
+
+
+class AbstractRpcServer(object):
+ """Provides a common interface for a simple RPC server."""
+
+ def __init__(self, host, auth_function, host_override=None, extra_headers={},
+ save_cookies=False):
+ """Creates a new HttpRpcServer.
+
+ Args:
+ host: The host to send requests to.
+ auth_function: A function that takes no arguments and returns an
+ (email, password) tuple when called. Will be called if authentication
+ is required.
+ host_override: The host header to send to the server (defaults to host).
+ extra_headers: A dict of extra headers to append to every request.
+ save_cookies: If True, save the authentication cookies to local disk.
+ If False, use an in-memory cookiejar instead. Subclasses must
+ implement this functionality. Defaults to False.
+ """
+ self.host = host
+ self.host_override = host_override
+ self.auth_function = auth_function
+ self.authenticated = False
+ self.extra_headers = extra_headers
+ self.save_cookies = save_cookies
+ self.opener = self._GetOpener()
+ if self.host_override:
+ logging.info("Server: %s; Host: %s", self.host, self.host_override)
+ else:
+ logging.info("Server: %s", self.host)
+
+ def _GetOpener(self):
+ """Returns an OpenerDirector for making HTTP requests.
+
+ Returns:
+ A urllib2.OpenerDirector object.
+ """
+ raise NotImplementedError()
+
+ def _CreateRequest(self, url, data=None):
+ """Creates a new urllib request."""
+ logging.debug("Creating request for: '%s' with payload:\n%s", url, data)
+ req = urllib2.Request(url, data=data)
+ if self.host_override:
+ req.add_header("Host", self.host_override)
+ for key, value in self.extra_headers.iteritems():
+ req.add_header(key, value)
+ return req
+
+ def _GetAuthToken(self, email, password):
+ """Uses ClientLogin to authenticate the user, returning an auth token.
+
+ Args:
+ email: The user's email address
+ password: The user's password
+
+ Raises:
+ ClientLoginError: If there was an error authenticating with ClientLogin.
+ HTTPError: If there was some other form of HTTP error.
+
+ Returns:
+ The authentication token returned by ClientLogin.
+ """
+ account_type = "GOOGLE"
+ if self.host.endswith(".google.com"):
+ # Needed for use inside Google.
+ account_type = "HOSTED"
+ req = self._CreateRequest(
+ url="https://www.google.com/accounts/ClientLogin",
+ data=urllib.urlencode({
+ "Email": email,
+ "Passwd": password,
+ "service": "ah",
+ "source": "rietveld-codereview-upload",
+ "accountType": account_type,
+ }),
+ )
+ try:
+ response = self.opener.open(req)
+ response_body = response.read()
+ response_dict = dict(x.split("=")
+ for x in response_body.split("\n") if x)
+ return response_dict["Auth"]
+ except urllib2.HTTPError, e:
+ if e.code == 403:
+ body = e.read()
+ response_dict = dict(x.split("=", 1) for x in body.split("\n") if x)
+ raise ClientLoginError(req.get_full_url(), e.code, e.msg,
+ e.headers, response_dict)
+ else:
+ raise
+
+ def _GetAuthCookie(self, auth_token):
+ """Fetches authentication cookies for an authentication token.
+
+ Args:
+ auth_token: The authentication token returned by ClientLogin.
+
+ Raises:
+ HTTPError: If there was an error fetching the authentication cookies.
+ """
+ # This is a dummy value to allow us to identify when we're successful.
+ continue_location = "http://localhost/"
+ args = {"continue": continue_location, "auth": auth_token}
+ req = self._CreateRequest("http://%s/_ah/login?%s" %
+ (self.host, urllib.urlencode(args)))
+ try:
+ response = self.opener.open(req)
+ except urllib2.HTTPError, e:
+ response = e
+ if (response.code != 302 or
+ response.info()["location"] != continue_location):
+ raise urllib2.HTTPError(req.get_full_url(), response.code, response.msg,
+ response.headers, response.fp)
+ self.authenticated = True
+
+ def _Authenticate(self):
+ """Authenticates the user.
+
+ The authentication process works as follows:
+ 1) We get a username and password from the user
+ 2) We use ClientLogin to obtain an AUTH token for the user
+ (see http://code.google.com/apis/accounts/AuthForInstalledApps.html).
+ 3) We pass the auth token to /_ah/login on the server to obtain an
+ authentication cookie. If login was successful, it tries to redirect
+ us to the URL we provided.
+
+ If we attempt to access the upload API without first obtaining an
+ authentication cookie, it returns a 401 response (or a 302) and
+ directs us to authenticate ourselves with ClientLogin.
+ """
+ for i in range(3):
+ credentials = self.auth_function()
+ try:
+ auth_token = self._GetAuthToken(credentials[0], credentials[1])
+ except ClientLoginError, e:
+ if e.reason == "BadAuthentication":
+ print >>sys.stderr, "Invalid username or password."
+ continue
+ if e.reason == "CaptchaRequired":
+ print >>sys.stderr, (
+ "Please go to\n"
+ "https://www.google.com/accounts/DisplayUnlockCaptcha\n"
+ "and verify you are a human. Then try again.")
+ break
+ if e.reason == "NotVerified":
+ print >>sys.stderr, "Account not verified."
+ break
+ if e.reason == "TermsNotAgreed":
+ print >>sys.stderr, "User has not agreed to TOS."
+ break
+ if e.reason == "AccountDeleted":
+ print >>sys.stderr, "The user account has been deleted."
+ break
+ if e.reason == "AccountDisabled":
+ print >>sys.stderr, "The user account has been disabled."
+ break
+ if e.reason == "ServiceDisabled":
+ print >>sys.stderr, ("The user's access to the service has been "
+ "disabled.")
+ break
+ if e.reason == "ServiceUnavailable":
+ print >>sys.stderr, "The service is not available; try again later."
+ break
+ raise
+ self._GetAuthCookie(auth_token)
+ return
+
+ def Send(self, request_path, payload=None,
+ content_type="application/octet-stream",
+ timeout=None,
+ **kwargs):
+ """Sends an RPC and returns the response.
+
+ Args:
+ request_path: The path to send the request to, eg /api/appversion/create.
+ payload: The body of the request, or None to send an empty request.
+ content_type: The Content-Type header to use.
+ timeout: timeout in seconds; default None i.e. no timeout.
+ (Note: for large requests on OS X, the timeout doesn't work right.)
+ kwargs: Any keyword arguments are converted into query string parameters.
+
+ Returns:
+ The response body, as a string.
+ """
+ # TODO: Don't require authentication. Let the server say
+ # whether it is necessary.
+ if not self.authenticated:
+ self._Authenticate()
+
+ old_timeout = socket.getdefaulttimeout()
+ socket.setdefaulttimeout(timeout)
+ try:
+ tries = 0
+ while True:
+ tries += 1
+ args = dict(kwargs)
+ url = "http://%s%s" % (self.host, request_path)
+ if args:
+ url += "?" + urllib.urlencode(args)
+ req = self._CreateRequest(url=url, data=payload)
+ req.add_header("Content-Type", content_type)
+ try:
+ f = self.opener.open(req)
+ response = f.read()
+ f.close()
+ return response
+ except urllib2.HTTPError, e:
+ if tries > 3:
+ raise
+ elif e.code == 401 or e.code == 302:
+ self._Authenticate()
+## elif e.code >= 500 and e.code < 600:
+## # Server Error - try again.
+## continue
+ else:
+ raise
+ finally:
+ socket.setdefaulttimeout(old_timeout)
+
+
+class HttpRpcServer(AbstractRpcServer):
+ """Provides a simplified RPC-style interface for HTTP requests."""
+
+ def _Authenticate(self):
+ """Save the cookie jar after authentication."""
+ super(HttpRpcServer, self)._Authenticate()
+ if self.save_cookies:
+ StatusUpdate("Saving authentication cookies to %s" % self.cookie_file)
+ self.cookie_jar.save()
+
+ def _GetOpener(self):
+ """Returns an OpenerDirector that supports cookies and ignores redirects.
+
+ Returns:
+ A urllib2.OpenerDirector object.
+ """
+ opener = urllib2.OpenerDirector()
+ opener.add_handler(urllib2.ProxyHandler())
+ opener.add_handler(urllib2.UnknownHandler())
+ opener.add_handler(urllib2.HTTPHandler())
+ opener.add_handler(urllib2.HTTPDefaultErrorHandler())
+ opener.add_handler(urllib2.HTTPSHandler())
+ opener.add_handler(urllib2.HTTPErrorProcessor())
+ if self.save_cookies:
+ self.cookie_file = os.path.expanduser("~/.codereview_upload_cookies")
+ self.cookie_jar = cookielib.MozillaCookieJar(self.cookie_file)
+ if os.path.exists(self.cookie_file):
+ try:
+ self.cookie_jar.load()
+ self.authenticated = True
+ StatusUpdate("Loaded authentication cookies from %s" %
+ self.cookie_file)
+ except (cookielib.LoadError, IOError):
+ # Failed to load cookies - just ignore them.
+ pass
+ else:
+ # Create an empty cookie file with mode 600
+ fd = os.open(self.cookie_file, os.O_CREAT, 0600)
+ os.close(fd)
+ # Always chmod the cookie file
+ os.chmod(self.cookie_file, 0600)
+ else:
+ # Don't save cookies across runs of update.py.
+ self.cookie_jar = cookielib.CookieJar()
+ opener.add_handler(urllib2.HTTPCookieProcessor(self.cookie_jar))
+ return opener
+
+
+parser = optparse.OptionParser(usage="%prog [options] [-- diff_options]")
+parser.add_option("-y", "--assume_yes", action="store_true",
+ dest="assume_yes", default=False,
+ help="Assume that the answer to yes/no questions is 'yes'.")
+# Logging
+group = parser.add_option_group("Logging options")
+group.add_option("-q", "--quiet", action="store_const", const=0,
+ dest="verbose", help="Print errors only.")
+group.add_option("-v", "--verbose", action="store_const", const=2,
+ dest="verbose", default=1,
+ help="Print info level logs (default).")
+group.add_option("--noisy", action="store_const", const=3,
+ dest="verbose", help="Print all logs.")
+# Review server
+group = parser.add_option_group("Review server options")
+group.add_option("-s", "--server", action="store", dest="server",
+ default="codereview.appspot.com",
+ metavar="SERVER",
+ help=("The server to upload to. The format is host[:port]. "
+ "Defaults to '%default'."))
+group.add_option("-e", "--email", action="store", dest="email",
+ metavar="EMAIL", default=None,
+ help="The username to use. Will prompt if omitted.")
+group.add_option("-H", "--host", action="store", dest="host",
+ metavar="HOST", default=None,
+ help="Overrides the Host header sent with all RPCs.")
+group.add_option("--no_cookies", action="store_false",
+ dest="save_cookies", default=True,
+ help="Do not save authentication cookies to local disk.")
+# Issue
+group = parser.add_option_group("Issue options")
+group.add_option("-d", "--description", action="store", dest="description",
+ metavar="DESCRIPTION", default=None,
+ help="Optional description when creating an issue.")
+group.add_option("-f", "--description_file", action="store",
+ dest="description_file", metavar="DESCRIPTION_FILE",
+ default=None,
+ help="Optional path of a file that contains "
+ "the description when creating an issue.")
+group.add_option("-r", "--reviewers", action="store", dest="reviewers",
+ metavar="REVIEWERS", default=None,
+ help="Add reviewers (comma separated email addresses).")
+group.add_option("--cc", action="store", dest="cc",
+ metavar="CC", default=None,
+ help="Add CC (comma separated email addresses).")
+group.add_option("--private", action="store_true", dest="private",
+ default=False,
+ help="Make the issue restricted to reviewers and those CCed")
+# Upload options
+group = parser.add_option_group("Patch options")
+group.add_option("-m", "--message", action="store", dest="message",
+ metavar="MESSAGE", default=None,
+ help="A message to identify the patch. "
+ "Will prompt if omitted.")
+group.add_option("-i", "--issue", type="int", action="store",
+ metavar="ISSUE", default=None,
+ help="Issue number to which to add. Defaults to new issue.")
+group.add_option("--base_url", action="store", dest="base_url", default=None,
+ help="Base repository URL (listed as \"Base URL\" when "
+ "viewing issue). If omitted, will be guessed automatically "
+ "for SVN repos and left blank for others.")
+group.add_option("--download_base", action="store_true",
+ dest="download_base", default=False,
+ help="Base files will be downloaded by the server "
+ "(side-by-side diffs may not work on files with CRs).")
+group.add_option("--rev", action="store", dest="revision",
+ metavar="REV", default=None,
+ help="Base revision/branch/tree to diff against. Use "
+ "rev1:rev2 range to review already committed changeset.")
+group.add_option("--send_mail", action="store_true",
+ dest="send_mail", default=False,
+ help="Send notification email to reviewers.")
+group.add_option("--vcs", action="store", dest="vcs",
+ metavar="VCS", default=None,
+ help=("Version control system (optional, usually upload.py "
+ "already guesses the right VCS)."))
+group.add_option("--emulate_svn_auto_props", action="store_true",
+ dest="emulate_svn_auto_props", default=False,
+ help=("Emulate Subversion's auto properties feature."))
+
+
+def GetRpcServer(server, email=None, host_override=None, save_cookies=True):
+ """Returns an instance of an AbstractRpcServer.
+
+ Args:
+ server: String containing the review server URL.
+ email: String containing user's email address.
+ host_override: If not None, string containing an alternate hostname to use
+ in the host header.
+ save_cookies: Whether authentication cookies should be saved to disk.
+
+ Returns:
+ A new AbstractRpcServer, on which RPC calls can be made.
+ """
+
+ rpc_server_class = HttpRpcServer
+
+ def GetUserCredentials():
+ """Prompts the user for a username and password."""
+ if email is None:
+ email = GetEmail("Email (login for uploading to %s)" % server)
+ password = getpass.getpass("Password for %s: " % email)
+ return (email, password)
+
+ # If this is the dev_appserver, use fake authentication.
+ host = (host_override or server).lower()
+ if host == "localhost" or host.startswith("localhost:"):
+ if email is None:
+ email = "test@example.com"
+ logging.info("Using debug user %s. Override with --email" % email)
+ server = rpc_server_class(
+ server,
+ lambda: (email, "password"),
+ host_override=host_override,
+ extra_headers={"Cookie":
+ 'dev_appserver_login="%s:False"' % email},
+ save_cookies=save_cookies)
+ # Don't try to talk to ClientLogin.
+ server.authenticated = True
+ return server
+
+ return rpc_server_class(server,
+ GetUserCredentials,
+ host_override=host_override,
+ save_cookies=save_cookies)
+
+
+def EncodeMultipartFormData(fields, files):
+ """Encode form fields for multipart/form-data.
+
+ Args:
+ fields: A sequence of (name, value) elements for regular form fields.
+ files: A sequence of (name, filename, value) elements for data to be
+ uploaded as files.
+ Returns:
+ (content_type, body) ready for httplib.HTTP instance.
+
+ Source:
+ http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306
+ """
+ BOUNDARY = '-M-A-G-I-C---B-O-U-N-D-A-R-Y-'
+ CRLF = '\r\n'
+ lines = []
+ for (key, value) in fields:
+ lines.append('--' + BOUNDARY)
+ lines.append('Content-Disposition: form-data; name="%s"' % key)
+ lines.append('')
+ lines.append(value)
+ for (key, filename, value) in files:
+ lines.append('--' + BOUNDARY)
+ lines.append('Content-Disposition: form-data; name="%s"; filename="%s"' %
+ (key, filename))
+ lines.append('Content-Type: %s' % GetContentType(filename))
+ lines.append('')
+ lines.append(value)
+ lines.append('--' + BOUNDARY + '--')
+ lines.append('')
+ body = CRLF.join(lines)
+ content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
+ return content_type, body
+
+
+def GetContentType(filename):
+ """Helper to guess the content-type from the filename."""
+ return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
+
+
+# Use a shell for subcommands on Windows to get a PATH search.
+use_shell = sys.platform.startswith("win")
+
+def RunShellWithReturnCode(command, print_output=False,
+ universal_newlines=True,
+ env=os.environ):
+ """Executes a command and returns the output from stdout and the return code.
+
+ Args:
+ command: Command to execute.
+ print_output: If True, the output is printed to stdout.
+ If False, both stdout and stderr are ignored.
+ universal_newlines: Use universal_newlines flag (default: True).
+
+ Returns:
+ Tuple (output, return code)
+ """
+ logging.info("Running %s", command)
+ p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ shell=use_shell, universal_newlines=universal_newlines,
+ env=env)
+ if print_output:
+ output_array = []
+ while True:
+ line = p.stdout.readline()
+ if not line:
+ break
+ print line.strip("\n")
+ output_array.append(line)
+ output = "".join(output_array)
+ else:
+ output = p.stdout.read()
+ p.wait()
+ errout = p.stderr.read()
+ if print_output and errout:
+ print >>sys.stderr, errout
+ p.stdout.close()
+ p.stderr.close()
+ return output, p.returncode
+
+
+def RunShell(command, silent_ok=False, universal_newlines=True,
+ print_output=False, env=os.environ):
+ data, retcode = RunShellWithReturnCode(command, print_output,
+ universal_newlines, env)
+ if retcode:
+ ErrorExit("Got error status from %s:\n%s" % (command, data))
+ if not silent_ok and not data:
+ ErrorExit("No output from %s" % command)
+ return data
+
+
+class VersionControlSystem(object):
+ """Abstract base class providing an interface to the VCS."""
+
+ def __init__(self, options):
+ """Constructor.
+
+ Args:
+ options: Command line options.
+ """
+ self.options = options
+
+ def GenerateDiff(self, args):
+ """Return the current diff as a string.
+
+ Args:
+ args: Extra arguments to pass to the diff command.
+ """
+ raise NotImplementedError(
+ "abstract method -- subclass %s must override" % self.__class__)
+
+ def GetUnknownFiles(self):
+ """Return a list of files unknown to the VCS."""
+ raise NotImplementedError(
+ "abstract method -- subclass %s must override" % self.__class__)
+
+ def CheckForUnknownFiles(self):
+ """Show an "are you sure?" prompt if there are unknown files."""
+ unknown_files = self.GetUnknownFiles()
+ if unknown_files:
+ print "The following files are not added to version control:"
+ for line in unknown_files:
+ print line
+ prompt = "Are you sure to continue?(y/N) "
+ answer = raw_input(prompt).strip()
+ if answer != "y":
+ ErrorExit("User aborted")
+
+ def GetBaseFile(self, filename):
+ """Get the content of the upstream version of a file.
+
+ Returns:
+ A tuple (base_content, new_content, is_binary, status)
+ base_content: The contents of the base file.
+ new_content: For text files, this is empty. For binary files, this is
+ the contents of the new file, since the diff output won't contain
+ information to reconstruct the current file.
+ is_binary: True iff the file is binary.
+ status: The status of the file.
+ """
+
+ raise NotImplementedError(
+ "abstract method -- subclass %s must override" % self.__class__)
+
+
+ def GetBaseFiles(self, diff):
+ """Helper that calls GetBase file for each file in the patch.
+
+ Returns:
+ A dictionary that maps from filename to GetBaseFile's tuple. Filenames
+ are retrieved based on lines that start with "Index:" or
+ "Property changes on:".
+ """
+ files = {}
+ for line in diff.splitlines(True):
+ if line.startswith('Index:') or line.startswith('Property changes on:'):
+ unused, filename = line.split(':', 1)
+ # On Windows if a file has property changes its filename uses '\'
+ # instead of '/'.
+ filename = filename.strip().replace('\\', '/')
+ files[filename] = self.GetBaseFile(filename)
+ return files
+
+
+ def UploadBaseFiles(self, issue, rpc_server, patch_list, patchset, options,
+ files):
+ """Uploads the base files (and if necessary, the current ones as well)."""
+
+ def UploadFile(filename, file_id, content, is_binary, status, is_base):
+ """Uploads a file to the server."""
+ file_too_large = False
+ if is_base:
+ type = "base"
+ else:
+ type = "current"
+ if len(content) > MAX_UPLOAD_SIZE:
+ print ("Not uploading the %s file for %s because it's too large." %
+ (type, filename))
+ file_too_large = True
+ content = ""
+ checksum = md5(content).hexdigest()
+ if options.verbose > 0 and not file_too_large:
+ print "Uploading %s file for %s" % (type, filename)
+ url = "/%d/upload_content/%d/%d" % (int(issue), int(patchset), file_id)
+ form_fields = [("filename", filename),
+ ("status", status),
+ ("checksum", checksum),
+ ("is_binary", str(is_binary)),
+ ("is_current", str(not is_base)),
+ ]
+ if file_too_large:
+ form_fields.append(("file_too_large", "1"))
+ if options.email:
+ form_fields.append(("user", options.email))
+ ctype, body = EncodeMultipartFormData(form_fields,
+ [("data", filename, content)])
+ response_body = rpc_server.Send(url, body,
+ content_type=ctype)
+ if not response_body.startswith("OK"):
+ StatusUpdate(" --> %s" % response_body)
+ sys.exit(1)
+
+ patches = dict()
+ [patches.setdefault(v, k) for k, v in patch_list]
+ for filename in patches.keys():
+ base_content, new_content, is_binary, status = files[filename]
+ file_id_str = patches.get(filename)
+ if file_id_str.find("nobase") != -1:
+ base_content = None
+ file_id_str = file_id_str[file_id_str.rfind("_") + 1:]
+ file_id = int(file_id_str)
+ if base_content != None:
+ UploadFile(filename, file_id, base_content, is_binary, status, True)
+ if new_content != None:
+ UploadFile(filename, file_id, new_content, is_binary, status, False)
+
+ def IsImage(self, filename):
+ """Returns true if the filename has an image extension."""
+ mimetype = mimetypes.guess_type(filename)[0]
+ if not mimetype:
+ return False
+ return mimetype.startswith("image/")
+
+ def IsBinary(self, filename):
+ """Returns true if the guessed mimetyped isnt't in text group."""
+ mimetype = mimetypes.guess_type(filename)[0]
+ if not mimetype:
+ return False # e.g. README, "real" binaries usually have an extension
+ # special case for text files which don't start with text/
+ if mimetype in TEXT_MIMETYPES:
+ return False
+ return not mimetype.startswith("text/")
+
+
+class SubversionVCS(VersionControlSystem):
+ """Implementation of the VersionControlSystem interface for Subversion."""
+
+ def __init__(self, options):
+ super(SubversionVCS, self).__init__(options)
+ if self.options.revision:
+ match = re.match(r"(\d+)(:(\d+))?", self.options.revision)
+ if not match:
+ ErrorExit("Invalid Subversion revision %s." % self.options.revision)
+ self.rev_start = match.group(1)
+ self.rev_end = match.group(3)
+ else:
+ self.rev_start = self.rev_end = None
+ # Cache output from "svn list -r REVNO dirname".
+ # Keys: dirname, Values: 2-tuple (ouput for start rev and end rev).
+ self.svnls_cache = {}
+ # Base URL is required to fetch files deleted in an older revision.
+ # Result is cached to not guess it over and over again in GetBaseFile().
+ required = self.options.download_base or self.options.revision is not None
+ self.svn_base = self._GuessBase(required)
+
+ def GuessBase(self, required):
+ """Wrapper for _GuessBase."""
+ return self.svn_base
+
+ def _GuessBase(self, required):
+ """Returns the SVN base URL.
+
+ Args:
+ required: If true, exits if the url can't be guessed, otherwise None is
+ returned.
+ """
+ info = RunShell(["svn", "info"])
+ for line in info.splitlines():
+ words = line.split()
+ if len(words) == 2 and words[0] == "URL:":
+ url = words[1]
+ scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
+ username, netloc = urllib.splituser(netloc)
+ if username:
+ logging.info("Removed username from base URL")
+ if netloc.endswith("svn.python.org"):
+ if netloc == "svn.python.org":
+ if path.startswith("/projects/"):
+ path = path[9:]
+ elif netloc != "pythondev@svn.python.org":
+ ErrorExit("Unrecognized Python URL: %s" % url)
+ base = "http://svn.python.org/view/*checkout*%s/" % path
+ logging.info("Guessed Python base = %s", base)
+ elif netloc.endswith("svn.collab.net"):
+ if path.startswith("/repos/"):
+ path = path[6:]
+ base = "http://svn.collab.net/viewvc/*checkout*%s/" % path
+ logging.info("Guessed CollabNet base = %s", base)
+ elif netloc.endswith(".googlecode.com"):
+ path = path + "/"
+ base = urlparse.urlunparse(("http", netloc, path, params,
+ query, fragment))
+ logging.info("Guessed Google Code base = %s", base)
+ else:
+ path = path + "/"
+ base = urlparse.urlunparse((scheme, netloc, path, params,
+ query, fragment))
+ logging.info("Guessed base = %s", base)
+ return base
+ if required:
+ ErrorExit("Can't find URL in output from svn info")
+ return None
+
+ def GenerateDiff(self, args):
+ cmd = ["svn", "diff"]
+ if self.options.revision:
+ cmd += ["-r", self.options.revision]
+ cmd.extend(args)
+ data = RunShell(cmd)
+ count = 0
+ for line in data.splitlines():
+ if line.startswith("Index:") or line.startswith("Property changes on:"):
+ count += 1
+ logging.info(line)
+ if not count:
+ ErrorExit("No valid patches found in output from svn diff")
+ return data
+
+ def _CollapseKeywords(self, content, keyword_str):
+ """Collapses SVN keywords."""
+ # svn cat translates keywords but svn diff doesn't. As a result of this
+ # behavior patching.PatchChunks() fails with a chunk mismatch error.
+ # This part was originally written by the Review Board development team
+ # who had the same problem (http://reviews.review-board.org/r/276/).
+ # Mapping of keywords to known aliases
+ svn_keywords = {
+ # Standard keywords
+ 'Date': ['Date', 'LastChangedDate'],
+ 'Revision': ['Revision', 'LastChangedRevision', 'Rev'],
+ 'Author': ['Author', 'LastChangedBy'],
+ 'HeadURL': ['HeadURL', 'URL'],
+ 'Id': ['Id'],
+
+ # Aliases
+ 'LastChangedDate': ['LastChangedDate', 'Date'],
+ 'LastChangedRevision': ['LastChangedRevision', 'Rev', 'Revision'],
+ 'LastChangedBy': ['LastChangedBy', 'Author'],
+ 'URL': ['URL', 'HeadURL'],
+ }
+
+ def repl(m):
+ if m.group(2):
+ return "$%s::%s$" % (m.group(1), " " * len(m.group(3)))
+ return "$%s$" % m.group(1)
+ keywords = [keyword
+ for name in keyword_str.split(" ")
+ for keyword in svn_keywords.get(name, [])]
+ return re.sub(r"\$(%s):(:?)([^\$]+)\$" % '|'.join(keywords), repl, content)
+
+ def GetUnknownFiles(self):
+ status = RunShell(["svn", "status", "--ignore-externals"], silent_ok=True)
+ unknown_files = []
+ for line in status.split("\n"):
+ if line and line[0] == "?":
+ unknown_files.append(line)
+ return unknown_files
+
+ def ReadFile(self, filename):
+ """Returns the contents of a file."""
+ file = open(filename, 'rb')
+ result = ""
+ try:
+ result = file.read()
+ finally:
+ file.close()
+ return result
+
+ def GetStatus(self, filename):
+ """Returns the status of a file."""
+ if not self.options.revision:
+ status = RunShell(["svn", "status", "--ignore-externals", filename])
+ if not status:
+ ErrorExit("svn status returned no output for %s" % filename)
+ status_lines = status.splitlines()
+ # If file is in a cl, the output will begin with
+ # "\n--- Changelist 'cl_name':\n". See
+ # http://svn.collab.net/repos/svn/trunk/notes/changelist-design.txt
+ if (len(status_lines) == 3 and
+ not status_lines[0] and
+ status_lines[1].startswith("--- Changelist")):
+ status = status_lines[2]
+ else:
+ status = status_lines[0]
+ # If we have a revision to diff against we need to run "svn list"
+ # for the old and the new revision and compare the results to get
+ # the correct status for a file.
+ else:
+ dirname, relfilename = os.path.split(filename)
+ if dirname not in self.svnls_cache:
+ cmd = ["svn", "list", "-r", self.rev_start, dirname or "."]
+ out, returncode = RunShellWithReturnCode(cmd)
+ if returncode:
+ ErrorExit("Failed to get status for %s." % filename)
+ old_files = out.splitlines()
+ args = ["svn", "list"]
+ if self.rev_end:
+ args += ["-r", self.rev_end]
+ cmd = args + [dirname or "."]
+ out, returncode = RunShellWithReturnCode(cmd)
+ if returncode:
+ ErrorExit("Failed to run command %s" % cmd)
+ self.svnls_cache[dirname] = (old_files, out.splitlines())
+ old_files, new_files = self.svnls_cache[dirname]
+ if relfilename in old_files and relfilename not in new_files:
+ status = "D "
+ elif relfilename in old_files and relfilename in new_files:
+ status = "M "
+ else:
+ status = "A "
+ return status
+
+ def GetBaseFile(self, filename):
+ status = self.GetStatus(filename)
+ base_content = None
+ new_content = None
+
+ # If a file is copied its status will be "A +", which signifies
+ # "addition-with-history". See "svn st" for more information. We need to
+ # upload the original file or else diff parsing will fail if the file was
+ # edited.
+ if status[0] == "A" and status[3] != "+":
+ # We'll need to upload the new content if we're adding a binary file
+ # since diff's output won't contain it.
+ mimetype = RunShell(["svn", "propget", "svn:mime-type", filename],
+ silent_ok=True)
+ base_content = ""
+ is_binary = bool(mimetype) and not mimetype.startswith("text/")
+ if is_binary and self.IsImage(filename):
+ new_content = self.ReadFile(filename)
+ elif (status[0] in ("M", "D", "R") or
+ (status[0] == "A" and status[3] == "+") or # Copied file.
+ (status[0] == " " and status[1] == "M")): # Property change.
+ args = []
+ if self.options.revision:
+ url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start)
+ else:
+ # Don't change filename, it's needed later.
+ url = filename
+ args += ["-r", "BASE"]
+ cmd = ["svn"] + args + ["propget", "svn:mime-type", url]
+ mimetype, returncode = RunShellWithReturnCode(cmd)
+ if returncode:
+ # File does not exist in the requested revision.
+ # Reset mimetype, it contains an error message.
+ mimetype = ""
+ get_base = False
+ is_binary = bool(mimetype) and not mimetype.startswith("text/")
+ if status[0] == " ":
+ # Empty base content just to force an upload.
+ base_content = ""
+ elif is_binary:
+ if self.IsImage(filename):
+ get_base = True
+ if status[0] == "M":
+ if not self.rev_end:
+ new_content = self.ReadFile(filename)
+ else:
+ url = "%s/%s@%s" % (self.svn_base, filename, self.rev_end)
+ new_content = RunShell(["svn", "cat", url],
+ universal_newlines=True, silent_ok=True)
+ else:
+ base_content = ""
+ else:
+ get_base = True
+
+ if get_base:
+ if is_binary:
+ universal_newlines = False
+ else:
+ universal_newlines = True
+ if self.rev_start:
+ # "svn cat -r REV delete_file.txt" doesn't work. cat requires
+ # the full URL with "@REV" appended instead of using "-r" option.
+ url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start)
+ base_content = RunShell(["svn", "cat", url],
+ universal_newlines=universal_newlines,
+ silent_ok=True)
+ else:
+ base_content = RunShell(["svn", "cat", filename],
+ universal_newlines=universal_newlines,
+ silent_ok=True)
+ if not is_binary:
+ args = []
+ if self.rev_start:
+ url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start)
+ else:
+ url = filename
+ args += ["-r", "BASE"]
+ cmd = ["svn"] + args + ["propget", "svn:keywords", url]
+ keywords, returncode = RunShellWithReturnCode(cmd)
+ if keywords and not returncode:
+ base_content = self._CollapseKeywords(base_content, keywords)
+ else:
+ StatusUpdate("svn status returned unexpected output: %s" % status)
+ sys.exit(1)
+ return base_content, new_content, is_binary, status[0:5]
+
+
+class GitVCS(VersionControlSystem):
+ """Implementation of the VersionControlSystem interface for Git."""
+
+ def __init__(self, options):
+ super(GitVCS, self).__init__(options)
+ # Map of filename -> (hash before, hash after) of base file.
+ # Hashes for "no such file" are represented as None.
+ self.hashes = {}
+ # Map of new filename -> old filename for renames.
+ self.renames = {}
+
+ def GenerateDiff(self, extra_args):
+ # This is more complicated than svn's GenerateDiff because we must convert
+ # the diff output to include an svn-style "Index:" line as well as record
+ # the hashes of the files, so we can upload them along with our diff.
+
+ # Special used by git to indicate "no such content".
+ NULL_HASH = "0"*40
+
+ extra_args = extra_args[:]
+ if self.options.revision:
+ extra_args = [self.options.revision] + extra_args
+
+ # --no-ext-diff is broken in some versions of Git, so try to work around
+ # this by overriding the environment (but there is still a problem if the
+ # git config key "diff.external" is used).
+ env = os.environ.copy()
+ if 'GIT_EXTERNAL_DIFF' in env: del env['GIT_EXTERNAL_DIFF']
+ gitdiff = RunShell(["git", "diff", "--no-ext-diff", "--full-index", "-M"]
+ + extra_args, env=env)
+
+ def IsFileNew(filename):
+ return filename in self.hashes and self.hashes[filename][0] is None
+
+ def AddSubversionPropertyChange(filename):
+ """Add svn's property change information into the patch if given file is
+ new file.
+
+ We use Subversion's auto-props setting to retrieve its property.
+ See http://svnbook.red-bean.com/en/1.1/ch07.html#svn-ch-7-sect-1.3.2 for
+ Subversion's [auto-props] setting.
+ """
+ if self.options.emulate_svn_auto_props and IsFileNew(filename):
+ svnprops = GetSubversionPropertyChanges(filename)
+ if svnprops:
+ svndiff.append("\n" + svnprops + "\n")
+
+ svndiff = []
+ filecount = 0
+ filename = None
+ for line in gitdiff.splitlines():
+ match = re.match(r"diff --git a/(.*) b/(.*)$", line)
+ if match:
+ # Add auto property here for previously seen file.
+ if filename is not None:
+ AddSubversionPropertyChange(filename)
+ filecount += 1
+ # Intentionally use the "after" filename so we can show renames.
+ filename = match.group(2)
+ svndiff.append("Index: %s\n" % filename)
+ if match.group(1) != match.group(2):
+ self.renames[match.group(2)] = match.group(1)
+ else:
+ # The "index" line in a git diff looks like this (long hashes elided):
+ # index 82c0d44..b2cee3f 100755
+ # We want to save the left hash, as that identifies the base file.
+ match = re.match(r"index (\w+)\.\.(\w+)", line)
+ if match:
+ before, after = (match.group(1), match.group(2))
+ if before == NULL_HASH:
+ before = None
+ if after == NULL_HASH:
+ after = None
+ self.hashes[filename] = (before, after)
+ svndiff.append(line + "\n")
+ if not filecount:
+ ErrorExit("No valid patches found in output from git diff")
+ # Add auto property for the last seen file.
+ assert filename is not None
+ AddSubversionPropertyChange(filename)
+ return "".join(svndiff)
+
+ def GetUnknownFiles(self):
+ status = RunShell(["git", "ls-files", "--exclude-standard", "--others"],
+ silent_ok=True)
+ return status.splitlines()
+
+ def GetFileContent(self, file_hash, is_binary):
+ """Returns the content of a file identified by its git hash."""
+ data, retcode = RunShellWithReturnCode(["git", "show", file_hash],
+ universal_newlines=not is_binary)
+ if retcode:
+ ErrorExit("Got error status from 'git show %s'" % file_hash)
+ return data
+
+ def GetBaseFile(self, filename):
+ hash_before, hash_after = self.hashes.get(filename, (None,None))
+ base_content = None
+ new_content = None
+ is_binary = self.IsBinary(filename)
+ status = None
+
+ if filename in self.renames:
+ status = "A +" # Match svn attribute name for renames.
+ if filename not in self.hashes:
+ # If a rename doesn't change the content, we never get a hash.
+ base_content = RunShell(["git", "show", "HEAD:" + filename])
+ elif not hash_before:
+ status = "A"
+ base_content = ""
+ elif not hash_after:
+ status = "D"
+ else:
+ status = "M"
+
+ is_image = self.IsImage(filename)
+
+ # Grab the before/after content if we need it.
+ # We should include file contents if it's text or it's an image.
+ if not is_binary or is_image:
+ # Grab the base content if we don't have it already.
+ if base_content is None and hash_before:
+ base_content = self.GetFileContent(hash_before, is_binary)
+ # Only include the "after" file if it's an image; otherwise it
+ # it is reconstructed from the diff.
+ if is_image and hash_after:
+ new_content = self.GetFileContent(hash_after, is_binary)
+
+ return (base_content, new_content, is_binary, status)
+
+
+class MercurialVCS(VersionControlSystem):
+ """Implementation of the VersionControlSystem interface for Mercurial."""
+
+ def __init__(self, options, repo_dir):
+ super(MercurialVCS, self).__init__(options)
+ # Absolute path to repository (we can be in a subdir)
+ self.repo_dir = os.path.normpath(repo_dir)
+ # Compute the subdir
+ cwd = os.path.normpath(os.getcwd())
+ assert cwd.startswith(self.repo_dir)
+ self.subdir = cwd[len(self.repo_dir):].lstrip(r"\/")
+ if self.options.revision:
+ self.base_rev = self.options.revision
+ else:
+ self.base_rev = RunShell(["hg", "parent", "-q"]).split(':')[1].strip()
+
+ def _GetRelPath(self, filename):
+ """Get relative path of a file according to the current directory,
+ given its logical path in the repo."""
+ assert filename.startswith(self.subdir), (filename, self.subdir)
+ return filename[len(self.subdir):].lstrip(r"\/")
+
+ def GenerateDiff(self, extra_args):
+ # If no file specified, restrict to the current subdir
+ extra_args = extra_args or ["."]
+ cmd = ["hg", "diff", "--git", "-r", self.base_rev] + extra_args
+ data = RunShell(cmd, silent_ok=True)
+ svndiff = []
+ filecount = 0
+ for line in data.splitlines():
+ m = re.match("diff --git a/(\S+) b/(\S+)", line)
+ if m:
+ # Modify line to make it look like as it comes from svn diff.
+ # With this modification no changes on the server side are required
+ # to make upload.py work with Mercurial repos.
+ # NOTE: for proper handling of moved/copied files, we have to use
+ # the second filename.
+ filename = m.group(2)
+ svndiff.append("Index: %s" % filename)
+ svndiff.append("=" * 67)
+ filecount += 1
+ logging.info(line)
+ else:
+ svndiff.append(line)
+ if not filecount:
+ ErrorExit("No valid patches found in output from hg diff")
+ return "\n".join(svndiff) + "\n"
+
+ def GetUnknownFiles(self):
+ """Return a list of files unknown to the VCS."""
+ args = []
+ status = RunShell(["hg", "status", "--rev", self.base_rev, "-u", "."],
+ silent_ok=True)
+ unknown_files = []
+ for line in status.splitlines():
+ st, fn = line.split(" ", 1)
+ if st == "?":
+ unknown_files.append(fn)
+ return unknown_files
+
+ def GetBaseFile(self, filename):
+ # "hg status" and "hg cat" both take a path relative to the current subdir
+ # rather than to the repo root, but "hg diff" has given us the full path
+ # to the repo root.
+ base_content = ""
+ new_content = None
+ is_binary = False
+ oldrelpath = relpath = self._GetRelPath(filename)
+ # "hg status -C" returns two lines for moved/copied files, one otherwise
+ out = RunShell(["hg", "status", "-C", "--rev", self.base_rev, relpath])
+ out = out.splitlines()
+ # HACK: strip error message about missing file/directory if it isn't in
+ # the working copy
+ if out[0].startswith('%s: ' % relpath):
+ out = out[1:]
+ if len(out) > 1:
+ # Moved/copied => considered as modified, use old filename to
+ # retrieve base contents
+ oldrelpath = out[1].strip()
+ status = "M"
+ else:
+ status, _ = out[0].split(' ', 1)
+ if ":" in self.base_rev:
+ base_rev = self.base_rev.split(":", 1)[0]
+ else:
+ base_rev = self.base_rev
+ if status != "A":
+ base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath],
+ silent_ok=True)
+ is_binary = "\0" in base_content # Mercurial's heuristic
+ if status != "R":
+ new_content = open(relpath, "rb").read()
+ is_binary = is_binary or "\0" in new_content
+ if is_binary and base_content:
+ # Fetch again without converting newlines
+ base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath],
+ silent_ok=True, universal_newlines=False)
+ if not is_binary or not self.IsImage(relpath):
+ new_content = None
+ return base_content, new_content, is_binary, status
+
+
+# NOTE: The SplitPatch function is duplicated in engine.py, keep them in sync.
+def SplitPatch(data):
+ """Splits a patch into separate pieces for each file.
+
+ Args:
+ data: A string containing the output of svn diff.
+
+ Returns:
+ A list of 2-tuple (filename, text) where text is the svn diff output
+ pertaining to filename.
+ """
+ patches = []
+ filename = None
+ diff = []
+ for line in data.splitlines(True):
+ new_filename = None
+ if line.startswith('Index:'):
+ unused, new_filename = line.split(':', 1)
+ new_filename = new_filename.strip()
+ elif line.startswith('Property changes on:'):
+ unused, temp_filename = line.split(':', 1)
+ # When a file is modified, paths use '/' between directories, however
+ # when a property is modified '\' is used on Windows. Make them the same
+ # otherwise the file shows up twice.
+ temp_filename = temp_filename.strip().replace('\\', '/')
+ if temp_filename != filename:
+ # File has property changes but no modifications, create a new diff.
+ new_filename = temp_filename
+ if new_filename:
+ if filename and diff:
+ patches.append((filename, ''.join(diff)))
+ filename = new_filename
+ diff = [line]
+ continue
+ if diff is not None:
+ diff.append(line)
+ if filename and diff:
+ patches.append((filename, ''.join(diff)))
+ return patches
+
+
+def UploadSeparatePatches(issue, rpc_server, patchset, data, options):
+ """Uploads a separate patch for each file in the diff output.
+
+ Returns a list of [patch_key, filename] for each file.
+ """
+ patches = SplitPatch(data)
+ rv = []
+ for patch in patches:
+ if len(patch[1]) > MAX_UPLOAD_SIZE:
+ print ("Not uploading the patch for " + patch[0] +
+ " because the file is too large.")
+ continue
+ form_fields = [("filename", patch[0])]
+ if not options.download_base:
+ form_fields.append(("content_upload", "1"))
+ files = [("data", "data.diff", patch[1])]
+ ctype, body = EncodeMultipartFormData(form_fields, files)
+ url = "/%d/upload_patch/%d" % (int(issue), int(patchset))
+ print "Uploading patch for " + patch[0]
+ response_body = rpc_server.Send(url, body, content_type=ctype)
+ lines = response_body.splitlines()
+ if not lines or lines[0] != "OK":
+ StatusUpdate(" --> %s" % response_body)
+ sys.exit(1)
+ rv.append([lines[1], patch[0]])
+ return rv
+
+
+def GuessVCSName():
+ """Helper to guess the version control system.
+
+ This examines the current directory, guesses which VersionControlSystem
+ we're using, and returns an string indicating which VCS is detected.
+
+ Returns:
+ A pair (vcs, output). vcs is a string indicating which VCS was detected
+ and is one of VCS_GIT, VCS_MERCURIAL, VCS_SUBVERSION, or VCS_UNKNOWN.
+ output is a string containing any interesting output from the vcs
+ detection routine, or None if there is nothing interesting.
+ """
+ # Mercurial has a command to get the base directory of a repository
+ # Try running it, but don't die if we don't have hg installed.
+ # NOTE: we try Mercurial first as it can sit on top of an SVN working copy.
+ try:
+ out, returncode = RunShellWithReturnCode(["hg", "root"])
+ if returncode == 0:
+ return (VCS_MERCURIAL, out.strip())
+ except OSError, (errno, message):
+ if errno != 2: # ENOENT -- they don't have hg installed.
+ raise
+
+ # Subversion has a .svn in all working directories.
+ if os.path.isdir('.svn'):
+ logging.info("Guessed VCS = Subversion")
+ return (VCS_SUBVERSION, None)
+
+ # Git has a command to test if you're in a git tree.
+ # Try running it, but don't die if we don't have git installed.
+ try:
+ out, returncode = RunShellWithReturnCode(["git", "rev-parse",
+ "--is-inside-work-tree"])
+ if returncode == 0:
+ return (VCS_GIT, None)
+ except OSError, (errno, message):
+ if errno != 2: # ENOENT -- they don't have git installed.
+ raise
+
+ return (VCS_UNKNOWN, None)
+
+
+def GuessVCS(options):
+ """Helper to guess the version control system.
+
+ This verifies any user-specified VersionControlSystem (by command line
+ or environment variable). If the user didn't specify one, this examines
+ the current directory, guesses which VersionControlSystem we're using,
+ and returns an instance of the appropriate class. Exit with an error
+ if we can't figure it out.
+
+ Returns:
+ A VersionControlSystem instance. Exits if the VCS can't be guessed.
+ """
+ vcs = options.vcs
+ if not vcs:
+ vcs = os.environ.get("CODEREVIEW_VCS")
+ if vcs:
+ v = VCS_ABBREVIATIONS.get(vcs.lower())
+ if v is None:
+ ErrorExit("Unknown version control system %r specified." % vcs)
+ (vcs, extra_output) = (v, None)
+ else:
+ (vcs, extra_output) = GuessVCSName()
+
+ if vcs == VCS_MERCURIAL:
+ if extra_output is None:
+ extra_output = RunShell(["hg", "root"]).strip()
+ return MercurialVCS(options, extra_output)
+ elif vcs == VCS_SUBVERSION:
+ return SubversionVCS(options)
+ elif vcs == VCS_GIT:
+ return GitVCS(options)
+
+ ErrorExit(("Could not guess version control system. "
+ "Are you in a working copy directory?"))
+
+
+def CheckReviewer(reviewer):
+ """Validate a reviewer -- either a nickname or an email addres.
+
+ Args:
+ reviewer: A nickname or an email address.
+
+ Calls ErrorExit() if it is an invalid email address.
+ """
+ if "@" not in reviewer:
+ return # Assume nickname
+ parts = reviewer.split("@")
+ if len(parts) > 2:
+ ErrorExit("Invalid email address: %r" % reviewer)
+ assert len(parts) == 2
+ if "." not in parts[1]:
+ ErrorExit("Invalid email address: %r" % reviewer)
+
+
+def LoadSubversionAutoProperties():
+ """Returns the content of [auto-props] section of Subversion's config file as
+ a dictionary.
+
+ Returns:
+ A dictionary whose key-value pair corresponds the [auto-props] section's
+ key-value pair.
+ In following cases, returns empty dictionary:
+ - config file doesn't exist, or
+ - 'enable-auto-props' is not set to 'true-like-value' in [miscellany].
+ """
+ # Todo(hayato): Windows users might use different path for configuration file.
+ subversion_config = os.path.expanduser("~/.subversion/config")
+ if not os.path.exists(subversion_config):
+ return {}
+ config = ConfigParser.ConfigParser()
+ config.read(subversion_config)
+ if (config.has_section("miscellany") and
+ config.has_option("miscellany", "enable-auto-props") and
+ config.getboolean("miscellany", "enable-auto-props") and
+ config.has_section("auto-props")):
+ props = {}
+ for file_pattern in config.options("auto-props"):
+ props[file_pattern] = ParseSubversionPropertyValues(
+ config.get("auto-props", file_pattern))
+ return props
+ else:
+ return {}
+
+def ParseSubversionPropertyValues(props):
+ """Parse the given property value which comes from [auto-props] section and
+ returns a list whose element is a (svn_prop_key, svn_prop_value) pair.
+
+ See the following doctest for example.
+
+ >>> ParseSubversionPropertyValues('svn:eol-style=LF')
+ [('svn:eol-style', 'LF')]
+ >>> ParseSubversionPropertyValues('svn:mime-type=image/jpeg')
+ [('svn:mime-type', 'image/jpeg')]
+ >>> ParseSubversionPropertyValues('svn:eol-style=LF;svn:executable')
+ [('svn:eol-style', 'LF'), ('svn:executable', '*')]
+ """
+ key_value_pairs = []
+ for prop in props.split(";"):
+ key_value = prop.split("=")
+ assert len(key_value) <= 2
+ if len(key_value) == 1:
+ # If value is not given, use '*' as a Subversion's convention.
+ key_value_pairs.append((key_value[0], "*"))
+ else:
+ key_value_pairs.append((key_value[0], key_value[1]))
+ return key_value_pairs
+
+
+def GetSubversionPropertyChanges(filename):
+ """Return a Subversion's 'Property changes on ...' string, which is used in
+ the patch file.
+
+ Args:
+ filename: filename whose property might be set by [auto-props] config.
+
+ Returns:
+ A string like 'Property changes on |filename| ...' if given |filename|
+ matches any entries in [auto-props] section. None, otherwise.
+ """
+ global svn_auto_props_map
+ if svn_auto_props_map is None:
+ svn_auto_props_map = LoadSubversionAutoProperties()
+
+ all_props = []
+ for file_pattern, props in svn_auto_props_map.items():
+ if fnmatch.fnmatch(filename, file_pattern):
+ all_props.extend(props)
+ if all_props:
+ return FormatSubversionPropertyChanges(filename, all_props)
+ return None
+
+
+def FormatSubversionPropertyChanges(filename, props):
+ """Returns Subversion's 'Property changes on ...' strings using given filename
+ and properties.
+
+ Args:
+ filename: filename
+ props: A list whose element is a (svn_prop_key, svn_prop_value) pair.
+
+ Returns:
+ A string which can be used in the patch file for Subversion.
+
+ See the following doctest for example.
+
+ >>> print FormatSubversionPropertyChanges('foo.cc', [('svn:eol-style', 'LF')])
+ Property changes on: foo.cc
+ ___________________________________________________________________
+ Added: svn:eol-style
+ + LF
+ <BLANKLINE>
+ """
+ prop_changes_lines = [
+ "Property changes on: %s" % filename,
+ "___________________________________________________________________"]
+ for key, value in props:
+ prop_changes_lines.append("Added: " + key)
+ prop_changes_lines.append(" + " + value)
+ return "\n".join(prop_changes_lines) + "\n"
+
+
+def RealMain(argv, data=None):
+ """The real main function.
+
+ Args:
+ argv: Command line arguments.
+ data: Diff contents. If None (default) the diff is generated by
+ the VersionControlSystem implementation returned by GuessVCS().
+
+ Returns:
+ A 2-tuple (issue id, patchset id).
+ The patchset id is None if the base files are not uploaded by this
+ script (applies only to SVN checkouts).
+ """
+ logging.basicConfig(format=("%(asctime).19s %(levelname)s %(filename)s:"
+ "%(lineno)s %(message)s "))
+ os.environ['LC_ALL'] = 'C'
+ options, args = parser.parse_args(argv[1:])
+ global verbosity
+ verbosity = options.verbose
+ if verbosity >= 3:
+ logging.getLogger().setLevel(logging.DEBUG)
+ elif verbosity >= 2:
+ logging.getLogger().setLevel(logging.INFO)
+
+ vcs = GuessVCS(options)
+
+ base = options.base_url
+ if isinstance(vcs, SubversionVCS):
+ # Guessing the base field is only supported for Subversion.
+ # Note: Fetching base files may become deprecated in future releases.
+ guessed_base = vcs.GuessBase(options.download_base)
+ if base:
+ if guessed_base and base != guessed_base:
+ print "Using base URL \"%s\" from --base_url instead of \"%s\"" % \
+ (base, guessed_base)
+ else:
+ base = guessed_base
+
+ if not base and options.download_base:
+ options.download_base = True
+ logging.info("Enabled upload of base file")
+ if not options.assume_yes:
+ vcs.CheckForUnknownFiles()
+ if data is None:
+ data = vcs.GenerateDiff(args)
+ files = vcs.GetBaseFiles(data)
+ if verbosity >= 1:
+ print "Upload server:", options.server, "(change with -s/--server)"
+ if options.issue:
+ prompt = "Message describing this patch set: "
+ else:
+ prompt = "New issue subject: "
+ message = options.message or raw_input(prompt).strip()
+ if not message:
+ ErrorExit("A non-empty message is required")
+ rpc_server = GetRpcServer(options.server,
+ options.email,
+ options.host,
+ options.save_cookies)
+ form_fields = [("subject", message)]
+ if base:
+ form_fields.append(("base", base))
+ if options.issue:
+ form_fields.append(("issue", str(options.issue)))
+ if options.email:
+ form_fields.append(("user", options.email))
+ if options.reviewers:
+ for reviewer in options.reviewers.split(','):
+ CheckReviewer(reviewer)
+ form_fields.append(("reviewers", options.reviewers))
+ if options.cc:
+ for cc in options.cc.split(','):
+ CheckReviewer(cc)
+ form_fields.append(("cc", options.cc))
+ description = options.description
+ if options.description_file:
+ if options.description:
+ ErrorExit("Can't specify description and description_file")
+ file = open(options.description_file, 'r')
+ description = file.read()
+ file.close()
+ if description:
+ form_fields.append(("description", description))
+ # Send a hash of all the base file so the server can determine if a copy
+ # already exists in an earlier patchset.
+ base_hashes = ""
+ for file, info in files.iteritems():
+ if not info[0] is None:
+ checksum = md5(info[0]).hexdigest()
+ if base_hashes:
+ base_hashes += "|"
+ base_hashes += checksum + ":" + file
+ form_fields.append(("base_hashes", base_hashes))
+ if options.private:
+ if options.issue:
+ print "Warning: Private flag ignored when updating an existing issue."
+ else:
+ form_fields.append(("private", "1"))
+ # If we're uploading base files, don't send the email before the uploads, so
+ # that it contains the file status.
+ if options.send_mail and options.download_base:
+ form_fields.append(("send_mail", "1"))
+ if not options.download_base:
+ form_fields.append(("content_upload", "1"))
+ if len(data) > MAX_UPLOAD_SIZE:
+ print "Patch is large, so uploading file patches separately."
+ uploaded_diff_file = []
+ form_fields.append(("separate_patches", "1"))
+ else:
+ uploaded_diff_file = [("data", "data.diff", data)]
+ ctype, body = EncodeMultipartFormData(form_fields, uploaded_diff_file)
+ response_body = rpc_server.Send("/upload", body, content_type=ctype)
+ patchset = None
+ if not options.download_base or not uploaded_diff_file:
+ lines = response_body.splitlines()
+ if len(lines) >= 2:
+ msg = lines[0]
+ patchset = lines[1].strip()
+ patches = [x.split(" ", 1) for x in lines[2:]]
+ else:
+ msg = response_body
+ else:
+ msg = response_body
+ StatusUpdate(msg)
+ if not response_body.startswith("Issue created.") and \
+ not response_body.startswith("Issue updated."):
+ sys.exit(0)
+ issue = msg[msg.rfind("/")+1:]
+
+ if not uploaded_diff_file:
+ result = UploadSeparatePatches(issue, rpc_server, patchset, data, options)
+ if not options.download_base:
+ patches = result
+
+ if not options.download_base:
+ vcs.UploadBaseFiles(issue, rpc_server, patches, patchset, options, files)
+ if options.send_mail:
+ rpc_server.Send("/" + issue + "/mail", payload="")
+ return issue, patchset
+
+
+def main():
+ try:
+ RealMain(sys.argv)
+ except KeyboardInterrupt:
+ print
+ StatusUpdate("Interrupted.")
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ main()