summaryrefslogtreecommitdiffstats
path: root/WebKitTools/Scripts/webkitpy
diff options
context:
space:
mode:
authorBen Murdoch <benm@google.com>2010-06-15 19:36:43 +0100
committerBen Murdoch <benm@google.com>2010-06-16 14:52:28 +0100
commit545e470e52f0ac6a3a072bf559c796b42c6066b6 (patch)
treec0c14763654d84d37577dde512c3d3b4699a9e86 /WebKitTools/Scripts/webkitpy
parent719298a66237d38ea5c05f1547123ad8aacbc237 (diff)
downloadexternal_webkit-545e470e52f0ac6a3a072bf559c796b42c6066b6.zip
external_webkit-545e470e52f0ac6a3a072bf559c796b42c6066b6.tar.gz
external_webkit-545e470e52f0ac6a3a072bf559c796b42c6066b6.tar.bz2
Merge webkit.org at r61121: Initial merge by git.
Change-Id: Icd6db395c62285be384d137164d95d7466c98760
Diffstat (limited to 'WebKitTools/Scripts/webkitpy')
-rw-r--r--WebKitTools/Scripts/webkitpy/common/checkout/changelog.py10
-rw-r--r--WebKitTools/Scripts/webkitpy/common/checkout/changelog_unittest.py14
-rw-r--r--WebKitTools/Scripts/webkitpy/common/checkout/scm.py55
-rw-r--r--WebKitTools/Scripts/webkitpy/common/checkout/scm_unittest.py60
-rw-r--r--WebKitTools/Scripts/webkitpy/common/config/committers.py7
-rw-r--r--WebKitTools/Scripts/webkitpy/common/net/bugzilla.py39
-rw-r--r--WebKitTools/Scripts/webkitpy/common/net/bugzilla_unittest.py19
-rw-r--r--WebKitTools/Scripts/webkitpy/common/net/buildbot.py7
-rw-r--r--WebKitTools/Scripts/webkitpy/common/net/buildbot_unittest.py12
-rw-r--r--WebKitTools/Scripts/webkitpy/common/net/rietveld.py6
-rw-r--r--WebKitTools/Scripts/webkitpy/common/system/outputcapture.py7
-rw-r--r--WebKitTools/Scripts/webkitpy/common/system/user.py9
-rw-r--r--WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_layout_results_generator.py1
-rw-r--r--WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_results_generator.py28
-rw-r--r--WebKitTools/Scripts/webkitpy/layout_tests/port/base.py8
-rw-r--r--WebKitTools/Scripts/webkitpy/layout_tests/port/chromium.py7
-rw-r--r--WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_linux.py12
-rw-r--r--WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_mac.py9
-rw-r--r--WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_win.py9
-rw-r--r--WebKitTools/Scripts/webkitpy/layout_tests/port/websocket_server.py9
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/__init__.py7
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.mechanize.url1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.pep8.py.url1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/README2
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/__init__.py1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/.ClientForm.py.url1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/ClientForm.py3401
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/__init__.py1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.ircbot.py.url1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.irclib.py.url1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/__init__.py1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/ircbot.py438
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/irclib.py1560
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/__init__.py140
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_auth.py522
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_beautifulsoup.py1080
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_clientcookie.py1707
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_debug.py28
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_file.py60
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_firefox3cookiejar.py249
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_gzip.py103
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_headersutil.py232
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_html.py631
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_http.py758
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_lwpcookiejar.py185
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mechanize.py676
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mozillacookiejar.py161
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_msiecookiejar.py388
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_opener.py436
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_pullparser.py390
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_request.py87
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_response.py527
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_rfc3986.py241
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_seek.py16
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_sockettimeout.py6
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_testcase.py73
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_upgrade.py40
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_urllib2.py55
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_useragent.py352
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_util.py291
-rwxr-xr-xWebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/pep8.py1254
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/.upload.py.url1
-rw-r--r--WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/__init__.py1
-rwxr-xr-xWebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/upload.py1702
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/bot/irc_command.py27
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/bot/irc_command_unittest.py38
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/bot/queueengine.py2
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot.py22
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot_unittest.py6
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/commands/download.py12
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/commands/download_unittest.py4
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/commands/earlywarningsystem_unittest.py13
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/commands/queues.py48
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/commands/queues_unittest.py27
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/commands/queuestest.py67
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/commands/upload.py2
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/commands/upload_unittest.py4
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/mocktool.py35
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/steps/abstractstep.py7
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/steps/confirmdiff.py3
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/steps/options.py1
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/steps/postcodereview.py42
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog.py16
-rw-r--r--WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog_unittest.py55
84 files changed, 580 insertions, 17987 deletions
diff --git a/WebKitTools/Scripts/webkitpy/common/checkout/changelog.py b/WebKitTools/Scripts/webkitpy/common/checkout/changelog.py
index 6220fbd..40657eb 100644
--- a/WebKitTools/Scripts/webkitpy/common/checkout/changelog.py
+++ b/WebKitTools/Scripts/webkitpy/common/checkout/changelog.py
@@ -36,6 +36,8 @@ import textwrap
from webkitpy.common.system.deprecated_logging import log
from webkitpy.common.config.committers import CommitterList
+from webkitpy.common.net.bugzilla import parse_bug_id
+
def view_source_url(revision_number):
# FIMXE: This doesn't really belong in this file, but we don't have a
@@ -88,6 +90,9 @@ class ChangeLogEntry(object):
def contents(self):
return self._contents
+ def bug_id(self):
+ return parse_bug_id(self._contents)
+
# FIXME: Various methods on ChangeLog should move into ChangeLogEntry instead.
class ChangeLog(object):
@@ -183,3 +188,8 @@ class ChangeLog(object):
for line in fileinput.FileInput(self.path, inplace=1):
# Trailing comma suppresses printing newline
print line.replace("NOBODY (OOPS!)", reviewer.encode("utf-8")),
+
+ def set_short_description_and_bug_url(self, short_description, bug_url):
+ message = "%s\n %s" % (short_description, bug_url)
+ for line in fileinput.FileInput(self.path, inplace=1):
+ print line.replace("Need a short description and bug URL (OOPS!)", message.encode("utf-8")),
diff --git a/WebKitTools/Scripts/webkitpy/common/checkout/changelog_unittest.py b/WebKitTools/Scripts/webkitpy/common/checkout/changelog_unittest.py
index 864428a..6aeb1f8 100644
--- a/WebKitTools/Scripts/webkitpy/common/checkout/changelog_unittest.py
+++ b/WebKitTools/Scripts/webkitpy/common/checkout/changelog_unittest.py
@@ -38,7 +38,7 @@ from StringIO import StringIO
from webkitpy.common.checkout.changelog import *
-class ChangeLogsTest(unittest.TestCase):
+class ChangeLogTest(unittest.TestCase):
_example_entry = u'''2009-08-17 Peter Kasting <pkasting@google.com>
@@ -131,6 +131,18 @@ class ChangeLogsTest(unittest.TestCase):
os.remove(changelog_path)
self.assertEquals(actual_contents, expected_contents)
+ def test_set_short_description_and_bug_url(self):
+ changelog_contents = u"%s\n%s" % (self._new_entry_boilerplate, self._example_changelog)
+ changelog_path = self._write_tmp_file_with_contents(changelog_contents.encode("utf-8"))
+ short_description = "A short description"
+ bug_url = "http://example.com/b/2344"
+ ChangeLog(changelog_path).set_short_description_and_bug_url(short_description, bug_url)
+ actual_contents = self._read_file_contents(changelog_path, "utf-8")
+ expected_message = "%s\n %s" % (short_description, bug_url)
+ expected_contents = changelog_contents.replace("Need a short description and bug URL (OOPS!)", expected_message)
+ os.remove(changelog_path)
+ self.assertEquals(actual_contents, expected_contents)
+
_revert_message = """ Unreviewed, rolling out r12345.
http://trac.webkit.org/changeset/12345
http://example.com/123
diff --git a/WebKitTools/Scripts/webkitpy/common/checkout/scm.py b/WebKitTools/Scripts/webkitpy/common/checkout/scm.py
index eea76be..fc4c6fd 100644
--- a/WebKitTools/Scripts/webkitpy/common/checkout/scm.py
+++ b/WebKitTools/Scripts/webkitpy/common/checkout/scm.py
@@ -240,7 +240,7 @@ class SCM:
def supports_local_commits():
raise NotImplementedError, "subclasses must implement"
- def svn_merge_base():
+ def remote_merge_base():
raise NotImplementedError, "subclasses must implement"
def commit_locally_with_message(self, message):
@@ -465,11 +465,11 @@ class Git(SCM):
def discard_local_commits(self):
# FIXME: This should probably use cwd=self.checkout_root
- self.run(['git', 'reset', '--hard', self.svn_branch_name()])
+ self.run(['git', 'reset', '--hard', self.remote_branch_ref()])
def local_commits(self):
# FIXME: This should probably use cwd=self.checkout_root
- return self.run(['git', 'log', '--pretty=oneline', 'HEAD...' + self.svn_branch_name()]).splitlines()
+ return self.run(['git', 'log', '--pretty=oneline', 'HEAD...' + self.remote_branch_ref()]).splitlines()
def rebase_in_progress(self):
return os.path.exists(os.path.join(self.checkout_root, '.git/rebase-apply'))
@@ -507,7 +507,7 @@ class Git(SCM):
return git_commit
if self.should_squash(squash):
- return self.svn_merge_base()
+ return self.remote_merge_base()
# FIXME: Non-squash behavior should match commit_with_message. It raises an error
# if there are working copy changes and --squash or --no-squash wasn't passed in.
@@ -602,14 +602,14 @@ class Git(SCM):
if num_local_commits > 1 or (num_local_commits > 0 and not self.working_directory_is_clean()):
raise ScriptError(message=self._get_squash_error_message(num_local_commits))
- if squash and self._svn_branch_has_extra_commits():
+ if squash and self._remote_branch_has_extra_commits():
raise ScriptError(message="Cannot use --squash when HEAD is not fully merged/rebased to %s. "
- "This branch needs to be synced first." % self.svn_branch_name())
+ "This branch needs to be synced first." % self.remote_branch_ref())
return squash
- def _svn_branch_has_extra_commits(self):
- return len(run_command(['git', 'rev-list', '--max-count=1', self.svn_branch_name(), '^HEAD']))
+ def _remote_branch_has_extra_commits(self):
+ return len(run_command(['git', 'rev-list', '--max-count=1', self.remote_branch_ref(), '^HEAD']))
def commit_with_message(self, message, username=None, git_commit=None, squash=None):
# Username is ignored during Git commits.
@@ -624,7 +624,7 @@ class Git(SCM):
squash = self.should_squash(squash)
if squash:
- self.run(['git', 'reset', '--soft', self.svn_branch_name()])
+ self.run(['git', 'reset', '--soft', self.remote_branch_ref()])
self.commit_locally_with_message(message)
elif not self.working_directory_is_clean():
if not len(self.local_commits()):
@@ -650,8 +650,8 @@ class Git(SCM):
# We want to squash all this branch's commits into one commit with the proper description.
# We do this by doing a "merge --squash" into a new commit branch, then dcommitting that.
- MERGE_BRANCH = 'webkit-patch-land'
- self.delete_branch(MERGE_BRANCH)
+ MERGE_BRANCH_NAME = 'webkit-patch-land'
+ self.delete_branch(MERGE_BRANCH_NAME)
# We might be in a directory that's present in this branch but not in the
# trunk. Move up to the top of the tree so that git commands that expect a
@@ -662,7 +662,7 @@ class Git(SCM):
# We wrap in a try...finally block so if anything goes wrong, we clean up the branches.
commit_succeeded = True
try:
- self.run(['git', 'checkout', '-q', '-b', MERGE_BRANCH, self.svn_branch_name()])
+ self.run(['git', 'checkout', '-q', '-b', MERGE_BRANCH_NAME, self.remote_branch_ref()])
for commit in commit_ids:
# We're on a different branch now, so convert "head" to the branch name.
@@ -681,7 +681,7 @@ class Git(SCM):
# And then swap back to the original branch and clean up.
self.clean_working_directory()
self.run(['git', 'checkout', '-q', branch_name])
- self.delete_branch(MERGE_BRANCH)
+ self.delete_branch(MERGE_BRANCH_NAME)
return output
@@ -693,18 +693,31 @@ class Git(SCM):
return self.run(['git', 'svn', 'log', '--limit=1'])
# Git-specific methods:
+ def _branch_ref_exists(self, branch_ref):
+ return self.run(['git', 'show-ref', '--quiet', '--verify', branch_ref], return_exit_code=True) == 0
- def delete_branch(self, branch):
- if self.run(['git', 'show-ref', '--quiet', '--verify', 'refs/heads/' + branch], return_exit_code=True) == 0:
- self.run(['git', 'branch', '-D', branch])
+ def delete_branch(self, branch_name):
+ if self._branch_ref_exists('refs/heads/' + branch_name):
+ self.run(['git', 'branch', '-D', branch_name])
- def svn_merge_base(self):
- return self.run(['git', 'merge-base', self.svn_branch_name(), 'HEAD']).strip()
+ def remote_merge_base(self):
+ return self.run(['git', 'merge-base', self.remote_branch_ref(), 'HEAD']).strip()
+
+ def remote_branch_ref(self):
+ # Use references so that we can avoid collisions, e.g. we don't want to operate on refs/heads/trunk if it exists.
- def svn_branch_name(self):
# FIXME: This should so something like: Git.read_git_config('svn-remote.svn.fetch').split(':')[1]
# but that doesn't work if the git repo is tracking multiple svn branches.
- return 'trunk'
+ remote_branch_refs = [
+ 'refs/remotes/trunk', # A git-svn checkout as per http://trac.webkit.org/wiki/UsingGitWithWebKit.
+ 'refs/remotes/origin/master', # A git clone of git://git.webkit.org/WebKit.git that is not tracking svn.
+ ]
+
+ for ref in remote_branch_refs:
+ if self._branch_ref_exists(ref):
+ return ref
+
+ raise ScriptError(message="Can't find a branch to diff against. %s branches do not exist." % " and ".join(remote_branch_refs))
def commit_locally_with_message(self, message):
self.run(['git', 'commit', '--all', '-F', '-'], input=message)
@@ -726,7 +739,7 @@ class Git(SCM):
# A B : [A, B] (different from git diff, which would use "rev-list A..B")
def commit_ids_from_commitish_arguments(self, args):
if not len(args):
- args.append('%s..HEAD' % self.svn_branch_name())
+ args.append('%s..HEAD' % self.remote_branch_ref())
commit_ids = []
for commitish in args:
diff --git a/WebKitTools/Scripts/webkitpy/common/checkout/scm_unittest.py b/WebKitTools/Scripts/webkitpy/common/checkout/scm_unittest.py
index 8eea4d8..36a1d1c 100644
--- a/WebKitTools/Scripts/webkitpy/common/checkout/scm_unittest.py
+++ b/WebKitTools/Scripts/webkitpy/common/checkout/scm_unittest.py
@@ -635,25 +635,63 @@ Q1dTBx0AAAB42itg4GlgYJjGwMDDyODMxMDw34GBgQEAJPQDJA==
class GitTest(SCMTest):
- def _setup_git_clone_of_svn_repository(self):
+ def setUp(self):
+ """Sets up fresh git repository with one commit. Then setups a second git
+ repo that tracks the first one."""
+ self.original_dir = os.getcwd()
+
+ self.untracking_checkout_path = tempfile.mkdtemp(suffix="git_test_checkout2")
+ run_command(['git', 'init', self.untracking_checkout_path])
+
+ os.chdir(self.untracking_checkout_path)
+ write_into_file_at_path('foo_file', 'foo')
+ run_command(['git', 'add', 'foo_file'])
+ run_command(['git', 'commit', '-am', 'dummy commit'])
+ self.untracking_scm = detect_scm_system(self.untracking_checkout_path)
+
+ self.tracking_git_checkout_path = tempfile.mkdtemp(suffix="git_test_checkout")
+ run_command(['git', 'clone', '--quiet', self.untracking_checkout_path, self.tracking_git_checkout_path])
+ os.chdir(self.tracking_git_checkout_path)
+ self.tracking_scm = detect_scm_system(self.tracking_git_checkout_path)
+
+ def tearDown(self):
+ # Change back to a valid directory so that later calls to os.getcwd() do not fail.
+ os.chdir(self.original_dir)
+ run_command(['rm', '-rf', self.tracking_git_checkout_path])
+ run_command(['rm', '-rf', self.untracking_checkout_path])
+
+ def test_remote_branch_ref(self):
+ self.assertEqual(self.tracking_scm.remote_branch_ref(), 'refs/remotes/origin/master')
+
+ os.chdir(self.untracking_checkout_path)
+ self.assertRaises(ScriptError, self.untracking_scm.remote_branch_ref)
+
+
+class GitSVNTest(SCMTest):
+
+ def _setup_git_checkout(self):
self.git_checkout_path = tempfile.mkdtemp(suffix="git_test_checkout")
# --quiet doesn't make git svn silent, so we use run_silent to redirect output
run_silent(['git', 'svn', 'clone', '-T', 'trunk', self.svn_repo_url, self.git_checkout_path])
+ os.chdir(self.git_checkout_path)
- def _tear_down_git_clone_of_svn_repository(self):
+ def _tear_down_git_checkout(self):
+ # Change back to a valid directory so that later calls to os.getcwd() do not fail.
+ os.chdir(self.original_dir)
run_command(['rm', '-rf', self.git_checkout_path])
def setUp(self):
+ self.original_dir = os.getcwd()
+
SVNTestRepository.setup(self)
- self._setup_git_clone_of_svn_repository()
- os.chdir(self.git_checkout_path)
+ self._setup_git_checkout()
self.scm = detect_scm_system(self.git_checkout_path)
# For historical reasons, we test some checkout code here too.
self.checkout = Checkout(self.scm)
def tearDown(self):
SVNTestRepository.tear_down(self)
- self._tear_down_git_clone_of_svn_repository()
+ self._tear_down_git_checkout()
def test_detection(self):
scm = detect_scm_system(self.git_checkout_path)
@@ -683,25 +721,24 @@ class GitTest(SCMTest):
self.assertEqual(len(self.scm.local_commits()), 0)
def test_delete_branch(self):
- old_branch = run_command(['git', 'symbolic-ref', 'HEAD']).strip()
new_branch = 'foo'
run_command(['git', 'checkout', '-b', new_branch])
self.assertEqual(run_command(['git', 'symbolic-ref', 'HEAD']).strip(), 'refs/heads/' + new_branch)
- run_command(['git', 'checkout', old_branch])
+ run_command(['git', 'checkout', '-b', 'bar'])
self.scm.delete_branch(new_branch)
self.assertFalse(re.search(r'foo', run_command(['git', 'branch'])))
- def test_svn_merge_base(self):
+ def test_remote_merge_base(self):
# Diff to merge-base should include working-copy changes,
# which the diff to svn_branch.. doesn't.
test_file = os.path.join(self.git_checkout_path, 'test_file')
write_into_file_at_path(test_file, 'foo')
- diff_to_common_base = _git_diff(self.scm.svn_branch_name() + '..')
- diff_to_merge_base = _git_diff(self.scm.svn_merge_base())
+ diff_to_common_base = _git_diff(self.scm.remote_branch_ref() + '..')
+ diff_to_merge_base = _git_diff(self.scm.remote_merge_base())
self.assertFalse(re.search(r'foo', diff_to_common_base))
self.assertTrue(re.search(r'foo', diff_to_merge_base))
@@ -888,6 +925,9 @@ class GitTest(SCMTest):
scm = detect_scm_system(self.git_checkout_path)
self.assertRaises(ScriptError, scm.commit_with_message, "another test commit", squash=True)
+ def test_remote_branch_ref(self):
+ self.assertEqual(self.scm.remote_branch_ref(), 'refs/remotes/trunk')
+
def test_reverse_diff(self):
self._shared_test_reverse_diff()
diff --git a/WebKitTools/Scripts/webkitpy/common/config/committers.py b/WebKitTools/Scripts/webkitpy/common/config/committers.py
index d9c541f..37bd4eb 100644
--- a/WebKitTools/Scripts/webkitpy/common/config/committers.py
+++ b/WebKitTools/Scripts/webkitpy/common/config/committers.py
@@ -70,6 +70,7 @@ committers_unable_to_review = [
Committer("Alexander Kellett", ["lypanov@mac.com", "a-lists001@lypanov.net", "lypanov@kde.org"], "lypanov"),
Committer("Alexander Pavlov", "apavlov@chromium.org"),
Committer("Andre Boule", "aboule@apple.com"),
+ Committer("Andrei Popescu", "andreip@google.com", "andreip"),
Committer("Andrew Wellington", ["andrew@webkit.org", "proton@wiretapped.net"], "proton"),
Committer("Andras Becsi", "abecsi@webkit.org", "bbandix"),
Committer("Andy Estes", "aestes@apple.com", "estes"),
@@ -133,14 +134,15 @@ committers_unable_to_review = [
Committer("Krzysztof Kowalczyk", "kkowalczyk@gmail.com"),
Committer("Levi Weintraub", "lweintraub@apple.com"),
Committer("Mads Ager", "ager@chromium.org"),
+ Committer("Marcus Voltis Bulach", "bulach@chromium.org"),
Committer("Matt Lilek", ["webkit@mattlilek.com", "pewtermoose@webkit.org"]),
Committer("Matt Perry", "mpcomplete@chromium.org"),
Committer("Maxime Britto", ["maxime.britto@gmail.com", "britto@apple.com"]),
Committer("Maxime Simon", ["simon.maxime@gmail.com", "maxime.simon@webkit.org"], "maxime.simon"),
- Committer("Martin Robinson", ["mrobinson@webkit.org", "martin.james.robinson@gmail.com"]),
+ Committer("Martin Robinson", ["mrobinson@igalia.com", "mrobinson@webkit.org", "martin.james.robinson@gmail.com"], "mrobinson"),
Committer("Michelangelo De Simone", "michelangelo@webkit.org", "michelangelo"),
Committer("Mike Belshe", ["mbelshe@chromium.org", "mike@belshe.com"]),
- Committer("Mike Fenton", ["mike.fenton@torchmobile.com", "mifenton@rim.com"], "mfenton"),
+ Committer("Mike Fenton", ["mifenton@rim.com", "mike.fenton@torchmobile.com"], "mfenton"),
Committer("Mike Thole", ["mthole@mikethole.com", "mthole@apple.com"]),
Committer("Mikhail Naganov", "mnaganov@chromium.org"),
Committer("MORITA Hajime", "morrita@google.com", "morrita"),
@@ -166,6 +168,7 @@ committers_unable_to_review = [
Committer("Yong Li", ["yong.li.webkit@gmail.com", "yong.li@torchmobile.com"], "yong"),
Committer("Yongjun Zhang", "yongjun.zhang@nokia.com"),
Committer("Yuzo Fujishima", "yuzo@google.com", "yuzo"),
+ Committer("Zhenyao Mo", "zmo@google.com"),
Committer("Zoltan Herczeg", "zherczeg@webkit.org", "zherczeg"),
Committer("Zoltan Horvath", "zoltan@webkit.org", "zoltan"),
]
diff --git a/WebKitTools/Scripts/webkitpy/common/net/bugzilla.py b/WebKitTools/Scripts/webkitpy/common/net/bugzilla.py
index 26d3652..40db32c 100644
--- a/WebKitTools/Scripts/webkitpy/common/net/bugzilla.py
+++ b/WebKitTools/Scripts/webkitpy/common/net/bugzilla.py
@@ -113,6 +113,9 @@ class Attachment(object):
def commit_queue(self):
return self._attachment_dictionary.get("commit-queue")
+ def in_rietveld(self):
+ return self._attachment_dictionary.get("in-rietveld")
+
def url(self):
# FIXME: This should just return
# self._bugzilla().attachment_url_for_id(self.id()). scm_unittest.py
@@ -158,6 +161,9 @@ class Bug(object):
def id(self):
return self.bug_dictionary["id"]
+ def title(self):
+ return self.bug_dictionary["title"]
+
def assigned_to_email(self):
return self.bug_dictionary["assigned_to_email"]
@@ -201,6 +207,9 @@ class Bug(object):
# a valid committer.
return filter(lambda patch: patch.committer(), patches)
+ def in_rietveld_queue_patches(self):
+ return [patch for patch in self.patches() if patch.in_rietveld() == None]
+
# A container for all of the logic for making and parsing buzilla queries.
class BugzillaQueries(object):
@@ -264,6 +273,16 @@ class BugzillaQueries(object):
return sum([self._fetch_bug(bug_id).commit_queued_patches()
for bug_id in self.fetch_bug_ids_from_commit_queue()], [])
+ def fetch_first_patch_from_rietveld_queue(self):
+ # rietveld-queue processes all patches that don't have in-rietveld set.
+ query_url = "buglist.cgi?query_format=advanced&bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&field0-0-0=flagtypes.name&type0-0-0=notsubstring&value0-0-0=in-rietveld&field0-1-0=attachments.ispatch&type0-1-0=equals&value0-1-0=1&order=Last+Changed&field0-2-0=attachments.isobsolete&type0-2-0=equals&value0-2-0=0"
+ bugs = self._fetch_bug_ids_advanced_query(query_url)
+ if not len(bugs):
+ return None
+
+ patches = self._fetch_bug(bugs[0]).in_rietveld_queue_patches()
+ return patches[0] if len(patches) else None
+
def _fetch_bug_ids_from_review_queue(self):
review_queue_url = "buglist.cgi?query_format=advanced&bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&field0-0-0=flagtypes.name&type0-0-0=equals&value0-0-0=review?"
return self._fetch_bug_ids_advanced_query(review_queue_url)
@@ -474,6 +493,8 @@ class Bugzilla(object):
self._parse_attachment_flag(
element, 'review', attachment, 'reviewer_email')
self._parse_attachment_flag(
+ element, 'in-rietveld', attachment, 'rietveld_uploader_email')
+ self._parse_attachment_flag(
element, 'commit-queue', attachment, 'committer_email')
return attachment
@@ -592,7 +613,8 @@ class Bugzilla(object):
comment_text=None,
mark_for_review=False,
mark_for_commit_queue=False,
- mark_for_landing=False, bug_id=None):
+ mark_for_landing=False,
+ bug_id=None):
self.browser['description'] = description
self.browser['ispatch'] = ("1",)
self.browser['flag_type-1'] = ('?',) if mark_for_review else ('X',)
@@ -703,7 +725,7 @@ class Bugzilla(object):
self.browser["blocked"] = unicode(blocked)
if assignee == None:
assignee = self.username
- if assignee:
+ if assignee and not self.browser.find_control("assigned_to").disabled:
self.browser["assigned_to"] = assignee
self.browser["short_desc"] = bug_title
self.browser["comment"] = bug_description
@@ -730,8 +752,10 @@ class Bugzilla(object):
# FIXME: This will break if we ever re-order attachment flags
if flag_name == "review":
return self.browser.find_control(type='select', nr=0)
- if flag_name == "commit-queue":
+ elif flag_name == "commit-queue":
return self.browser.find_control(type='select', nr=1)
+ elif flag_name == "in-rietveld":
+ return self.browser.find_control(type='select', nr=2)
raise Exception("Don't know how to find flag named \"%s\"" % flag_name)
def clear_attachment_flags(self,
@@ -758,8 +782,8 @@ class Bugzilla(object):
attachment_id,
flag_name,
flag_value,
- comment_text,
- additional_comment_text):
+ comment_text=None,
+ additional_comment_text=None):
# FIXME: We need a way to test this function on a live bugzilla
# instance.
@@ -774,7 +798,10 @@ class Bugzilla(object):
self.browser.open(self.attachment_url_for_id(attachment_id, 'edit'))
self.browser.select_form(nr=1)
- self.browser.set_value(comment_text, name='comment', nr=0)
+
+ if comment_text:
+ self.browser.set_value(comment_text, name='comment', nr=0)
+
self._find_select_element_for_flag(flag_name).value = (flag_value,)
self.browser.submit()
diff --git a/WebKitTools/Scripts/webkitpy/common/net/bugzilla_unittest.py b/WebKitTools/Scripts/webkitpy/common/net/bugzilla_unittest.py
index ce992e7..3556121 100644
--- a/WebKitTools/Scripts/webkitpy/common/net/bugzilla_unittest.py
+++ b/WebKitTools/Scripts/webkitpy/common/net/bugzilla_unittest.py
@@ -96,6 +96,11 @@ class BugzillaTest(unittest.TestCase):
status="+"
setter="two@test.com"
/>
+ <flag name="in-rietveld"
+ id="17933"
+ status="+"
+ setter="three@test.com"
+ />
</attachment>
'''
_expected_example_attachment_parsing = {
@@ -111,6 +116,8 @@ class BugzillaTest(unittest.TestCase):
'reviewer_email' : 'one@test.com',
'commit-queue' : '+',
'committer_email' : 'two@test.com',
+ 'in-rietveld': '+',
+ 'rietveld_uploader_email': 'three@test.com',
'attacher_email' : 'christian.plesner.hansen@gmail.com',
}
@@ -191,12 +198,12 @@ removed-because-it-was-really-long
ZEZpbmlzaExvYWRXaXRoUmVhc29uOnJlYXNvbl07Cit9CisKIEBlbmQKIAogI2VuZGlmCg==
</data>
- <flag name="review"
- id="27602"
- status="?"
- setter="mjs@apple.com"
- />
- </attachment>
+ <flag name="review"
+ id="27602"
+ status="?"
+ setter="mjs@apple.com"
+ />
+ </attachment>
</bug>
</bugzilla>
"""
diff --git a/WebKitTools/Scripts/webkitpy/common/net/buildbot.py b/WebKitTools/Scripts/webkitpy/common/net/buildbot.py
index 6c6ed43..c849ef1 100644
--- a/WebKitTools/Scripts/webkitpy/common/net/buildbot.py
+++ b/WebKitTools/Scripts/webkitpy/common/net/buildbot.py
@@ -333,7 +333,12 @@ class BuildBot(object):
builder['built_revision'] = int(revision_string) \
if not re.match('\D', revision_string) \
else None
- builder['is_green'] = not re.search('fail', cell.renderContents())
+
+ # FIXME: We treat slave lost as green even though it is not to
+ # work around the Qts bot being on a broken internet connection.
+ # The real fix is https://bugs.webkit.org/show_bug.cgi?id=37099
+ builder['is_green'] = not re.search('fail', cell.renderContents()) or \
+ not not re.search('lost', cell.renderContents())
status_link_regexp = r"builders/(?P<builder_name>.*)/builds/(?P<build_number>\d+)"
link_match = re.match(status_link_regexp, status_link['href'])
diff --git a/WebKitTools/Scripts/webkitpy/common/net/buildbot_unittest.py b/WebKitTools/Scripts/webkitpy/common/net/buildbot_unittest.py
index 5e04745..5384321 100644
--- a/WebKitTools/Scripts/webkitpy/common/net/buildbot_unittest.py
+++ b/WebKitTools/Scripts/webkitpy/common/net/buildbot_unittest.py
@@ -169,6 +169,10 @@ class BuildBotTest(unittest.TestCase):
<td class="box"><a href="builders/Qt%20Linux%20Release">Qt Linux Release</a></td>
<td align="center" class="LastBuild box failure"><a href="builders/Qt%20Linux%20Release/builds/654">47383</a><br />failed<br />compile-webkit</td>
<td align="center" class="Activity idle">idle<br />3 pending</td>
+ <tr>
+ <td class="box"><a href="builders/Qt%20Windows%2032-bit%20Debug">Qt Windows 32-bit Debug</a></td>
+ <td align="center" class="LastBuild box failure"><a href="builders/Qt%20Windows%2032-bit%20Debug/builds/2090">60563</a><br />failed<br />failed<br />slave<br />lost</td>
+ <td align="center" class="Activity building">building<br />ETA in<br />~ 5 mins<br />at 08:25</td>
</table>
'''
_expected_example_one_box_parsings = [
@@ -196,6 +200,14 @@ class BuildBotTest(unittest.TestCase):
'activity': 'idle',
'pending_builds': 3,
},
+ {
+ 'is_green': True,
+ 'build_number' : 2090,
+ 'name': u'Qt Windows 32-bit Debug',
+ 'built_revision': 60563,
+ 'activity': 'building',
+ 'pending_builds': 0,
+ },
]
def test_status_parsing(self):
diff --git a/WebKitTools/Scripts/webkitpy/common/net/rietveld.py b/WebKitTools/Scripts/webkitpy/common/net/rietveld.py
index 572d1fd..eccda3a 100644
--- a/WebKitTools/Scripts/webkitpy/common/net/rietveld.py
+++ b/WebKitTools/Scripts/webkitpy/common/net/rietveld.py
@@ -51,6 +51,10 @@ class Rietveld(object):
if not message:
raise ScriptError("Rietveld requires a message.")
+ # Rietveld has a 100 character limit on message length.
+ if len(message) > 100:
+ message = message[:100]
+
args = [
# First argument is empty string to mimic sys.argv.
"",
@@ -70,5 +74,5 @@ class Rietveld(object):
# Use RealMain instead of calling upload from the commandline so that
# we can pass in the diff ourselves. Otherwise, upload will just use
# git diff for git checkouts, which doesn't respect --squash and --git-commit.
- issue, patchset = upload.RealMain(args[1:], data=diff)
+ issue, patchset = upload.RealMain(args, data=diff)
return issue
diff --git a/WebKitTools/Scripts/webkitpy/common/system/outputcapture.py b/WebKitTools/Scripts/webkitpy/common/system/outputcapture.py
index 592a669..68a3919 100644
--- a/WebKitTools/Scripts/webkitpy/common/system/outputcapture.py
+++ b/WebKitTools/Scripts/webkitpy/common/system/outputcapture.py
@@ -52,9 +52,12 @@ class OutputCapture(object):
def restore_output(self):
return (self._restore_output_with_name("stdout"), self._restore_output_with_name("stderr"))
- def assert_outputs(self, testcase, function, args=[], kwargs={}, expected_stdout="", expected_stderr=""):
+ def assert_outputs(self, testcase, function, args=[], kwargs={}, expected_stdout="", expected_stderr="", expected_exception=None):
self.capture_output()
- return_value = function(*args, **kwargs)
+ if expected_exception:
+ return_value = testcase.assertRaises(expected_exception, function, *args, **kwargs)
+ else:
+ return_value = function(*args, **kwargs)
(stdout_string, stderr_string) = self.restore_output()
testcase.assertEqual(stdout_string, expected_stdout)
testcase.assertEqual(stderr_string, expected_stderr)
diff --git a/WebKitTools/Scripts/webkitpy/common/system/user.py b/WebKitTools/Scripts/webkitpy/common/system/user.py
index 82fa0d3..b4df3cb 100644
--- a/WebKitTools/Scripts/webkitpy/common/system/user.py
+++ b/WebKitTools/Scripts/webkitpy/common/system/user.py
@@ -104,5 +104,14 @@ class User(object):
response = raw_input("%s [Y/n]: " % message)
return not response or response.lower() == "y"
+ def can_open_url(self):
+ try:
+ webbrowser.get()
+ return True
+ except webbrowser.Error, e:
+ return False
+
def open_url(self, url):
+ if not self.can_open_url():
+ _log.warn("Failed to open %s" % url)
webbrowser.open(url)
diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_layout_results_generator.py b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_layout_results_generator.py
index cee44ad..bb214f7 100644
--- a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_layout_results_generator.py
+++ b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_layout_results_generator.py
@@ -77,6 +77,7 @@ class JSONLayoutResultsGenerator(json_results_generator.JSONResultsGenerator):
self._test_timings = dict(
(path_to_name(test_tuple.filename), test_tuple.test_run_time)
for test_tuple in test_timings)
+ self._svn_repositories = port.test_repository_paths()
self._generate_json_output()
diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_results_generator.py b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_results_generator.py
index 0993cbd..1cf1b95 100644
--- a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_results_generator.py
+++ b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_results_generator.py
@@ -38,6 +38,8 @@ import time
import urllib2
import xml.dom.minidom
+from webkitpy.common.checkout import scm
+from webkitpy.common.system.executive import ScriptError
from webkitpy.layout_tests.layout_package import test_expectations
import webkitpy.thirdparty.simplejson as simplejson
@@ -46,6 +48,7 @@ _log = logging.getLogger("webkitpy.layout_tests.layout_package."
class JSONResultsGenerator(object):
+ """A JSON results generator for generic tests."""
MAX_NUMBER_OF_BUILD_RESULTS_TO_LOG = 750
# Min time (seconds) that will be added to the JSON.
@@ -60,8 +63,6 @@ class JSONResultsGenerator(object):
RESULTS = "results"
TIMES = "times"
BUILD_NUMBERS = "buildNumbers"
- WEBKIT_SVN = "webkitRevision"
- CHROME_SVN = "chromeRevision"
TIME = "secondsSinceEpoch"
TESTS = "tests"
@@ -102,7 +103,6 @@ class JSONResultsGenerator(object):
all_tests: List of all the tests that were run. This should not
include skipped tests.
"""
- self._port = port
self._builder_name = builder_name
self._build_name = build_name
self._build_number = build_number
@@ -114,6 +114,7 @@ class JSONResultsGenerator(object):
self._passed_tests = passed_tests
self._skipped_tests = skipped_tests
self._all_tests = all_tests
+ self._svn_repositories = port.test_repository_paths()
self._generate_json_output()
@@ -132,6 +133,7 @@ class JSONResultsGenerator(object):
Args:
in_directory: The directory where svn is to be run.
"""
+
if os.path.exists(os.path.join(in_directory, '.svn')):
# Note: Not thread safe: http://bugs.python.org/issue2320
output = subprocess.Popen(["svn", "info", "--xml"],
@@ -312,23 +314,11 @@ class JSONResultsGenerator(object):
self._insert_item_into_raw_list(results_for_builder,
self._build_number, self.BUILD_NUMBERS)
- # These next two branches test to see which source repos we can
- # pull revisions from.
- if hasattr(self._port, 'path_from_webkit_base'):
- path_to_webkit = self._port.path_from_webkit_base('WebCore')
+ # Include SVN revisions for the given repositories.
+ for (name, path) in self._svn_repositories:
self._insert_item_into_raw_list(results_for_builder,
- self._get_svn_revision(path_to_webkit),
- self.WEBKIT_SVN)
-
- if hasattr(self._port, 'path_from_chromium_base'):
- try:
- path_to_chrome = self._port.path_from_chromium_base()
- self._insert_item_into_raw_list(results_for_builder,
- self._get_svn_revision(path_to_chrome),
- self.CHROME_SVN)
- except AssertionError:
- # We're not in a Chromium checkout, that's ok.
- pass
+ self._get_svn_revision(path),
+ name + 'Revision')
self._insert_item_into_raw_list(results_for_builder,
int(time.time()),
diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/base.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/base.py
index 782c87c..e73579f 100644
--- a/WebKitTools/Scripts/webkitpy/layout_tests/port/base.py
+++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/base.py
@@ -537,6 +537,14 @@ class Port(object):
expectations, determining search paths, and logging information."""
raise NotImplementedError('Port.version')
+ def test_repository_paths(self):
+ """Returns a list of (repository_name, repository_path) tuples
+ of its depending code base. By default it returns a list that only
+ contains a ('webkit', <webkitRepossitoryPath>) tuple.
+ """
+ return [('webkit', self.layout_tests_dir())]
+
+
_WDIFF_DEL = '##WDIFF_DEL##'
_WDIFF_ADD = '##WDIFF_ADD##'
_WDIFF_END = '##WDIFF_END##'
diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium.py
index db23eb8..e7f9ac8 100644
--- a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium.py
+++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium.py
@@ -232,6 +232,13 @@ class ChromiumPort(base.Port):
raise ValueError('Unsupported test_platform_name: %s' %
test_platform_name)
+ def test_repository_paths(self):
+ # Note: for JSON file's backward-compatibility we use 'chrome' rather
+ # than 'chromium' here.
+ repos = super(ChromiumPort, self).test_repository_paths()
+ repos.append(('chrome', self.path_from_chromium_base()))
+ return repos
+
#
# PROTECTED METHODS
#
diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_linux.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_linux.py
index 0818d51..4df43e0 100644
--- a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_linux.py
+++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_linux.py
@@ -81,15 +81,15 @@ class ChromiumLinuxPort(chromium.ChromiumPort):
#
def _build_path(self, *comps):
- if self._options.use_drt:
- base = os.path.join(self.path_from_webkit_base(), 'WebKit',
- 'chromium')
- else:
- base = self.path_from_chromium_base()
+ base = self.path_from_chromium_base()
if os.path.exists(os.path.join(base, 'sconsbuild')):
return os.path.join(base, 'sconsbuild', *comps)
- else:
+ if os.path.exists(os.path.join(base, 'out', *comps)) or not self._options.use_drt:
return os.path.join(base, 'out', *comps)
+ base = self.path_from_webkit_base()
+ if os.path.exists(os.path.join(base, 'sconsbuild')):
+ return os.path.join(base, 'sconsbuild', *comps)
+ return os.path.join(base, 'out', *comps)
def _check_apache_install(self):
result = chromium.check_file_exists(self._path_to_apache(),
diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_mac.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_mac.py
index aa3ac8d..abd84ae 100644
--- a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_mac.py
+++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_mac.py
@@ -102,10 +102,11 @@ class ChromiumMacPort(chromium.ChromiumPort):
#
def _build_path(self, *comps):
- if self._options.use_drt:
- return self.path_from_webkit_base('WebKit', 'chromium',
- 'xcodebuild', *comps)
- return self.path_from_chromium_base('xcodebuild', *comps)
+ path = self.path_from_chromium_base('xcodebuild', *comps)
+ if os.path.exists(path) or not self._options.use_drt:
+ return path
+ return self.path_from_webkit_base('WebKit', 'chromium', 'xcodebuild',
+ *comps)
def _check_wdiff_install(self):
try:
diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_win.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_win.py
index ec1c33c..8072bc0 100644
--- a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_win.py
+++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_win.py
@@ -117,13 +117,14 @@ class ChromiumWinPort(chromium.ChromiumPort):
#
def _build_path(self, *comps):
- if self._options.use_drt:
- return os.path.join(self.path_from_webkit_base(), 'WebKit',
- 'chromium', *comps)
p = self.path_from_chromium_base('webkit', *comps)
if os.path.exists(p):
return p
- return self.path_from_chromium_base('chrome', *comps)
+ p = self.path_from_chromium_base('chrome', *comps)
+ if os.path.exists(p) or not self._options.use_drt:
+ return p
+ return os.path.join(self.path_from_webkit_base(), 'WebKit', 'chromium',
+ *comps)
def _lighttpd_path(self, *comps):
return self.path_from_chromium_base('third_party', 'lighttpd', 'win',
diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/websocket_server.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/websocket_server.py
index 22ae780..81bf39e 100644
--- a/WebKitTools/Scripts/webkitpy/layout_tests/port/websocket_server.py
+++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/websocket_server.py
@@ -207,12 +207,13 @@ class PyWebSocket(http_server.Lighttpd):
url = 'http'
url = url + '://127.0.0.1:%d/' % self._port
if not url_is_alive(url):
- fp = codecs.open(output_log, "utf-8")
- try:
+ if self._process.returncode == None:
+ # FIXME: We should use a non-static Executive for easier
+ # testing.
+ Executive().kill_process(self._process.pid)
+ with codecs.open(output_log, "r", "utf-8") as fp:
for line in fp:
_log.error(line)
- finally:
- fp.close()
raise PyWebSocketNotStarted(
'Failed to start %s server on port %s.' %
(self._server_name, self._port))
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py
index 3642286..e1fa673 100644
--- a/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py
+++ b/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py
@@ -71,7 +71,8 @@ installer.install(url="http://pypi.python.org/packages/source/m/mechanize/mechan
url_subpath="mechanize")
installer.install(url="http://pypi.python.org/packages/source/p/pep8/pep8-0.5.0.tar.gz#md5=512a818af9979290cd619cce8e9c2e2b",
url_subpath="pep8-0.5.0/pep8.py")
-
+installer.install(url="http://www.adambarth.com/webkit/eliza",
+ target_name="eliza.py")
rietveld_dir = os.path.join(autoinstalled_dir, "rietveld")
installer = AutoInstaller(target_dir=rietveld_dir)
@@ -84,9 +85,9 @@ installer.install(url="http://webkit-rietveld.googlecode.com/svn/trunk/static/up
# organization purposes.
irc_dir = os.path.join(autoinstalled_dir, "irc")
installer = AutoInstaller(target_dir=irc_dir)
-installer.install(url="http://iweb.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip",
+installer.install(url="http://hivelocity.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip",
url_subpath="irclib.py")
-installer.install(url="http://iweb.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip",
+installer.install(url="http://hivelocity.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip",
url_subpath="ircbot.py")
pywebsocket_dir = os.path.join(autoinstalled_dir, "pywebsocket")
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.mechanize.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.mechanize.url
deleted file mode 100644
index 4186aee..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.mechanize.url
+++ /dev/null
@@ -1 +0,0 @@
-http://pypi.python.org/packages/source/m/mechanize/mechanize-0.1.11.zip \ No newline at end of file
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.pep8.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.pep8.py.url
deleted file mode 100644
index 0fb1ef6..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.pep8.py.url
+++ /dev/null
@@ -1 +0,0 @@
-http://pypi.python.org/packages/source/p/pep8/pep8-0.5.0.tar.gz#md5=512a818af9979290cd619cce8e9c2e2b \ No newline at end of file
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/README b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/README
deleted file mode 100644
index 1d68cf3..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/README
+++ /dev/null
@@ -1,2 +0,0 @@
-This directory is auto-generated by WebKit and is safe to delete.
-It contains needed third-party Python packages automatically downloaded from the web. \ No newline at end of file
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/__init__.py
deleted file mode 100644
index c1e4c6d..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# This file is required for Python to search this directory for modules.
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/.ClientForm.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/.ClientForm.py.url
deleted file mode 100644
index c723abf..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/.ClientForm.py.url
+++ /dev/null
@@ -1 +0,0 @@
-http://pypi.python.org/packages/source/C/ClientForm/ClientForm-0.2.10.zip \ No newline at end of file
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/ClientForm.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/ClientForm.py
deleted file mode 100644
index a622de7..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/ClientForm.py
+++ /dev/null
@@ -1,3401 +0,0 @@
-"""HTML form handling for web clients.
-
-ClientForm is a Python module for handling HTML forms on the client
-side, useful for parsing HTML forms, filling them in and returning the
-completed forms to the server. It has developed from a port of Gisle
-Aas' Perl module HTML::Form, from the libwww-perl library, but the
-interface is not the same.
-
-The most useful docstring is the one for HTMLForm.
-
-RFC 1866: HTML 2.0
-RFC 1867: Form-based File Upload in HTML
-RFC 2388: Returning Values from Forms: multipart/form-data
-HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX)
-HTML 4.01 Specification, W3C Recommendation 24 December 1999
-
-
-Copyright 2002-2007 John J. Lee <jjl@pobox.com>
-Copyright 2005 Gary Poster
-Copyright 2005 Zope Corporation
-Copyright 1998-2000 Gisle Aas.
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
-"""
-
-# XXX
-# Remove parser testing hack
-# safeUrl()-ize action
-# Switch to unicode throughout (would be 0.3.x)
-# See Wichert Akkerman's 2004-01-22 message to c.l.py.
-# Add charset parameter to Content-type headers? How to find value??
-# Add some more functional tests
-# Especially single and multiple file upload on the internet.
-# Does file upload work when name is missing? Sourceforge tracker form
-# doesn't like it. Check standards, and test with Apache. Test
-# binary upload with Apache.
-# mailto submission & enctype text/plain
-# I'm not going to fix this unless somebody tells me what real servers
-# that want this encoding actually expect: If enctype is
-# application/x-www-form-urlencoded and there's a FILE control present.
-# Strictly, it should be 'name=data' (see HTML 4.01 spec., section
-# 17.13.2), but I send "name=" ATM. What about multiple file upload??
-
-# Would be nice, but I'm not going to do it myself:
-# -------------------------------------------------
-# Maybe a 0.4.x?
-# Replace by_label etc. with moniker / selector concept. Allows, eg.,
-# a choice between selection by value / id / label / element
-# contents. Or choice between matching labels exactly or by
-# substring. Etc.
-# Remove deprecated methods.
-# ...what else?
-# Work on DOMForm.
-# XForms? Don't know if there's a need here.
-
-__all__ = ['AmbiguityError', 'CheckboxControl', 'Control',
- 'ControlNotFoundError', 'FileControl', 'FormParser', 'HTMLForm',
- 'HiddenControl', 'IgnoreControl', 'ImageControl', 'IsindexControl',
- 'Item', 'ItemCountError', 'ItemNotFoundError', 'Label',
- 'ListControl', 'LocateError', 'Missing', 'ParseError', 'ParseFile',
- 'ParseFileEx', 'ParseResponse', 'ParseResponseEx','PasswordControl',
- 'RadioControl', 'ScalarControl', 'SelectControl',
- 'SubmitButtonControl', 'SubmitControl', 'TextControl',
- 'TextareaControl', 'XHTMLCompatibleFormParser']
-
-try: True
-except NameError:
- True = 1
- False = 0
-
-try: bool
-except NameError:
- def bool(expr):
- if expr: return True
- else: return False
-
-try:
- import logging
- import inspect
-except ImportError:
- def debug(msg, *args, **kwds):
- pass
-else:
- _logger = logging.getLogger("ClientForm")
- OPTIMIZATION_HACK = True
-
- def debug(msg, *args, **kwds):
- if OPTIMIZATION_HACK:
- return
-
- caller_name = inspect.stack()[1][3]
- extended_msg = '%%s %s' % msg
- extended_args = (caller_name,)+args
- debug = _logger.debug(extended_msg, *extended_args, **kwds)
-
- def _show_debug_messages():
- global OPTIMIZATION_HACK
- OPTIMIZATION_HACK = False
- _logger.setLevel(logging.DEBUG)
- handler = logging.StreamHandler(sys.stdout)
- handler.setLevel(logging.DEBUG)
- _logger.addHandler(handler)
-
-import sys, urllib, urllib2, types, mimetools, copy, urlparse, \
- htmlentitydefs, re, random
-from cStringIO import StringIO
-
-import sgmllib
-# monkeypatch to fix http://www.python.org/sf/803422 :-(
-sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
-
-# HTMLParser.HTMLParser is recent, so live without it if it's not available
-# (also, sgmllib.SGMLParser is much more tolerant of bad HTML)
-try:
- import HTMLParser
-except ImportError:
- HAVE_MODULE_HTMLPARSER = False
-else:
- HAVE_MODULE_HTMLPARSER = True
-
-try:
- import warnings
-except ImportError:
- def deprecation(message, stack_offset=0):
- pass
-else:
- def deprecation(message, stack_offset=0):
- warnings.warn(message, DeprecationWarning, stacklevel=3+stack_offset)
-
-VERSION = "0.2.10"
-
-CHUNK = 1024 # size of chunks fed to parser, in bytes
-
-DEFAULT_ENCODING = "latin-1"
-
-class Missing: pass
-
-_compress_re = re.compile(r"\s+")
-def compress_text(text): return _compress_re.sub(" ", text.strip())
-
-def normalize_line_endings(text):
- return re.sub(r"(?:(?<!\r)\n)|(?:\r(?!\n))", "\r\n", text)
-
-
-# This version of urlencode is from my Python 1.5.2 back-port of the
-# Python 2.1 CVS maintenance branch of urllib. It will accept a sequence
-# of pairs instead of a mapping -- the 2.0 version only accepts a mapping.
-def urlencode(query,doseq=False,):
- """Encode a sequence of two-element tuples or dictionary into a URL query \
-string.
-
- If any values in the query arg are sequences and doseq is true, each
- sequence element is converted to a separate parameter.
-
- If the query arg is a sequence of two-element tuples, the order of the
- parameters in the output will match the order of parameters in the
- input.
- """
-
- if hasattr(query,"items"):
- # mapping objects
- query = query.items()
- else:
- # it's a bother at times that strings and string-like objects are
- # sequences...
- try:
- # non-sequence items should not work with len()
- x = len(query)
- # non-empty strings will fail this
- if len(query) and type(query[0]) != types.TupleType:
- raise TypeError()
- # zero-length sequences of all types will get here and succeed,
- # but that's a minor nit - since the original implementation
- # allowed empty dicts that type of behavior probably should be
- # preserved for consistency
- except TypeError:
- ty,va,tb = sys.exc_info()
- raise TypeError("not a valid non-string sequence or mapping "
- "object", tb)
-
- l = []
- if not doseq:
- # preserve old behavior
- for k, v in query:
- k = urllib.quote_plus(str(k))
- v = urllib.quote_plus(str(v))
- l.append(k + '=' + v)
- else:
- for k, v in query:
- k = urllib.quote_plus(str(k))
- if type(v) == types.StringType:
- v = urllib.quote_plus(v)
- l.append(k + '=' + v)
- elif type(v) == types.UnicodeType:
- # is there a reasonable way to convert to ASCII?
- # encode generates a string, but "replace" or "ignore"
- # lose information and "strict" can raise UnicodeError
- v = urllib.quote_plus(v.encode("ASCII","replace"))
- l.append(k + '=' + v)
- else:
- try:
- # is this a sufficient test for sequence-ness?
- x = len(v)
- except TypeError:
- # not a sequence
- v = urllib.quote_plus(str(v))
- l.append(k + '=' + v)
- else:
- # loop over the sequence
- for elt in v:
- l.append(k + '=' + urllib.quote_plus(str(elt)))
- return '&'.join(l)
-
-def unescape(data, entities, encoding=DEFAULT_ENCODING):
- if data is None or "&" not in data:
- return data
-
- def replace_entities(match, entities=entities, encoding=encoding):
- ent = match.group()
- if ent[1] == "#":
- return unescape_charref(ent[2:-1], encoding)
-
- repl = entities.get(ent)
- if repl is not None:
- if type(repl) != type(""):
- try:
- repl = repl.encode(encoding)
- except UnicodeError:
- repl = ent
- else:
- repl = ent
-
- return repl
-
- return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
-
-def unescape_charref(data, encoding):
- name, base = data, 10
- if name.startswith("x"):
- name, base= name[1:], 16
- uc = unichr(int(name, base))
- if encoding is None:
- return uc
- else:
- try:
- repl = uc.encode(encoding)
- except UnicodeError:
- repl = "&#%s;" % data
- return repl
-
-def get_entitydefs():
- import htmlentitydefs
- from codecs import latin_1_decode
- entitydefs = {}
- try:
- htmlentitydefs.name2codepoint
- except AttributeError:
- entitydefs = {}
- for name, char in htmlentitydefs.entitydefs.items():
- uc = latin_1_decode(char)[0]
- if uc.startswith("&#") and uc.endswith(";"):
- uc = unescape_charref(uc[2:-1], None)
- entitydefs["&%s;" % name] = uc
- else:
- for name, codepoint in htmlentitydefs.name2codepoint.items():
- entitydefs["&%s;" % name] = unichr(codepoint)
- return entitydefs
-
-
-def issequence(x):
- try:
- x[0]
- except (TypeError, KeyError):
- return False
- except IndexError:
- pass
- return True
-
-def isstringlike(x):
- try: x+""
- except: return False
- else: return True
-
-
-def choose_boundary():
- """Return a string usable as a multipart boundary."""
- # follow IE and firefox
- nonce = "".join([str(random.randint(0, sys.maxint-1)) for i in 0,1,2])
- return "-"*27 + nonce
-
-# This cut-n-pasted MimeWriter from standard library is here so can add
-# to HTTP headers rather than message body when appropriate. It also uses
-# \r\n in place of \n. This is a bit nasty.
-class MimeWriter:
-
- """Generic MIME writer.
-
- Methods:
-
- __init__()
- addheader()
- flushheaders()
- startbody()
- startmultipartbody()
- nextpart()
- lastpart()
-
- A MIME writer is much more primitive than a MIME parser. It
- doesn't seek around on the output file, and it doesn't use large
- amounts of buffer space, so you have to write the parts in the
- order they should occur on the output file. It does buffer the
- headers you add, allowing you to rearrange their order.
-
- General usage is:
-
- f = <open the output file>
- w = MimeWriter(f)
- ...call w.addheader(key, value) 0 or more times...
-
- followed by either:
-
- f = w.startbody(content_type)
- ...call f.write(data) for body data...
-
- or:
-
- w.startmultipartbody(subtype)
- for each part:
- subwriter = w.nextpart()
- ...use the subwriter's methods to create the subpart...
- w.lastpart()
-
- The subwriter is another MimeWriter instance, and should be
- treated in the same way as the toplevel MimeWriter. This way,
- writing recursive body parts is easy.
-
- Warning: don't forget to call lastpart()!
-
- XXX There should be more state so calls made in the wrong order
- are detected.
-
- Some special cases:
-
- - startbody() just returns the file passed to the constructor;
- but don't use this knowledge, as it may be changed.
-
- - startmultipartbody() actually returns a file as well;
- this can be used to write the initial 'if you can read this your
- mailer is not MIME-aware' message.
-
- - If you call flushheaders(), the headers accumulated so far are
- written out (and forgotten); this is useful if you don't need a
- body part at all, e.g. for a subpart of type message/rfc822
- that's (mis)used to store some header-like information.
-
- - Passing a keyword argument 'prefix=<flag>' to addheader(),
- start*body() affects where the header is inserted; 0 means
- append at the end, 1 means insert at the start; default is
- append for addheader(), but insert for start*body(), which use
- it to determine where the Content-type header goes.
-
- """
-
- def __init__(self, fp, http_hdrs=None):
- self._http_hdrs = http_hdrs
- self._fp = fp
- self._headers = []
- self._boundary = []
- self._first_part = True
-
- def addheader(self, key, value, prefix=0,
- add_to_http_hdrs=0):
- """
- prefix is ignored if add_to_http_hdrs is true.
- """
- lines = value.split("\r\n")
- while lines and not lines[-1]: del lines[-1]
- while lines and not lines[0]: del lines[0]
- if add_to_http_hdrs:
- value = "".join(lines)
- # 2.2 urllib2 doesn't normalize header case
- self._http_hdrs.append((key.capitalize(), value))
- else:
- for i in range(1, len(lines)):
- lines[i] = " " + lines[i].strip()
- value = "\r\n".join(lines) + "\r\n"
- line = key.title() + ": " + value
- if prefix:
- self._headers.insert(0, line)
- else:
- self._headers.append(line)
-
- def flushheaders(self):
- self._fp.writelines(self._headers)
- self._headers = []
-
- def startbody(self, ctype=None, plist=[], prefix=1,
- add_to_http_hdrs=0, content_type=1):
- """
- prefix is ignored if add_to_http_hdrs is true.
- """
- if content_type and ctype:
- for name, value in plist:
- ctype = ctype + ';\r\n %s=%s' % (name, value)
- self.addheader("Content-Type", ctype, prefix=prefix,
- add_to_http_hdrs=add_to_http_hdrs)
- self.flushheaders()
- if not add_to_http_hdrs: self._fp.write("\r\n")
- self._first_part = True
- return self._fp
-
- def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1,
- add_to_http_hdrs=0, content_type=1):
- boundary = boundary or choose_boundary()
- self._boundary.append(boundary)
- return self.startbody("multipart/" + subtype,
- [("boundary", boundary)] + plist,
- prefix=prefix,
- add_to_http_hdrs=add_to_http_hdrs,
- content_type=content_type)
-
- def nextpart(self):
- boundary = self._boundary[-1]
- if self._first_part:
- self._first_part = False
- else:
- self._fp.write("\r\n")
- self._fp.write("--" + boundary + "\r\n")
- return self.__class__(self._fp)
-
- def lastpart(self):
- if self._first_part:
- self.nextpart()
- boundary = self._boundary.pop()
- self._fp.write("\r\n--" + boundary + "--\r\n")
-
-
-class LocateError(ValueError): pass
-class AmbiguityError(LocateError): pass
-class ControlNotFoundError(LocateError): pass
-class ItemNotFoundError(LocateError): pass
-
-class ItemCountError(ValueError): pass
-
-# for backwards compatibility, ParseError derives from exceptions that were
-# raised by versions of ClientForm <= 0.2.5
-if HAVE_MODULE_HTMLPARSER:
- SGMLLIB_PARSEERROR = sgmllib.SGMLParseError
- class ParseError(sgmllib.SGMLParseError,
- HTMLParser.HTMLParseError,
- ):
- pass
-else:
- if hasattr(sgmllib, "SGMLParseError"):
- SGMLLIB_PARSEERROR = sgmllib.SGMLParseError
- class ParseError(sgmllib.SGMLParseError):
- pass
- else:
- SGMLLIB_PARSEERROR = RuntimeError
- class ParseError(RuntimeError):
- pass
-
-
-class _AbstractFormParser:
- """forms attribute contains HTMLForm instances on completion."""
- # thanks to Moshe Zadka for an example of sgmllib/htmllib usage
- def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
- if entitydefs is None:
- entitydefs = get_entitydefs()
- self._entitydefs = entitydefs
- self._encoding = encoding
-
- self.base = None
- self.forms = []
- self.labels = []
- self._current_label = None
- self._current_form = None
- self._select = None
- self._optgroup = None
- self._option = None
- self._textarea = None
-
- # forms[0] will contain all controls that are outside of any form
- # self._global_form is an alias for self.forms[0]
- self._global_form = None
- self.start_form([])
- self.end_form()
- self._current_form = self._global_form = self.forms[0]
-
- def do_base(self, attrs):
- debug("%s", attrs)
- for key, value in attrs:
- if key == "href":
- self.base = self.unescape_attr_if_required(value)
-
- def end_body(self):
- debug("")
- if self._current_label is not None:
- self.end_label()
- if self._current_form is not self._global_form:
- self.end_form()
-
- def start_form(self, attrs):
- debug("%s", attrs)
- if self._current_form is not self._global_form:
- raise ParseError("nested FORMs")
- name = None
- action = None
- enctype = "application/x-www-form-urlencoded"
- method = "GET"
- d = {}
- for key, value in attrs:
- if key == "name":
- name = self.unescape_attr_if_required(value)
- elif key == "action":
- action = self.unescape_attr_if_required(value)
- elif key == "method":
- method = self.unescape_attr_if_required(value.upper())
- elif key == "enctype":
- enctype = self.unescape_attr_if_required(value.lower())
- d[key] = self.unescape_attr_if_required(value)
- controls = []
- self._current_form = (name, action, method, enctype), d, controls
-
- def end_form(self):
- debug("")
- if self._current_label is not None:
- self.end_label()
- if self._current_form is self._global_form:
- raise ParseError("end of FORM before start")
- self.forms.append(self._current_form)
- self._current_form = self._global_form
-
- def start_select(self, attrs):
- debug("%s", attrs)
- if self._select is not None:
- raise ParseError("nested SELECTs")
- if self._textarea is not None:
- raise ParseError("SELECT inside TEXTAREA")
- d = {}
- for key, val in attrs:
- d[key] = self.unescape_attr_if_required(val)
-
- self._select = d
- self._add_label(d)
-
- self._append_select_control({"__select": d})
-
- def end_select(self):
- debug("")
- if self._select is None:
- raise ParseError("end of SELECT before start")
-
- if self._option is not None:
- self._end_option()
-
- self._select = None
-
- def start_optgroup(self, attrs):
- debug("%s", attrs)
- if self._select is None:
- raise ParseError("OPTGROUP outside of SELECT")
- d = {}
- for key, val in attrs:
- d[key] = self.unescape_attr_if_required(val)
-
- self._optgroup = d
-
- def end_optgroup(self):
- debug("")
- if self._optgroup is None:
- raise ParseError("end of OPTGROUP before start")
- self._optgroup = None
-
- def _start_option(self, attrs):
- debug("%s", attrs)
- if self._select is None:
- raise ParseError("OPTION outside of SELECT")
- if self._option is not None:
- self._end_option()
-
- d = {}
- for key, val in attrs:
- d[key] = self.unescape_attr_if_required(val)
-
- self._option = {}
- self._option.update(d)
- if (self._optgroup and self._optgroup.has_key("disabled") and
- not self._option.has_key("disabled")):
- self._option["disabled"] = None
-
- def _end_option(self):
- debug("")
- if self._option is None:
- raise ParseError("end of OPTION before start")
-
- contents = self._option.get("contents", "").strip()
- self._option["contents"] = contents
- if not self._option.has_key("value"):
- self._option["value"] = contents
- if not self._option.has_key("label"):
- self._option["label"] = contents
- # stuff dict of SELECT HTML attrs into a special private key
- # (gets deleted again later)
- self._option["__select"] = self._select
- self._append_select_control(self._option)
- self._option = None
-
- def _append_select_control(self, attrs):
- debug("%s", attrs)
- controls = self._current_form[2]
- name = self._select.get("name")
- controls.append(("select", name, attrs))
-
- def start_textarea(self, attrs):
- debug("%s", attrs)
- if self._textarea is not None:
- raise ParseError("nested TEXTAREAs")
- if self._select is not None:
- raise ParseError("TEXTAREA inside SELECT")
- d = {}
- for key, val in attrs:
- d[key] = self.unescape_attr_if_required(val)
- self._add_label(d)
-
- self._textarea = d
-
- def end_textarea(self):
- debug("")
- if self._textarea is None:
- raise ParseError("end of TEXTAREA before start")
- controls = self._current_form[2]
- name = self._textarea.get("name")
- controls.append(("textarea", name, self._textarea))
- self._textarea = None
-
- def start_label(self, attrs):
- debug("%s", attrs)
- if self._current_label:
- self.end_label()
- d = {}
- for key, val in attrs:
- d[key] = self.unescape_attr_if_required(val)
- taken = bool(d.get("for")) # empty id is invalid
- d["__text"] = ""
- d["__taken"] = taken
- if taken:
- self.labels.append(d)
- self._current_label = d
-
- def end_label(self):
- debug("")
- label = self._current_label
- if label is None:
- # something is ugly in the HTML, but we're ignoring it
- return
- self._current_label = None
- # if it is staying around, it is True in all cases
- del label["__taken"]
-
- def _add_label(self, d):
- #debug("%s", d)
- if self._current_label is not None:
- if not self._current_label["__taken"]:
- self._current_label["__taken"] = True
- d["__label"] = self._current_label
-
- def handle_data(self, data):
- debug("%s", data)
-
- if self._option is not None:
- # self._option is a dictionary of the OPTION element's HTML
- # attributes, but it has two special keys, one of which is the
- # special "contents" key contains text between OPTION tags (the
- # other is the "__select" key: see the end_option method)
- map = self._option
- key = "contents"
- elif self._textarea is not None:
- map = self._textarea
- key = "value"
- data = normalize_line_endings(data)
- # not if within option or textarea
- elif self._current_label is not None:
- map = self._current_label
- key = "__text"
- else:
- return
-
- if data and not map.has_key(key):
- # according to
- # http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.1 line break
- # immediately after start tags or immediately before end tags must
- # be ignored, but real browsers only ignore a line break after a
- # start tag, so we'll do that.
- if data[0:2] == "\r\n":
- data = data[2:]
- elif data[0:1] in ["\n", "\r"]:
- data = data[1:]
- map[key] = data
- else:
- map[key] = map[key] + data
-
- def do_button(self, attrs):
- debug("%s", attrs)
- d = {}
- d["type"] = "submit" # default
- for key, val in attrs:
- d[key] = self.unescape_attr_if_required(val)
- controls = self._current_form[2]
-
- type = d["type"]
- name = d.get("name")
- # we don't want to lose information, so use a type string that
- # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON}
- # e.g. type for BUTTON/RESET is "resetbutton"
- # (type for INPUT/RESET is "reset")
- type = type+"button"
- self._add_label(d)
- controls.append((type, name, d))
-
- def do_input(self, attrs):
- debug("%s", attrs)
- d = {}
- d["type"] = "text" # default
- for key, val in attrs:
- d[key] = self.unescape_attr_if_required(val)
- controls = self._current_form[2]
-
- type = d["type"]
- name = d.get("name")
- self._add_label(d)
- controls.append((type, name, d))
-
- def do_isindex(self, attrs):
- debug("%s", attrs)
- d = {}
- for key, val in attrs:
- d[key] = self.unescape_attr_if_required(val)
- controls = self._current_form[2]
-
- self._add_label(d)
- # isindex doesn't have type or name HTML attributes
- controls.append(("isindex", None, d))
-
- def handle_entityref(self, name):
- #debug("%s", name)
- self.handle_data(unescape(
- '&%s;' % name, self._entitydefs, self._encoding))
-
- def handle_charref(self, name):
- #debug("%s", name)
- self.handle_data(unescape_charref(name, self._encoding))
-
- def unescape_attr(self, name):
- #debug("%s", name)
- return unescape(name, self._entitydefs, self._encoding)
-
- def unescape_attrs(self, attrs):
- #debug("%s", attrs)
- escaped_attrs = {}
- for key, val in attrs.items():
- try:
- val.items
- except AttributeError:
- escaped_attrs[key] = self.unescape_attr(val)
- else:
- # e.g. "__select" -- yuck!
- escaped_attrs[key] = self.unescape_attrs(val)
- return escaped_attrs
-
- def unknown_entityref(self, ref): self.handle_data("&%s;" % ref)
- def unknown_charref(self, ref): self.handle_data("&#%s;" % ref)
-
-
-if not HAVE_MODULE_HTMLPARSER:
- class XHTMLCompatibleFormParser:
- def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
- raise ValueError("HTMLParser could not be imported")
-else:
- class XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser):
- """Good for XHTML, bad for tolerance of incorrect HTML."""
- # thanks to Michael Howitz for this!
- def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
- HTMLParser.HTMLParser.__init__(self)
- _AbstractFormParser.__init__(self, entitydefs, encoding)
-
- def feed(self, data):
- try:
- HTMLParser.HTMLParser.feed(self, data)
- except HTMLParser.HTMLParseError, exc:
- raise ParseError(exc)
-
- def start_option(self, attrs):
- _AbstractFormParser._start_option(self, attrs)
-
- def end_option(self):
- _AbstractFormParser._end_option(self)
-
- def handle_starttag(self, tag, attrs):
- try:
- method = getattr(self, "start_" + tag)
- except AttributeError:
- try:
- method = getattr(self, "do_" + tag)
- except AttributeError:
- pass # unknown tag
- else:
- method(attrs)
- else:
- method(attrs)
-
- def handle_endtag(self, tag):
- try:
- method = getattr(self, "end_" + tag)
- except AttributeError:
- pass # unknown tag
- else:
- method()
-
- def unescape(self, name):
- # Use the entitydefs passed into constructor, not
- # HTMLParser.HTMLParser's entitydefs.
- return self.unescape_attr(name)
-
- def unescape_attr_if_required(self, name):
- return name # HTMLParser.HTMLParser already did it
- def unescape_attrs_if_required(self, attrs):
- return attrs # ditto
-
- def close(self):
- HTMLParser.HTMLParser.close(self)
- self.end_body()
-
-
-class _AbstractSgmllibParser(_AbstractFormParser):
-
- def do_option(self, attrs):
- _AbstractFormParser._start_option(self, attrs)
-
- if sys.version_info[:2] >= (2,5):
- # we override this attr to decode hex charrefs
- entity_or_charref = re.compile(
- '&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)')
- def convert_entityref(self, name):
- return unescape("&%s;" % name, self._entitydefs, self._encoding)
- def convert_charref(self, name):
- return unescape_charref("%s" % name, self._encoding)
- def unescape_attr_if_required(self, name):
- return name # sgmllib already did it
- def unescape_attrs_if_required(self, attrs):
- return attrs # ditto
- else:
- def unescape_attr_if_required(self, name):
- return self.unescape_attr(name)
- def unescape_attrs_if_required(self, attrs):
- return self.unescape_attrs(attrs)
-
-
-class FormParser(_AbstractSgmllibParser, sgmllib.SGMLParser):
- """Good for tolerance of incorrect HTML, bad for XHTML."""
- def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
- sgmllib.SGMLParser.__init__(self)
- _AbstractFormParser.__init__(self, entitydefs, encoding)
-
- def feed(self, data):
- try:
- sgmllib.SGMLParser.feed(self, data)
- except SGMLLIB_PARSEERROR, exc:
- raise ParseError(exc)
-
- def close(self):
- sgmllib.SGMLParser.close(self)
- self.end_body()
-
-
-# sigh, must support mechanize by allowing dynamic creation of classes based on
-# its bundled copy of BeautifulSoup (which was necessary because of dependency
-# problems)
-
-def _create_bs_classes(bs,
- icbinbs,
- ):
- class _AbstractBSFormParser(_AbstractSgmllibParser):
- bs_base_class = None
- def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
- _AbstractFormParser.__init__(self, entitydefs, encoding)
- self.bs_base_class.__init__(self)
- def handle_data(self, data):
- _AbstractFormParser.handle_data(self, data)
- self.bs_base_class.handle_data(self, data)
- def feed(self, data):
- try:
- self.bs_base_class.feed(self, data)
- except SGMLLIB_PARSEERROR, exc:
- raise ParseError(exc)
- def close(self):
- self.bs_base_class.close(self)
- self.end_body()
-
- class RobustFormParser(_AbstractBSFormParser, bs):
- """Tries to be highly tolerant of incorrect HTML."""
- pass
- RobustFormParser.bs_base_class = bs
- class NestingRobustFormParser(_AbstractBSFormParser, icbinbs):
- """Tries to be highly tolerant of incorrect HTML.
-
- Different from RobustFormParser in that it more often guesses nesting
- above missing end tags (see BeautifulSoup docs).
-
- """
- pass
- NestingRobustFormParser.bs_base_class = icbinbs
-
- return RobustFormParser, NestingRobustFormParser
-
-try:
- if sys.version_info[:2] < (2, 2):
- raise ImportError # BeautifulSoup uses generators
- import BeautifulSoup
-except ImportError:
- pass
-else:
- RobustFormParser, NestingRobustFormParser = _create_bs_classes(
- BeautifulSoup.BeautifulSoup, BeautifulSoup.ICantBelieveItsBeautifulSoup
- )
- __all__ += ['RobustFormParser', 'NestingRobustFormParser']
-
-
-#FormParser = XHTMLCompatibleFormParser # testing hack
-#FormParser = RobustFormParser # testing hack
-
-
-def ParseResponseEx(response,
- select_default=False,
- form_parser_class=FormParser,
- request_class=urllib2.Request,
- entitydefs=None,
- encoding=DEFAULT_ENCODING,
-
- # private
- _urljoin=urlparse.urljoin,
- _urlparse=urlparse.urlparse,
- _urlunparse=urlparse.urlunparse,
- ):
- """Identical to ParseResponse, except that:
-
- 1. The returned list contains an extra item. The first form in the list
- contains all controls not contained in any FORM element.
-
- 2. The arguments ignore_errors and backwards_compat have been removed.
-
- 3. Backwards-compatibility mode (backwards_compat=True) is not available.
- """
- return _ParseFileEx(response, response.geturl(),
- select_default,
- False,
- form_parser_class,
- request_class,
- entitydefs,
- False,
- encoding,
- _urljoin=_urljoin,
- _urlparse=_urlparse,
- _urlunparse=_urlunparse,
- )
-
-def ParseFileEx(file, base_uri,
- select_default=False,
- form_parser_class=FormParser,
- request_class=urllib2.Request,
- entitydefs=None,
- encoding=DEFAULT_ENCODING,
-
- # private
- _urljoin=urlparse.urljoin,
- _urlparse=urlparse.urlparse,
- _urlunparse=urlparse.urlunparse,
- ):
- """Identical to ParseFile, except that:
-
- 1. The returned list contains an extra item. The first form in the list
- contains all controls not contained in any FORM element.
-
- 2. The arguments ignore_errors and backwards_compat have been removed.
-
- 3. Backwards-compatibility mode (backwards_compat=True) is not available.
- """
- return _ParseFileEx(file, base_uri,
- select_default,
- False,
- form_parser_class,
- request_class,
- entitydefs,
- False,
- encoding,
- _urljoin=_urljoin,
- _urlparse=_urlparse,
- _urlunparse=_urlunparse,
- )
-
-def ParseResponse(response, *args, **kwds):
- """Parse HTTP response and return a list of HTMLForm instances.
-
- The return value of urllib2.urlopen can be conveniently passed to this
- function as the response parameter.
-
- ClientForm.ParseError is raised on parse errors.
-
- response: file-like object (supporting read() method) with a method
- geturl(), returning the URI of the HTTP response
- select_default: for multiple-selection SELECT controls and RADIO controls,
- pick the first item as the default if none are selected in the HTML
- form_parser_class: class to instantiate and use to pass
- request_class: class to return from .click() method (default is
- urllib2.Request)
- entitydefs: mapping like {"&amp;": "&", ...} containing HTML entity
- definitions (a sensible default is used)
- encoding: character encoding used for encoding numeric character references
- when matching link text. ClientForm does not attempt to find the encoding
- in a META HTTP-EQUIV attribute in the document itself (mechanize, for
- example, does do that and will pass the correct value to ClientForm using
- this parameter).
-
- backwards_compat: boolean that determines whether the returned HTMLForm
- objects are backwards-compatible with old code. If backwards_compat is
- true:
-
- - ClientForm 0.1 code will continue to work as before.
-
- - Label searches that do not specify a nr (number or count) will always
- get the first match, even if other controls match. If
- backwards_compat is False, label searches that have ambiguous results
- will raise an AmbiguityError.
-
- - Item label matching is done by strict string comparison rather than
- substring matching.
-
- - De-selecting individual list items is allowed even if the Item is
- disabled.
-
- The backwards_compat argument will be deprecated in a future release.
-
- Pass a true value for select_default if you want the behaviour specified by
- RFC 1866 (the HTML 2.0 standard), which is to select the first item in a
- RADIO or multiple-selection SELECT control if none were selected in the
- HTML. Most browsers (including Microsoft Internet Explorer (IE) and
- Netscape Navigator) instead leave all items unselected in these cases. The
- W3C HTML 4.0 standard leaves this behaviour undefined in the case of
- multiple-selection SELECT controls, but insists that at least one RADIO
- button should be checked at all times, in contradiction to browser
- behaviour.
-
- There is a choice of parsers. ClientForm.XHTMLCompatibleFormParser (uses
- HTMLParser.HTMLParser) works best for XHTML, ClientForm.FormParser (uses
- sgmllib.SGMLParser) (the default) works better for ordinary grubby HTML.
- Note that HTMLParser is only available in Python 2.2 and later. You can
- pass your own class in here as a hack to work around bad HTML, but at your
- own risk: there is no well-defined interface.
-
- """
- return _ParseFileEx(response, response.geturl(), *args, **kwds)[1:]
-
-def ParseFile(file, base_uri, *args, **kwds):
- """Parse HTML and return a list of HTMLForm instances.
-
- ClientForm.ParseError is raised on parse errors.
-
- file: file-like object (supporting read() method) containing HTML with zero
- or more forms to be parsed
- base_uri: the URI of the document (note that the base URI used to submit
- the form will be that given in the BASE element if present, not that of
- the document)
-
- For the other arguments and further details, see ParseResponse.__doc__.
-
- """
- return _ParseFileEx(file, base_uri, *args, **kwds)[1:]
-
-def _ParseFileEx(file, base_uri,
- select_default=False,
- ignore_errors=False,
- form_parser_class=FormParser,
- request_class=urllib2.Request,
- entitydefs=None,
- backwards_compat=True,
- encoding=DEFAULT_ENCODING,
- _urljoin=urlparse.urljoin,
- _urlparse=urlparse.urlparse,
- _urlunparse=urlparse.urlunparse,
- ):
- if backwards_compat:
- deprecation("operating in backwards-compatibility mode", 1)
- fp = form_parser_class(entitydefs, encoding)
- while 1:
- data = file.read(CHUNK)
- try:
- fp.feed(data)
- except ParseError, e:
- e.base_uri = base_uri
- raise
- if len(data) != CHUNK: break
- fp.close()
- if fp.base is not None:
- # HTML BASE element takes precedence over document URI
- base_uri = fp.base
- labels = [] # Label(label) for label in fp.labels]
- id_to_labels = {}
- for l in fp.labels:
- label = Label(l)
- labels.append(label)
- for_id = l["for"]
- coll = id_to_labels.get(for_id)
- if coll is None:
- id_to_labels[for_id] = [label]
- else:
- coll.append(label)
- forms = []
- for (name, action, method, enctype), attrs, controls in fp.forms:
- if action is None:
- action = base_uri
- else:
- action = _urljoin(base_uri, action)
- # would be nice to make HTMLForm class (form builder) pluggable
- form = HTMLForm(
- action, method, enctype, name, attrs, request_class,
- forms, labels, id_to_labels, backwards_compat)
- form._urlparse = _urlparse
- form._urlunparse = _urlunparse
- for ii in range(len(controls)):
- type, name, attrs = controls[ii]
- # index=ii*10 allows ImageControl to return multiple ordered pairs
- form.new_control(
- type, name, attrs, select_default=select_default, index=ii*10)
- forms.append(form)
- for form in forms:
- form.fixup()
- return forms
-
-
-class Label:
- def __init__(self, attrs):
- self.id = attrs.get("for")
- self._text = attrs.get("__text").strip()
- self._ctext = compress_text(self._text)
- self.attrs = attrs
- self._backwards_compat = False # maintained by HTMLForm
-
- def __getattr__(self, name):
- if name == "text":
- if self._backwards_compat:
- return self._text
- else:
- return self._ctext
- return getattr(Label, name)
-
- def __setattr__(self, name, value):
- if name == "text":
- # don't see any need for this, so make it read-only
- raise AttributeError("text attribute is read-only")
- self.__dict__[name] = value
-
- def __str__(self):
- return "<Label(id=%r, text=%r)>" % (self.id, self.text)
-
-
-def _get_label(attrs):
- text = attrs.get("__label")
- if text is not None:
- return Label(text)
- else:
- return None
-
-class Control:
- """An HTML form control.
-
- An HTMLForm contains a sequence of Controls. The Controls in an HTMLForm
- are accessed using the HTMLForm.find_control method or the
- HTMLForm.controls attribute.
-
- Control instances are usually constructed using the ParseFile /
- ParseResponse functions. If you use those functions, you can ignore the
- rest of this paragraph. A Control is only properly initialised after the
- fixup method has been called. In fact, this is only strictly necessary for
- ListControl instances. This is necessary because ListControls are built up
- from ListControls each containing only a single item, and their initial
- value(s) can only be known after the sequence is complete.
-
- The types and values that are acceptable for assignment to the value
- attribute are defined by subclasses.
-
- If the disabled attribute is true, this represents the state typically
- represented by browsers by 'greying out' a control. If the disabled
- attribute is true, the Control will raise AttributeError if an attempt is
- made to change its value. In addition, the control will not be considered
- 'successful' as defined by the W3C HTML 4 standard -- ie. it will
- contribute no data to the return value of the HTMLForm.click* methods. To
- enable a control, set the disabled attribute to a false value.
-
- If the readonly attribute is true, the Control will raise AttributeError if
- an attempt is made to change its value. To make a control writable, set
- the readonly attribute to a false value.
-
- All controls have the disabled and readonly attributes, not only those that
- may have the HTML attributes of the same names.
-
- On assignment to the value attribute, the following exceptions are raised:
- TypeError, AttributeError (if the value attribute should not be assigned
- to, because the control is disabled, for example) and ValueError.
-
- If the name or value attributes are None, or the value is an empty list, or
- if the control is disabled, the control is not successful.
-
- Public attributes:
-
- type: string describing type of control (see the keys of the
- HTMLForm.type2class dictionary for the allowable values) (readonly)
- name: name of control (readonly)
- value: current value of control (subclasses may allow a single value, a
- sequence of values, or either)
- disabled: disabled state
- readonly: readonly state
- id: value of id HTML attribute
-
- """
- def __init__(self, type, name, attrs, index=None):
- """
- type: string describing type of control (see the keys of the
- HTMLForm.type2class dictionary for the allowable values)
- name: control name
- attrs: HTML attributes of control's HTML element
-
- """
- raise NotImplementedError()
-
- def add_to_form(self, form):
- self._form = form
- form.controls.append(self)
-
- def fixup(self):
- pass
-
- def is_of_kind(self, kind):
- raise NotImplementedError()
-
- def clear(self):
- raise NotImplementedError()
-
- def __getattr__(self, name): raise NotImplementedError()
- def __setattr__(self, name, value): raise NotImplementedError()
-
- def pairs(self):
- """Return list of (key, value) pairs suitable for passing to urlencode.
- """
- return [(k, v) for (i, k, v) in self._totally_ordered_pairs()]
-
- def _totally_ordered_pairs(self):
- """Return list of (key, value, index) tuples.
-
- Like pairs, but allows preserving correct ordering even where several
- controls are involved.
-
- """
- raise NotImplementedError()
-
- def _write_mime_data(self, mw, name, value):
- """Write data for a subitem of this control to a MimeWriter."""
- # called by HTMLForm
- mw2 = mw.nextpart()
- mw2.addheader("Content-Disposition",
- 'form-data; name="%s"' % name, 1)
- f = mw2.startbody(prefix=0)
- f.write(value)
-
- def __str__(self):
- raise NotImplementedError()
-
- def get_labels(self):
- """Return all labels (Label instances) for this control.
-
- If the control was surrounded by a <label> tag, that will be the first
- label; all other labels, connected by 'for' and 'id', are in the order
- that appear in the HTML.
-
- """
- res = []
- if self._label:
- res.append(self._label)
- if self.id:
- res.extend(self._form._id_to_labels.get(self.id, ()))
- return res
-
-
-#---------------------------------------------------
-class ScalarControl(Control):
- """Control whose value is not restricted to one of a prescribed set.
-
- Some ScalarControls don't accept any value attribute. Otherwise, takes a
- single value, which must be string-like.
-
- Additional read-only public attribute:
-
- attrs: dictionary mapping the names of original HTML attributes of the
- control to their values
-
- """
- def __init__(self, type, name, attrs, index=None):
- self._index = index
- self._label = _get_label(attrs)
- self.__dict__["type"] = type.lower()
- self.__dict__["name"] = name
- self._value = attrs.get("value")
- self.disabled = attrs.has_key("disabled")
- self.readonly = attrs.has_key("readonly")
- self.id = attrs.get("id")
-
- self.attrs = attrs.copy()
-
- self._clicked = False
-
- self._urlparse = urlparse.urlparse
- self._urlunparse = urlparse.urlunparse
-
- def __getattr__(self, name):
- if name == "value":
- return self.__dict__["_value"]
- else:
- raise AttributeError("%s instance has no attribute '%s'" %
- (self.__class__.__name__, name))
-
- def __setattr__(self, name, value):
- if name == "value":
- if not isstringlike(value):
- raise TypeError("must assign a string")
- elif self.readonly:
- raise AttributeError("control '%s' is readonly" % self.name)
- elif self.disabled:
- raise AttributeError("control '%s' is disabled" % self.name)
- self.__dict__["_value"] = value
- elif name in ("name", "type"):
- raise AttributeError("%s attribute is readonly" % name)
- else:
- self.__dict__[name] = value
-
- def _totally_ordered_pairs(self):
- name = self.name
- value = self.value
- if name is None or value is None or self.disabled:
- return []
- return [(self._index, name, value)]
-
- def clear(self):
- if self.readonly:
- raise AttributeError("control '%s' is readonly" % self.name)
- self.__dict__["_value"] = None
-
- def __str__(self):
- name = self.name
- value = self.value
- if name is None: name = "<None>"
- if value is None: value = "<None>"
-
- infos = []
- if self.disabled: infos.append("disabled")
- if self.readonly: infos.append("readonly")
- info = ", ".join(infos)
- if info: info = " (%s)" % info
-
- return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
-
-
-#---------------------------------------------------
-class TextControl(ScalarControl):
- """Textual input control.
-
- Covers:
-
- INPUT/TEXT
- INPUT/PASSWORD
- INPUT/HIDDEN
- TEXTAREA
-
- """
- def __init__(self, type, name, attrs, index=None):
- ScalarControl.__init__(self, type, name, attrs, index)
- if self.type == "hidden": self.readonly = True
- if self._value is None:
- self._value = ""
-
- def is_of_kind(self, kind): return kind == "text"
-
-#---------------------------------------------------
-class FileControl(ScalarControl):
- """File upload with INPUT TYPE=FILE.
-
- The value attribute of a FileControl is always None. Use add_file instead.
-
- Additional public method: add_file
-
- """
-
- def __init__(self, type, name, attrs, index=None):
- ScalarControl.__init__(self, type, name, attrs, index)
- self._value = None
- self._upload_data = []
-
- def is_of_kind(self, kind): return kind == "file"
-
- def clear(self):
- if self.readonly:
- raise AttributeError("control '%s' is readonly" % self.name)
- self._upload_data = []
-
- def __setattr__(self, name, value):
- if name in ("value", "name", "type"):
- raise AttributeError("%s attribute is readonly" % name)
- else:
- self.__dict__[name] = value
-
- def add_file(self, file_object, content_type=None, filename=None):
- if not hasattr(file_object, "read"):
- raise TypeError("file-like object must have read method")
- if content_type is not None and not isstringlike(content_type):
- raise TypeError("content type must be None or string-like")
- if filename is not None and not isstringlike(filename):
- raise TypeError("filename must be None or string-like")
- if content_type is None:
- content_type = "application/octet-stream"
- self._upload_data.append((file_object, content_type, filename))
-
- def _totally_ordered_pairs(self):
- # XXX should it be successful even if unnamed?
- if self.name is None or self.disabled:
- return []
- return [(self._index, self.name, "")]
-
- def _write_mime_data(self, mw, _name, _value):
- # called by HTMLForm
- # assert _name == self.name and _value == ''
- if len(self._upload_data) < 2:
- if len(self._upload_data) == 0:
- file_object = StringIO()
- content_type = "application/octet-stream"
- filename = ""
- else:
- file_object, content_type, filename = self._upload_data[0]
- if filename is None:
- filename = ""
- mw2 = mw.nextpart()
- fn_part = '; filename="%s"' % filename
- disp = 'form-data; name="%s"%s' % (self.name, fn_part)
- mw2.addheader("Content-Disposition", disp, prefix=1)
- fh = mw2.startbody(content_type, prefix=0)
- fh.write(file_object.read())
- else:
- # multiple files
- mw2 = mw.nextpart()
- disp = 'form-data; name="%s"' % self.name
- mw2.addheader("Content-Disposition", disp, prefix=1)
- fh = mw2.startmultipartbody("mixed", prefix=0)
- for file_object, content_type, filename in self._upload_data:
- mw3 = mw2.nextpart()
- if filename is None:
- filename = ""
- fn_part = '; filename="%s"' % filename
- disp = "file%s" % fn_part
- mw3.addheader("Content-Disposition", disp, prefix=1)
- fh2 = mw3.startbody(content_type, prefix=0)
- fh2.write(file_object.read())
- mw2.lastpart()
-
- def __str__(self):
- name = self.name
- if name is None: name = "<None>"
-
- if not self._upload_data:
- value = "<No files added>"
- else:
- value = []
- for file, ctype, filename in self._upload_data:
- if filename is None:
- value.append("<Unnamed file>")
- else:
- value.append(filename)
- value = ", ".join(value)
-
- info = []
- if self.disabled: info.append("disabled")
- if self.readonly: info.append("readonly")
- info = ", ".join(info)
- if info: info = " (%s)" % info
-
- return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
-
-
-#---------------------------------------------------
-class IsindexControl(ScalarControl):
- """ISINDEX control.
-
- ISINDEX is the odd-one-out of HTML form controls. In fact, it isn't really
- part of regular HTML forms at all, and predates it. You're only allowed
- one ISINDEX per HTML document. ISINDEX and regular form submission are
- mutually exclusive -- either submit a form, or the ISINDEX.
-
- Having said this, since ISINDEX controls may appear in forms (which is
- probably bad HTML), ParseFile / ParseResponse will include them in the
- HTMLForm instances it returns. You can set the ISINDEX's value, as with
- any other control (but note that ISINDEX controls have no name, so you'll
- need to use the type argument of set_value!). When you submit the form,
- the ISINDEX will not be successful (ie., no data will get returned to the
- server as a result of its presence), unless you click on the ISINDEX
- control, in which case the ISINDEX gets submitted instead of the form:
-
- form.set_value("my isindex value", type="isindex")
- urllib2.urlopen(form.click(type="isindex"))
-
- ISINDEX elements outside of FORMs are ignored. If you want to submit one
- by hand, do it like so:
-
- url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value"))
- result = urllib2.urlopen(url)
-
- """
- def __init__(self, type, name, attrs, index=None):
- ScalarControl.__init__(self, type, name, attrs, index)
- if self._value is None:
- self._value = ""
-
- def is_of_kind(self, kind): return kind in ["text", "clickable"]
-
- def _totally_ordered_pairs(self):
- return []
-
- def _click(self, form, coord, return_type, request_class=urllib2.Request):
- # Relative URL for ISINDEX submission: instead of "foo=bar+baz",
- # want "bar+baz".
- # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is
- # deprecated in 4.01, but it should still say how to submit it).
- # Submission of ISINDEX is explained in the HTML 3.2 spec, though.
- parts = self._urlparse(form.action)
- rest, (query, frag) = parts[:-2], parts[-2:]
- parts = rest + (urllib.quote_plus(self.value), None)
- url = self._urlunparse(parts)
- req_data = url, None, []
-
- if return_type == "pairs":
- return []
- elif return_type == "request_data":
- return req_data
- else:
- return request_class(url)
-
- def __str__(self):
- value = self.value
- if value is None: value = "<None>"
-
- infos = []
- if self.disabled: infos.append("disabled")
- if self.readonly: infos.append("readonly")
- info = ", ".join(infos)
- if info: info = " (%s)" % info
-
- return "<%s(%s)%s>" % (self.__class__.__name__, value, info)
-
-
-#---------------------------------------------------
-class IgnoreControl(ScalarControl):
- """Control that we're not interested in.
-
- Covers:
-
- INPUT/RESET
- BUTTON/RESET
- INPUT/BUTTON
- BUTTON/BUTTON
-
- These controls are always unsuccessful, in the terminology of HTML 4 (ie.
- they never require any information to be returned to the server).
-
- BUTTON/BUTTON is used to generate events for script embedded in HTML.
-
- The value attribute of IgnoreControl is always None.
-
- """
- def __init__(self, type, name, attrs, index=None):
- ScalarControl.__init__(self, type, name, attrs, index)
- self._value = None
-
- def is_of_kind(self, kind): return False
-
- def __setattr__(self, name, value):
- if name == "value":
- raise AttributeError(
- "control '%s' is ignored, hence read-only" % self.name)
- elif name in ("name", "type"):
- raise AttributeError("%s attribute is readonly" % name)
- else:
- self.__dict__[name] = value
-
-
-#---------------------------------------------------
-# ListControls
-
-# helpers and subsidiary classes
-
-class Item:
- def __init__(self, control, attrs, index=None):
- label = _get_label(attrs)
- self.__dict__.update({
- "name": attrs["value"],
- "_labels": label and [label] or [],
- "attrs": attrs,
- "_control": control,
- "disabled": attrs.has_key("disabled"),
- "_selected": False,
- "id": attrs.get("id"),
- "_index": index,
- })
- control.items.append(self)
-
- def get_labels(self):
- """Return all labels (Label instances) for this item.
-
- For items that represent radio buttons or checkboxes, if the item was
- surrounded by a <label> tag, that will be the first label; all other
- labels, connected by 'for' and 'id', are in the order that appear in
- the HTML.
-
- For items that represent select options, if the option had a label
- attribute, that will be the first label. If the option has contents
- (text within the option tags) and it is not the same as the label
- attribute (if any), that will be a label. There is nothing in the
- spec to my knowledge that makes an option with an id unable to be the
- target of a label's for attribute, so those are included, if any, for
- the sake of consistency and completeness.
-
- """
- res = []
- res.extend(self._labels)
- if self.id:
- res.extend(self._control._form._id_to_labels.get(self.id, ()))
- return res
-
- def __getattr__(self, name):
- if name=="selected":
- return self._selected
- raise AttributeError(name)
-
- def __setattr__(self, name, value):
- if name == "selected":
- self._control._set_selected_state(self, value)
- elif name == "disabled":
- self.__dict__["disabled"] = bool(value)
- else:
- raise AttributeError(name)
-
- def __str__(self):
- res = self.name
- if self.selected:
- res = "*" + res
- if self.disabled:
- res = "(%s)" % res
- return res
-
- def __repr__(self):
- # XXX appending the attrs without distinguishing them from name and id
- # is silly
- attrs = [("name", self.name), ("id", self.id)]+self.attrs.items()
- return "<%s %s>" % (
- self.__class__.__name__,
- " ".join(["%s=%r" % (k, v) for k, v in attrs])
- )
-
-def disambiguate(items, nr, **kwds):
- msgs = []
- for key, value in kwds.items():
- msgs.append("%s=%r" % (key, value))
- msg = " ".join(msgs)
- if not items:
- raise ItemNotFoundError(msg)
- if nr is None:
- if len(items) > 1:
- raise AmbiguityError(msg)
- nr = 0
- if len(items) <= nr:
- raise ItemNotFoundError(msg)
- return items[nr]
-
-class ListControl(Control):
- """Control representing a sequence of items.
-
- The value attribute of a ListControl represents the successful list items
- in the control. The successful list items are those that are selected and
- not disabled.
-
- ListControl implements both list controls that take a length-1 value
- (single-selection) and those that take length >1 values
- (multiple-selection).
-
- ListControls accept sequence values only. Some controls only accept
- sequences of length 0 or 1 (RADIO, and single-selection SELECT).
- In those cases, ItemCountError is raised if len(sequence) > 1. CHECKBOXes
- and multiple-selection SELECTs (those having the "multiple" HTML attribute)
- accept sequences of any length.
-
- Note the following mistake:
-
- control.value = some_value
- assert control.value == some_value # not necessarily true
-
- The reason for this is that the value attribute always gives the list items
- in the order they were listed in the HTML.
-
- ListControl items can also be referred to by their labels instead of names.
- Use the label argument to .get(), and the .set_value_by_label(),
- .get_value_by_label() methods.
-
- Note that, rather confusingly, though SELECT controls are represented in
- HTML by SELECT elements (which contain OPTION elements, representing
- individual list items), CHECKBOXes and RADIOs are not represented by *any*
- element. Instead, those controls are represented by a collection of INPUT
- elements. For example, this is a SELECT control, named "control1":
-
- <select name="control1">
- <option>foo</option>
- <option value="1">bar</option>
- </select>
-
- and this is a CHECKBOX control, named "control2":
-
- <input type="checkbox" name="control2" value="foo" id="cbe1">
- <input type="checkbox" name="control2" value="bar" id="cbe2">
-
- The id attribute of a CHECKBOX or RADIO ListControl is always that of its
- first element (for example, "cbe1" above).
-
-
- Additional read-only public attribute: multiple.
-
- """
-
- # ListControls are built up by the parser from their component items by
- # creating one ListControl per item, consolidating them into a single
- # master ListControl held by the HTMLForm:
-
- # -User calls form.new_control(...)
- # -Form creates Control, and calls control.add_to_form(self).
- # -Control looks for a Control with the same name and type in the form,
- # and if it finds one, merges itself with that control by calling
- # control.merge_control(self). The first Control added to the form, of
- # a particular name and type, is the only one that survives in the
- # form.
- # -Form calls control.fixup for all its controls. ListControls in the
- # form know they can now safely pick their default values.
-
- # To create a ListControl without an HTMLForm, use:
-
- # control.merge_control(new_control)
-
- # (actually, it's much easier just to use ParseFile)
-
- _label = None
-
- def __init__(self, type, name, attrs={}, select_default=False,
- called_as_base_class=False, index=None):
- """
- select_default: for RADIO and multiple-selection SELECT controls, pick
- the first item as the default if no 'selected' HTML attribute is
- present
-
- """
- if not called_as_base_class:
- raise NotImplementedError()
-
- self.__dict__["type"] = type.lower()
- self.__dict__["name"] = name
- self._value = attrs.get("value")
- self.disabled = False
- self.readonly = False
- self.id = attrs.get("id")
- self._closed = False
-
- # As Controls are merged in with .merge_control(), self.attrs will
- # refer to each Control in turn -- always the most recently merged
- # control. Each merged-in Control instance corresponds to a single
- # list item: see ListControl.__doc__.
- self.items = []
- self._form = None
-
- self._select_default = select_default
- self._clicked = False
-
- def clear(self):
- self.value = []
-
- def is_of_kind(self, kind):
- if kind == "list":
- return True
- elif kind == "multilist":
- return bool(self.multiple)
- elif kind == "singlelist":
- return not self.multiple
- else:
- return False
-
- def get_items(self, name=None, label=None, id=None,
- exclude_disabled=False):
- """Return matching items by name or label.
-
- For argument docs, see the docstring for .get()
-
- """
- if name is not None and not isstringlike(name):
- raise TypeError("item name must be string-like")
- if label is not None and not isstringlike(label):
- raise TypeError("item label must be string-like")
- if id is not None and not isstringlike(id):
- raise TypeError("item id must be string-like")
- items = [] # order is important
- compat = self._form.backwards_compat
- for o in self.items:
- if exclude_disabled and o.disabled:
- continue
- if name is not None and o.name != name:
- continue
- if label is not None:
- for l in o.get_labels():
- if ((compat and l.text == label) or
- (not compat and l.text.find(label) > -1)):
- break
- else:
- continue
- if id is not None and o.id != id:
- continue
- items.append(o)
- return items
-
- def get(self, name=None, label=None, id=None, nr=None,
- exclude_disabled=False):
- """Return item by name or label, disambiguating if necessary with nr.
-
- All arguments must be passed by name, with the exception of 'name',
- which may be used as a positional argument.
-
- If name is specified, then the item must have the indicated name.
-
- If label is specified, then the item must have a label whose
- whitespace-compressed, stripped, text substring-matches the indicated
- label string (eg. label="please choose" will match
- " Do please choose an item ").
-
- If id is specified, then the item must have the indicated id.
-
- nr is an optional 0-based index of the items matching the query.
-
- If nr is the default None value and more than item is found, raises
- AmbiguityError (unless the HTMLForm instance's backwards_compat
- attribute is true).
-
- If no item is found, or if items are found but nr is specified and not
- found, raises ItemNotFoundError.
-
- Optionally excludes disabled items.
-
- """
- if nr is None and self._form.backwards_compat:
- nr = 0 # :-/
- items = self.get_items(name, label, id, exclude_disabled)
- return disambiguate(items, nr, name=name, label=label, id=id)
-
- def _get(self, name, by_label=False, nr=None, exclude_disabled=False):
- # strictly for use by deprecated methods
- if by_label:
- name, label = None, name
- else:
- name, label = name, None
- return self.get(name, label, nr, exclude_disabled)
-
- def toggle(self, name, by_label=False, nr=None):
- """Deprecated: given a name or label and optional disambiguating index
- nr, toggle the matching item's selection.
-
- Selecting items follows the behavior described in the docstring of the
- 'get' method.
-
- if the item is disabled, or this control is disabled or readonly,
- raise AttributeError.
-
- """
- deprecation(
- "item = control.get(...); item.selected = not item.selected")
- o = self._get(name, by_label, nr)
- self._set_selected_state(o, not o.selected)
-
- def set(self, selected, name, by_label=False, nr=None):
- """Deprecated: given a name or label and optional disambiguating index
- nr, set the matching item's selection to the bool value of selected.
-
- Selecting items follows the behavior described in the docstring of the
- 'get' method.
-
- if the item is disabled, or this control is disabled or readonly,
- raise AttributeError.
-
- """
- deprecation(
- "control.get(...).selected = <boolean>")
- self._set_selected_state(self._get(name, by_label, nr), selected)
-
- def _set_selected_state(self, item, action):
- # action:
- # bool False: off
- # bool True: on
- if self.disabled:
- raise AttributeError("control '%s' is disabled" % self.name)
- if self.readonly:
- raise AttributeError("control '%s' is readonly" % self.name)
- action == bool(action)
- compat = self._form.backwards_compat
- if not compat and item.disabled:
- raise AttributeError("item is disabled")
- else:
- if compat and item.disabled and action:
- raise AttributeError("item is disabled")
- if self.multiple:
- item.__dict__["_selected"] = action
- else:
- if not action:
- item.__dict__["_selected"] = False
- else:
- for o in self.items:
- o.__dict__["_selected"] = False
- item.__dict__["_selected"] = True
-
- def toggle_single(self, by_label=None):
- """Deprecated: toggle the selection of the single item in this control.
-
- Raises ItemCountError if the control does not contain only one item.
-
- by_label argument is ignored, and included only for backwards
- compatibility.
-
- """
- deprecation(
- "control.items[0].selected = not control.items[0].selected")
- if len(self.items) != 1:
- raise ItemCountError(
- "'%s' is not a single-item control" % self.name)
- item = self.items[0]
- self._set_selected_state(item, not item.selected)
-
- def set_single(self, selected, by_label=None):
- """Deprecated: set the selection of the single item in this control.
-
- Raises ItemCountError if the control does not contain only one item.
-
- by_label argument is ignored, and included only for backwards
- compatibility.
-
- """
- deprecation(
- "control.items[0].selected = <boolean>")
- if len(self.items) != 1:
- raise ItemCountError(
- "'%s' is not a single-item control" % self.name)
- self._set_selected_state(self.items[0], selected)
-
- def get_item_disabled(self, name, by_label=False, nr=None):
- """Get disabled state of named list item in a ListControl."""
- deprecation(
- "control.get(...).disabled")
- return self._get(name, by_label, nr).disabled
-
- def set_item_disabled(self, disabled, name, by_label=False, nr=None):
- """Set disabled state of named list item in a ListControl.
-
- disabled: boolean disabled state
-
- """
- deprecation(
- "control.get(...).disabled = <boolean>")
- self._get(name, by_label, nr).disabled = disabled
-
- def set_all_items_disabled(self, disabled):
- """Set disabled state of all list items in a ListControl.
-
- disabled: boolean disabled state
-
- """
- for o in self.items:
- o.disabled = disabled
-
- def get_item_attrs(self, name, by_label=False, nr=None):
- """Return dictionary of HTML attributes for a single ListControl item.
-
- The HTML element types that describe list items are: OPTION for SELECT
- controls, INPUT for the rest. These elements have HTML attributes that
- you may occasionally want to know about -- for example, the "alt" HTML
- attribute gives a text string describing the item (graphical browsers
- usually display this as a tooltip).
-
- The returned dictionary maps HTML attribute names to values. The names
- and values are taken from the original HTML.
-
- """
- deprecation(
- "control.get(...).attrs")
- return self._get(name, by_label, nr).attrs
-
- def close_control(self):
- self._closed = True
-
- def add_to_form(self, form):
- assert self._form is None or form == self._form, (
- "can't add control to more than one form")
- self._form = form
- if self.name is None:
- # always count nameless elements as separate controls
- Control.add_to_form(self, form)
- else:
- for ii in range(len(form.controls)-1, -1, -1):
- control = form.controls[ii]
- if control.name == self.name and control.type == self.type:
- if control._closed:
- Control.add_to_form(self, form)
- else:
- control.merge_control(self)
- break
- else:
- Control.add_to_form(self, form)
-
- def merge_control(self, control):
- assert bool(control.multiple) == bool(self.multiple)
- # usually, isinstance(control, self.__class__)
- self.items.extend(control.items)
-
- def fixup(self):
- """
- ListControls are built up from component list items (which are also
- ListControls) during parsing. This method should be called after all
- items have been added. See ListControl.__doc__ for the reason this is
- required.
-
- """
- # Need to set default selection where no item was indicated as being
- # selected by the HTML:
-
- # CHECKBOX:
- # Nothing should be selected.
- # SELECT/single, SELECT/multiple and RADIO:
- # RFC 1866 (HTML 2.0): says first item should be selected.
- # W3C HTML 4.01 Specification: says that client behaviour is
- # undefined in this case. For RADIO, exactly one must be selected,
- # though which one is undefined.
- # Both Netscape and Microsoft Internet Explorer (IE) choose first
- # item for SELECT/single. However, both IE5 and Mozilla (both 1.0
- # and Firebird 0.6) leave all items unselected for RADIO and
- # SELECT/multiple.
-
- # Since both Netscape and IE all choose the first item for
- # SELECT/single, we do the same. OTOH, both Netscape and IE
- # leave SELECT/multiple with nothing selected, in violation of RFC 1866
- # (but not in violation of the W3C HTML 4 standard); the same is true
- # of RADIO (which *is* in violation of the HTML 4 standard). We follow
- # RFC 1866 if the _select_default attribute is set, and Netscape and IE
- # otherwise. RFC 1866 and HTML 4 are always violated insofar as you
- # can deselect all items in a RadioControl.
-
- for o in self.items:
- # set items' controls to self, now that we've merged
- o.__dict__["_control"] = self
-
- def __getattr__(self, name):
- if name == "value":
- compat = self._form.backwards_compat
- if self.name is None:
- return []
- return [o.name for o in self.items if o.selected and
- (not o.disabled or compat)]
- else:
- raise AttributeError("%s instance has no attribute '%s'" %
- (self.__class__.__name__, name))
-
- def __setattr__(self, name, value):
- if name == "value":
- if self.disabled:
- raise AttributeError("control '%s' is disabled" % self.name)
- if self.readonly:
- raise AttributeError("control '%s' is readonly" % self.name)
- self._set_value(value)
- elif name in ("name", "type", "multiple"):
- raise AttributeError("%s attribute is readonly" % name)
- else:
- self.__dict__[name] = value
-
- def _set_value(self, value):
- if value is None or isstringlike(value):
- raise TypeError("ListControl, must set a sequence")
- if not value:
- compat = self._form.backwards_compat
- for o in self.items:
- if not o.disabled or compat:
- o.selected = False
- elif self.multiple:
- self._multiple_set_value(value)
- elif len(value) > 1:
- raise ItemCountError(
- "single selection list, must set sequence of "
- "length 0 or 1")
- else:
- self._single_set_value(value)
-
- def _get_items(self, name, target=1):
- all_items = self.get_items(name)
- items = [o for o in all_items if not o.disabled]
- if len(items) < target:
- if len(all_items) < target:
- raise ItemNotFoundError(
- "insufficient items with name %r" % name)
- else:
- raise AttributeError(
- "insufficient non-disabled items with name %s" % name)
- on = []
- off = []
- for o in items:
- if o.selected:
- on.append(o)
- else:
- off.append(o)
- return on, off
-
- def _single_set_value(self, value):
- assert len(value) == 1
- on, off = self._get_items(value[0])
- assert len(on) <= 1
- if not on:
- off[0].selected = True
-
- def _multiple_set_value(self, value):
- compat = self._form.backwards_compat
- turn_on = [] # transactional-ish
- turn_off = [item for item in self.items if
- item.selected and (not item.disabled or compat)]
- names = {}
- for nn in value:
- if nn in names.keys():
- names[nn] += 1
- else:
- names[nn] = 1
- for name, count in names.items():
- on, off = self._get_items(name, count)
- for i in range(count):
- if on:
- item = on[0]
- del on[0]
- del turn_off[turn_off.index(item)]
- else:
- item = off[0]
- del off[0]
- turn_on.append(item)
- for item in turn_off:
- item.selected = False
- for item in turn_on:
- item.selected = True
-
- def set_value_by_label(self, value):
- """Set the value of control by item labels.
-
- value is expected to be an iterable of strings that are substrings of
- the item labels that should be selected. Before substring matching is
- performed, the original label text is whitespace-compressed
- (consecutive whitespace characters are converted to a single space
- character) and leading and trailing whitespace is stripped. Ambiguous
- labels are accepted without complaint if the form's backwards_compat is
- True; otherwise, it will not complain as long as all ambiguous labels
- share the same item name (e.g. OPTION value).
-
- """
- if isstringlike(value):
- raise TypeError(value)
- if not self.multiple and len(value) > 1:
- raise ItemCountError(
- "single selection list, must set sequence of "
- "length 0 or 1")
- items = []
- for nn in value:
- found = self.get_items(label=nn)
- if len(found) > 1:
- if not self._form.backwards_compat:
- # ambiguous labels are fine as long as item names (e.g.
- # OPTION values) are same
- opt_name = found[0].name
- if [o for o in found[1:] if o.name != opt_name]:
- raise AmbiguityError(nn)
- else:
- # OK, we'll guess :-( Assume first available item.
- found = found[:1]
- for o in found:
- # For the multiple-item case, we could try to be smarter,
- # saving them up and trying to resolve, but that's too much.
- if self._form.backwards_compat or o not in items:
- items.append(o)
- break
- else: # all of them are used
- raise ItemNotFoundError(nn)
- # now we have all the items that should be on
- # let's just turn everything off and then back on.
- self.value = []
- for o in items:
- o.selected = True
-
- def get_value_by_label(self):
- """Return the value of the control as given by normalized labels."""
- res = []
- compat = self._form.backwards_compat
- for o in self.items:
- if (not o.disabled or compat) and o.selected:
- for l in o.get_labels():
- if l.text:
- res.append(l.text)
- break
- else:
- res.append(None)
- return res
-
- def possible_items(self, by_label=False):
- """Deprecated: return the names or labels of all possible items.
-
- Includes disabled items, which may be misleading for some use cases.
-
- """
- deprecation(
- "[item.name for item in self.items]")
- if by_label:
- res = []
- for o in self.items:
- for l in o.get_labels():
- if l.text:
- res.append(l.text)
- break
- else:
- res.append(None)
- return res
- return [o.name for o in self.items]
-
- def _totally_ordered_pairs(self):
- if self.disabled or self.name is None:
- return []
- else:
- return [(o._index, self.name, o.name) for o in self.items
- if o.selected and not o.disabled]
-
- def __str__(self):
- name = self.name
- if name is None: name = "<None>"
-
- display = [str(o) for o in self.items]
-
- infos = []
- if self.disabled: infos.append("disabled")
- if self.readonly: infos.append("readonly")
- info = ", ".join(infos)
- if info: info = " (%s)" % info
-
- return "<%s(%s=[%s])%s>" % (self.__class__.__name__,
- name, ", ".join(display), info)
-
-
-class RadioControl(ListControl):
- """
- Covers:
-
- INPUT/RADIO
-
- """
- def __init__(self, type, name, attrs, select_default=False, index=None):
- attrs.setdefault("value", "on")
- ListControl.__init__(self, type, name, attrs, select_default,
- called_as_base_class=True, index=index)
- self.__dict__["multiple"] = False
- o = Item(self, attrs, index)
- o.__dict__["_selected"] = attrs.has_key("checked")
-
- def fixup(self):
- ListControl.fixup(self)
- found = [o for o in self.items if o.selected and not o.disabled]
- if not found:
- if self._select_default:
- for o in self.items:
- if not o.disabled:
- o.selected = True
- break
- else:
- # Ensure only one item selected. Choose the last one,
- # following IE and Firefox.
- for o in found[:-1]:
- o.selected = False
-
- def get_labels(self):
- return []
-
-class CheckboxControl(ListControl):
- """
- Covers:
-
- INPUT/CHECKBOX
-
- """
- def __init__(self, type, name, attrs, select_default=False, index=None):
- attrs.setdefault("value", "on")
- ListControl.__init__(self, type, name, attrs, select_default,
- called_as_base_class=True, index=index)
- self.__dict__["multiple"] = True
- o = Item(self, attrs, index)
- o.__dict__["_selected"] = attrs.has_key("checked")
-
- def get_labels(self):
- return []
-
-
-class SelectControl(ListControl):
- """
- Covers:
-
- SELECT (and OPTION)
-
-
- OPTION 'values', in HTML parlance, are Item 'names' in ClientForm parlance.
-
- SELECT control values and labels are subject to some messy defaulting
- rules. For example, if the HTML representation of the control is:
-
- <SELECT name=year>
- <OPTION value=0 label="2002">current year</OPTION>
- <OPTION value=1>2001</OPTION>
- <OPTION>2000</OPTION>
- </SELECT>
-
- The items, in order, have labels "2002", "2001" and "2000", whereas their
- names (the OPTION values) are "0", "1" and "2000" respectively. Note that
- the value of the last OPTION in this example defaults to its contents, as
- specified by RFC 1866, as do the labels of the second and third OPTIONs.
-
- The OPTION labels are sometimes more meaningful than the OPTION values,
- which can make for more maintainable code.
-
- Additional read-only public attribute: attrs
-
- The attrs attribute is a dictionary of the original HTML attributes of the
- SELECT element. Other ListControls do not have this attribute, because in
- other cases the control as a whole does not correspond to any single HTML
- element. control.get(...).attrs may be used as usual to get at the HTML
- attributes of the HTML elements corresponding to individual list items (for
- SELECT controls, these are OPTION elements).
-
- Another special case is that the Item.attrs dictionaries have a special key
- "contents" which does not correspond to any real HTML attribute, but rather
- contains the contents of the OPTION element:
-
- <OPTION>this bit</OPTION>
-
- """
- # HTML attributes here are treated slightly differently from other list
- # controls:
- # -The SELECT HTML attributes dictionary is stuffed into the OPTION
- # HTML attributes dictionary under the "__select" key.
- # -The content of each OPTION element is stored under the special
- # "contents" key of the dictionary.
- # After all this, the dictionary is passed to the SelectControl constructor
- # as the attrs argument, as usual. However:
- # -The first SelectControl constructed when building up a SELECT control
- # has a constructor attrs argument containing only the __select key -- so
- # this SelectControl represents an empty SELECT control.
- # -Subsequent SelectControls have both OPTION HTML-attribute in attrs and
- # the __select dictionary containing the SELECT HTML-attributes.
-
- def __init__(self, type, name, attrs, select_default=False, index=None):
- # fish out the SELECT HTML attributes from the OPTION HTML attributes
- # dictionary
- self.attrs = attrs["__select"].copy()
- self.__dict__["_label"] = _get_label(self.attrs)
- self.__dict__["id"] = self.attrs.get("id")
- self.__dict__["multiple"] = self.attrs.has_key("multiple")
- # the majority of the contents, label, and value dance already happened
- contents = attrs.get("contents")
- attrs = attrs.copy()
- del attrs["__select"]
-
- ListControl.__init__(self, type, name, self.attrs, select_default,
- called_as_base_class=True, index=index)
- self.disabled = self.attrs.has_key("disabled")
- self.readonly = self.attrs.has_key("readonly")
- if attrs.has_key("value"):
- # otherwise it is a marker 'select started' token
- o = Item(self, attrs, index)
- o.__dict__["_selected"] = attrs.has_key("selected")
- # add 'label' label and contents label, if different. If both are
- # provided, the 'label' label is used for display in HTML
- # 4.0-compliant browsers (and any lower spec? not sure) while the
- # contents are used for display in older or less-compliant
- # browsers. We make label objects for both, if the values are
- # different.
- label = attrs.get("label")
- if label:
- o._labels.append(Label({"__text": label}))
- if contents and contents != label:
- o._labels.append(Label({"__text": contents}))
- elif contents:
- o._labels.append(Label({"__text": contents}))
-
- def fixup(self):
- ListControl.fixup(self)
- # Firefox doesn't exclude disabled items from those considered here
- # (i.e. from 'found', for both branches of the if below). Note that
- # IE6 doesn't support the disabled attribute on OPTIONs at all.
- found = [o for o in self.items if o.selected]
- if not found:
- if not self.multiple or self._select_default:
- for o in self.items:
- if not o.disabled:
- was_disabled = self.disabled
- self.disabled = False
- try:
- o.selected = True
- finally:
- o.disabled = was_disabled
- break
- elif not self.multiple:
- # Ensure only one item selected. Choose the last one,
- # following IE and Firefox.
- for o in found[:-1]:
- o.selected = False
-
-
-#---------------------------------------------------
-class SubmitControl(ScalarControl):
- """
- Covers:
-
- INPUT/SUBMIT
- BUTTON/SUBMIT
-
- """
- def __init__(self, type, name, attrs, index=None):
- ScalarControl.__init__(self, type, name, attrs, index)
- # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it
- # blank, Konqueror 3.1 defaults to "Submit". HTML spec. doesn't seem
- # to define this.
- if self.value is None: self.value = ""
- self.readonly = True
-
- def get_labels(self):
- res = []
- if self.value:
- res.append(Label({"__text": self.value}))
- res.extend(ScalarControl.get_labels(self))
- return res
-
- def is_of_kind(self, kind): return kind == "clickable"
-
- def _click(self, form, coord, return_type, request_class=urllib2.Request):
- self._clicked = coord
- r = form._switch_click(return_type, request_class)
- self._clicked = False
- return r
-
- def _totally_ordered_pairs(self):
- if not self._clicked:
- return []
- return ScalarControl._totally_ordered_pairs(self)
-
-
-#---------------------------------------------------
-class ImageControl(SubmitControl):
- """
- Covers:
-
- INPUT/IMAGE
-
- Coordinates are specified using one of the HTMLForm.click* methods.
-
- """
- def __init__(self, type, name, attrs, index=None):
- SubmitControl.__init__(self, type, name, attrs, index)
- self.readonly = False
-
- def _totally_ordered_pairs(self):
- clicked = self._clicked
- if self.disabled or not clicked:
- return []
- name = self.name
- if name is None: return []
- pairs = [
- (self._index, "%s.x" % name, str(clicked[0])),
- (self._index+1, "%s.y" % name, str(clicked[1])),
- ]
- value = self._value
- if value:
- pairs.append((self._index+2, name, value))
- return pairs
-
- get_labels = ScalarControl.get_labels
-
-# aliases, just to make str(control) and str(form) clearer
-class PasswordControl(TextControl): pass
-class HiddenControl(TextControl): pass
-class TextareaControl(TextControl): pass
-class SubmitButtonControl(SubmitControl): pass
-
-
-def is_listcontrol(control): return control.is_of_kind("list")
-
-
-class HTMLForm:
- """Represents a single HTML <form> ... </form> element.
-
- A form consists of a sequence of controls that usually have names, and
- which can take on various values. The values of the various types of
- controls represent variously: text, zero-or-one-of-many or many-of-many
- choices, and files to be uploaded. Some controls can be clicked on to
- submit the form, and clickable controls' values sometimes include the
- coordinates of the click.
-
- Forms can be filled in with data to be returned to the server, and then
- submitted, using the click method to generate a request object suitable for
- passing to urllib2.urlopen (or the click_request_data or click_pairs
- methods if you're not using urllib2).
-
- import ClientForm
- forms = ClientForm.ParseFile(html, base_uri)
- form = forms[0]
-
- form["query"] = "Python"
- form.find_control("nr_results").get("lots").selected = True
-
- response = urllib2.urlopen(form.click())
-
- Usually, HTMLForm instances are not created directly. Instead, the
- ParseFile or ParseResponse factory functions are used. If you do construct
- HTMLForm objects yourself, however, note that an HTMLForm instance is only
- properly initialised after the fixup method has been called (ParseFile and
- ParseResponse do this for you). See ListControl.__doc__ for the reason
- this is required.
-
- Indexing a form (form["control_name"]) returns the named Control's value
- attribute. Assignment to a form index (form["control_name"] = something)
- is equivalent to assignment to the named Control's value attribute. If you
- need to be more specific than just supplying the control's name, use the
- set_value and get_value methods.
-
- ListControl values are lists of item names (specifically, the names of the
- items that are selected and not disabled, and hence are "successful" -- ie.
- cause data to be returned to the server). The list item's name is the
- value of the corresponding HTML element's"value" attribute.
-
- Example:
-
- <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT>
- <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT>
-
- defines a CHECKBOX control with name "cheeses" which has two items, named
- "leicester" and "cheddar".
-
- Another example:
-
- <SELECT name="more_cheeses">
- <OPTION>1</OPTION>
- <OPTION value="2" label="CHEDDAR">cheddar</OPTION>
- </SELECT>
-
- defines a SELECT control with name "more_cheeses" which has two items,
- named "1" and "2" (because the OPTION element's value HTML attribute
- defaults to the element contents -- see SelectControl.__doc__ for more on
- these defaulting rules).
-
- To select, deselect or otherwise manipulate individual list items, use the
- HTMLForm.find_control() and ListControl.get() methods. To set the whole
- value, do as for any other control: use indexing or the set_/get_value
- methods.
-
- Example:
-
- # select *only* the item named "cheddar"
- form["cheeses"] = ["cheddar"]
- # select "cheddar", leave other items unaffected
- form.find_control("cheeses").get("cheddar").selected = True
-
- Some controls (RADIO and SELECT without the multiple attribute) can only
- have zero or one items selected at a time. Some controls (CHECKBOX and
- SELECT with the multiple attribute) can have multiple items selected at a
- time. To set the whole value of a ListControl, assign a sequence to a form
- index:
-
- form["cheeses"] = ["cheddar", "leicester"]
-
- If the ListControl is not multiple-selection, the assigned list must be of
- length one.
-
- To check if a control has an item, if an item is selected, or if an item is
- successful (selected and not disabled), respectively:
-
- "cheddar" in [item.name for item in form.find_control("cheeses").items]
- "cheddar" in [item.name for item in form.find_control("cheeses").items and
- item.selected]
- "cheddar" in form["cheeses"] # (or "cheddar" in form.get_value("cheeses"))
-
- Note that some list items may be disabled (see below).
-
- Note the following mistake:
-
- form[control_name] = control_value
- assert form[control_name] == control_value # not necessarily true
-
- The reason for this is that form[control_name] always gives the list items
- in the order they were listed in the HTML.
-
- List items (hence list values, too) can be referred to in terms of list
- item labels rather than list item names using the appropriate label
- arguments. Note that each item may have several labels.
-
- The question of default values of OPTION contents, labels and values is
- somewhat complicated: see SelectControl.__doc__ and
- ListControl.get_item_attrs.__doc__ if you think you need to know.
-
- Controls can be disabled or readonly. In either case, the control's value
- cannot be changed until you clear those flags (see example below).
- Disabled is the state typically represented by browsers by 'greying out' a
- control. Disabled controls are not 'successful' -- they don't cause data
- to get returned to the server. Readonly controls usually appear in
- browsers as read-only text boxes. Readonly controls are successful. List
- items can also be disabled. Attempts to select or deselect disabled items
- fail with AttributeError.
-
- If a lot of controls are readonly, it can be useful to do this:
-
- form.set_all_readonly(False)
-
- To clear a control's value attribute, so that it is not successful (until a
- value is subsequently set):
-
- form.clear("cheeses")
-
- More examples:
-
- control = form.find_control("cheeses")
- control.disabled = False
- control.readonly = False
- control.get("gruyere").disabled = True
- control.items[0].selected = True
-
- See the various Control classes for further documentation. Many methods
- take name, type, kind, id, label and nr arguments to specify the control to
- be operated on: see HTMLForm.find_control.__doc__.
-
- ControlNotFoundError (subclass of ValueError) is raised if the specified
- control can't be found. This includes occasions where a non-ListControl
- is found, but the method (set, for example) requires a ListControl.
- ItemNotFoundError (subclass of ValueError) is raised if a list item can't
- be found. ItemCountError (subclass of ValueError) is raised if an attempt
- is made to select more than one item and the control doesn't allow that, or
- set/get_single are called and the control contains more than one item.
- AttributeError is raised if a control or item is readonly or disabled and
- an attempt is made to alter its value.
-
- Security note: Remember that any passwords you store in HTMLForm instances
- will be saved to disk in the clear if you pickle them (directly or
- indirectly). The simplest solution to this is to avoid pickling HTMLForm
- objects. You could also pickle before filling in any password, or just set
- the password to "" before pickling.
-
-
- Public attributes:
-
- action: full (absolute URI) form action
- method: "GET" or "POST"
- enctype: form transfer encoding MIME type
- name: name of form (None if no name was specified)
- attrs: dictionary mapping original HTML form attributes to their values
-
- controls: list of Control instances; do not alter this list
- (instead, call form.new_control to make a Control and add it to the
- form, or control.add_to_form if you already have a Control instance)
-
-
-
- Methods for form filling:
- -------------------------
-
- Most of the these methods have very similar arguments. See
- HTMLForm.find_control.__doc__ for details of the name, type, kind, label
- and nr arguments.
-
- def find_control(self,
- name=None, type=None, kind=None, id=None, predicate=None,
- nr=None, label=None)
-
- get_value(name=None, type=None, kind=None, id=None, nr=None,
- by_label=False, # by_label is deprecated
- label=None)
- set_value(value,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False, # by_label is deprecated
- label=None)
-
- clear_all()
- clear(name=None, type=None, kind=None, id=None, nr=None, label=None)
-
- set_all_readonly(readonly)
-
-
- Method applying only to FileControls:
-
- add_file(file_object,
- content_type="application/octet-stream", filename=None,
- name=None, id=None, nr=None, label=None)
-
-
- Methods applying only to clickable controls:
-
- click(name=None, type=None, id=None, nr=0, coord=(1,1), label=None)
- click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1),
- label=None)
- click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1), label=None)
-
- """
-
- type2class = {
- "text": TextControl,
- "password": PasswordControl,
- "hidden": HiddenControl,
- "textarea": TextareaControl,
-
- "isindex": IsindexControl,
-
- "file": FileControl,
-
- "button": IgnoreControl,
- "buttonbutton": IgnoreControl,
- "reset": IgnoreControl,
- "resetbutton": IgnoreControl,
-
- "submit": SubmitControl,
- "submitbutton": SubmitButtonControl,
- "image": ImageControl,
-
- "radio": RadioControl,
- "checkbox": CheckboxControl,
- "select": SelectControl,
- }
-
-#---------------------------------------------------
-# Initialisation. Use ParseResponse / ParseFile instead.
-
- def __init__(self, action, method="GET",
- enctype="application/x-www-form-urlencoded",
- name=None, attrs=None,
- request_class=urllib2.Request,
- forms=None, labels=None, id_to_labels=None,
- backwards_compat=True):
- """
- In the usual case, use ParseResponse (or ParseFile) to create new
- HTMLForm objects.
-
- action: full (absolute URI) form action
- method: "GET" or "POST"
- enctype: form transfer encoding MIME type
- name: name of form
- attrs: dictionary mapping original HTML form attributes to their values
-
- """
- self.action = action
- self.method = method
- self.enctype = enctype
- self.name = name
- if attrs is not None:
- self.attrs = attrs.copy()
- else:
- self.attrs = {}
- self.controls = []
- self._request_class = request_class
-
- # these attributes are used by zope.testbrowser
- self._forms = forms # this is a semi-public API!
- self._labels = labels # this is a semi-public API!
- self._id_to_labels = id_to_labels # this is a semi-public API!
-
- self.backwards_compat = backwards_compat # note __setattr__
-
- self._urlunparse = urlparse.urlunparse
- self._urlparse = urlparse.urlparse
-
- def __getattr__(self, name):
- if name == "backwards_compat":
- return self._backwards_compat
- return getattr(HTMLForm, name)
-
- def __setattr__(self, name, value):
- # yuck
- if name == "backwards_compat":
- name = "_backwards_compat"
- value = bool(value)
- for cc in self.controls:
- try:
- items = cc.items
- except AttributeError:
- continue
- else:
- for ii in items:
- for ll in ii.get_labels():
- ll._backwards_compat = value
- self.__dict__[name] = value
-
- def new_control(self, type, name, attrs,
- ignore_unknown=False, select_default=False, index=None):
- """Adds a new control to the form.
-
- This is usually called by ParseFile and ParseResponse. Don't call it
- youself unless you're building your own Control instances.
-
- Note that controls representing lists of items are built up from
- controls holding only a single list item. See ListControl.__doc__ for
- further information.
-
- type: type of control (see Control.__doc__ for a list)
- attrs: HTML attributes of control
- ignore_unknown: if true, use a dummy Control instance for controls of
- unknown type; otherwise, use a TextControl
- select_default: for RADIO and multiple-selection SELECT controls, pick
- the first item as the default if no 'selected' HTML attribute is
- present (this defaulting happens when the HTMLForm.fixup method is
- called)
- index: index of corresponding element in HTML (see
- MoreFormTests.test_interspersed_controls for motivation)
-
- """
- type = type.lower()
- klass = self.type2class.get(type)
- if klass is None:
- if ignore_unknown:
- klass = IgnoreControl
- else:
- klass = TextControl
-
- a = attrs.copy()
- if issubclass(klass, ListControl):
- control = klass(type, name, a, select_default, index)
- else:
- control = klass(type, name, a, index)
-
- if type == "select" and len(attrs) == 1:
- for ii in range(len(self.controls)-1, -1, -1):
- ctl = self.controls[ii]
- if ctl.type == "select":
- ctl.close_control()
- break
-
- control.add_to_form(self)
- control._urlparse = self._urlparse
- control._urlunparse = self._urlunparse
-
- def fixup(self):
- """Normalise form after all controls have been added.
-
- This is usually called by ParseFile and ParseResponse. Don't call it
- youself unless you're building your own Control instances.
-
- This method should only be called once, after all controls have been
- added to the form.
-
- """
- for control in self.controls:
- control.fixup()
- self.backwards_compat = self._backwards_compat
-
-#---------------------------------------------------
- def __str__(self):
- header = "%s%s %s %s" % (
- (self.name and self.name+" " or ""),
- self.method, self.action, self.enctype)
- rep = [header]
- for control in self.controls:
- rep.append(" %s" % str(control))
- return "<%s>" % "\n".join(rep)
-
-#---------------------------------------------------
-# Form-filling methods.
-
- def __getitem__(self, name):
- return self.find_control(name).value
- def __contains__(self, name):
- return bool(self.find_control(name))
- def __setitem__(self, name, value):
- control = self.find_control(name)
- try:
- control.value = value
- except AttributeError, e:
- raise ValueError(str(e))
-
- def get_value(self,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False, # by_label is deprecated
- label=None):
- """Return value of control.
-
- If only name and value arguments are supplied, equivalent to
-
- form[name]
-
- """
- if by_label:
- deprecation("form.get_value_by_label(...)")
- c = self.find_control(name, type, kind, id, label=label, nr=nr)
- if by_label:
- try:
- meth = c.get_value_by_label
- except AttributeError:
- raise NotImplementedError(
- "control '%s' does not yet support by_label" % c.name)
- else:
- return meth()
- else:
- return c.value
- def set_value(self, value,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False, # by_label is deprecated
- label=None):
- """Set value of control.
-
- If only name and value arguments are supplied, equivalent to
-
- form[name] = value
-
- """
- if by_label:
- deprecation("form.get_value_by_label(...)")
- c = self.find_control(name, type, kind, id, label=label, nr=nr)
- if by_label:
- try:
- meth = c.set_value_by_label
- except AttributeError:
- raise NotImplementedError(
- "control '%s' does not yet support by_label" % c.name)
- else:
- meth(value)
- else:
- c.value = value
- def get_value_by_label(
- self, name=None, type=None, kind=None, id=None, label=None, nr=None):
- """
-
- All arguments should be passed by name.
-
- """
- c = self.find_control(name, type, kind, id, label=label, nr=nr)
- return c.get_value_by_label()
-
- def set_value_by_label(
- self, value,
- name=None, type=None, kind=None, id=None, label=None, nr=None):
- """
-
- All arguments should be passed by name.
-
- """
- c = self.find_control(name, type, kind, id, label=label, nr=nr)
- c.set_value_by_label(value)
-
- def set_all_readonly(self, readonly):
- for control in self.controls:
- control.readonly = bool(readonly)
-
- def clear_all(self):
- """Clear the value attributes of all controls in the form.
-
- See HTMLForm.clear.__doc__.
-
- """
- for control in self.controls:
- control.clear()
-
- def clear(self,
- name=None, type=None, kind=None, id=None, nr=None, label=None):
- """Clear the value attribute of a control.
-
- As a result, the affected control will not be successful until a value
- is subsequently set. AttributeError is raised on readonly controls.
-
- """
- c = self.find_control(name, type, kind, id, label=label, nr=nr)
- c.clear()
-
-
-#---------------------------------------------------
-# Form-filling methods applying only to ListControls.
-
- def possible_items(self, # deprecated
- name=None, type=None, kind=None, id=None,
- nr=None, by_label=False, label=None):
- """Return a list of all values that the specified control can take."""
- c = self._find_list_control(name, type, kind, id, label, nr)
- return c.possible_items(by_label)
-
- def set(self, selected, item_name, # deprecated
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False, label=None):
- """Select / deselect named list item.
-
- selected: boolean selected state
-
- """
- self._find_list_control(name, type, kind, id, label, nr).set(
- selected, item_name, by_label)
- def toggle(self, item_name, # deprecated
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False, label=None):
- """Toggle selected state of named list item."""
- self._find_list_control(name, type, kind, id, label, nr).toggle(
- item_name, by_label)
-
- def set_single(self, selected, # deprecated
- name=None, type=None, kind=None, id=None,
- nr=None, by_label=None, label=None):
- """Select / deselect list item in a control having only one item.
-
- If the control has multiple list items, ItemCountError is raised.
-
- This is just a convenience method, so you don't need to know the item's
- name -- the item name in these single-item controls is usually
- something meaningless like "1" or "on".
-
- For example, if a checkbox has a single item named "on", the following
- two calls are equivalent:
-
- control.toggle("on")
- control.toggle_single()
-
- """ # by_label ignored and deprecated
- self._find_list_control(
- name, type, kind, id, label, nr).set_single(selected)
- def toggle_single(self, name=None, type=None, kind=None, id=None,
- nr=None, by_label=None, label=None): # deprecated
- """Toggle selected state of list item in control having only one item.
-
- The rest is as for HTMLForm.set_single.__doc__.
-
- """ # by_label ignored and deprecated
- self._find_list_control(name, type, kind, id, label, nr).toggle_single()
-
-#---------------------------------------------------
-# Form-filling method applying only to FileControls.
-
- def add_file(self, file_object, content_type=None, filename=None,
- name=None, id=None, nr=None, label=None):
- """Add a file to be uploaded.
-
- file_object: file-like object (with read method) from which to read
- data to upload
- content_type: MIME content type of data to upload
- filename: filename to pass to server
-
- If filename is None, no filename is sent to the server.
-
- If content_type is None, the content type is guessed based on the
- filename and the data from read from the file object.
-
- XXX
- At the moment, guessed content type is always application/octet-stream.
- Use sndhdr, imghdr modules. Should also try to guess HTML, XML, and
- plain text.
-
- Note the following useful HTML attributes of file upload controls (see
- HTML 4.01 spec, section 17):
-
- accept: comma-separated list of content types that the server will
- handle correctly; you can use this to filter out non-conforming files
- size: XXX IIRC, this is indicative of whether form wants multiple or
- single files
- maxlength: XXX hint of max content length in bytes?
-
- """
- self.find_control(name, "file", id=id, label=label, nr=nr).add_file(
- file_object, content_type, filename)
-
-#---------------------------------------------------
-# Form submission methods, applying only to clickable controls.
-
- def click(self, name=None, type=None, id=None, nr=0, coord=(1,1),
- request_class=urllib2.Request,
- label=None):
- """Return request that would result from clicking on a control.
-
- The request object is a urllib2.Request instance, which you can pass to
- urllib2.urlopen (or ClientCookie.urlopen).
-
- Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and
- IMAGEs) can be clicked.
-
- Will click on the first clickable control, subject to the name, type
- and nr arguments (as for find_control). If no name, type, id or number
- is specified and there are no clickable controls, a request will be
- returned for the form in its current, un-clicked, state.
-
- IndexError is raised if any of name, type, id or nr is specified but no
- matching control is found. ValueError is raised if the HTMLForm has an
- enctype attribute that is not recognised.
-
- You can optionally specify a coordinate to click at, which only makes a
- difference if you clicked on an image.
-
- """
- return self._click(name, type, id, label, nr, coord, "request",
- self._request_class)
-
- def click_request_data(self,
- name=None, type=None, id=None,
- nr=0, coord=(1,1),
- request_class=urllib2.Request,
- label=None):
- """As for click method, but return a tuple (url, data, headers).
-
- You can use this data to send a request to the server. This is useful
- if you're using httplib or urllib rather than urllib2. Otherwise, use
- the click method.
-
- # Untested. Have to subclass to add headers, I think -- so use urllib2
- # instead!
- import urllib
- url, data, hdrs = form.click_request_data()
- r = urllib.urlopen(url, data)
-
- # Untested. I don't know of any reason to use httplib -- you can get
- # just as much control with urllib2.
- import httplib, urlparse
- url, data, hdrs = form.click_request_data()
- tup = urlparse(url)
- host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:])
- conn = httplib.HTTPConnection(host)
- if data:
- httplib.request("POST", path, data, hdrs)
- else:
- httplib.request("GET", path, headers=hdrs)
- r = conn.getresponse()
-
- """
- return self._click(name, type, id, label, nr, coord, "request_data",
- self._request_class)
-
- def click_pairs(self, name=None, type=None, id=None,
- nr=0, coord=(1,1),
- label=None):
- """As for click_request_data, but returns a list of (key, value) pairs.
-
- You can use this list as an argument to ClientForm.urlencode. This is
- usually only useful if you're using httplib or urllib rather than
- urllib2 or ClientCookie. It may also be useful if you want to manually
- tweak the keys and/or values, but this should not be necessary.
- Otherwise, use the click method.
-
- Note that this method is only useful for forms of MIME type
- x-www-form-urlencoded. In particular, it does not return the
- information required for file upload. If you need file upload and are
- not using urllib2, use click_request_data.
-
- Also note that Python 2.0's urllib.urlencode is slightly broken: it
- only accepts a mapping, not a sequence of pairs, as an argument. This
- messes up any ordering in the argument. Use ClientForm.urlencode
- instead.
-
- """
- return self._click(name, type, id, label, nr, coord, "pairs",
- self._request_class)
-
-#---------------------------------------------------
-
- def find_control(self,
- name=None, type=None, kind=None, id=None,
- predicate=None, nr=None,
- label=None):
- """Locate and return some specific control within the form.
-
- At least one of the name, type, kind, predicate and nr arguments must
- be supplied. If no matching control is found, ControlNotFoundError is
- raised.
-
- If name is specified, then the control must have the indicated name.
-
- If type is specified then the control must have the specified type (in
- addition to the types possible for <input> HTML tags: "text",
- "password", "hidden", "submit", "image", "button", "radio", "checkbox",
- "file" we also have "reset", "buttonbutton", "submitbutton",
- "resetbutton", "textarea", "select" and "isindex").
-
- If kind is specified, then the control must fall into the specified
- group, each of which satisfies a particular interface. The types are
- "text", "list", "multilist", "singlelist", "clickable" and "file".
-
- If id is specified, then the control must have the indicated id.
-
- If predicate is specified, then the control must match that function.
- The predicate function is passed the control as its single argument,
- and should return a boolean value indicating whether the control
- matched.
-
- nr, if supplied, is the sequence number of the control (where 0 is the
- first). Note that control 0 is the first control matching all the
- other arguments (if supplied); it is not necessarily the first control
- in the form. If no nr is supplied, AmbiguityError is raised if
- multiple controls match the other arguments (unless the
- .backwards-compat attribute is true).
-
- If label is specified, then the control must have this label. Note
- that radio controls and checkboxes never have labels: their items do.
-
- """
- if ((name is None) and (type is None) and (kind is None) and
- (id is None) and (label is None) and (predicate is None) and
- (nr is None)):
- raise ValueError(
- "at least one argument must be supplied to specify control")
- return self._find_control(name, type, kind, id, label, predicate, nr)
-
-#---------------------------------------------------
-# Private methods.
-
- def _find_list_control(self,
- name=None, type=None, kind=None, id=None,
- label=None, nr=None):
- if ((name is None) and (type is None) and (kind is None) and
- (id is None) and (label is None) and (nr is None)):
- raise ValueError(
- "at least one argument must be supplied to specify control")
-
- return self._find_control(name, type, kind, id, label,
- is_listcontrol, nr)
-
- def _find_control(self, name, type, kind, id, label, predicate, nr):
- if ((name is not None) and (name is not Missing) and
- not isstringlike(name)):
- raise TypeError("control name must be string-like")
- if (type is not None) and not isstringlike(type):
- raise TypeError("control type must be string-like")
- if (kind is not None) and not isstringlike(kind):
- raise TypeError("control kind must be string-like")
- if (id is not None) and not isstringlike(id):
- raise TypeError("control id must be string-like")
- if (label is not None) and not isstringlike(label):
- raise TypeError("control label must be string-like")
- if (predicate is not None) and not callable(predicate):
- raise TypeError("control predicate must be callable")
- if (nr is not None) and nr < 0:
- raise ValueError("control number must be a positive integer")
-
- orig_nr = nr
- found = None
- ambiguous = False
- if nr is None and self.backwards_compat:
- nr = 0
-
- for control in self.controls:
- if ((name is not None and name != control.name) and
- (name is not Missing or control.name is not None)):
- continue
- if type is not None and type != control.type:
- continue
- if kind is not None and not control.is_of_kind(kind):
- continue
- if id is not None and id != control.id:
- continue
- if predicate and not predicate(control):
- continue
- if label:
- for l in control.get_labels():
- if l.text.find(label) > -1:
- break
- else:
- continue
- if nr is not None:
- if nr == 0:
- return control # early exit: unambiguous due to nr
- nr -= 1
- continue
- if found:
- ambiguous = True
- break
- found = control
-
- if found and not ambiguous:
- return found
-
- description = []
- if name is not None: description.append("name %s" % repr(name))
- if type is not None: description.append("type '%s'" % type)
- if kind is not None: description.append("kind '%s'" % kind)
- if id is not None: description.append("id '%s'" % id)
- if label is not None: description.append("label '%s'" % label)
- if predicate is not None:
- description.append("predicate %s" % predicate)
- if orig_nr: description.append("nr %d" % orig_nr)
- description = ", ".join(description)
-
- if ambiguous:
- raise AmbiguityError("more than one control matching "+description)
- elif not found:
- raise ControlNotFoundError("no control matching "+description)
- assert False
-
- def _click(self, name, type, id, label, nr, coord, return_type,
- request_class=urllib2.Request):
- try:
- control = self._find_control(
- name, type, "clickable", id, label, None, nr)
- except ControlNotFoundError:
- if ((name is not None) or (type is not None) or (id is not None) or
- (nr != 0)):
- raise
- # no clickable controls, but no control was explicitly requested,
- # so return state without clicking any control
- return self._switch_click(return_type, request_class)
- else:
- return control._click(self, coord, return_type, request_class)
-
- def _pairs(self):
- """Return sequence of (key, value) pairs suitable for urlencoding."""
- return [(k, v) for (i, k, v, c_i) in self._pairs_and_controls()]
-
-
- def _pairs_and_controls(self):
- """Return sequence of (index, key, value, control_index)
- of totally ordered pairs suitable for urlencoding.
-
- control_index is the index of the control in self.controls
- """
- pairs = []
- for control_index in range(len(self.controls)):
- control = self.controls[control_index]
- for ii, key, val in control._totally_ordered_pairs():
- pairs.append((ii, key, val, control_index))
-
- # stable sort by ONLY first item in tuple
- pairs.sort()
-
- return pairs
-
- def _request_data(self):
- """Return a tuple (url, data, headers)."""
- method = self.method.upper()
- #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(self.action)
- parts = self._urlparse(self.action)
- rest, (query, frag) = parts[:-2], parts[-2:]
-
- if method == "GET":
- if self.enctype != "application/x-www-form-urlencoded":
- raise ValueError(
- "unknown GET form encoding type '%s'" % self.enctype)
- parts = rest + (urlencode(self._pairs()), None)
- uri = self._urlunparse(parts)
- return uri, None, []
- elif method == "POST":
- parts = rest + (query, None)
- uri = self._urlunparse(parts)
- if self.enctype == "application/x-www-form-urlencoded":
- return (uri, urlencode(self._pairs()),
- [("Content-Type", self.enctype)])
- elif self.enctype == "multipart/form-data":
- data = StringIO()
- http_hdrs = []
- mw = MimeWriter(data, http_hdrs)
- f = mw.startmultipartbody("form-data", add_to_http_hdrs=True,
- prefix=0)
- for ii, k, v, control_index in self._pairs_and_controls():
- self.controls[control_index]._write_mime_data(mw, k, v)
- mw.lastpart()
- return uri, data.getvalue(), http_hdrs
- else:
- raise ValueError(
- "unknown POST form encoding type '%s'" % self.enctype)
- else:
- raise ValueError("Unknown method '%s'" % method)
-
- def _switch_click(self, return_type, request_class=urllib2.Request):
- # This is called by HTMLForm and clickable Controls to hide switching
- # on return_type.
- if return_type == "pairs":
- return self._pairs()
- elif return_type == "request_data":
- return self._request_data()
- else:
- req_data = self._request_data()
- req = request_class(req_data[0], req_data[1])
- for key, val in req_data[2]:
- add_hdr = req.add_header
- if key.lower() == "content-type":
- try:
- add_hdr = req.add_unredirected_header
- except AttributeError:
- # pre-2.4 and not using ClientCookie
- pass
- add_hdr(key, val)
- return req
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/__init__.py
deleted file mode 100644
index c1e4c6d..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# This file is required for Python to search this directory for modules.
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.ircbot.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.ircbot.py.url
deleted file mode 100644
index f34e243..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.ircbot.py.url
+++ /dev/null
@@ -1 +0,0 @@
-http://iweb.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip \ No newline at end of file
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.irclib.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.irclib.py.url
deleted file mode 100644
index f34e243..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.irclib.py.url
+++ /dev/null
@@ -1 +0,0 @@
-http://iweb.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip \ No newline at end of file
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/__init__.py
deleted file mode 100644
index c1e4c6d..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# This file is required for Python to search this directory for modules.
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/ircbot.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/ircbot.py
deleted file mode 100644
index 6f29a65..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/ircbot.py
+++ /dev/null
@@ -1,438 +0,0 @@
-# Copyright (C) 1999--2002 Joel Rosdahl
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-#
-# Joel Rosdahl <joel@rosdahl.net>
-#
-# $Id: ircbot.py,v 1.23 2008/09/11 07:38:30 keltus Exp $
-
-"""ircbot -- Simple IRC bot library.
-
-This module contains a single-server IRC bot class that can be used to
-write simpler bots.
-"""
-
-import sys
-from UserDict import UserDict
-
-from irclib import SimpleIRCClient
-from irclib import nm_to_n, irc_lower, all_events
-from irclib import parse_channel_modes, is_channel
-from irclib import ServerConnectionError
-
-class SingleServerIRCBot(SimpleIRCClient):
- """A single-server IRC bot class.
-
- The bot tries to reconnect if it is disconnected.
-
- The bot keeps track of the channels it has joined, the other
- clients that are present in the channels and which of those that
- have operator or voice modes. The "database" is kept in the
- self.channels attribute, which is an IRCDict of Channels.
- """
- def __init__(self, server_list, nickname, realname, reconnection_interval=60):
- """Constructor for SingleServerIRCBot objects.
-
- Arguments:
-
- server_list -- A list of tuples (server, port) that
- defines which servers the bot should try to
- connect to.
-
- nickname -- The bot's nickname.
-
- realname -- The bot's realname.
-
- reconnection_interval -- How long the bot should wait
- before trying to reconnect.
-
- dcc_connections -- A list of initiated/accepted DCC
- connections.
- """
-
- SimpleIRCClient.__init__(self)
- self.channels = IRCDict()
- self.server_list = server_list
- if not reconnection_interval or reconnection_interval < 0:
- reconnection_interval = 2**31
- self.reconnection_interval = reconnection_interval
-
- self._nickname = nickname
- self._realname = realname
- for i in ["disconnect", "join", "kick", "mode",
- "namreply", "nick", "part", "quit"]:
- self.connection.add_global_handler(i,
- getattr(self, "_on_" + i),
- -10)
- def _connected_checker(self):
- """[Internal]"""
- if not self.connection.is_connected():
- self.connection.execute_delayed(self.reconnection_interval,
- self._connected_checker)
- self.jump_server()
-
- def _connect(self):
- """[Internal]"""
- password = None
- if len(self.server_list[0]) > 2:
- password = self.server_list[0][2]
- try:
- self.connect(self.server_list[0][0],
- self.server_list[0][1],
- self._nickname,
- password,
- ircname=self._realname)
- except ServerConnectionError:
- pass
-
- def _on_disconnect(self, c, e):
- """[Internal]"""
- self.channels = IRCDict()
- self.connection.execute_delayed(self.reconnection_interval,
- self._connected_checker)
-
- def _on_join(self, c, e):
- """[Internal]"""
- ch = e.target()
- nick = nm_to_n(e.source())
- if nick == c.get_nickname():
- self.channels[ch] = Channel()
- self.channels[ch].add_user(nick)
-
- def _on_kick(self, c, e):
- """[Internal]"""
- nick = e.arguments()[0]
- channel = e.target()
-
- if nick == c.get_nickname():
- del self.channels[channel]
- else:
- self.channels[channel].remove_user(nick)
-
- def _on_mode(self, c, e):
- """[Internal]"""
- modes = parse_channel_modes(" ".join(e.arguments()))
- t = e.target()
- if is_channel(t):
- ch = self.channels[t]
- for mode in modes:
- if mode[0] == "+":
- f = ch.set_mode
- else:
- f = ch.clear_mode
- f(mode[1], mode[2])
- else:
- # Mode on self... XXX
- pass
-
- def _on_namreply(self, c, e):
- """[Internal]"""
-
- # e.arguments()[0] == "@" for secret channels,
- # "*" for private channels,
- # "=" for others (public channels)
- # e.arguments()[1] == channel
- # e.arguments()[2] == nick list
-
- ch = e.arguments()[1]
- for nick in e.arguments()[2].split():
- if nick[0] == "@":
- nick = nick[1:]
- self.channels[ch].set_mode("o", nick)
- elif nick[0] == "+":
- nick = nick[1:]
- self.channels[ch].set_mode("v", nick)
- self.channels[ch].add_user(nick)
-
- def _on_nick(self, c, e):
- """[Internal]"""
- before = nm_to_n(e.source())
- after = e.target()
- for ch in self.channels.values():
- if ch.has_user(before):
- ch.change_nick(before, after)
-
- def _on_part(self, c, e):
- """[Internal]"""
- nick = nm_to_n(e.source())
- channel = e.target()
-
- if nick == c.get_nickname():
- del self.channels[channel]
- else:
- self.channels[channel].remove_user(nick)
-
- def _on_quit(self, c, e):
- """[Internal]"""
- nick = nm_to_n(e.source())
- for ch in self.channels.values():
- if ch.has_user(nick):
- ch.remove_user(nick)
-
- def die(self, msg="Bye, cruel world!"):
- """Let the bot die.
-
- Arguments:
-
- msg -- Quit message.
- """
-
- self.connection.disconnect(msg)
- sys.exit(0)
-
- def disconnect(self, msg="I'll be back!"):
- """Disconnect the bot.
-
- The bot will try to reconnect after a while.
-
- Arguments:
-
- msg -- Quit message.
- """
- self.connection.disconnect(msg)
-
- def get_version(self):
- """Returns the bot version.
-
- Used when answering a CTCP VERSION request.
- """
- return "ircbot.py by Joel Rosdahl <joel@rosdahl.net>"
-
- def jump_server(self, msg="Changing servers"):
- """Connect to a new server, possibly disconnecting from the current.
-
- The bot will skip to next server in the server_list each time
- jump_server is called.
- """
- if self.connection.is_connected():
- self.connection.disconnect(msg)
-
- self.server_list.append(self.server_list.pop(0))
- self._connect()
-
- def on_ctcp(self, c, e):
- """Default handler for ctcp events.
-
- Replies to VERSION and PING requests and relays DCC requests
- to the on_dccchat method.
- """
- if e.arguments()[0] == "VERSION":
- c.ctcp_reply(nm_to_n(e.source()),
- "VERSION " + self.get_version())
- elif e.arguments()[0] == "PING":
- if len(e.arguments()) > 1:
- c.ctcp_reply(nm_to_n(e.source()),
- "PING " + e.arguments()[1])
- elif e.arguments()[0] == "DCC" and e.arguments()[1].split(" ", 1)[0] == "CHAT":
- self.on_dccchat(c, e)
-
- def on_dccchat(self, c, e):
- pass
-
- def start(self):
- """Start the bot."""
- self._connect()
- SimpleIRCClient.start(self)
-
-
-class IRCDict:
- """A dictionary suitable for storing IRC-related things.
-
- Dictionary keys a and b are considered equal if and only if
- irc_lower(a) == irc_lower(b)
-
- Otherwise, it should behave exactly as a normal dictionary.
- """
-
- def __init__(self, dict=None):
- self.data = {}
- self.canon_keys = {} # Canonical keys
- if dict is not None:
- self.update(dict)
- def __repr__(self):
- return repr(self.data)
- def __cmp__(self, dict):
- if isinstance(dict, IRCDict):
- return cmp(self.data, dict.data)
- else:
- return cmp(self.data, dict)
- def __len__(self):
- return len(self.data)
- def __getitem__(self, key):
- return self.data[self.canon_keys[irc_lower(key)]]
- def __setitem__(self, key, item):
- if key in self:
- del self[key]
- self.data[key] = item
- self.canon_keys[irc_lower(key)] = key
- def __delitem__(self, key):
- ck = irc_lower(key)
- del self.data[self.canon_keys[ck]]
- del self.canon_keys[ck]
- def __iter__(self):
- return iter(self.data)
- def __contains__(self, key):
- return self.has_key(key)
- def clear(self):
- self.data.clear()
- self.canon_keys.clear()
- def copy(self):
- if self.__class__ is UserDict:
- return UserDict(self.data)
- import copy
- return copy.copy(self)
- def keys(self):
- return self.data.keys()
- def items(self):
- return self.data.items()
- def values(self):
- return self.data.values()
- def has_key(self, key):
- return irc_lower(key) in self.canon_keys
- def update(self, dict):
- for k, v in dict.items():
- self.data[k] = v
- def get(self, key, failobj=None):
- return self.data.get(key, failobj)
-
-
-class Channel:
- """A class for keeping information about an IRC channel.
-
- This class can be improved a lot.
- """
-
- def __init__(self):
- self.userdict = IRCDict()
- self.operdict = IRCDict()
- self.voiceddict = IRCDict()
- self.modes = {}
-
- def users(self):
- """Returns an unsorted list of the channel's users."""
- return self.userdict.keys()
-
- def opers(self):
- """Returns an unsorted list of the channel's operators."""
- return self.operdict.keys()
-
- def voiced(self):
- """Returns an unsorted list of the persons that have voice
- mode set in the channel."""
- return self.voiceddict.keys()
-
- def has_user(self, nick):
- """Check whether the channel has a user."""
- return nick in self.userdict
-
- def is_oper(self, nick):
- """Check whether a user has operator status in the channel."""
- return nick in self.operdict
-
- def is_voiced(self, nick):
- """Check whether a user has voice mode set in the channel."""
- return nick in self.voiceddict
-
- def add_user(self, nick):
- self.userdict[nick] = 1
-
- def remove_user(self, nick):
- for d in self.userdict, self.operdict, self.voiceddict:
- if nick in d:
- del d[nick]
-
- def change_nick(self, before, after):
- self.userdict[after] = 1
- del self.userdict[before]
- if before in self.operdict:
- self.operdict[after] = 1
- del self.operdict[before]
- if before in self.voiceddict:
- self.voiceddict[after] = 1
- del self.voiceddict[before]
-
- def set_mode(self, mode, value=None):
- """Set mode on the channel.
-
- Arguments:
-
- mode -- The mode (a single-character string).
-
- value -- Value
- """
- if mode == "o":
- self.operdict[value] = 1
- elif mode == "v":
- self.voiceddict[value] = 1
- else:
- self.modes[mode] = value
-
- def clear_mode(self, mode, value=None):
- """Clear mode on the channel.
-
- Arguments:
-
- mode -- The mode (a single-character string).
-
- value -- Value
- """
- try:
- if mode == "o":
- del self.operdict[value]
- elif mode == "v":
- del self.voiceddict[value]
- else:
- del self.modes[mode]
- except KeyError:
- pass
-
- def has_mode(self, mode):
- return mode in self.modes
-
- def is_moderated(self):
- return self.has_mode("m")
-
- def is_secret(self):
- return self.has_mode("s")
-
- def is_protected(self):
- return self.has_mode("p")
-
- def has_topic_lock(self):
- return self.has_mode("t")
-
- def is_invite_only(self):
- return self.has_mode("i")
-
- def has_allow_external_messages(self):
- return self.has_mode("n")
-
- def has_limit(self):
- return self.has_mode("l")
-
- def limit(self):
- if self.has_limit():
- return self.modes[l]
- else:
- return None
-
- def has_key(self):
- return self.has_mode("k")
-
- def key(self):
- if self.has_key():
- return self.modes["k"]
- else:
- return None
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/irclib.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/irclib.py
deleted file mode 100644
index 5f7141c..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/irclib.py
+++ /dev/null
@@ -1,1560 +0,0 @@
-# Copyright (C) 1999--2002 Joel Rosdahl
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-#
-# keltus <keltus@users.sourceforge.net>
-#
-# $Id: irclib.py,v 1.47 2008/09/25 22:00:59 keltus Exp $
-
-"""irclib -- Internet Relay Chat (IRC) protocol client library.
-
-This library is intended to encapsulate the IRC protocol at a quite
-low level. It provides an event-driven IRC client framework. It has
-a fairly thorough support for the basic IRC protocol, CTCP, DCC chat,
-but DCC file transfers is not yet supported.
-
-In order to understand how to make an IRC client, I'm afraid you more
-or less must understand the IRC specifications. They are available
-here: [IRC specifications].
-
-The main features of the IRC client framework are:
-
- * Abstraction of the IRC protocol.
- * Handles multiple simultaneous IRC server connections.
- * Handles server PONGing transparently.
- * Messages to the IRC server are done by calling methods on an IRC
- connection object.
- * Messages from an IRC server triggers events, which can be caught
- by event handlers.
- * Reading from and writing to IRC server sockets are normally done
- by an internal select() loop, but the select()ing may be done by
- an external main loop.
- * Functions can be registered to execute at specified times by the
- event-loop.
- * Decodes CTCP tagging correctly (hopefully); I haven't seen any
- other IRC client implementation that handles the CTCP
- specification subtilties.
- * A kind of simple, single-server, object-oriented IRC client class
- that dispatches events to instance methods is included.
-
-Current limitations:
-
- * The IRC protocol shines through the abstraction a bit too much.
- * Data is not written asynchronously to the server, i.e. the write()
- may block if the TCP buffers are stuffed.
- * There are no support for DCC file transfers.
- * The author haven't even read RFC 2810, 2811, 2812 and 2813.
- * Like most projects, documentation is lacking...
-
-.. [IRC specifications] http://www.irchelp.org/irchelp/rfc/
-"""
-
-import bisect
-import re
-import select
-import socket
-import string
-import sys
-import time
-import types
-
-VERSION = 0, 4, 8
-DEBUG = 0
-
-# TODO
-# ----
-# (maybe) thread safety
-# (maybe) color parser convenience functions
-# documentation (including all event types)
-# (maybe) add awareness of different types of ircds
-# send data asynchronously to the server (and DCC connections)
-# (maybe) automatically close unused, passive DCC connections after a while
-
-# NOTES
-# -----
-# connection.quit() only sends QUIT to the server.
-# ERROR from the server triggers the error event and the disconnect event.
-# dropping of the connection triggers the disconnect event.
-
-class IRCError(Exception):
- """Represents an IRC exception."""
- pass
-
-
-class IRC:
- """Class that handles one or several IRC server connections.
-
- When an IRC object has been instantiated, it can be used to create
- Connection objects that represent the IRC connections. The
- responsibility of the IRC object is to provide an event-driven
- framework for the connections and to keep the connections alive.
- It runs a select loop to poll each connection's TCP socket and
- hands over the sockets with incoming data for processing by the
- corresponding connection.
-
- The methods of most interest for an IRC client writer are server,
- add_global_handler, remove_global_handler, execute_at,
- execute_delayed, process_once and process_forever.
-
- Here is an example:
-
- irc = irclib.IRC()
- server = irc.server()
- server.connect(\"irc.some.where\", 6667, \"my_nickname\")
- server.privmsg(\"a_nickname\", \"Hi there!\")
- irc.process_forever()
-
- This will connect to the IRC server irc.some.where on port 6667
- using the nickname my_nickname and send the message \"Hi there!\"
- to the nickname a_nickname.
- """
-
- def __init__(self, fn_to_add_socket=None,
- fn_to_remove_socket=None,
- fn_to_add_timeout=None):
- """Constructor for IRC objects.
-
- Optional arguments are fn_to_add_socket, fn_to_remove_socket
- and fn_to_add_timeout. The first two specify functions that
- will be called with a socket object as argument when the IRC
- object wants to be notified (or stop being notified) of data
- coming on a new socket. When new data arrives, the method
- process_data should be called. Similarly, fn_to_add_timeout
- is called with a number of seconds (a floating point number)
- as first argument when the IRC object wants to receive a
- notification (by calling the process_timeout method). So, if
- e.g. the argument is 42.17, the object wants the
- process_timeout method to be called after 42 seconds and 170
- milliseconds.
-
- The three arguments mainly exist to be able to use an external
- main loop (for example Tkinter's or PyGTK's main app loop)
- instead of calling the process_forever method.
-
- An alternative is to just call ServerConnection.process_once()
- once in a while.
- """
-
- if fn_to_add_socket and fn_to_remove_socket:
- self.fn_to_add_socket = fn_to_add_socket
- self.fn_to_remove_socket = fn_to_remove_socket
- else:
- self.fn_to_add_socket = None
- self.fn_to_remove_socket = None
-
- self.fn_to_add_timeout = fn_to_add_timeout
- self.connections = []
- self.handlers = {}
- self.delayed_commands = [] # list of tuples in the format (time, function, arguments)
-
- self.add_global_handler("ping", _ping_ponger, -42)
-
- def server(self):
- """Creates and returns a ServerConnection object."""
-
- c = ServerConnection(self)
- self.connections.append(c)
- return c
-
- def process_data(self, sockets):
- """Called when there is more data to read on connection sockets.
-
- Arguments:
-
- sockets -- A list of socket objects.
-
- See documentation for IRC.__init__.
- """
- for s in sockets:
- for c in self.connections:
- if s == c._get_socket():
- c.process_data()
-
- def process_timeout(self):
- """Called when a timeout notification is due.
-
- See documentation for IRC.__init__.
- """
- t = time.time()
- while self.delayed_commands:
- if t >= self.delayed_commands[0][0]:
- self.delayed_commands[0][1](*self.delayed_commands[0][2])
- del self.delayed_commands[0]
- else:
- break
-
- def process_once(self, timeout=0):
- """Process data from connections once.
-
- Arguments:
-
- timeout -- How long the select() call should wait if no
- data is available.
-
- This method should be called periodically to check and process
- incoming data, if there are any. If that seems boring, look
- at the process_forever method.
- """
- sockets = map(lambda x: x._get_socket(), self.connections)
- sockets = filter(lambda x: x != None, sockets)
- if sockets:
- (i, o, e) = select.select(sockets, [], [], timeout)
- self.process_data(i)
- else:
- time.sleep(timeout)
- self.process_timeout()
-
- def process_forever(self, timeout=0.2):
- """Run an infinite loop, processing data from connections.
-
- This method repeatedly calls process_once.
-
- Arguments:
-
- timeout -- Parameter to pass to process_once.
- """
- while 1:
- self.process_once(timeout)
-
- def disconnect_all(self, message=""):
- """Disconnects all connections."""
- for c in self.connections:
- c.disconnect(message)
-
- def add_global_handler(self, event, handler, priority=0):
- """Adds a global handler function for a specific event type.
-
- Arguments:
-
- event -- Event type (a string). Check the values of the
- numeric_events dictionary in irclib.py for possible event
- types.
-
- handler -- Callback function.
-
- priority -- A number (the lower number, the higher priority).
-
- The handler function is called whenever the specified event is
- triggered in any of the connections. See documentation for
- the Event class.
-
- The handler functions are called in priority order (lowest
- number is highest priority). If a handler function returns
- \"NO MORE\", no more handlers will be called.
- """
- if not event in self.handlers:
- self.handlers[event] = []
- bisect.insort(self.handlers[event], ((priority, handler)))
-
- def remove_global_handler(self, event, handler):
- """Removes a global handler function.
-
- Arguments:
-
- event -- Event type (a string).
-
- handler -- Callback function.
-
- Returns 1 on success, otherwise 0.
- """
- if not event in self.handlers:
- return 0
- for h in self.handlers[event]:
- if handler == h[1]:
- self.handlers[event].remove(h)
- return 1
-
- def execute_at(self, at, function, arguments=()):
- """Execute a function at a specified time.
-
- Arguments:
-
- at -- Execute at this time (standard \"time_t\" time).
-
- function -- Function to call.
-
- arguments -- Arguments to give the function.
- """
- self.execute_delayed(at-time.time(), function, arguments)
-
- def execute_delayed(self, delay, function, arguments=()):
- """Execute a function after a specified time.
-
- Arguments:
-
- delay -- How many seconds to wait.
-
- function -- Function to call.
-
- arguments -- Arguments to give the function.
- """
- bisect.insort(self.delayed_commands, (delay+time.time(), function, arguments))
- if self.fn_to_add_timeout:
- self.fn_to_add_timeout(delay)
-
- def dcc(self, dcctype="chat"):
- """Creates and returns a DCCConnection object.
-
- Arguments:
-
- dcctype -- "chat" for DCC CHAT connections or "raw" for
- DCC SEND (or other DCC types). If "chat",
- incoming data will be split in newline-separated
- chunks. If "raw", incoming data is not touched.
- """
- c = DCCConnection(self, dcctype)
- self.connections.append(c)
- return c
-
- def _handle_event(self, connection, event):
- """[Internal]"""
- h = self.handlers
- for handler in h.get("all_events", []) + h.get(event.eventtype(), []):
- if handler[1](connection, event) == "NO MORE":
- return
-
- def _remove_connection(self, connection):
- """[Internal]"""
- self.connections.remove(connection)
- if self.fn_to_remove_socket:
- self.fn_to_remove_socket(connection._get_socket())
-
-_rfc_1459_command_regexp = re.compile("^(:(?P<prefix>[^ ]+) +)?(?P<command>[^ ]+)( *(?P<argument> .+))?")
-
-class Connection:
- """Base class for IRC connections.
-
- Must be overridden.
- """
- def __init__(self, irclibobj):
- self.irclibobj = irclibobj
-
- def _get_socket():
- raise IRCError, "Not overridden"
-
- ##############################
- ### Convenience wrappers.
-
- def execute_at(self, at, function, arguments=()):
- self.irclibobj.execute_at(at, function, arguments)
-
- def execute_delayed(self, delay, function, arguments=()):
- self.irclibobj.execute_delayed(delay, function, arguments)
-
-
-class ServerConnectionError(IRCError):
- pass
-
-class ServerNotConnectedError(ServerConnectionError):
- pass
-
-
-# Huh!? Crrrrazy EFNet doesn't follow the RFC: their ircd seems to
-# use \n as message separator! :P
-_linesep_regexp = re.compile("\r?\n")
-
-class ServerConnection(Connection):
- """This class represents an IRC server connection.
-
- ServerConnection objects are instantiated by calling the server
- method on an IRC object.
- """
-
- def __init__(self, irclibobj):
- Connection.__init__(self, irclibobj)
- self.connected = 0 # Not connected yet.
- self.socket = None
- self.ssl = None
-
- def connect(self, server, port, nickname, password=None, username=None,
- ircname=None, localaddress="", localport=0, ssl=False, ipv6=False):
- """Connect/reconnect to a server.
-
- Arguments:
-
- server -- Server name.
-
- port -- Port number.
-
- nickname -- The nickname.
-
- password -- Password (if any).
-
- username -- The username.
-
- ircname -- The IRC name ("realname").
-
- localaddress -- Bind the connection to a specific local IP address.
-
- localport -- Bind the connection to a specific local port.
-
- ssl -- Enable support for ssl.
-
- ipv6 -- Enable support for ipv6.
-
- This function can be called to reconnect a closed connection.
-
- Returns the ServerConnection object.
- """
- if self.connected:
- self.disconnect("Changing servers")
-
- self.previous_buffer = ""
- self.handlers = {}
- self.real_server_name = ""
- self.real_nickname = nickname
- self.server = server
- self.port = port
- self.nickname = nickname
- self.username = username or nickname
- self.ircname = ircname or nickname
- self.password = password
- self.localaddress = localaddress
- self.localport = localport
- self.localhost = socket.gethostname()
- if ipv6:
- self.socket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
- else:
- self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- try:
- self.socket.bind((self.localaddress, self.localport))
- self.socket.connect((self.server, self.port))
- if ssl:
- self.ssl = socket.ssl(self.socket)
- except socket.error, x:
- self.socket.close()
- self.socket = None
- raise ServerConnectionError, "Couldn't connect to socket: %s" % x
- self.connected = 1
- if self.irclibobj.fn_to_add_socket:
- self.irclibobj.fn_to_add_socket(self.socket)
-
- # Log on...
- if self.password:
- self.pass_(self.password)
- self.nick(self.nickname)
- self.user(self.username, self.ircname)
- return self
-
- def close(self):
- """Close the connection.
-
- This method closes the connection permanently; after it has
- been called, the object is unusable.
- """
-
- self.disconnect("Closing object")
- self.irclibobj._remove_connection(self)
-
- def _get_socket(self):
- """[Internal]"""
- return self.socket
-
- def get_server_name(self):
- """Get the (real) server name.
-
- This method returns the (real) server name, or, more
- specifically, what the server calls itself.
- """
-
- if self.real_server_name:
- return self.real_server_name
- else:
- return ""
-
- def get_nickname(self):
- """Get the (real) nick name.
-
- This method returns the (real) nickname. The library keeps
- track of nick changes, so it might not be the nick name that
- was passed to the connect() method. """
-
- return self.real_nickname
-
- def process_data(self):
- """[Internal]"""
-
- try:
- if self.ssl:
- new_data = self.ssl.read(2**14)
- else:
- new_data = self.socket.recv(2**14)
- except socket.error, x:
- # The server hung up.
- self.disconnect("Connection reset by peer")
- return
- if not new_data:
- # Read nothing: connection must be down.
- self.disconnect("Connection reset by peer")
- return
-
- lines = _linesep_regexp.split(self.previous_buffer + new_data)
-
- # Save the last, unfinished line.
- self.previous_buffer = lines.pop()
-
- for line in lines:
- if DEBUG:
- print "FROM SERVER:", line
-
- if not line:
- continue
-
- prefix = None
- command = None
- arguments = None
- self._handle_event(Event("all_raw_messages",
- self.get_server_name(),
- None,
- [line]))
-
- m = _rfc_1459_command_regexp.match(line)
- if m.group("prefix"):
- prefix = m.group("prefix")
- if not self.real_server_name:
- self.real_server_name = prefix
-
- if m.group("command"):
- command = m.group("command").lower()
-
- if m.group("argument"):
- a = m.group("argument").split(" :", 1)
- arguments = a[0].split()
- if len(a) == 2:
- arguments.append(a[1])
-
- # Translate numerics into more readable strings.
- if command in numeric_events:
- command = numeric_events[command]
-
- if command == "nick":
- if nm_to_n(prefix) == self.real_nickname:
- self.real_nickname = arguments[0]
- elif command == "welcome":
- # Record the nickname in case the client changed nick
- # in a nicknameinuse callback.
- self.real_nickname = arguments[0]
-
- if command in ["privmsg", "notice"]:
- target, message = arguments[0], arguments[1]
- messages = _ctcp_dequote(message)
-
- if command == "privmsg":
- if is_channel(target):
- command = "pubmsg"
- else:
- if is_channel(target):
- command = "pubnotice"
- else:
- command = "privnotice"
-
- for m in messages:
- if type(m) is types.TupleType:
- if command in ["privmsg", "pubmsg"]:
- command = "ctcp"
- else:
- command = "ctcpreply"
-
- m = list(m)
- if DEBUG:
- print "command: %s, source: %s, target: %s, arguments: %s" % (
- command, prefix, target, m)
- self._handle_event(Event(command, prefix, target, m))
- if command == "ctcp" and m[0] == "ACTION":
- self._handle_event(Event("action", prefix, target, m[1:]))
- else:
- if DEBUG:
- print "command: %s, source: %s, target: %s, arguments: %s" % (
- command, prefix, target, [m])
- self._handle_event(Event(command, prefix, target, [m]))
- else:
- target = None
-
- if command == "quit":
- arguments = [arguments[0]]
- elif command == "ping":
- target = arguments[0]
- else:
- target = arguments[0]
- arguments = arguments[1:]
-
- if command == "mode":
- if not is_channel(target):
- command = "umode"
-
- if DEBUG:
- print "command: %s, source: %s, target: %s, arguments: %s" % (
- command, prefix, target, arguments)
- self._handle_event(Event(command, prefix, target, arguments))
-
- def _handle_event(self, event):
- """[Internal]"""
- self.irclibobj._handle_event(self, event)
- if event.eventtype() in self.handlers:
- for fn in self.handlers[event.eventtype()]:
- fn(self, event)
-
- def is_connected(self):
- """Return connection status.
-
- Returns true if connected, otherwise false.
- """
- return self.connected
-
- def add_global_handler(self, *args):
- """Add global handler.
-
- See documentation for IRC.add_global_handler.
- """
- self.irclibobj.add_global_handler(*args)
-
- def remove_global_handler(self, *args):
- """Remove global handler.
-
- See documentation for IRC.remove_global_handler.
- """
- self.irclibobj.remove_global_handler(*args)
-
- def action(self, target, action):
- """Send a CTCP ACTION command."""
- self.ctcp("ACTION", target, action)
-
- def admin(self, server=""):
- """Send an ADMIN command."""
- self.send_raw(" ".join(["ADMIN", server]).strip())
-
- def ctcp(self, ctcptype, target, parameter=""):
- """Send a CTCP command."""
- ctcptype = ctcptype.upper()
- self.privmsg(target, "\001%s%s\001" % (ctcptype, parameter and (" " + parameter) or ""))
-
- def ctcp_reply(self, target, parameter):
- """Send a CTCP REPLY command."""
- self.notice(target, "\001%s\001" % parameter)
-
- def disconnect(self, message=""):
- """Hang up the connection.
-
- Arguments:
-
- message -- Quit message.
- """
- if not self.connected:
- return
-
- self.connected = 0
-
- self.quit(message)
-
- try:
- self.socket.close()
- except socket.error, x:
- pass
- self.socket = None
- self._handle_event(Event("disconnect", self.server, "", [message]))
-
- def globops(self, text):
- """Send a GLOBOPS command."""
- self.send_raw("GLOBOPS :" + text)
-
- def info(self, server=""):
- """Send an INFO command."""
- self.send_raw(" ".join(["INFO", server]).strip())
-
- def invite(self, nick, channel):
- """Send an INVITE command."""
- self.send_raw(" ".join(["INVITE", nick, channel]).strip())
-
- def ison(self, nicks):
- """Send an ISON command.
-
- Arguments:
-
- nicks -- List of nicks.
- """
- self.send_raw("ISON " + " ".join(nicks))
-
- def join(self, channel, key=""):
- """Send a JOIN command."""
- self.send_raw("JOIN %s%s" % (channel, (key and (" " + key))))
-
- def kick(self, channel, nick, comment=""):
- """Send a KICK command."""
- self.send_raw("KICK %s %s%s" % (channel, nick, (comment and (" :" + comment))))
-
- def links(self, remote_server="", server_mask=""):
- """Send a LINKS command."""
- command = "LINKS"
- if remote_server:
- command = command + " " + remote_server
- if server_mask:
- command = command + " " + server_mask
- self.send_raw(command)
-
- def list(self, channels=None, server=""):
- """Send a LIST command."""
- command = "LIST"
- if channels:
- command = command + " " + ",".join(channels)
- if server:
- command = command + " " + server
- self.send_raw(command)
-
- def lusers(self, server=""):
- """Send a LUSERS command."""
- self.send_raw("LUSERS" + (server and (" " + server)))
-
- def mode(self, target, command):
- """Send a MODE command."""
- self.send_raw("MODE %s %s" % (target, command))
-
- def motd(self, server=""):
- """Send an MOTD command."""
- self.send_raw("MOTD" + (server and (" " + server)))
-
- def names(self, channels=None):
- """Send a NAMES command."""
- self.send_raw("NAMES" + (channels and (" " + ",".join(channels)) or ""))
-
- def nick(self, newnick):
- """Send a NICK command."""
- self.send_raw("NICK " + newnick)
-
- def notice(self, target, text):
- """Send a NOTICE command."""
- # Should limit len(text) here!
- self.send_raw("NOTICE %s :%s" % (target, text))
-
- def oper(self, nick, password):
- """Send an OPER command."""
- self.send_raw("OPER %s %s" % (nick, password))
-
- def part(self, channels, message=""):
- """Send a PART command."""
- if type(channels) == types.StringType:
- self.send_raw("PART " + channels + (message and (" " + message)))
- else:
- self.send_raw("PART " + ",".join(channels) + (message and (" " + message)))
-
- def pass_(self, password):
- """Send a PASS command."""
- self.send_raw("PASS " + password)
-
- def ping(self, target, target2=""):
- """Send a PING command."""
- self.send_raw("PING %s%s" % (target, target2 and (" " + target2)))
-
- def pong(self, target, target2=""):
- """Send a PONG command."""
- self.send_raw("PONG %s%s" % (target, target2 and (" " + target2)))
-
- def privmsg(self, target, text):
- """Send a PRIVMSG command."""
- # Should limit len(text) here!
- self.send_raw("PRIVMSG %s :%s" % (target, text))
-
- def privmsg_many(self, targets, text):
- """Send a PRIVMSG command to multiple targets."""
- # Should limit len(text) here!
- self.send_raw("PRIVMSG %s :%s" % (",".join(targets), text))
-
- def quit(self, message=""):
- """Send a QUIT command."""
- # Note that many IRC servers don't use your QUIT message
- # unless you've been connected for at least 5 minutes!
- self.send_raw("QUIT" + (message and (" :" + message)))
-
- def send_raw(self, string):
- """Send raw string to the server.
-
- The string will be padded with appropriate CR LF.
- """
- if self.socket is None:
- raise ServerNotConnectedError, "Not connected."
- try:
- if self.ssl:
- self.ssl.write(string + "\r\n")
- else:
- self.socket.send(string + "\r\n")
- if DEBUG:
- print "TO SERVER:", string
- except socket.error, x:
- # Ouch!
- self.disconnect("Connection reset by peer.")
-
- def squit(self, server, comment=""):
- """Send an SQUIT command."""
- self.send_raw("SQUIT %s%s" % (server, comment and (" :" + comment)))
-
- def stats(self, statstype, server=""):
- """Send a STATS command."""
- self.send_raw("STATS %s%s" % (statstype, server and (" " + server)))
-
- def time(self, server=""):
- """Send a TIME command."""
- self.send_raw("TIME" + (server and (" " + server)))
-
- def topic(self, channel, new_topic=None):
- """Send a TOPIC command."""
- if new_topic is None:
- self.send_raw("TOPIC " + channel)
- else:
- self.send_raw("TOPIC %s :%s" % (channel, new_topic))
-
- def trace(self, target=""):
- """Send a TRACE command."""
- self.send_raw("TRACE" + (target and (" " + target)))
-
- def user(self, username, realname):
- """Send a USER command."""
- self.send_raw("USER %s 0 * :%s" % (username, realname))
-
- def userhost(self, nicks):
- """Send a USERHOST command."""
- self.send_raw("USERHOST " + ",".join(nicks))
-
- def users(self, server=""):
- """Send a USERS command."""
- self.send_raw("USERS" + (server and (" " + server)))
-
- def version(self, server=""):
- """Send a VERSION command."""
- self.send_raw("VERSION" + (server and (" " + server)))
-
- def wallops(self, text):
- """Send a WALLOPS command."""
- self.send_raw("WALLOPS :" + text)
-
- def who(self, target="", op=""):
- """Send a WHO command."""
- self.send_raw("WHO%s%s" % (target and (" " + target), op and (" o")))
-
- def whois(self, targets):
- """Send a WHOIS command."""
- self.send_raw("WHOIS " + ",".join(targets))
-
- def whowas(self, nick, max="", server=""):
- """Send a WHOWAS command."""
- self.send_raw("WHOWAS %s%s%s" % (nick,
- max and (" " + max),
- server and (" " + server)))
-
-class DCCConnectionError(IRCError):
- pass
-
-
-class DCCConnection(Connection):
- """This class represents a DCC connection.
-
- DCCConnection objects are instantiated by calling the dcc
- method on an IRC object.
- """
- def __init__(self, irclibobj, dcctype):
- Connection.__init__(self, irclibobj)
- self.connected = 0
- self.passive = 0
- self.dcctype = dcctype
- self.peeraddress = None
- self.peerport = None
-
- def connect(self, address, port):
- """Connect/reconnect to a DCC peer.
-
- Arguments:
- address -- Host/IP address of the peer.
-
- port -- The port number to connect to.
-
- Returns the DCCConnection object.
- """
- self.peeraddress = socket.gethostbyname(address)
- self.peerport = port
- self.socket = None
- self.previous_buffer = ""
- self.handlers = {}
- self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- self.passive = 0
- try:
- self.socket.connect((self.peeraddress, self.peerport))
- except socket.error, x:
- raise DCCConnectionError, "Couldn't connect to socket: %s" % x
- self.connected = 1
- if self.irclibobj.fn_to_add_socket:
- self.irclibobj.fn_to_add_socket(self.socket)
- return self
-
- def listen(self):
- """Wait for a connection/reconnection from a DCC peer.
-
- Returns the DCCConnection object.
-
- The local IP address and port are available as
- self.localaddress and self.localport. After connection from a
- peer, the peer address and port are available as
- self.peeraddress and self.peerport.
- """
- self.previous_buffer = ""
- self.handlers = {}
- self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- self.passive = 1
- try:
- self.socket.bind((socket.gethostbyname(socket.gethostname()), 0))
- self.localaddress, self.localport = self.socket.getsockname()
- self.socket.listen(10)
- except socket.error, x:
- raise DCCConnectionError, "Couldn't bind socket: %s" % x
- return self
-
- def disconnect(self, message=""):
- """Hang up the connection and close the object.
-
- Arguments:
-
- message -- Quit message.
- """
- if not self.connected:
- return
-
- self.connected = 0
- try:
- self.socket.close()
- except socket.error, x:
- pass
- self.socket = None
- self.irclibobj._handle_event(
- self,
- Event("dcc_disconnect", self.peeraddress, "", [message]))
- self.irclibobj._remove_connection(self)
-
- def process_data(self):
- """[Internal]"""
-
- if self.passive and not self.connected:
- conn, (self.peeraddress, self.peerport) = self.socket.accept()
- self.socket.close()
- self.socket = conn
- self.connected = 1
- if DEBUG:
- print "DCC connection from %s:%d" % (
- self.peeraddress, self.peerport)
- self.irclibobj._handle_event(
- self,
- Event("dcc_connect", self.peeraddress, None, None))
- return
-
- try:
- new_data = self.socket.recv(2**14)
- except socket.error, x:
- # The server hung up.
- self.disconnect("Connection reset by peer")
- return
- if not new_data:
- # Read nothing: connection must be down.
- self.disconnect("Connection reset by peer")
- return
-
- if self.dcctype == "chat":
- # The specification says lines are terminated with LF, but
- # it seems safer to handle CR LF terminations too.
- chunks = _linesep_regexp.split(self.previous_buffer + new_data)
-
- # Save the last, unfinished line.
- self.previous_buffer = chunks[-1]
- if len(self.previous_buffer) > 2**14:
- # Bad peer! Naughty peer!
- self.disconnect()
- return
- chunks = chunks[:-1]
- else:
- chunks = [new_data]
-
- command = "dccmsg"
- prefix = self.peeraddress
- target = None
- for chunk in chunks:
- if DEBUG:
- print "FROM PEER:", chunk
- arguments = [chunk]
- if DEBUG:
- print "command: %s, source: %s, target: %s, arguments: %s" % (
- command, prefix, target, arguments)
- self.irclibobj._handle_event(
- self,
- Event(command, prefix, target, arguments))
-
- def _get_socket(self):
- """[Internal]"""
- return self.socket
-
- def privmsg(self, string):
- """Send data to DCC peer.
-
- The string will be padded with appropriate LF if it's a DCC
- CHAT session.
- """
- try:
- self.socket.send(string)
- if self.dcctype == "chat":
- self.socket.send("\n")
- if DEBUG:
- print "TO PEER: %s\n" % string
- except socket.error, x:
- # Ouch!
- self.disconnect("Connection reset by peer.")
-
-class SimpleIRCClient:
- """A simple single-server IRC client class.
-
- This is an example of an object-oriented wrapper of the IRC
- framework. A real IRC client can be made by subclassing this
- class and adding appropriate methods.
-
- The method on_join will be called when a "join" event is created
- (which is done when the server sends a JOIN messsage/command),
- on_privmsg will be called for "privmsg" events, and so on. The
- handler methods get two arguments: the connection object (same as
- self.connection) and the event object.
-
- Instance attributes that can be used by sub classes:
-
- ircobj -- The IRC instance.
-
- connection -- The ServerConnection instance.
-
- dcc_connections -- A list of DCCConnection instances.
- """
- def __init__(self):
- self.ircobj = IRC()
- self.connection = self.ircobj.server()
- self.dcc_connections = []
- self.ircobj.add_global_handler("all_events", self._dispatcher, -10)
- self.ircobj.add_global_handler("dcc_disconnect", self._dcc_disconnect, -10)
-
- def _dispatcher(self, c, e):
- """[Internal]"""
- m = "on_" + e.eventtype()
- if hasattr(self, m):
- getattr(self, m)(c, e)
-
- def _dcc_disconnect(self, c, e):
- self.dcc_connections.remove(c)
-
- def connect(self, server, port, nickname, password=None, username=None,
- ircname=None, localaddress="", localport=0, ssl=False, ipv6=False):
- """Connect/reconnect to a server.
-
- Arguments:
-
- server -- Server name.
-
- port -- Port number.
-
- nickname -- The nickname.
-
- password -- Password (if any).
-
- username -- The username.
-
- ircname -- The IRC name.
-
- localaddress -- Bind the connection to a specific local IP address.
-
- localport -- Bind the connection to a specific local port.
-
- ssl -- Enable support for ssl.
-
- ipv6 -- Enable support for ipv6.
-
- This function can be called to reconnect a closed connection.
- """
- self.connection.connect(server, port, nickname,
- password, username, ircname,
- localaddress, localport, ssl, ipv6)
-
- def dcc_connect(self, address, port, dcctype="chat"):
- """Connect to a DCC peer.
-
- Arguments:
-
- address -- IP address of the peer.
-
- port -- Port to connect to.
-
- Returns a DCCConnection instance.
- """
- dcc = self.ircobj.dcc(dcctype)
- self.dcc_connections.append(dcc)
- dcc.connect(address, port)
- return dcc
-
- def dcc_listen(self, dcctype="chat"):
- """Listen for connections from a DCC peer.
-
- Returns a DCCConnection instance.
- """
- dcc = self.ircobj.dcc(dcctype)
- self.dcc_connections.append(dcc)
- dcc.listen()
- return dcc
-
- def start(self):
- """Start the IRC client."""
- self.ircobj.process_forever()
-
-
-class Event:
- """Class representing an IRC event."""
- def __init__(self, eventtype, source, target, arguments=None):
- """Constructor of Event objects.
-
- Arguments:
-
- eventtype -- A string describing the event.
-
- source -- The originator of the event (a nick mask or a server).
-
- target -- The target of the event (a nick or a channel).
-
- arguments -- Any event specific arguments.
- """
- self._eventtype = eventtype
- self._source = source
- self._target = target
- if arguments:
- self._arguments = arguments
- else:
- self._arguments = []
-
- def eventtype(self):
- """Get the event type."""
- return self._eventtype
-
- def source(self):
- """Get the event source."""
- return self._source
-
- def target(self):
- """Get the event target."""
- return self._target
-
- def arguments(self):
- """Get the event arguments."""
- return self._arguments
-
-_LOW_LEVEL_QUOTE = "\020"
-_CTCP_LEVEL_QUOTE = "\134"
-_CTCP_DELIMITER = "\001"
-
-_low_level_mapping = {
- "0": "\000",
- "n": "\n",
- "r": "\r",
- _LOW_LEVEL_QUOTE: _LOW_LEVEL_QUOTE
-}
-
-_low_level_regexp = re.compile(_LOW_LEVEL_QUOTE + "(.)")
-
-def mask_matches(nick, mask):
- """Check if a nick matches a mask.
-
- Returns true if the nick matches, otherwise false.
- """
- nick = irc_lower(nick)
- mask = irc_lower(mask)
- mask = mask.replace("\\", "\\\\")
- for ch in ".$|[](){}+":
- mask = mask.replace(ch, "\\" + ch)
- mask = mask.replace("?", ".")
- mask = mask.replace("*", ".*")
- r = re.compile(mask, re.IGNORECASE)
- return r.match(nick)
-
-_special = "-[]\\`^{}"
-nick_characters = string.ascii_letters + string.digits + _special
-_ircstring_translation = string.maketrans(string.ascii_uppercase + "[]\\^",
- string.ascii_lowercase + "{}|~")
-
-def irc_lower(s):
- """Returns a lowercased string.
-
- The definition of lowercased comes from the IRC specification (RFC
- 1459).
- """
- return s.translate(_ircstring_translation)
-
-def _ctcp_dequote(message):
- """[Internal] Dequote a message according to CTCP specifications.
-
- The function returns a list where each element can be either a
- string (normal message) or a tuple of one or two strings (tagged
- messages). If a tuple has only one element (ie is a singleton),
- that element is the tag; otherwise the tuple has two elements: the
- tag and the data.
-
- Arguments:
-
- message -- The message to be decoded.
- """
-
- def _low_level_replace(match_obj):
- ch = match_obj.group(1)
-
- # If low_level_mapping doesn't have the character as key, we
- # should just return the character.
- return _low_level_mapping.get(ch, ch)
-
- if _LOW_LEVEL_QUOTE in message:
- # Yup, there was a quote. Release the dequoter, man!
- message = _low_level_regexp.sub(_low_level_replace, message)
-
- if _CTCP_DELIMITER not in message:
- return [message]
- else:
- # Split it into parts. (Does any IRC client actually *use*
- # CTCP stacking like this?)
- chunks = message.split(_CTCP_DELIMITER)
-
- messages = []
- i = 0
- while i < len(chunks)-1:
- # Add message if it's non-empty.
- if len(chunks[i]) > 0:
- messages.append(chunks[i])
-
- if i < len(chunks)-2:
- # Aye! CTCP tagged data ahead!
- messages.append(tuple(chunks[i+1].split(" ", 1)))
-
- i = i + 2
-
- if len(chunks) % 2 == 0:
- # Hey, a lonely _CTCP_DELIMITER at the end! This means
- # that the last chunk, including the delimiter, is a
- # normal message! (This is according to the CTCP
- # specification.)
- messages.append(_CTCP_DELIMITER + chunks[-1])
-
- return messages
-
-def is_channel(string):
- """Check if a string is a channel name.
-
- Returns true if the argument is a channel name, otherwise false.
- """
- return string and string[0] in "#&+!"
-
-def ip_numstr_to_quad(num):
- """Convert an IP number as an integer given in ASCII
- representation (e.g. '3232235521') to an IP address string
- (e.g. '192.168.0.1')."""
- n = long(num)
- p = map(str, map(int, [n >> 24 & 0xFF, n >> 16 & 0xFF,
- n >> 8 & 0xFF, n & 0xFF]))
- return ".".join(p)
-
-def ip_quad_to_numstr(quad):
- """Convert an IP address string (e.g. '192.168.0.1') to an IP
- number as an integer given in ASCII representation
- (e.g. '3232235521')."""
- p = map(long, quad.split("."))
- s = str((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3])
- if s[-1] == "L":
- s = s[:-1]
- return s
-
-def nm_to_n(s):
- """Get the nick part of a nickmask.
-
- (The source of an Event is a nickmask.)
- """
- return s.split("!")[0]
-
-def nm_to_uh(s):
- """Get the userhost part of a nickmask.
-
- (The source of an Event is a nickmask.)
- """
- return s.split("!")[1]
-
-def nm_to_h(s):
- """Get the host part of a nickmask.
-
- (The source of an Event is a nickmask.)
- """
- return s.split("@")[1]
-
-def nm_to_u(s):
- """Get the user part of a nickmask.
-
- (The source of an Event is a nickmask.)
- """
- s = s.split("!")[1]
- return s.split("@")[0]
-
-def parse_nick_modes(mode_string):
- """Parse a nick mode string.
-
- The function returns a list of lists with three members: sign,
- mode and argument. The sign is \"+\" or \"-\". The argument is
- always None.
-
- Example:
-
- >>> irclib.parse_nick_modes(\"+ab-c\")
- [['+', 'a', None], ['+', 'b', None], ['-', 'c', None]]
- """
-
- return _parse_modes(mode_string, "")
-
-def parse_channel_modes(mode_string):
- """Parse a channel mode string.
-
- The function returns a list of lists with three members: sign,
- mode and argument. The sign is \"+\" or \"-\". The argument is
- None if mode isn't one of \"b\", \"k\", \"l\", \"v\" or \"o\".
-
- Example:
-
- >>> irclib.parse_channel_modes(\"+ab-c foo\")
- [['+', 'a', None], ['+', 'b', 'foo'], ['-', 'c', None]]
- """
-
- return _parse_modes(mode_string, "bklvo")
-
-def _parse_modes(mode_string, unary_modes=""):
- """[Internal]"""
- modes = []
- arg_count = 0
-
- # State variable.
- sign = ""
-
- a = mode_string.split()
- if len(a) == 0:
- return []
- else:
- mode_part, args = a[0], a[1:]
-
- if mode_part[0] not in "+-":
- return []
- for ch in mode_part:
- if ch in "+-":
- sign = ch
- elif ch == " ":
- collecting_arguments = 1
- elif ch in unary_modes:
- if len(args) >= arg_count + 1:
- modes.append([sign, ch, args[arg_count]])
- arg_count = arg_count + 1
- else:
- modes.append([sign, ch, None])
- else:
- modes.append([sign, ch, None])
- return modes
-
-def _ping_ponger(connection, event):
- """[Internal]"""
- connection.pong(event.target())
-
-# Numeric table mostly stolen from the Perl IRC module (Net::IRC).
-numeric_events = {
- "001": "welcome",
- "002": "yourhost",
- "003": "created",
- "004": "myinfo",
- "005": "featurelist", # XXX
- "200": "tracelink",
- "201": "traceconnecting",
- "202": "tracehandshake",
- "203": "traceunknown",
- "204": "traceoperator",
- "205": "traceuser",
- "206": "traceserver",
- "207": "traceservice",
- "208": "tracenewtype",
- "209": "traceclass",
- "210": "tracereconnect",
- "211": "statslinkinfo",
- "212": "statscommands",
- "213": "statscline",
- "214": "statsnline",
- "215": "statsiline",
- "216": "statskline",
- "217": "statsqline",
- "218": "statsyline",
- "219": "endofstats",
- "221": "umodeis",
- "231": "serviceinfo",
- "232": "endofservices",
- "233": "service",
- "234": "servlist",
- "235": "servlistend",
- "241": "statslline",
- "242": "statsuptime",
- "243": "statsoline",
- "244": "statshline",
- "250": "luserconns",
- "251": "luserclient",
- "252": "luserop",
- "253": "luserunknown",
- "254": "luserchannels",
- "255": "luserme",
- "256": "adminme",
- "257": "adminloc1",
- "258": "adminloc2",
- "259": "adminemail",
- "261": "tracelog",
- "262": "endoftrace",
- "263": "tryagain",
- "265": "n_local",
- "266": "n_global",
- "300": "none",
- "301": "away",
- "302": "userhost",
- "303": "ison",
- "305": "unaway",
- "306": "nowaway",
- "311": "whoisuser",
- "312": "whoisserver",
- "313": "whoisoperator",
- "314": "whowasuser",
- "315": "endofwho",
- "316": "whoischanop",
- "317": "whoisidle",
- "318": "endofwhois",
- "319": "whoischannels",
- "321": "liststart",
- "322": "list",
- "323": "listend",
- "324": "channelmodeis",
- "329": "channelcreate",
- "331": "notopic",
- "332": "currenttopic",
- "333": "topicinfo",
- "341": "inviting",
- "342": "summoning",
- "346": "invitelist",
- "347": "endofinvitelist",
- "348": "exceptlist",
- "349": "endofexceptlist",
- "351": "version",
- "352": "whoreply",
- "353": "namreply",
- "361": "killdone",
- "362": "closing",
- "363": "closeend",
- "364": "links",
- "365": "endoflinks",
- "366": "endofnames",
- "367": "banlist",
- "368": "endofbanlist",
- "369": "endofwhowas",
- "371": "info",
- "372": "motd",
- "373": "infostart",
- "374": "endofinfo",
- "375": "motdstart",
- "376": "endofmotd",
- "377": "motd2", # 1997-10-16 -- tkil
- "381": "youreoper",
- "382": "rehashing",
- "384": "myportis",
- "391": "time",
- "392": "usersstart",
- "393": "users",
- "394": "endofusers",
- "395": "nousers",
- "401": "nosuchnick",
- "402": "nosuchserver",
- "403": "nosuchchannel",
- "404": "cannotsendtochan",
- "405": "toomanychannels",
- "406": "wasnosuchnick",
- "407": "toomanytargets",
- "409": "noorigin",
- "411": "norecipient",
- "412": "notexttosend",
- "413": "notoplevel",
- "414": "wildtoplevel",
- "421": "unknowncommand",
- "422": "nomotd",
- "423": "noadmininfo",
- "424": "fileerror",
- "431": "nonicknamegiven",
- "432": "erroneusnickname", # Thiss iz how its speld in thee RFC.
- "433": "nicknameinuse",
- "436": "nickcollision",
- "437": "unavailresource", # "Nick temporally unavailable"
- "441": "usernotinchannel",
- "442": "notonchannel",
- "443": "useronchannel",
- "444": "nologin",
- "445": "summondisabled",
- "446": "usersdisabled",
- "451": "notregistered",
- "461": "needmoreparams",
- "462": "alreadyregistered",
- "463": "nopermforhost",
- "464": "passwdmismatch",
- "465": "yourebannedcreep", # I love this one...
- "466": "youwillbebanned",
- "467": "keyset",
- "471": "channelisfull",
- "472": "unknownmode",
- "473": "inviteonlychan",
- "474": "bannedfromchan",
- "475": "badchannelkey",
- "476": "badchanmask",
- "477": "nochanmodes", # "Channel doesn't support modes"
- "478": "banlistfull",
- "481": "noprivileges",
- "482": "chanoprivsneeded",
- "483": "cantkillserver",
- "484": "restricted", # Connection is restricted
- "485": "uniqopprivsneeded",
- "491": "nooperhost",
- "492": "noservicehost",
- "501": "umodeunknownflag",
- "502": "usersdontmatch",
-}
-
-generated_events = [
- # Generated events
- "dcc_connect",
- "dcc_disconnect",
- "dccmsg",
- "disconnect",
- "ctcp",
- "ctcpreply",
-]
-
-protocol_events = [
- # IRC protocol events
- "error",
- "join",
- "kick",
- "mode",
- "part",
- "ping",
- "privmsg",
- "privnotice",
- "pubmsg",
- "pubnotice",
- "quit",
- "invite",
- "pong",
-]
-
-all_events = generated_events + protocol_events + numeric_events.values()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/__init__.py
deleted file mode 100644
index 4bb20aa..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/__init__.py
+++ /dev/null
@@ -1,140 +0,0 @@
-__all__ = [
- 'AbstractBasicAuthHandler',
- 'AbstractDigestAuthHandler',
- 'BaseHandler',
- 'Browser',
- 'BrowserStateError',
- 'CacheFTPHandler',
- 'ContentTooShortError',
- 'Cookie',
- 'CookieJar',
- 'CookiePolicy',
- 'DefaultCookiePolicy',
- 'DefaultFactory',
- 'FTPHandler',
- 'Factory',
- 'FileCookieJar',
- 'FileHandler',
- 'FormNotFoundError',
- 'FormsFactory',
- 'HTTPBasicAuthHandler',
- 'HTTPCookieProcessor',
- 'HTTPDefaultErrorHandler',
- 'HTTPDigestAuthHandler',
- 'HTTPEquivProcessor',
- 'HTTPError',
- 'HTTPErrorProcessor',
- 'HTTPHandler',
- 'HTTPPasswordMgr',
- 'HTTPPasswordMgrWithDefaultRealm',
- 'HTTPProxyPasswordMgr',
- 'HTTPRedirectDebugProcessor',
- 'HTTPRedirectHandler',
- 'HTTPRefererProcessor',
- 'HTTPRefreshProcessor',
- 'HTTPRequestUpgradeProcessor',
- 'HTTPResponseDebugProcessor',
- 'HTTPRobotRulesProcessor',
- 'HTTPSClientCertMgr',
- 'HTTPSHandler',
- 'HeadParser',
- 'History',
- 'LWPCookieJar',
- 'Link',
- 'LinkNotFoundError',
- 'LinksFactory',
- 'LoadError',
- 'MSIECookieJar',
- 'MozillaCookieJar',
- 'OpenerDirector',
- 'OpenerFactory',
- 'ParseError',
- 'ProxyBasicAuthHandler',
- 'ProxyDigestAuthHandler',
- 'ProxyHandler',
- 'Request',
- 'ResponseUpgradeProcessor',
- 'RobotExclusionError',
- 'RobustFactory',
- 'RobustFormsFactory',
- 'RobustLinksFactory',
- 'RobustTitleFactory',
- 'SeekableProcessor',
- 'SeekableResponseOpener',
- 'TitleFactory',
- 'URLError',
- 'USE_BARE_EXCEPT',
- 'UnknownHandler',
- 'UserAgent',
- 'UserAgentBase',
- 'XHTMLCompatibleHeadParser',
- '__version__',
- 'build_opener',
- 'install_opener',
- 'lwp_cookie_str',
- 'make_response',
- 'request_host',
- 'response_seek_wrapper', # XXX deprecate in public interface?
- 'seek_wrapped_response' # XXX should probably use this internally in place of response_seek_wrapper()
- 'str2time',
- 'urlopen',
- 'urlretrieve']
-
-import logging
-import sys
-
-from _mechanize import __version__
-
-# high-level stateful browser-style interface
-from _mechanize import \
- Browser, History, \
- BrowserStateError, LinkNotFoundError, FormNotFoundError
-
-# configurable URL-opener interface
-from _useragent import UserAgentBase, UserAgent
-from _html import \
- ParseError, \
- Link, \
- Factory, DefaultFactory, RobustFactory, \
- FormsFactory, LinksFactory, TitleFactory, \
- RobustFormsFactory, RobustLinksFactory, RobustTitleFactory
-
-# urllib2 work-alike interface (part from mechanize, part from urllib2)
-# This is a superset of the urllib2 interface.
-from _urllib2 import *
-
-# misc
-from _opener import ContentTooShortError, OpenerFactory, urlretrieve
-from _util import http2time as str2time
-from _response import \
- response_seek_wrapper, seek_wrapped_response, make_response
-from _http import HeadParser
-try:
- from _http import XHTMLCompatibleHeadParser
-except ImportError:
- pass
-
-# cookies
-from _clientcookie import Cookie, CookiePolicy, DefaultCookiePolicy, \
- CookieJar, FileCookieJar, LoadError, request_host_lc as request_host, \
- effective_request_host
-from _lwpcookiejar import LWPCookieJar, lwp_cookie_str
-# 2.4 raises SyntaxError due to generator / try/finally use
-if sys.version_info[:2] > (2,4):
- try:
- import sqlite3
- except ImportError:
- pass
- else:
- from _firefox3cookiejar import Firefox3CookieJar
-from _mozillacookiejar import MozillaCookieJar
-from _msiecookiejar import MSIECookieJar
-
-# If you hate the idea of turning bugs into warnings, do:
-# import mechanize; mechanize.USE_BARE_EXCEPT = False
-USE_BARE_EXCEPT = True
-
-logger = logging.getLogger("mechanize")
-if logger.level is logging.NOTSET:
- logger.setLevel(logging.CRITICAL)
-del logger
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_auth.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_auth.py
deleted file mode 100644
index 232f7d8..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_auth.py
+++ /dev/null
@@ -1,522 +0,0 @@
-"""HTTP Authentication and Proxy support.
-
-All but HTTPProxyPasswordMgr come from Python 2.5.
-
-
-Copyright 2006 John J. Lee <jjl@pobox.com>
-
-This code is free software; you can redistribute it and/or modify it under
-the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
-included with the distribution).
-
-"""
-
-import base64
-import copy
-import os
-import posixpath
-import random
-import re
-import time
-import urlparse
-
-try:
- import hashlib
-except ImportError:
- import md5
- import sha
- def sha1_digest(bytes):
- return sha.new(bytes).hexdigest()
- def md5_digest(bytes):
- return md5.new(bytes).hexdigest()
-else:
- def sha1_digest(bytes):
- return hashlib.sha1(bytes).hexdigest()
- def md5_digest(bytes):
- return hashlib.md5(bytes).hexdigest()
-
-from urllib2 import BaseHandler, HTTPError, parse_keqv_list, parse_http_list
-from urllib import getproxies, unquote, splittype, splituser, splitpasswd, \
- splitport
-
-
-def _parse_proxy(proxy):
- """Return (scheme, user, password, host/port) given a URL or an authority.
-
- If a URL is supplied, it must have an authority (host:port) component.
- According to RFC 3986, having an authority component means the URL must
- have two slashes after the scheme:
-
- >>> _parse_proxy('file:/ftp.example.com/')
- Traceback (most recent call last):
- ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
-
- The first three items of the returned tuple may be None.
-
- Examples of authority parsing:
-
- >>> _parse_proxy('proxy.example.com')
- (None, None, None, 'proxy.example.com')
- >>> _parse_proxy('proxy.example.com:3128')
- (None, None, None, 'proxy.example.com:3128')
-
- The authority component may optionally include userinfo (assumed to be
- username:password):
-
- >>> _parse_proxy('joe:password@proxy.example.com')
- (None, 'joe', 'password', 'proxy.example.com')
- >>> _parse_proxy('joe:password@proxy.example.com:3128')
- (None, 'joe', 'password', 'proxy.example.com:3128')
-
- Same examples, but with URLs instead:
-
- >>> _parse_proxy('http://proxy.example.com/')
- ('http', None, None, 'proxy.example.com')
- >>> _parse_proxy('http://proxy.example.com:3128/')
- ('http', None, None, 'proxy.example.com:3128')
- >>> _parse_proxy('http://joe:password@proxy.example.com/')
- ('http', 'joe', 'password', 'proxy.example.com')
- >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
- ('http', 'joe', 'password', 'proxy.example.com:3128')
-
- Everything after the authority is ignored:
-
- >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
- ('ftp', 'joe', 'password', 'proxy.example.com')
-
- Test for no trailing '/' case:
-
- >>> _parse_proxy('http://joe:password@proxy.example.com')
- ('http', 'joe', 'password', 'proxy.example.com')
-
- """
- scheme, r_scheme = splittype(proxy)
- if not r_scheme.startswith("/"):
- # authority
- scheme = None
- authority = proxy
- else:
- # URL
- if not r_scheme.startswith("//"):
- raise ValueError("proxy URL with no authority: %r" % proxy)
- # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
- # and 3.3.), path is empty or starts with '/'
- end = r_scheme.find("/", 2)
- if end == -1:
- end = None
- authority = r_scheme[2:end]
- userinfo, hostport = splituser(authority)
- if userinfo is not None:
- user, password = splitpasswd(userinfo)
- else:
- user = password = None
- return scheme, user, password, hostport
-
-class ProxyHandler(BaseHandler):
- # Proxies must be in front
- handler_order = 100
-
- def __init__(self, proxies=None):
- if proxies is None:
- proxies = getproxies()
- assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
- self.proxies = proxies
- for type, url in proxies.items():
- setattr(self, '%s_open' % type,
- lambda r, proxy=url, type=type, meth=self.proxy_open: \
- meth(r, proxy, type))
-
- def proxy_open(self, req, proxy, type):
- orig_type = req.get_type()
- proxy_type, user, password, hostport = _parse_proxy(proxy)
- if proxy_type is None:
- proxy_type = orig_type
- if user and password:
- user_pass = '%s:%s' % (unquote(user), unquote(password))
- creds = base64.encodestring(user_pass).strip()
- req.add_header('Proxy-authorization', 'Basic ' + creds)
- hostport = unquote(hostport)
- req.set_proxy(hostport, proxy_type)
- if orig_type == proxy_type:
- # let other handlers take care of it
- return None
- else:
- # need to start over, because the other handlers don't
- # grok the proxy's URL type
- # e.g. if we have a constructor arg proxies like so:
- # {'http': 'ftp://proxy.example.com'}, we may end up turning
- # a request for http://acme.example.com/a into one for
- # ftp://proxy.example.com/a
- return self.parent.open(req)
-
-class HTTPPasswordMgr:
-
- def __init__(self):
- self.passwd = {}
-
- def add_password(self, realm, uri, user, passwd):
- # uri could be a single URI or a sequence
- if isinstance(uri, basestring):
- uri = [uri]
- if not realm in self.passwd:
- self.passwd[realm] = {}
- for default_port in True, False:
- reduced_uri = tuple(
- [self.reduce_uri(u, default_port) for u in uri])
- self.passwd[realm][reduced_uri] = (user, passwd)
-
- def find_user_password(self, realm, authuri):
- domains = self.passwd.get(realm, {})
- for default_port in True, False:
- reduced_authuri = self.reduce_uri(authuri, default_port)
- for uris, authinfo in domains.iteritems():
- for uri in uris:
- if self.is_suburi(uri, reduced_authuri):
- return authinfo
- return None, None
-
- def reduce_uri(self, uri, default_port=True):
- """Accept authority or URI and extract only the authority and path."""
- # note HTTP URLs do not have a userinfo component
- parts = urlparse.urlsplit(uri)
- if parts[1]:
- # URI
- scheme = parts[0]
- authority = parts[1]
- path = parts[2] or '/'
- else:
- # host or host:port
- scheme = None
- authority = uri
- path = '/'
- host, port = splitport(authority)
- if default_port and port is None and scheme is not None:
- dport = {"http": 80,
- "https": 443,
- }.get(scheme)
- if dport is not None:
- authority = "%s:%d" % (host, dport)
- return authority, path
-
- def is_suburi(self, base, test):
- """Check if test is below base in a URI tree
-
- Both args must be URIs in reduced form.
- """
- if base == test:
- return True
- if base[0] != test[0]:
- return False
- common = posixpath.commonprefix((base[1], test[1]))
- if len(common) == len(base[1]):
- return True
- return False
-
-
-class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
-
- def find_user_password(self, realm, authuri):
- user, password = HTTPPasswordMgr.find_user_password(self, realm,
- authuri)
- if user is not None:
- return user, password
- return HTTPPasswordMgr.find_user_password(self, None, authuri)
-
-
-class AbstractBasicAuthHandler:
-
- rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I)
-
- # XXX there can actually be multiple auth-schemes in a
- # www-authenticate header. should probably be a lot more careful
- # in parsing them to extract multiple alternatives
-
- def __init__(self, password_mgr=None):
- if password_mgr is None:
- password_mgr = HTTPPasswordMgr()
- self.passwd = password_mgr
- self.add_password = self.passwd.add_password
-
- def http_error_auth_reqed(self, authreq, host, req, headers):
- # host may be an authority (without userinfo) or a URL with an
- # authority
- # XXX could be multiple headers
- authreq = headers.get(authreq, None)
- if authreq:
- mo = AbstractBasicAuthHandler.rx.search(authreq)
- if mo:
- scheme, realm = mo.groups()
- if scheme.lower() == 'basic':
- return self.retry_http_basic_auth(host, req, realm)
-
- def retry_http_basic_auth(self, host, req, realm):
- user, pw = self.passwd.find_user_password(realm, host)
- if pw is not None:
- raw = "%s:%s" % (user, pw)
- auth = 'Basic %s' % base64.encodestring(raw).strip()
- if req.headers.get(self.auth_header, None) == auth:
- return None
- newreq = copy.copy(req)
- newreq.add_header(self.auth_header, auth)
- newreq.visit = False
- return self.parent.open(newreq)
- else:
- return None
-
-
-class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
-
- auth_header = 'Authorization'
-
- def http_error_401(self, req, fp, code, msg, headers):
- url = req.get_full_url()
- return self.http_error_auth_reqed('www-authenticate',
- url, req, headers)
-
-
-class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
-
- auth_header = 'Proxy-authorization'
-
- def http_error_407(self, req, fp, code, msg, headers):
- # http_error_auth_reqed requires that there is no userinfo component in
- # authority. Assume there isn't one, since urllib2 does not (and
- # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
- # userinfo.
- authority = req.get_host()
- return self.http_error_auth_reqed('proxy-authenticate',
- authority, req, headers)
-
-
-def randombytes(n):
- """Return n random bytes."""
- # Use /dev/urandom if it is available. Fall back to random module
- # if not. It might be worthwhile to extend this function to use
- # other platform-specific mechanisms for getting random bytes.
- if os.path.exists("/dev/urandom"):
- f = open("/dev/urandom")
- s = f.read(n)
- f.close()
- return s
- else:
- L = [chr(random.randrange(0, 256)) for i in range(n)]
- return "".join(L)
-
-class AbstractDigestAuthHandler:
- # Digest authentication is specified in RFC 2617.
-
- # XXX The client does not inspect the Authentication-Info header
- # in a successful response.
-
- # XXX It should be possible to test this implementation against
- # a mock server that just generates a static set of challenges.
-
- # XXX qop="auth-int" supports is shaky
-
- def __init__(self, passwd=None):
- if passwd is None:
- passwd = HTTPPasswordMgr()
- self.passwd = passwd
- self.add_password = self.passwd.add_password
- self.retried = 0
- self.nonce_count = 0
-
- def reset_retry_count(self):
- self.retried = 0
-
- def http_error_auth_reqed(self, auth_header, host, req, headers):
- authreq = headers.get(auth_header, None)
- if self.retried > 5:
- # Don't fail endlessly - if we failed once, we'll probably
- # fail a second time. Hm. Unless the Password Manager is
- # prompting for the information. Crap. This isn't great
- # but it's better than the current 'repeat until recursion
- # depth exceeded' approach <wink>
- raise HTTPError(req.get_full_url(), 401, "digest auth failed",
- headers, None)
- else:
- self.retried += 1
- if authreq:
- scheme = authreq.split()[0]
- if scheme.lower() == 'digest':
- return self.retry_http_digest_auth(req, authreq)
-
- def retry_http_digest_auth(self, req, auth):
- token, challenge = auth.split(' ', 1)
- chal = parse_keqv_list(parse_http_list(challenge))
- auth = self.get_authorization(req, chal)
- if auth:
- auth_val = 'Digest %s' % auth
- if req.headers.get(self.auth_header, None) == auth_val:
- return None
- newreq = copy.copy(req)
- newreq.add_unredirected_header(self.auth_header, auth_val)
- newreq.visit = False
- return self.parent.open(newreq)
-
- def get_cnonce(self, nonce):
- # The cnonce-value is an opaque
- # quoted string value provided by the client and used by both client
- # and server to avoid chosen plaintext attacks, to provide mutual
- # authentication, and to provide some message integrity protection.
- # This isn't a fabulous effort, but it's probably Good Enough.
- dig = sha1_digest("%s:%s:%s:%s" % (self.nonce_count, nonce,
- time.ctime(), randombytes(8)))
- return dig[:16]
-
- def get_authorization(self, req, chal):
- try:
- realm = chal['realm']
- nonce = chal['nonce']
- qop = chal.get('qop')
- algorithm = chal.get('algorithm', 'MD5')
- # mod_digest doesn't send an opaque, even though it isn't
- # supposed to be optional
- opaque = chal.get('opaque', None)
- except KeyError:
- return None
-
- H, KD = self.get_algorithm_impls(algorithm)
- if H is None:
- return None
-
- user, pw = self.passwd.find_user_password(realm, req.get_full_url())
- if user is None:
- return None
-
- # XXX not implemented yet
- if req.has_data():
- entdig = self.get_entity_digest(req.get_data(), chal)
- else:
- entdig = None
-
- A1 = "%s:%s:%s" % (user, realm, pw)
- A2 = "%s:%s" % (req.get_method(),
- # XXX selector: what about proxies and full urls
- req.get_selector())
- if qop == 'auth':
- self.nonce_count += 1
- ncvalue = '%08x' % self.nonce_count
- cnonce = self.get_cnonce(nonce)
- noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
- respdig = KD(H(A1), noncebit)
- elif qop is None:
- respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
- else:
- # XXX handle auth-int.
- pass
-
- # XXX should the partial digests be encoded too?
-
- base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
- 'response="%s"' % (user, realm, nonce, req.get_selector(),
- respdig)
- if opaque:
- base += ', opaque="%s"' % opaque
- if entdig:
- base += ', digest="%s"' % entdig
- base += ', algorithm="%s"' % algorithm
- if qop:
- base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
- return base
-
- def get_algorithm_impls(self, algorithm):
- # lambdas assume digest modules are imported at the top level
- if algorithm == 'MD5':
- H = md5_digest
- elif algorithm == 'SHA':
- H = sha1_digest
- # XXX MD5-sess
- KD = lambda s, d: H("%s:%s" % (s, d))
- return H, KD
-
- def get_entity_digest(self, data, chal):
- # XXX not implemented yet
- return None
-
-
-class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
- """An authentication protocol defined by RFC 2069
-
- Digest authentication improves on basic authentication because it
- does not transmit passwords in the clear.
- """
-
- auth_header = 'Authorization'
- handler_order = 490
-
- def http_error_401(self, req, fp, code, msg, headers):
- host = urlparse.urlparse(req.get_full_url())[1]
- retry = self.http_error_auth_reqed('www-authenticate',
- host, req, headers)
- self.reset_retry_count()
- return retry
-
-
-class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
-
- auth_header = 'Proxy-Authorization'
- handler_order = 490
-
- def http_error_407(self, req, fp, code, msg, headers):
- host = req.get_host()
- retry = self.http_error_auth_reqed('proxy-authenticate',
- host, req, headers)
- self.reset_retry_count()
- return retry
-
-
-# XXX ugly implementation, should probably not bother deriving
-class HTTPProxyPasswordMgr(HTTPPasswordMgr):
- # has default realm and host/port
- def add_password(self, realm, uri, user, passwd):
- # uri could be a single URI or a sequence
- if uri is None or isinstance(uri, basestring):
- uris = [uri]
- else:
- uris = uri
- passwd_by_domain = self.passwd.setdefault(realm, {})
- for uri in uris:
- for default_port in True, False:
- reduced_uri = self.reduce_uri(uri, default_port)
- passwd_by_domain[reduced_uri] = (user, passwd)
-
- def find_user_password(self, realm, authuri):
- attempts = [(realm, authuri), (None, authuri)]
- # bleh, want default realm to take precedence over default
- # URI/authority, hence this outer loop
- for default_uri in False, True:
- for realm, authuri in attempts:
- authinfo_by_domain = self.passwd.get(realm, {})
- for default_port in True, False:
- reduced_authuri = self.reduce_uri(authuri, default_port)
- for uri, authinfo in authinfo_by_domain.iteritems():
- if uri is None and not default_uri:
- continue
- if self.is_suburi(uri, reduced_authuri):
- return authinfo
- user, password = None, None
-
- if user is not None:
- break
- return user, password
-
- def reduce_uri(self, uri, default_port=True):
- if uri is None:
- return None
- return HTTPPasswordMgr.reduce_uri(self, uri, default_port)
-
- def is_suburi(self, base, test):
- if base is None:
- # default to the proxy's host/port
- hostport, path = test
- base = (hostport, "/")
- return HTTPPasswordMgr.is_suburi(self, base, test)
-
-
-class HTTPSClientCertMgr(HTTPPasswordMgr):
- # implementation inheritance: this is not a proper subclass
- def add_key_cert(self, uri, key_file, cert_file):
- self.add_password(None, uri, key_file, cert_file)
- def find_key_cert(self, authuri):
- return HTTPPasswordMgr.find_user_password(self, None, authuri)
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_beautifulsoup.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_beautifulsoup.py
deleted file mode 100644
index 268b305..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_beautifulsoup.py
+++ /dev/null
@@ -1,1080 +0,0 @@
-"""Beautiful Soup
-Elixir and Tonic
-"The Screen-Scraper's Friend"
-v2.1.1
-http://www.crummy.com/software/BeautifulSoup/
-
-Beautiful Soup parses arbitrarily invalid XML- or HTML-like substance
-into a tree representation. It provides methods and Pythonic idioms
-that make it easy to search and modify the tree.
-
-A well-formed XML/HTML document will yield a well-formed data
-structure. An ill-formed XML/HTML document will yield a
-correspondingly ill-formed data structure. If your document is only
-locally well-formed, you can use this library to find and process the
-well-formed part of it. The BeautifulSoup class has heuristics for
-obtaining a sensible parse tree in the face of common HTML errors.
-
-Beautiful Soup has no external dependencies. It works with Python 2.2
-and up.
-
-Beautiful Soup defines classes for four different parsing strategies:
-
- * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific
- language that kind of looks like XML.
-
- * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
- or invalid.
-
- * ICantBelieveItsBeautifulSoup, for parsing valid but bizarre HTML
- that trips up BeautifulSoup.
-
- * BeautifulSOAP, for making it easier to parse XML documents that use
- lots of subelements containing a single string, where you'd prefer
- they put that string into an attribute (such as SOAP messages).
-
-You can subclass BeautifulStoneSoup or BeautifulSoup to create a
-parsing strategy specific to an XML schema or a particular bizarre
-HTML document. Typically your subclass would just override
-SELF_CLOSING_TAGS and/or NESTABLE_TAGS.
-""" #"
-from __future__ import generators
-
-__author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "2.1.1"
-__date__ = "$Date: 2004/10/18 00:14:20 $"
-__copyright__ = "Copyright (c) 2004-2005 Leonard Richardson"
-__license__ = "PSF"
-
-from sgmllib import SGMLParser, SGMLParseError
-import types
-import re
-import sgmllib
-
-#This code makes Beautiful Soup able to parse XML with namespaces
-sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
-
-class NullType(object):
-
- """Similar to NoneType with a corresponding singleton instance
- 'Null' that, unlike None, accepts any message and returns itself.
-
- Examples:
- >>> Null("send", "a", "message")("and one more",
- ... "and what you get still") is Null
- True
- """
-
- def __new__(cls): return Null
- def __call__(self, *args, **kwargs): return Null
-## def __getstate__(self, *args): return Null
- def __getattr__(self, attr): return Null
- def __getitem__(self, item): return Null
- def __setattr__(self, attr, value): pass
- def __setitem__(self, item, value): pass
- def __len__(self): return 0
- # FIXME: is this a python bug? otherwise ``for x in Null: pass``
- # never terminates...
- def __iter__(self): return iter([])
- def __contains__(self, item): return False
- def __repr__(self): return "Null"
-Null = object.__new__(NullType)
-
-class PageElement:
- """Contains the navigational information for some part of the page
- (either a tag or a piece of text)"""
-
- def setup(self, parent=Null, previous=Null):
- """Sets up the initial relations between this element and
- other elements."""
- self.parent = parent
- self.previous = previous
- self.next = Null
- self.previousSibling = Null
- self.nextSibling = Null
- if self.parent and self.parent.contents:
- self.previousSibling = self.parent.contents[-1]
- self.previousSibling.nextSibling = self
-
- def findNext(self, name=None, attrs={}, text=None):
- """Returns the first item that matches the given criteria and
- appears after this Tag in the document."""
- return self._first(self.fetchNext, name, attrs, text)
- firstNext = findNext
-
- def fetchNext(self, name=None, attrs={}, text=None, limit=None):
- """Returns all items that match the given criteria and appear
- before after Tag in the document."""
- return self._fetch(name, attrs, text, limit, self.nextGenerator)
-
- def findNextSibling(self, name=None, attrs={}, text=None):
- """Returns the closest sibling to this Tag that matches the
- given criteria and appears after this Tag in the document."""
- return self._first(self.fetchNextSiblings, name, attrs, text)
- firstNextSibling = findNextSibling
-
- def fetchNextSiblings(self, name=None, attrs={}, text=None, limit=None):
- """Returns the siblings of this Tag that match the given
- criteria and appear after this Tag in the document."""
- return self._fetch(name, attrs, text, limit, self.nextSiblingGenerator)
-
- def findPrevious(self, name=None, attrs={}, text=None):
- """Returns the first item that matches the given criteria and
- appears before this Tag in the document."""
- return self._first(self.fetchPrevious, name, attrs, text)
-
- def fetchPrevious(self, name=None, attrs={}, text=None, limit=None):
- """Returns all items that match the given criteria and appear
- before this Tag in the document."""
- return self._fetch(name, attrs, text, limit, self.previousGenerator)
- firstPrevious = findPrevious
-
- def findPreviousSibling(self, name=None, attrs={}, text=None):
- """Returns the closest sibling to this Tag that matches the
- given criteria and appears before this Tag in the document."""
- return self._first(self.fetchPreviousSiblings, name, attrs, text)
- firstPreviousSibling = findPreviousSibling
-
- def fetchPreviousSiblings(self, name=None, attrs={}, text=None,
- limit=None):
- """Returns the siblings of this Tag that match the given
- criteria and appear before this Tag in the document."""
- return self._fetch(name, attrs, text, limit,
- self.previousSiblingGenerator)
-
- def findParent(self, name=None, attrs={}):
- """Returns the closest parent of this Tag that matches the given
- criteria."""
- r = Null
- l = self.fetchParents(name, attrs, 1)
- if l:
- r = l[0]
- return r
- firstParent = findParent
-
- def fetchParents(self, name=None, attrs={}, limit=None):
- """Returns the parents of this Tag that match the given
- criteria."""
- return self._fetch(name, attrs, None, limit, self.parentGenerator)
-
- #These methods do the real heavy lifting.
-
- def _first(self, method, name, attrs, text):
- r = Null
- l = method(name, attrs, text, 1)
- if l:
- r = l[0]
- return r
-
- def _fetch(self, name, attrs, text, limit, generator):
- "Iterates over a generator looking for things that match."
- if not hasattr(attrs, 'items'):
- attrs = {'class' : attrs}
-
- results = []
- g = generator()
- while True:
- try:
- i = g.next()
- except StopIteration:
- break
- found = None
- if isinstance(i, Tag):
- if not text:
- if not name or self._matches(i, name):
- match = True
- for attr, matchAgainst in attrs.items():
- check = i.get(attr)
- if not self._matches(check, matchAgainst):
- match = False
- break
- if match:
- found = i
- elif text:
- if self._matches(i, text):
- found = i
- if found:
- results.append(found)
- if limit and len(results) >= limit:
- break
- return results
-
- #Generators that can be used to navigate starting from both
- #NavigableTexts and Tags.
- def nextGenerator(self):
- i = self
- while i:
- i = i.next
- yield i
-
- def nextSiblingGenerator(self):
- i = self
- while i:
- i = i.nextSibling
- yield i
-
- def previousGenerator(self):
- i = self
- while i:
- i = i.previous
- yield i
-
- def previousSiblingGenerator(self):
- i = self
- while i:
- i = i.previousSibling
- yield i
-
- def parentGenerator(self):
- i = self
- while i:
- i = i.parent
- yield i
-
- def _matches(self, chunk, howToMatch):
- #print 'looking for %s in %s' % (howToMatch, chunk)
- #
- # If given a list of items, return true if the list contains a
- # text element that matches.
- if isList(chunk) and not isinstance(chunk, Tag):
- for tag in chunk:
- if isinstance(tag, NavigableText) and self._matches(tag, howToMatch):
- return True
- return False
- if callable(howToMatch):
- return howToMatch(chunk)
- if isinstance(chunk, Tag):
- #Custom match methods take the tag as an argument, but all other
- #ways of matching match the tag name as a string
- chunk = chunk.name
- #Now we know that chunk is a string
- if not isinstance(chunk, basestring):
- chunk = str(chunk)
- if hasattr(howToMatch, 'match'):
- # It's a regexp object.
- return howToMatch.search(chunk)
- if isList(howToMatch):
- return chunk in howToMatch
- if hasattr(howToMatch, 'items'):
- return howToMatch.has_key(chunk)
- #It's just a string
- return str(howToMatch) == chunk
-
-class NavigableText(PageElement):
-
- def __getattr__(self, attr):
- "For backwards compatibility, text.string gives you text"
- if attr == 'string':
- return self
- else:
- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
-
-class NavigableString(str, NavigableText):
- pass
-
-class NavigableUnicodeString(unicode, NavigableText):
- pass
-
-class Tag(PageElement):
-
- """Represents a found HTML tag with its attributes and contents."""
-
- def __init__(self, name, attrs=None, parent=Null, previous=Null):
- "Basic constructor."
- self.name = name
- if attrs == None:
- attrs = []
- self.attrs = attrs
- self.contents = []
- self.setup(parent, previous)
- self.hidden = False
-
- def get(self, key, default=None):
- """Returns the value of the 'key' attribute for the tag, or
- the value given for 'default' if it doesn't have that
- attribute."""
- return self._getAttrMap().get(key, default)
-
- def __getitem__(self, key):
- """tag[key] returns the value of the 'key' attribute for the tag,
- and throws an exception if it's not there."""
- return self._getAttrMap()[key]
-
- def __iter__(self):
- "Iterating over a tag iterates over its contents."
- return iter(self.contents)
-
- def __len__(self):
- "The length of a tag is the length of its list of contents."
- return len(self.contents)
-
- def __contains__(self, x):
- return x in self.contents
-
- def __nonzero__(self):
- "A tag is non-None even if it has no contents."
- return True
-
- def __setitem__(self, key, value):
- """Setting tag[key] sets the value of the 'key' attribute for the
- tag."""
- self._getAttrMap()
- self.attrMap[key] = value
- found = False
- for i in range(0, len(self.attrs)):
- if self.attrs[i][0] == key:
- self.attrs[i] = (key, value)
- found = True
- if not found:
- self.attrs.append((key, value))
- self._getAttrMap()[key] = value
-
- def __delitem__(self, key):
- "Deleting tag[key] deletes all 'key' attributes for the tag."
- for item in self.attrs:
- if item[0] == key:
- self.attrs.remove(item)
- #We don't break because bad HTML can define the same
- #attribute multiple times.
- self._getAttrMap()
- if self.attrMap.has_key(key):
- del self.attrMap[key]
-
- def __call__(self, *args, **kwargs):
- """Calling a tag like a function is the same as calling its
- fetch() method. Eg. tag('a') returns a list of all the A tags
- found within this tag."""
- return apply(self.fetch, args, kwargs)
-
- def __getattr__(self, tag):
- if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3:
- return self.first(tag[:-3])
- elif tag.find('__') != 0:
- return self.first(tag)
-
- def __eq__(self, other):
- """Returns true iff this tag has the same name, the same attributes,
- and the same contents (recursively) as the given tag.
-
- NOTE: right now this will return false if two tags have the
- same attributes in a different order. Should this be fixed?"""
- if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other):
- return False
- for i in range(0, len(self.contents)):
- if self.contents[i] != other.contents[i]:
- return False
- return True
-
- def __ne__(self, other):
- """Returns true iff this tag is not identical to the other tag,
- as defined in __eq__."""
- return not self == other
-
- def __repr__(self):
- """Renders this tag as a string."""
- return str(self)
-
- def __unicode__(self):
- return self.__str__(1)
-
- def __str__(self, needUnicode=None, showStructureIndent=None):
- """Returns a string or Unicode representation of this tag and
- its contents.
-
- NOTE: since Python's HTML parser consumes whitespace, this
- method is not certain to reproduce the whitespace present in
- the original string."""
-
- attrs = []
- if self.attrs:
- for key, val in self.attrs:
- attrs.append('%s="%s"' % (key, val))
- close = ''
- closeTag = ''
- if self.isSelfClosing():
- close = ' /'
- else:
- closeTag = '</%s>' % self.name
- indentIncrement = None
- if showStructureIndent != None:
- indentIncrement = showStructureIndent
- if not self.hidden:
- indentIncrement += 1
- contents = self.renderContents(indentIncrement, needUnicode=needUnicode)
- if showStructureIndent:
- space = '\n%s' % (' ' * showStructureIndent)
- if self.hidden:
- s = contents
- else:
- s = []
- attributeString = ''
- if attrs:
- attributeString = ' ' + ' '.join(attrs)
- if showStructureIndent:
- s.append(space)
- s.append('<%s%s%s>' % (self.name, attributeString, close))
- s.append(contents)
- if closeTag and showStructureIndent != None:
- s.append(space)
- s.append(closeTag)
- s = ''.join(s)
- isUnicode = type(s) == types.UnicodeType
- if needUnicode and not isUnicode:
- s = unicode(s)
- elif isUnicode and needUnicode==False:
- s = str(s)
- return s
-
- def prettify(self, needUnicode=None):
- return self.__str__(needUnicode, showStructureIndent=True)
-
- def renderContents(self, showStructureIndent=None, needUnicode=None):
- """Renders the contents of this tag as a (possibly Unicode)
- string."""
- s=[]
- for c in self:
- text = None
- if isinstance(c, NavigableUnicodeString) or type(c) == types.UnicodeType:
- text = unicode(c)
- elif isinstance(c, Tag):
- s.append(c.__str__(needUnicode, showStructureIndent))
- elif needUnicode:
- text = unicode(c)
- else:
- text = str(c)
- if text:
- if showStructureIndent != None:
- if text[-1] == '\n':
- text = text[:-1]
- s.append(text)
- return ''.join(s)
-
- #Soup methods
-
- def firstText(self, text, recursive=True):
- """Convenience method to retrieve the first piece of text matching the
- given criteria. 'text' can be a string, a regular expression object,
- a callable that takes a string and returns whether or not the
- string 'matches', etc."""
- return self.first(recursive=recursive, text=text)
-
- def fetchText(self, text, recursive=True, limit=None):
- """Convenience method to retrieve all pieces of text matching the
- given criteria. 'text' can be a string, a regular expression object,
- a callable that takes a string and returns whether or not the
- string 'matches', etc."""
- return self.fetch(recursive=recursive, text=text, limit=limit)
-
- def first(self, name=None, attrs={}, recursive=True, text=None):
- """Return only the first child of this
- Tag matching the given criteria."""
- r = Null
- l = self.fetch(name, attrs, recursive, text, 1)
- if l:
- r = l[0]
- return r
- findChild = first
-
- def fetch(self, name=None, attrs={}, recursive=True, text=None,
- limit=None):
- """Extracts a list of Tag objects that match the given
- criteria. You can specify the name of the Tag and any
- attributes you want the Tag to have.
-
- The value of a key-value pair in the 'attrs' map can be a
- string, a list of strings, a regular expression object, or a
- callable that takes a string and returns whether or not the
- string matches for some custom definition of 'matches'. The
- same is true of the tag name."""
- generator = self.recursiveChildGenerator
- if not recursive:
- generator = self.childGenerator
- return self._fetch(name, attrs, text, limit, generator)
- fetchChildren = fetch
-
- #Utility methods
-
- def isSelfClosing(self):
- """Returns true iff this is a self-closing tag as defined in the HTML
- standard.
-
- TODO: This is specific to BeautifulSoup and its subclasses, but it's
- used by __str__"""
- return self.name in BeautifulSoup.SELF_CLOSING_TAGS
-
- def append(self, tag):
- """Appends the given tag to the contents of this tag."""
- self.contents.append(tag)
-
- #Private methods
-
- def _getAttrMap(self):
- """Initializes a map representation of this tag's attributes,
- if not already initialized."""
- if not getattr(self, 'attrMap'):
- self.attrMap = {}
- for (key, value) in self.attrs:
- self.attrMap[key] = value
- return self.attrMap
-
- #Generator methods
- def childGenerator(self):
- for i in range(0, len(self.contents)):
- yield self.contents[i]
- raise StopIteration
-
- def recursiveChildGenerator(self):
- stack = [(self, 0)]
- while stack:
- tag, start = stack.pop()
- if isinstance(tag, Tag):
- for i in range(start, len(tag.contents)):
- a = tag.contents[i]
- yield a
- if isinstance(a, Tag) and tag.contents:
- if i < len(tag.contents) - 1:
- stack.append((tag, i+1))
- stack.append((a, 0))
- break
- raise StopIteration
-
-
-def isList(l):
- """Convenience method that works with all 2.x versions of Python
- to determine whether or not something is listlike."""
- return hasattr(l, '__iter__') \
- or (type(l) in (types.ListType, types.TupleType))
-
-def buildTagMap(default, *args):
- """Turns a list of maps, lists, or scalars into a single map.
- Used to build the SELF_CLOSING_TAGS and NESTABLE_TAGS maps out
- of lists and partial maps."""
- built = {}
- for portion in args:
- if hasattr(portion, 'items'):
- #It's a map. Merge it.
- for k,v in portion.items():
- built[k] = v
- elif isList(portion):
- #It's a list. Map each item to the default.
- for k in portion:
- built[k] = default
- else:
- #It's a scalar. Map it to the default.
- built[portion] = default
- return built
-
-class BeautifulStoneSoup(Tag, SGMLParser):
-
- """This class contains the basic parser and fetch code. It defines
- a parser that knows nothing about tag behavior except for the
- following:
-
- You can't close a tag without closing all the tags it encloses.
- That is, "<foo><bar></foo>" actually means
- "<foo><bar></bar></foo>".
-
- [Another possible explanation is "<foo><bar /></foo>", but since
- this class defines no SELF_CLOSING_TAGS, it will never use that
- explanation.]
-
- This class is useful for parsing XML or made-up markup languages,
- or when BeautifulSoup makes an assumption counter to what you were
- expecting."""
-
- SELF_CLOSING_TAGS = {}
- NESTABLE_TAGS = {}
- RESET_NESTING_TAGS = {}
- QUOTE_TAGS = {}
-
- #As a public service we will by default silently replace MS smart quotes
- #and similar characters with their HTML or ASCII equivalents.
- MS_CHARS = { '\x80' : '&euro;',
- '\x81' : ' ',
- '\x82' : '&sbquo;',
- '\x83' : '&fnof;',
- '\x84' : '&bdquo;',
- '\x85' : '&hellip;',
- '\x86' : '&dagger;',
- '\x87' : '&Dagger;',
- '\x88' : '&caret;',
- '\x89' : '%',
- '\x8A' : '&Scaron;',
- '\x8B' : '&lt;',
- '\x8C' : '&OElig;',
- '\x8D' : '?',
- '\x8E' : 'Z',
- '\x8F' : '?',
- '\x90' : '?',
- '\x91' : '&lsquo;',
- '\x92' : '&rsquo;',
- '\x93' : '&ldquo;',
- '\x94' : '&rdquo;',
- '\x95' : '&bull;',
- '\x96' : '&ndash;',
- '\x97' : '&mdash;',
- '\x98' : '&tilde;',
- '\x99' : '&trade;',
- '\x9a' : '&scaron;',
- '\x9b' : '&gt;',
- '\x9c' : '&oelig;',
- '\x9d' : '?',
- '\x9e' : 'z',
- '\x9f' : '&Yuml;',}
-
- PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'),
- lambda(x):x.group(1) + ' />'),
- (re.compile('<!\s+([^<>]*)>'),
- lambda(x):'<!' + x.group(1) + '>'),
- (re.compile("([\x80-\x9f])"),
- lambda(x): BeautifulStoneSoup.MS_CHARS.get(x.group(1)))
- ]
-
- ROOT_TAG_NAME = '[document]'
-
- def __init__(self, text=None, avoidParserProblems=True,
- initialTextIsEverything=True):
- """Initialize this as the 'root tag' and feed in any text to
- the parser.
-
- NOTE about avoidParserProblems: sgmllib will process most bad
- HTML, and BeautifulSoup has tricks for dealing with some HTML
- that kills sgmllib, but Beautiful Soup can nonetheless choke
- or lose data if your data uses self-closing tags or
- declarations incorrectly. By default, Beautiful Soup sanitizes
- its input to avoid the vast majority of these problems. The
- problems are relatively rare, even in bad HTML, so feel free
- to pass in False to avoidParserProblems if they don't apply to
- you, and you'll get better performance. The only reason I have
- this turned on by default is so I don't get so many tech
- support questions.
-
- The two most common instances of invalid HTML that will choke
- sgmllib are fixed by the default parser massage techniques:
-
- <br/> (No space between name of closing tag and tag close)
- <! --Comment--> (Extraneous whitespace in declaration)
-
- You can pass in a custom list of (RE object, replace method)
- tuples to get Beautiful Soup to scrub your input the way you
- want."""
- Tag.__init__(self, self.ROOT_TAG_NAME)
- if avoidParserProblems \
- and not isList(avoidParserProblems):
- avoidParserProblems = self.PARSER_MASSAGE
- self.avoidParserProblems = avoidParserProblems
- SGMLParser.__init__(self)
- self.quoteStack = []
- self.hidden = 1
- self.reset()
- if hasattr(text, 'read'):
- #It's a file-type object.
- text = text.read()
- if text:
- self.feed(text)
- if initialTextIsEverything:
- self.done()
-
- def __getattr__(self, methodName):
- """This method routes method call requests to either the SGMLParser
- superclass or the Tag superclass, depending on the method name."""
- if methodName.find('start_') == 0 or methodName.find('end_') == 0 \
- or methodName.find('do_') == 0:
- return SGMLParser.__getattr__(self, methodName)
- elif methodName.find('__') != 0:
- return Tag.__getattr__(self, methodName)
- else:
- raise AttributeError
-
- def feed(self, text):
- if self.avoidParserProblems:
- for fix, m in self.avoidParserProblems:
- text = fix.sub(m, text)
- SGMLParser.feed(self, text)
-
- def done(self):
- """Called when you're done parsing, so that the unclosed tags can be
- correctly processed."""
- self.endData() #NEW
- while self.currentTag.name != self.ROOT_TAG_NAME:
- self.popTag()
-
- def reset(self):
- SGMLParser.reset(self)
- self.currentData = []
- self.currentTag = None
- self.tagStack = []
- self.pushTag(self)
-
- def popTag(self):
- tag = self.tagStack.pop()
- # Tags with just one string-owning child get the child as a
- # 'string' property, so that soup.tag.string is shorthand for
- # soup.tag.contents[0]
- if len(self.currentTag.contents) == 1 and \
- isinstance(self.currentTag.contents[0], NavigableText):
- self.currentTag.string = self.currentTag.contents[0]
-
- #print "Pop", tag.name
- if self.tagStack:
- self.currentTag = self.tagStack[-1]
- return self.currentTag
-
- def pushTag(self, tag):
- #print "Push", tag.name
- if self.currentTag:
- self.currentTag.append(tag)
- self.tagStack.append(tag)
- self.currentTag = self.tagStack[-1]
-
- def endData(self):
- currentData = ''.join(self.currentData)
- if currentData:
- if not currentData.strip():
- if '\n' in currentData:
- currentData = '\n'
- else:
- currentData = ' '
- c = NavigableString
- if type(currentData) == types.UnicodeType:
- c = NavigableUnicodeString
- o = c(currentData)
- o.setup(self.currentTag, self.previous)
- if self.previous:
- self.previous.next = o
- self.previous = o
- self.currentTag.contents.append(o)
- self.currentData = []
-
- def _popToTag(self, name, inclusivePop=True):
- """Pops the tag stack up to and including the most recent
- instance of the given tag. If inclusivePop is false, pops the tag
- stack up to but *not* including the most recent instqance of
- the given tag."""
- if name == self.ROOT_TAG_NAME:
- return
-
- numPops = 0
- mostRecentTag = None
- for i in range(len(self.tagStack)-1, 0, -1):
- if name == self.tagStack[i].name:
- numPops = len(self.tagStack)-i
- break
- if not inclusivePop:
- numPops = numPops - 1
-
- for i in range(0, numPops):
- mostRecentTag = self.popTag()
- return mostRecentTag
-
- def _smartPop(self, name):
-
- """We need to pop up to the previous tag of this type, unless
- one of this tag's nesting reset triggers comes between this
- tag and the previous tag of this type, OR unless this tag is a
- generic nesting trigger and another generic nesting trigger
- comes between this tag and the previous tag of this type.
-
- Examples:
- <p>Foo<b>Bar<p> should pop to 'p', not 'b'.
- <p>Foo<table>Bar<p> should pop to 'table', not 'p'.
- <p>Foo<table><tr>Bar<p> should pop to 'tr', not 'p'.
- <p>Foo<b>Bar<p> should pop to 'p', not 'b'.
-
- <li><ul><li> *<li>* should pop to 'ul', not the first 'li'.
- <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr'
- <td><tr><td> *<td>* should pop to 'tr', not the first 'td'
- """
-
- nestingResetTriggers = self.NESTABLE_TAGS.get(name)
- isNestable = nestingResetTriggers != None
- isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
- popTo = None
- inclusive = True
- for i in range(len(self.tagStack)-1, 0, -1):
- p = self.tagStack[i]
- if (not p or p.name == name) and not isNestable:
- #Non-nestable tags get popped to the top or to their
- #last occurance.
- popTo = name
- break
- if (nestingResetTriggers != None
- and p.name in nestingResetTriggers) \
- or (nestingResetTriggers == None and isResetNesting
- and self.RESET_NESTING_TAGS.has_key(p.name)):
-
- #If we encounter one of the nesting reset triggers
- #peculiar to this tag, or we encounter another tag
- #that causes nesting to reset, pop up to but not
- #including that tag.
-
- popTo = p.name
- inclusive = False
- break
- p = p.parent
- if popTo:
- self._popToTag(popTo, inclusive)
-
- def unknown_starttag(self, name, attrs, selfClosing=0):
- #print "Start tag %s" % name
- if self.quoteStack:
- #This is not a real tag.
- #print "<%s> is not real!" % name
- attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs))
- self.handle_data('<%s%s>' % (name, attrs))
- return
- self.endData()
- if not name in self.SELF_CLOSING_TAGS and not selfClosing:
- self._smartPop(name)
- tag = Tag(name, attrs, self.currentTag, self.previous)
- if self.previous:
- self.previous.next = tag
- self.previous = tag
- self.pushTag(tag)
- if selfClosing or name in self.SELF_CLOSING_TAGS:
- self.popTag()
- if name in self.QUOTE_TAGS:
- #print "Beginning quote (%s)" % name
- self.quoteStack.append(name)
- self.literal = 1
-
- def unknown_endtag(self, name):
- if self.quoteStack and self.quoteStack[-1] != name:
- #This is not a real end tag.
- #print "</%s> is not real!" % name
- self.handle_data('</%s>' % name)
- return
- self.endData()
- self._popToTag(name)
- if self.quoteStack and self.quoteStack[-1] == name:
- self.quoteStack.pop()
- self.literal = (len(self.quoteStack) > 0)
-
- def handle_data(self, data):
- self.currentData.append(data)
-
- def handle_pi(self, text):
- "Propagate processing instructions right through."
- self.handle_data("<?%s>" % text)
-
- def handle_comment(self, text):
- "Propagate comments right through."
- self.handle_data("<!--%s-->" % text)
-
- def handle_charref(self, ref):
- "Propagate char refs right through."
- self.handle_data('&#%s;' % ref)
-
- def handle_entityref(self, ref):
- "Propagate entity refs right through."
- self.handle_data('&%s;' % ref)
-
- def handle_decl(self, data):
- "Propagate DOCTYPEs and the like right through."
- self.handle_data('<!%s>' % data)
-
- def parse_declaration(self, i):
- """Treat a bogus SGML declaration as raw data. Treat a CDATA
- declaration as regular data."""
- j = None
- if self.rawdata[i:i+9] == '<![CDATA[':
- k = self.rawdata.find(']]>', i)
- if k == -1:
- k = len(self.rawdata)
- self.handle_data(self.rawdata[i+9:k])
- j = k+3
- else:
- try:
- j = SGMLParser.parse_declaration(self, i)
- except SGMLParseError:
- toHandle = self.rawdata[i:]
- self.handle_data(toHandle)
- j = i + len(toHandle)
- return j
-
-class BeautifulSoup(BeautifulStoneSoup):
-
- """This parser knows the following facts about HTML:
-
- * Some tags have no closing tag and should be interpreted as being
- closed as soon as they are encountered.
-
- * The text inside some tags (ie. 'script') may contain tags which
- are not really part of the document and which should be parsed
- as text, not tags. If you want to parse the text as tags, you can
- always fetch it and parse it explicitly.
-
- * Tag nesting rules:
-
- Most tags can't be nested at all. For instance, the occurance of
- a <p> tag should implicitly close the previous <p> tag.
-
- <p>Para1<p>Para2
- should be transformed into:
- <p>Para1</p><p>Para2
-
- Some tags can be nested arbitrarily. For instance, the occurance
- of a <blockquote> tag should _not_ implicitly close the previous
- <blockquote> tag.
-
- Alice said: <blockquote>Bob said: <blockquote>Blah
- should NOT be transformed into:
- Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah
-
- Some tags can be nested, but the nesting is reset by the
- interposition of other tags. For instance, a <tr> tag should
- implicitly close the previous <tr> tag within the same <table>,
- but not close a <tr> tag in another table.
-
- <table><tr>Blah<tr>Blah
- should be transformed into:
- <table><tr>Blah</tr><tr>Blah
- but,
- <tr>Blah<table><tr>Blah
- should NOT be transformed into
- <tr>Blah<table></tr><tr>Blah
-
- Differing assumptions about tag nesting rules are a major source
- of problems with the BeautifulSoup class. If BeautifulSoup is not
- treating as nestable a tag your page author treats as nestable,
- try ICantBelieveItsBeautifulSoup before writing your own
- subclass."""
-
- SELF_CLOSING_TAGS = buildTagMap(None, ['br' , 'hr', 'input', 'img', 'meta',
- 'spacer', 'link', 'frame', 'base'])
-
- QUOTE_TAGS = {'script': None}
-
- #According to the HTML standard, each of these inline tags can
- #contain another tag of the same type. Furthermore, it's common
- #to actually use these tags this way.
- NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup',
- 'center']
-
- #According to the HTML standard, these block tags can contain
- #another tag of the same type. Furthermore, it's common
- #to actually use these tags this way.
- NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del']
-
- #Lists can contain other lists, but there are restrictions.
- NESTABLE_LIST_TAGS = { 'ol' : [],
- 'ul' : [],
- 'li' : ['ul', 'ol'],
- 'dl' : [],
- 'dd' : ['dl'],
- 'dt' : ['dl'] }
-
- #Tables can contain other tables, but there are restrictions.
- NESTABLE_TABLE_TAGS = {'table' : [],
- 'tr' : ['table', 'tbody', 'tfoot', 'thead'],
- 'td' : ['tr'],
- 'th' : ['tr'],
- }
-
- NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre']
-
- #If one of these tags is encountered, all tags up to the next tag of
- #this type are popped.
- RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript',
- NON_NESTABLE_BLOCK_TAGS,
- NESTABLE_LIST_TAGS,
- NESTABLE_TABLE_TAGS)
-
- NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS,
- NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS)
-
-class ICantBelieveItsBeautifulSoup(BeautifulSoup):
-
- """The BeautifulSoup class is oriented towards skipping over
- common HTML errors like unclosed tags. However, sometimes it makes
- errors of its own. For instance, consider this fragment:
-
- <b>Foo<b>Bar</b></b>
-
- This is perfectly valid (if bizarre) HTML. However, the
- BeautifulSoup class will implicitly close the first b tag when it
- encounters the second 'b'. It will think the author wrote
- "<b>Foo<b>Bar", and didn't close the first 'b' tag, because
- there's no real-world reason to bold something that's already
- bold. When it encounters '</b></b>' it will close two more 'b'
- tags, for a grand total of three tags closed instead of two. This
- can throw off the rest of your document structure. The same is
- true of a number of other tags, listed below.
-
- It's much more common for someone to forget to close (eg.) a 'b'
- tag than to actually use nested 'b' tags, and the BeautifulSoup
- class handles the common case. This class handles the
- not-co-common case: where you can't believe someone wrote what
- they did, but it's valid HTML and BeautifulSoup screwed up by
- assuming it wouldn't be.
-
- If this doesn't do what you need, try subclassing this class or
- BeautifulSoup, and providing your own list of NESTABLE_TAGS."""
-
- I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \
- ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong',
- 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b',
- 'big']
-
- I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript']
-
- NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS,
- I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS,
- I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS)
-
-class BeautifulSOAP(BeautifulStoneSoup):
- """This class will push a tag with only a single string child into
- the tag's parent as an attribute. The attribute's name is the tag
- name, and the value is the string child. An example should give
- the flavor of the change:
-
- <foo><bar>baz</bar></foo>
- =>
- <foo bar="baz"><bar>baz</bar></foo>
-
- You can then access fooTag['bar'] instead of fooTag.barTag.string.
-
- This is, of course, useful for scraping structures that tend to
- use subelements instead of attributes, such as SOAP messages. Note
- that it modifies its input, so don't print the modified version
- out.
-
- I'm not sure how many people really want to use this class; let me
- know if you do. Mainly I like the name."""
-
- def popTag(self):
- if len(self.tagStack) > 1:
- tag = self.tagStack[-1]
- parent = self.tagStack[-2]
- parent._getAttrMap()
- if (isinstance(tag, Tag) and len(tag.contents) == 1 and
- isinstance(tag.contents[0], NavigableText) and
- not parent.attrMap.has_key(tag.name)):
- parent[tag.name] = tag.contents[0]
- BeautifulStoneSoup.popTag(self)
-
-#Enterprise class names! It has come to our attention that some people
-#think the names of the Beautiful Soup parser classes are too silly
-#and "unprofessional" for use in enterprise screen-scraping. We feel
-#your pain! For such-minded folk, the Beautiful Soup Consortium And
-#All-Night Kosher Bakery recommends renaming this file to
-#"RobustParser.py" (or, in cases of extreme enterprisitude,
-#"RobustParserBeanInterface.class") and using the following
-#enterprise-friendly class aliases:
-class RobustXMLParser(BeautifulStoneSoup):
- pass
-class RobustHTMLParser(BeautifulSoup):
- pass
-class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup):
- pass
-class SimplifyingSOAPParser(BeautifulSOAP):
- pass
-
-###
-
-
-#By default, act as an HTML pretty-printer.
-if __name__ == '__main__':
- import sys
- soup = BeautifulStoneSoup(sys.stdin.read())
- print soup.prettify()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_clientcookie.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_clientcookie.py
deleted file mode 100644
index caeb82b..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_clientcookie.py
+++ /dev/null
@@ -1,1707 +0,0 @@
-"""HTTP cookie handling for web clients.
-
-This module originally developed from my port of Gisle Aas' Perl module
-HTTP::Cookies, from the libwww-perl library.
-
-Docstrings, comments and debug strings in this code refer to the
-attributes of the HTTP cookie system as cookie-attributes, to distinguish
-them clearly from Python attributes.
-
- CookieJar____
- / \ \
- FileCookieJar \ \
- / | \ \ \
- MozillaCookieJar | LWPCookieJar \ \
- | | \
- | ---MSIEBase | \
- | / | | \
- | / MSIEDBCookieJar BSDDBCookieJar
- |/
- MSIECookieJar
-
-Comments to John J Lee <jjl@pobox.com>.
-
-
-Copyright 2002-2006 John J Lee <jjl@pobox.com>
-Copyright 1997-1999 Gisle Aas (original libwww-perl code)
-Copyright 2002-2003 Johnny Lee (original MSIE Perl code)
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
-"""
-
-import sys, re, copy, time, urllib, types, logging
-try:
- import threading
- _threading = threading; del threading
-except ImportError:
- import dummy_threading
- _threading = dummy_threading; del dummy_threading
-
-MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
- "instance initialised with one)")
-DEFAULT_HTTP_PORT = "80"
-
-from _headersutil import split_header_words, parse_ns_headers
-from _util import isstringlike
-import _rfc3986
-
-debug = logging.getLogger("mechanize.cookies").debug
-
-
-def reraise_unmasked_exceptions(unmasked=()):
- # There are a few catch-all except: statements in this module, for
- # catching input that's bad in unexpected ways.
- # This function re-raises some exceptions we don't want to trap.
- import mechanize, warnings
- if not mechanize.USE_BARE_EXCEPT:
- raise
- unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError)
- etype = sys.exc_info()[0]
- if issubclass(etype, unmasked):
- raise
- # swallowed an exception
- import traceback, StringIO
- f = StringIO.StringIO()
- traceback.print_exc(None, f)
- msg = f.getvalue()
- warnings.warn("mechanize bug!\n%s" % msg, stacklevel=2)
-
-
-IPV4_RE = re.compile(r"\.\d+$")
-def is_HDN(text):
- """Return True if text is a host domain name."""
- # XXX
- # This may well be wrong. Which RFC is HDN defined in, if any (for
- # the purposes of RFC 2965)?
- # For the current implementation, what about IPv6? Remember to look
- # at other uses of IPV4_RE also, if change this.
- return not (IPV4_RE.search(text) or
- text == "" or
- text[0] == "." or text[-1] == ".")
-
-def domain_match(A, B):
- """Return True if domain A domain-matches domain B, according to RFC 2965.
-
- A and B may be host domain names or IP addresses.
-
- RFC 2965, section 1:
-
- Host names can be specified either as an IP address or a HDN string.
- Sometimes we compare one host name with another. (Such comparisons SHALL
- be case-insensitive.) Host A's name domain-matches host B's if
-
- * their host name strings string-compare equal; or
-
- * A is a HDN string and has the form NB, where N is a non-empty
- name string, B has the form .B', and B' is a HDN string. (So,
- x.y.com domain-matches .Y.com but not Y.com.)
-
- Note that domain-match is not a commutative operation: a.b.c.com
- domain-matches .c.com, but not the reverse.
-
- """
- # Note that, if A or B are IP addresses, the only relevant part of the
- # definition of the domain-match algorithm is the direct string-compare.
- A = A.lower()
- B = B.lower()
- if A == B:
- return True
- if not is_HDN(A):
- return False
- i = A.rfind(B)
- has_form_nb = not (i == -1 or i == 0)
- return (
- has_form_nb and
- B.startswith(".") and
- is_HDN(B[1:])
- )
-
-def liberal_is_HDN(text):
- """Return True if text is a sort-of-like a host domain name.
-
- For accepting/blocking domains.
-
- """
- return not IPV4_RE.search(text)
-
-def user_domain_match(A, B):
- """For blocking/accepting domains.
-
- A and B may be host domain names or IP addresses.
-
- """
- A = A.lower()
- B = B.lower()
- if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
- if A == B:
- # equal IP addresses
- return True
- return False
- initial_dot = B.startswith(".")
- if initial_dot and A.endswith(B):
- return True
- if not initial_dot and A == B:
- return True
- return False
-
-cut_port_re = re.compile(r":\d+$")
-def request_host(request):
- """Return request-host, as defined by RFC 2965.
-
- Variation from RFC: returned value is lowercased, for convenient
- comparison.
-
- """
- url = request.get_full_url()
- host = _rfc3986.urlsplit(url)[1]
- if host is None:
- host = request.get_header("Host", "")
- # remove port, if present
- return cut_port_re.sub("", host, 1)
-
-def request_host_lc(request):
- return request_host(request).lower()
-
-def eff_request_host(request):
- """Return a tuple (request-host, effective request-host name)."""
- erhn = req_host = request_host(request)
- if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
- erhn = req_host + ".local"
- return req_host, erhn
-
-def eff_request_host_lc(request):
- req_host, erhn = eff_request_host(request)
- return req_host.lower(), erhn.lower()
-
-def effective_request_host(request):
- """Return the effective request-host, as defined by RFC 2965."""
- return eff_request_host(request)[1]
-
-def request_path(request):
- """request-URI, as defined by RFC 2965."""
- url = request.get_full_url()
- path, query, frag = _rfc3986.urlsplit(url)[2:]
- path = escape_path(path)
- req_path = _rfc3986.urlunsplit((None, None, path, query, frag))
- if not req_path.startswith("/"):
- req_path = "/"+req_path
- return req_path
-
-def request_port(request):
- host = request.get_host()
- i = host.find(':')
- if i >= 0:
- port = host[i+1:]
- try:
- int(port)
- except ValueError:
- debug("nonnumeric port: '%s'", port)
- return None
- else:
- port = DEFAULT_HTTP_PORT
- return port
-
-def request_is_unverifiable(request):
- try:
- return request.is_unverifiable()
- except AttributeError:
- if hasattr(request, "unverifiable"):
- return request.unverifiable
- else:
- raise
-
-# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
-# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
-HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
-ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
-def uppercase_escaped_char(match):
- return "%%%s" % match.group(1).upper()
-def escape_path(path):
- """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
- # There's no knowing what character encoding was used to create URLs
- # containing %-escapes, but since we have to pick one to escape invalid
- # path characters, we pick UTF-8, as recommended in the HTML 4.0
- # specification:
- # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
- # And here, kind of: draft-fielding-uri-rfc2396bis-03
- # (And in draft IRI specification: draft-duerst-iri-05)
- # (And here, for new URI schemes: RFC 2718)
- if isinstance(path, types.UnicodeType):
- path = path.encode("utf-8")
- path = urllib.quote(path, HTTP_PATH_SAFE)
- path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
- return path
-
-def reach(h):
- """Return reach of host h, as defined by RFC 2965, section 1.
-
- The reach R of a host name H is defined as follows:
-
- * If
-
- - H is the host domain name of a host; and,
-
- - H has the form A.B; and
-
- - A has no embedded (that is, interior) dots; and
-
- - B has at least one embedded dot, or B is the string "local".
- then the reach of H is .B.
-
- * Otherwise, the reach of H is H.
-
- >>> reach("www.acme.com")
- '.acme.com'
- >>> reach("acme.com")
- 'acme.com'
- >>> reach("acme.local")
- '.local'
-
- """
- i = h.find(".")
- if i >= 0:
- #a = h[:i] # this line is only here to show what a is
- b = h[i+1:]
- i = b.find(".")
- if is_HDN(h) and (i >= 0 or b == "local"):
- return "."+b
- return h
-
-def is_third_party(request):
- """
-
- RFC 2965, section 3.3.6:
-
- An unverifiable transaction is to a third-party host if its request-
- host U does not domain-match the reach R of the request-host O in the
- origin transaction.
-
- """
- req_host = request_host_lc(request)
- # the origin request's request-host was stuffed into request by
- # _urllib2_support.AbstractHTTPHandler
- return not domain_match(req_host, reach(request.origin_req_host))
-
-
-class Cookie:
- """HTTP Cookie.
-
- This class represents both Netscape and RFC 2965 cookies.
-
- This is deliberately a very simple class. It just holds attributes. It's
- possible to construct Cookie instances that don't comply with the cookie
- standards. CookieJar.make_cookies is the factory function for Cookie
- objects -- it deals with cookie parsing, supplying defaults, and
- normalising to the representation used in this class. CookiePolicy is
- responsible for checking them to see whether they should be accepted from
- and returned to the server.
-
- version: integer;
- name: string;
- value: string (may be None);
- port: string; None indicates no attribute was supplied (eg. "Port", rather
- than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list
- string (eg. "80,8080")
- port_specified: boolean; true if a value was supplied with the Port
- cookie-attribute
- domain: string;
- domain_specified: boolean; true if Domain was explicitly set
- domain_initial_dot: boolean; true if Domain as set in HTTP header by server
- started with a dot (yes, this really is necessary!)
- path: string;
- path_specified: boolean; true if Path was explicitly set
- secure: boolean; true if should only be returned over secure connection
- expires: integer; seconds since epoch (RFC 2965 cookies should calculate
- this value from the Max-Age attribute)
- discard: boolean, true if this is a session cookie; (if no expires value,
- this should be true)
- comment: string;
- comment_url: string;
- rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not
- Set-Cookie2:) header, but had a version cookie-attribute of 1
- rest: mapping of other cookie-attributes
-
- Note that the port may be present in the headers, but unspecified ("Port"
- rather than"Port=80", for example); if this is the case, port is None.
-
- """
-
- def __init__(self, version, name, value,
- port, port_specified,
- domain, domain_specified, domain_initial_dot,
- path, path_specified,
- secure,
- expires,
- discard,
- comment,
- comment_url,
- rest,
- rfc2109=False,
- ):
-
- if version is not None: version = int(version)
- if expires is not None: expires = int(expires)
- if port is None and port_specified is True:
- raise ValueError("if port is None, port_specified must be false")
-
- self.version = version
- self.name = name
- self.value = value
- self.port = port
- self.port_specified = port_specified
- # normalise case, as per RFC 2965 section 3.3.3
- self.domain = domain.lower()
- self.domain_specified = domain_specified
- # Sigh. We need to know whether the domain given in the
- # cookie-attribute had an initial dot, in order to follow RFC 2965
- # (as clarified in draft errata). Needed for the returned $Domain
- # value.
- self.domain_initial_dot = domain_initial_dot
- self.path = path
- self.path_specified = path_specified
- self.secure = secure
- self.expires = expires
- self.discard = discard
- self.comment = comment
- self.comment_url = comment_url
- self.rfc2109 = rfc2109
-
- self._rest = copy.copy(rest)
-
- def has_nonstandard_attr(self, name):
- return self._rest.has_key(name)
- def get_nonstandard_attr(self, name, default=None):
- return self._rest.get(name, default)
- def set_nonstandard_attr(self, name, value):
- self._rest[name] = value
- def nonstandard_attr_keys(self):
- return self._rest.keys()
-
- def is_expired(self, now=None):
- if now is None: now = time.time()
- return (self.expires is not None) and (self.expires <= now)
-
- def __str__(self):
- if self.port is None: p = ""
- else: p = ":"+self.port
- limit = self.domain + p + self.path
- if self.value is not None:
- namevalue = "%s=%s" % (self.name, self.value)
- else:
- namevalue = self.name
- return "<Cookie %s for %s>" % (namevalue, limit)
-
- def __repr__(self):
- args = []
- for name in ["version", "name", "value",
- "port", "port_specified",
- "domain", "domain_specified", "domain_initial_dot",
- "path", "path_specified",
- "secure", "expires", "discard", "comment", "comment_url",
- ]:
- attr = getattr(self, name)
- args.append("%s=%s" % (name, repr(attr)))
- args.append("rest=%s" % repr(self._rest))
- args.append("rfc2109=%s" % repr(self.rfc2109))
- return "Cookie(%s)" % ", ".join(args)
-
-
-class CookiePolicy:
- """Defines which cookies get accepted from and returned to server.
-
- May also modify cookies.
-
- The subclass DefaultCookiePolicy defines the standard rules for Netscape
- and RFC 2965 cookies -- override that if you want a customised policy.
-
- As well as implementing set_ok and return_ok, implementations of this
- interface must also supply the following attributes, indicating which
- protocols should be used, and how. These can be read and set at any time,
- though whether that makes complete sense from the protocol point of view is
- doubtful.
-
- Public attributes:
-
- netscape: implement netscape protocol
- rfc2965: implement RFC 2965 protocol
- rfc2109_as_netscape:
- WARNING: This argument will change or go away if is not accepted into
- the Python standard library in this form!
- If true, treat RFC 2109 cookies as though they were Netscape cookies. The
- default is for this attribute to be None, which means treat 2109 cookies
- as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is,
- by default), and as Netscape cookies otherwise.
- hide_cookie2: don't add Cookie2 header to requests (the presence of
- this header indicates to the server that we understand RFC 2965
- cookies)
-
- """
- def set_ok(self, cookie, request):
- """Return true if (and only if) cookie should be accepted from server.
-
- Currently, pre-expired cookies never get this far -- the CookieJar
- class deletes such cookies itself.
-
- cookie: mechanize.Cookie object
- request: object implementing the interface defined by
- CookieJar.extract_cookies.__doc__
-
- """
- raise NotImplementedError()
-
- def return_ok(self, cookie, request):
- """Return true if (and only if) cookie should be returned to server.
-
- cookie: mechanize.Cookie object
- request: object implementing the interface defined by
- CookieJar.add_cookie_header.__doc__
-
- """
- raise NotImplementedError()
-
- def domain_return_ok(self, domain, request):
- """Return false if cookies should not be returned, given cookie domain.
-
- This is here as an optimization, to remove the need for checking every
- cookie with a particular domain (which may involve reading many files).
- The default implementations of domain_return_ok and path_return_ok
- (return True) leave all the work to return_ok.
-
- If domain_return_ok returns true for the cookie domain, path_return_ok
- is called for the cookie path. Otherwise, path_return_ok and return_ok
- are never called for that cookie domain. If path_return_ok returns
- true, return_ok is called with the Cookie object itself for a full
- check. Otherwise, return_ok is never called for that cookie path.
-
- Note that domain_return_ok is called for every *cookie* domain, not
- just for the *request* domain. For example, the function might be
- called with both ".acme.com" and "www.acme.com" if the request domain
- is "www.acme.com". The same goes for path_return_ok.
-
- For argument documentation, see the docstring for return_ok.
-
- """
- return True
-
- def path_return_ok(self, path, request):
- """Return false if cookies should not be returned, given cookie path.
-
- See the docstring for domain_return_ok.
-
- """
- return True
-
-
-class DefaultCookiePolicy(CookiePolicy):
- """Implements the standard rules for accepting and returning cookies.
-
- Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is
- switched off by default.
-
- The easiest way to provide your own policy is to override this class and
- call its methods in your overriden implementations before adding your own
- additional checks.
-
- import mechanize
- class MyCookiePolicy(mechanize.DefaultCookiePolicy):
- def set_ok(self, cookie, request):
- if not mechanize.DefaultCookiePolicy.set_ok(
- self, cookie, request):
- return False
- if i_dont_want_to_store_this_cookie():
- return False
- return True
-
- In addition to the features required to implement the CookiePolicy
- interface, this class allows you to block and allow domains from setting
- and receiving cookies. There are also some strictness switches that allow
- you to tighten up the rather loose Netscape protocol rules a little bit (at
- the cost of blocking some benign cookies).
-
- A domain blacklist and whitelist is provided (both off by default). Only
- domains not in the blacklist and present in the whitelist (if the whitelist
- is active) participate in cookie setting and returning. Use the
- blocked_domains constructor argument, and blocked_domains and
- set_blocked_domains methods (and the corresponding argument and methods for
- allowed_domains). If you set a whitelist, you can turn it off again by
- setting it to None.
-
- Domains in block or allow lists that do not start with a dot must
- string-compare equal. For example, "acme.com" matches a blacklist entry of
- "acme.com", but "www.acme.com" does not. Domains that do start with a dot
- are matched by more specific domains too. For example, both "www.acme.com"
- and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does
- not). IP addresses are an exception, and must match exactly. For example,
- if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is
- blocked, but 193.168.1.2 is not.
-
- Additional Public Attributes:
-
- General strictness switches
-
- strict_domain: don't allow sites to set two-component domains with
- country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc.
- This is far from perfect and isn't guaranteed to work!
-
- RFC 2965 protocol strictness switches
-
- strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable
- transactions (usually, an unverifiable transaction is one resulting from
- a redirect or an image hosted on another site); if this is false, cookies
- are NEVER blocked on the basis of verifiability
-
- Netscape protocol strictness switches
-
- strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions
- even to Netscape cookies
- strict_ns_domain: flags indicating how strict to be with domain-matching
- rules for Netscape cookies:
- DomainStrictNoDots: when setting cookies, host prefix must not contain a
- dot (eg. www.foo.bar.com can't set a cookie for .bar.com, because
- www.foo contains a dot)
- DomainStrictNonDomain: cookies that did not explicitly specify a Domain
- cookie-attribute can only be returned to a domain that string-compares
- equal to the domain that set the cookie (eg. rockets.acme.com won't
- be returned cookies from acme.com that had no Domain cookie-attribute)
- DomainRFC2965Match: when setting cookies, require a full RFC 2965
- domain-match
- DomainLiberal and DomainStrict are the most useful combinations of the
- above flags, for convenience
- strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that
- have names starting with '$'
- strict_ns_set_path: don't allow setting cookies whose path doesn't
- path-match request URI
-
- """
-
- DomainStrictNoDots = 1
- DomainStrictNonDomain = 2
- DomainRFC2965Match = 4
-
- DomainLiberal = 0
- DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
-
- def __init__(self,
- blocked_domains=None, allowed_domains=None,
- netscape=True, rfc2965=False,
- # WARNING: this argument will change or go away if is not
- # accepted into the Python standard library in this form!
- # default, ie. treat 2109 as netscape iff not rfc2965
- rfc2109_as_netscape=None,
- hide_cookie2=False,
- strict_domain=False,
- strict_rfc2965_unverifiable=True,
- strict_ns_unverifiable=False,
- strict_ns_domain=DomainLiberal,
- strict_ns_set_initial_dollar=False,
- strict_ns_set_path=False,
- ):
- """
- Constructor arguments should be used as keyword arguments only.
-
- blocked_domains: sequence of domain names that we never accept cookies
- from, nor return cookies to
- allowed_domains: if not None, this is a sequence of the only domains
- for which we accept and return cookies
-
- For other arguments, see CookiePolicy.__doc__ and
- DefaultCookiePolicy.__doc__..
-
- """
- self.netscape = netscape
- self.rfc2965 = rfc2965
- self.rfc2109_as_netscape = rfc2109_as_netscape
- self.hide_cookie2 = hide_cookie2
- self.strict_domain = strict_domain
- self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
- self.strict_ns_unverifiable = strict_ns_unverifiable
- self.strict_ns_domain = strict_ns_domain
- self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
- self.strict_ns_set_path = strict_ns_set_path
-
- if blocked_domains is not None:
- self._blocked_domains = tuple(blocked_domains)
- else:
- self._blocked_domains = ()
-
- if allowed_domains is not None:
- allowed_domains = tuple(allowed_domains)
- self._allowed_domains = allowed_domains
-
- def blocked_domains(self):
- """Return the sequence of blocked domains (as a tuple)."""
- return self._blocked_domains
- def set_blocked_domains(self, blocked_domains):
- """Set the sequence of blocked domains."""
- self._blocked_domains = tuple(blocked_domains)
-
- def is_blocked(self, domain):
- for blocked_domain in self._blocked_domains:
- if user_domain_match(domain, blocked_domain):
- return True
- return False
-
- def allowed_domains(self):
- """Return None, or the sequence of allowed domains (as a tuple)."""
- return self._allowed_domains
- def set_allowed_domains(self, allowed_domains):
- """Set the sequence of allowed domains, or None."""
- if allowed_domains is not None:
- allowed_domains = tuple(allowed_domains)
- self._allowed_domains = allowed_domains
-
- def is_not_allowed(self, domain):
- if self._allowed_domains is None:
- return False
- for allowed_domain in self._allowed_domains:
- if user_domain_match(domain, allowed_domain):
- return False
- return True
-
- def set_ok(self, cookie, request):
- """
- If you override set_ok, be sure to call this method. If it returns
- false, so should your subclass (assuming your subclass wants to be more
- strict about which cookies to accept).
-
- """
- debug(" - checking cookie %s", cookie)
-
- assert cookie.name is not None
-
- for n in "version", "verifiability", "name", "path", "domain", "port":
- fn_name = "set_ok_"+n
- fn = getattr(self, fn_name)
- if not fn(cookie, request):
- return False
-
- return True
-
- def set_ok_version(self, cookie, request):
- if cookie.version is None:
- # Version is always set to 0 by parse_ns_headers if it's a Netscape
- # cookie, so this must be an invalid RFC 2965 cookie.
- debug(" Set-Cookie2 without version attribute (%s)", cookie)
- return False
- if cookie.version > 0 and not self.rfc2965:
- debug(" RFC 2965 cookies are switched off")
- return False
- elif cookie.version == 0 and not self.netscape:
- debug(" Netscape cookies are switched off")
- return False
- return True
-
- def set_ok_verifiability(self, cookie, request):
- if request_is_unverifiable(request) and is_third_party(request):
- if cookie.version > 0 and self.strict_rfc2965_unverifiable:
- debug(" third-party RFC 2965 cookie during "
- "unverifiable transaction")
- return False
- elif cookie.version == 0 and self.strict_ns_unverifiable:
- debug(" third-party Netscape cookie during "
- "unverifiable transaction")
- return False
- return True
-
- def set_ok_name(self, cookie, request):
- # Try and stop servers setting V0 cookies designed to hack other
- # servers that know both V0 and V1 protocols.
- if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
- cookie.name.startswith("$")):
- debug(" illegal name (starts with '$'): '%s'", cookie.name)
- return False
- return True
-
- def set_ok_path(self, cookie, request):
- if cookie.path_specified:
- req_path = request_path(request)
- if ((cookie.version > 0 or
- (cookie.version == 0 and self.strict_ns_set_path)) and
- not req_path.startswith(cookie.path)):
- debug(" path attribute %s is not a prefix of request "
- "path %s", cookie.path, req_path)
- return False
- return True
-
- def set_ok_countrycode_domain(self, cookie, request):
- """Return False if explicit cookie domain is not acceptable.
-
- Called by set_ok_domain, for convenience of overriding by
- subclasses.
-
- """
- if cookie.domain_specified and self.strict_domain:
- domain = cookie.domain
- # since domain was specified, we know that:
- assert domain.startswith(".")
- if domain.count(".") == 2:
- # domain like .foo.bar
- i = domain.rfind(".")
- tld = domain[i+1:]
- sld = domain[1:i]
- if (sld.lower() in [
- "co", "ac",
- "com", "edu", "org", "net", "gov", "mil", "int",
- "aero", "biz", "cat", "coop", "info", "jobs", "mobi",
- "museum", "name", "pro", "travel",
- ] and
- len(tld) == 2):
- # domain like .co.uk
- return False
- return True
-
- def set_ok_domain(self, cookie, request):
- if self.is_blocked(cookie.domain):
- debug(" domain %s is in user block-list", cookie.domain)
- return False
- if self.is_not_allowed(cookie.domain):
- debug(" domain %s is not in user allow-list", cookie.domain)
- return False
- if not self.set_ok_countrycode_domain(cookie, request):
- debug(" country-code second level domain %s", cookie.domain)
- return False
- if cookie.domain_specified:
- req_host, erhn = eff_request_host_lc(request)
- domain = cookie.domain
- if domain.startswith("."):
- undotted_domain = domain[1:]
- else:
- undotted_domain = domain
- embedded_dots = (undotted_domain.find(".") >= 0)
- if not embedded_dots and domain != ".local":
- debug(" non-local domain %s contains no embedded dot",
- domain)
- return False
- if cookie.version == 0:
- if (not erhn.endswith(domain) and
- (not erhn.startswith(".") and
- not ("."+erhn).endswith(domain))):
- debug(" effective request-host %s (even with added "
- "initial dot) does not end end with %s",
- erhn, domain)
- return False
- if (cookie.version > 0 or
- (self.strict_ns_domain & self.DomainRFC2965Match)):
- if not domain_match(erhn, domain):
- debug(" effective request-host %s does not domain-match "
- "%s", erhn, domain)
- return False
- if (cookie.version > 0 or
- (self.strict_ns_domain & self.DomainStrictNoDots)):
- host_prefix = req_host[:-len(domain)]
- if (host_prefix.find(".") >= 0 and
- not IPV4_RE.search(req_host)):
- debug(" host prefix %s for domain %s contains a dot",
- host_prefix, domain)
- return False
- return True
-
- def set_ok_port(self, cookie, request):
- if cookie.port_specified:
- req_port = request_port(request)
- if req_port is None:
- req_port = "80"
- else:
- req_port = str(req_port)
- for p in cookie.port.split(","):
- try:
- int(p)
- except ValueError:
- debug(" bad port %s (not numeric)", p)
- return False
- if p == req_port:
- break
- else:
- debug(" request port (%s) not found in %s",
- req_port, cookie.port)
- return False
- return True
-
- def return_ok(self, cookie, request):
- """
- If you override return_ok, be sure to call this method. If it returns
- false, so should your subclass (assuming your subclass wants to be more
- strict about which cookies to return).
-
- """
- # Path has already been checked by path_return_ok, and domain blocking
- # done by domain_return_ok.
- debug(" - checking cookie %s", cookie)
-
- for n in ("version", "verifiability", "secure", "expires", "port",
- "domain"):
- fn_name = "return_ok_"+n
- fn = getattr(self, fn_name)
- if not fn(cookie, request):
- return False
- return True
-
- def return_ok_version(self, cookie, request):
- if cookie.version > 0 and not self.rfc2965:
- debug(" RFC 2965 cookies are switched off")
- return False
- elif cookie.version == 0 and not self.netscape:
- debug(" Netscape cookies are switched off")
- return False
- return True
-
- def return_ok_verifiability(self, cookie, request):
- if request_is_unverifiable(request) and is_third_party(request):
- if cookie.version > 0 and self.strict_rfc2965_unverifiable:
- debug(" third-party RFC 2965 cookie during unverifiable "
- "transaction")
- return False
- elif cookie.version == 0 and self.strict_ns_unverifiable:
- debug(" third-party Netscape cookie during unverifiable "
- "transaction")
- return False
- return True
-
- def return_ok_secure(self, cookie, request):
- if cookie.secure and request.get_type() != "https":
- debug(" secure cookie with non-secure request")
- return False
- return True
-
- def return_ok_expires(self, cookie, request):
- if cookie.is_expired(self._now):
- debug(" cookie expired")
- return False
- return True
-
- def return_ok_port(self, cookie, request):
- if cookie.port:
- req_port = request_port(request)
- if req_port is None:
- req_port = "80"
- for p in cookie.port.split(","):
- if p == req_port:
- break
- else:
- debug(" request port %s does not match cookie port %s",
- req_port, cookie.port)
- return False
- return True
-
- def return_ok_domain(self, cookie, request):
- req_host, erhn = eff_request_host_lc(request)
- domain = cookie.domain
-
- # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
- if (cookie.version == 0 and
- (self.strict_ns_domain & self.DomainStrictNonDomain) and
- not cookie.domain_specified and domain != erhn):
- debug(" cookie with unspecified domain does not string-compare "
- "equal to request domain")
- return False
-
- if cookie.version > 0 and not domain_match(erhn, domain):
- debug(" effective request-host name %s does not domain-match "
- "RFC 2965 cookie domain %s", erhn, domain)
- return False
- if cookie.version == 0 and not ("."+erhn).endswith(domain):
- debug(" request-host %s does not match Netscape cookie domain "
- "%s", req_host, domain)
- return False
- return True
-
- def domain_return_ok(self, domain, request):
- # Liberal check of domain. This is here as an optimization to avoid
- # having to load lots of MSIE cookie files unless necessary.
-
- # Munge req_host and erhn to always start with a dot, so as to err on
- # the side of letting cookies through.
- dotted_req_host, dotted_erhn = eff_request_host_lc(request)
- if not dotted_req_host.startswith("."):
- dotted_req_host = "."+dotted_req_host
- if not dotted_erhn.startswith("."):
- dotted_erhn = "."+dotted_erhn
- if not (dotted_req_host.endswith(domain) or
- dotted_erhn.endswith(domain)):
- #debug(" request domain %s does not match cookie domain %s",
- # req_host, domain)
- return False
-
- if self.is_blocked(domain):
- debug(" domain %s is in user block-list", domain)
- return False
- if self.is_not_allowed(domain):
- debug(" domain %s is not in user allow-list", domain)
- return False
-
- return True
-
- def path_return_ok(self, path, request):
- debug("- checking cookie path=%s", path)
- req_path = request_path(request)
- if not req_path.startswith(path):
- debug(" %s does not path-match %s", req_path, path)
- return False
- return True
-
-
-def vals_sorted_by_key(adict):
- keys = adict.keys()
- keys.sort()
- return map(adict.get, keys)
-
-class MappingIterator:
- """Iterates over nested mapping, depth-first, in sorted order by key."""
- def __init__(self, mapping):
- self._s = [(vals_sorted_by_key(mapping), 0, None)] # LIFO stack
-
- def __iter__(self): return self
-
- def next(self):
- # this is hairy because of lack of generators
- while 1:
- try:
- vals, i, prev_item = self._s.pop()
- except IndexError:
- raise StopIteration()
- if i < len(vals):
- item = vals[i]
- i = i + 1
- self._s.append((vals, i, prev_item))
- try:
- item.items
- except AttributeError:
- # non-mapping
- break
- else:
- # mapping
- self._s.append((vals_sorted_by_key(item), 0, item))
- continue
- return item
-
-
-# Used as second parameter to dict.get method, to distinguish absent
-# dict key from one with a None value.
-class Absent: pass
-
-class CookieJar:
- """Collection of HTTP cookies.
-
- You may not need to know about this class: try mechanize.urlopen().
-
- The major methods are extract_cookies and add_cookie_header; these are all
- you are likely to need.
-
- CookieJar supports the iterator protocol:
-
- for cookie in cookiejar:
- # do something with cookie
-
- Methods:
-
- add_cookie_header(request)
- extract_cookies(response, request)
- get_policy()
- set_policy(policy)
- cookies_for_request(request)
- make_cookies(response, request)
- set_cookie_if_ok(cookie, request)
- set_cookie(cookie)
- clear_session_cookies()
- clear_expired_cookies()
- clear(domain=None, path=None, name=None)
-
- Public attributes
-
- policy: CookiePolicy object
-
- """
-
- non_word_re = re.compile(r"\W")
- quote_re = re.compile(r"([\"\\])")
- strict_domain_re = re.compile(r"\.?[^.]*")
- domain_re = re.compile(r"[^.]*")
- dots_re = re.compile(r"^\.+")
-
- def __init__(self, policy=None):
- """
- See CookieJar.__doc__ for argument documentation.
-
- """
- if policy is None:
- policy = DefaultCookiePolicy()
- self._policy = policy
-
- self._cookies = {}
-
- # for __getitem__ iteration in pre-2.2 Pythons
- self._prev_getitem_index = 0
-
- def get_policy(self):
- return self._policy
-
- def set_policy(self, policy):
- self._policy = policy
-
- def _cookies_for_domain(self, domain, request):
- cookies = []
- if not self._policy.domain_return_ok(domain, request):
- return []
- debug("Checking %s for cookies to return", domain)
- cookies_by_path = self._cookies[domain]
- for path in cookies_by_path.keys():
- if not self._policy.path_return_ok(path, request):
- continue
- cookies_by_name = cookies_by_path[path]
- for cookie in cookies_by_name.values():
- if not self._policy.return_ok(cookie, request):
- debug(" not returning cookie")
- continue
- debug(" it's a match")
- cookies.append(cookie)
- return cookies
-
- def cookies_for_request(self, request):
- """Return a list of cookies to be returned to server.
-
- The returned list of cookie instances is sorted in the order they
- should appear in the Cookie: header for return to the server.
-
- See add_cookie_header.__doc__ for the interface required of the
- request argument.
-
- New in version 0.1.10
-
- """
- self._policy._now = self._now = int(time.time())
- cookies = self._cookies_for_request(request)
- # add cookies in order of most specific (i.e. longest) path first
- def decreasing_size(a, b): return cmp(len(b.path), len(a.path))
- cookies.sort(decreasing_size)
- return cookies
-
- def _cookies_for_request(self, request):
- """Return a list of cookies to be returned to server."""
- # this method still exists (alongside cookies_for_request) because it
- # is part of an implied protected interface for subclasses of cookiejar
- # XXX document that implied interface, or provide another way of
- # implementing cookiejars than subclassing
- cookies = []
- for domain in self._cookies.keys():
- cookies.extend(self._cookies_for_domain(domain, request))
- return cookies
-
- def _cookie_attrs(self, cookies):
- """Return a list of cookie-attributes to be returned to server.
-
- The $Version attribute is also added when appropriate (currently only
- once per request).
-
- >>> jar = CookieJar()
- >>> ns_cookie = Cookie(0, "foo", '"bar"', None, False,
- ... "example.com", False, False,
- ... "/", False, False, None, True,
- ... None, None, {})
- >>> jar._cookie_attrs([ns_cookie])
- ['foo="bar"']
- >>> rfc2965_cookie = Cookie(1, "foo", "bar", None, False,
- ... ".example.com", True, False,
- ... "/", False, False, None, True,
- ... None, None, {})
- >>> jar._cookie_attrs([rfc2965_cookie])
- ['$Version=1', 'foo=bar', '$Domain="example.com"']
-
- """
- version_set = False
-
- attrs = []
- for cookie in cookies:
- # set version of Cookie header
- # XXX
- # What should it be if multiple matching Set-Cookie headers have
- # different versions themselves?
- # Answer: there is no answer; was supposed to be settled by
- # RFC 2965 errata, but that may never appear...
- version = cookie.version
- if not version_set:
- version_set = True
- if version > 0:
- attrs.append("$Version=%s" % version)
-
- # quote cookie value if necessary
- # (not for Netscape protocol, which already has any quotes
- # intact, due to the poorly-specified Netscape Cookie: syntax)
- if ((cookie.value is not None) and
- self.non_word_re.search(cookie.value) and version > 0):
- value = self.quote_re.sub(r"\\\1", cookie.value)
- else:
- value = cookie.value
-
- # add cookie-attributes to be returned in Cookie header
- if cookie.value is None:
- attrs.append(cookie.name)
- else:
- attrs.append("%s=%s" % (cookie.name, value))
- if version > 0:
- if cookie.path_specified:
- attrs.append('$Path="%s"' % cookie.path)
- if cookie.domain.startswith("."):
- domain = cookie.domain
- if (not cookie.domain_initial_dot and
- domain.startswith(".")):
- domain = domain[1:]
- attrs.append('$Domain="%s"' % domain)
- if cookie.port is not None:
- p = "$Port"
- if cookie.port_specified:
- p = p + ('="%s"' % cookie.port)
- attrs.append(p)
-
- return attrs
-
- def add_cookie_header(self, request):
- """Add correct Cookie: header to request (urllib2.Request object).
-
- The Cookie2 header is also added unless policy.hide_cookie2 is true.
-
- The request object (usually a urllib2.Request instance) must support
- the methods get_full_url, get_host, is_unverifiable, get_type,
- has_header, get_header, header_items and add_unredirected_header, as
- documented by urllib2, and the port attribute (the port number).
- Actually, RequestUpgradeProcessor will automatically upgrade your
- Request object to one with has_header, get_header, header_items and
- add_unredirected_header, if it lacks those methods, for compatibility
- with pre-2.4 versions of urllib2.
-
- """
- debug("add_cookie_header")
- cookies = self.cookies_for_request(request)
-
- attrs = self._cookie_attrs(cookies)
- if attrs:
- if not request.has_header("Cookie"):
- request.add_unredirected_header("Cookie", "; ".join(attrs))
-
- # if necessary, advertise that we know RFC 2965
- if self._policy.rfc2965 and not self._policy.hide_cookie2:
- for cookie in cookies:
- if cookie.version != 1 and not request.has_header("Cookie2"):
- request.add_unredirected_header("Cookie2", '$Version="1"')
- break
-
- self.clear_expired_cookies()
-
- def _normalized_cookie_tuples(self, attrs_set):
- """Return list of tuples containing normalised cookie information.
-
- attrs_set is the list of lists of key,value pairs extracted from
- the Set-Cookie or Set-Cookie2 headers.
-
- Tuples are name, value, standard, rest, where name and value are the
- cookie name and value, standard is a dictionary containing the standard
- cookie-attributes (discard, secure, version, expires or max-age,
- domain, path and port) and rest is a dictionary containing the rest of
- the cookie-attributes.
-
- """
- cookie_tuples = []
-
- boolean_attrs = "discard", "secure"
- value_attrs = ("version",
- "expires", "max-age",
- "domain", "path", "port",
- "comment", "commenturl")
-
- for cookie_attrs in attrs_set:
- name, value = cookie_attrs[0]
-
- # Build dictionary of standard cookie-attributes (standard) and
- # dictionary of other cookie-attributes (rest).
-
- # Note: expiry time is normalised to seconds since epoch. V0
- # cookies should have the Expires cookie-attribute, and V1 cookies
- # should have Max-Age, but since V1 includes RFC 2109 cookies (and
- # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
- # accept either (but prefer Max-Age).
- max_age_set = False
-
- bad_cookie = False
-
- standard = {}
- rest = {}
- for k, v in cookie_attrs[1:]:
- lc = k.lower()
- # don't lose case distinction for unknown fields
- if lc in value_attrs or lc in boolean_attrs:
- k = lc
- if k in boolean_attrs and v is None:
- # boolean cookie-attribute is present, but has no value
- # (like "discard", rather than "port=80")
- v = True
- if standard.has_key(k):
- # only first value is significant
- continue
- if k == "domain":
- if v is None:
- debug(" missing value for domain attribute")
- bad_cookie = True
- break
- # RFC 2965 section 3.3.3
- v = v.lower()
- if k == "expires":
- if max_age_set:
- # Prefer max-age to expires (like Mozilla)
- continue
- if v is None:
- debug(" missing or invalid value for expires "
- "attribute: treating as session cookie")
- continue
- if k == "max-age":
- max_age_set = True
- if v is None:
- debug(" missing value for max-age attribute")
- bad_cookie = True
- break
- try:
- v = int(v)
- except ValueError:
- debug(" missing or invalid (non-numeric) value for "
- "max-age attribute")
- bad_cookie = True
- break
- # convert RFC 2965 Max-Age to seconds since epoch
- # XXX Strictly you're supposed to follow RFC 2616
- # age-calculation rules. Remember that zero Max-Age is a
- # is a request to discard (old and new) cookie, though.
- k = "expires"
- v = self._now + v
- if (k in value_attrs) or (k in boolean_attrs):
- if (v is None and
- k not in ["port", "comment", "commenturl"]):
- debug(" missing value for %s attribute" % k)
- bad_cookie = True
- break
- standard[k] = v
- else:
- rest[k] = v
-
- if bad_cookie:
- continue
-
- cookie_tuples.append((name, value, standard, rest))
-
- return cookie_tuples
-
- def _cookie_from_cookie_tuple(self, tup, request):
- # standard is dict of standard cookie-attributes, rest is dict of the
- # rest of them
- name, value, standard, rest = tup
-
- domain = standard.get("domain", Absent)
- path = standard.get("path", Absent)
- port = standard.get("port", Absent)
- expires = standard.get("expires", Absent)
-
- # set the easy defaults
- version = standard.get("version", None)
- if version is not None:
- try:
- version = int(version)
- except ValueError:
- return None # invalid version, ignore cookie
- secure = standard.get("secure", False)
- # (discard is also set if expires is Absent)
- discard = standard.get("discard", False)
- comment = standard.get("comment", None)
- comment_url = standard.get("commenturl", None)
-
- # set default path
- if path is not Absent and path != "":
- path_specified = True
- path = escape_path(path)
- else:
- path_specified = False
- path = request_path(request)
- i = path.rfind("/")
- if i != -1:
- if version == 0:
- # Netscape spec parts company from reality here
- path = path[:i]
- else:
- path = path[:i+1]
- if len(path) == 0: path = "/"
-
- # set default domain
- domain_specified = domain is not Absent
- # but first we have to remember whether it starts with a dot
- domain_initial_dot = False
- if domain_specified:
- domain_initial_dot = bool(domain.startswith("."))
- if domain is Absent:
- req_host, erhn = eff_request_host_lc(request)
- domain = erhn
- elif not domain.startswith("."):
- domain = "."+domain
-
- # set default port
- port_specified = False
- if port is not Absent:
- if port is None:
- # Port attr present, but has no value: default to request port.
- # Cookie should then only be sent back on that port.
- port = request_port(request)
- else:
- port_specified = True
- port = re.sub(r"\s+", "", port)
- else:
- # No port attr present. Cookie can be sent back on any port.
- port = None
-
- # set default expires and discard
- if expires is Absent:
- expires = None
- discard = True
-
- return Cookie(version,
- name, value,
- port, port_specified,
- domain, domain_specified, domain_initial_dot,
- path, path_specified,
- secure,
- expires,
- discard,
- comment,
- comment_url,
- rest)
-
- def _cookies_from_attrs_set(self, attrs_set, request):
- cookie_tuples = self._normalized_cookie_tuples(attrs_set)
-
- cookies = []
- for tup in cookie_tuples:
- cookie = self._cookie_from_cookie_tuple(tup, request)
- if cookie: cookies.append(cookie)
- return cookies
-
- def _process_rfc2109_cookies(self, cookies):
- if self._policy.rfc2109_as_netscape is None:
- rfc2109_as_netscape = not self._policy.rfc2965
- else:
- rfc2109_as_netscape = self._policy.rfc2109_as_netscape
- for cookie in cookies:
- if cookie.version == 1:
- cookie.rfc2109 = True
- if rfc2109_as_netscape:
- # treat 2109 cookies as Netscape cookies rather than
- # as RFC2965 cookies
- cookie.version = 0
-
- def _make_cookies(self, response, request):
- # get cookie-attributes for RFC 2965 and Netscape protocols
- headers = response.info()
- rfc2965_hdrs = headers.getheaders("Set-Cookie2")
- ns_hdrs = headers.getheaders("Set-Cookie")
-
- rfc2965 = self._policy.rfc2965
- netscape = self._policy.netscape
-
- if ((not rfc2965_hdrs and not ns_hdrs) or
- (not ns_hdrs and not rfc2965) or
- (not rfc2965_hdrs and not netscape) or
- (not netscape and not rfc2965)):
- return [] # no relevant cookie headers: quick exit
-
- try:
- cookies = self._cookies_from_attrs_set(
- split_header_words(rfc2965_hdrs), request)
- except:
- reraise_unmasked_exceptions()
- cookies = []
-
- if ns_hdrs and netscape:
- try:
- # RFC 2109 and Netscape cookies
- ns_cookies = self._cookies_from_attrs_set(
- parse_ns_headers(ns_hdrs), request)
- except:
- reraise_unmasked_exceptions()
- ns_cookies = []
- self._process_rfc2109_cookies(ns_cookies)
-
- # Look for Netscape cookies (from Set-Cookie headers) that match
- # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
- # For each match, keep the RFC 2965 cookie and ignore the Netscape
- # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
- # bundled in with the Netscape cookies for this purpose, which is
- # reasonable behaviour.
- if rfc2965:
- lookup = {}
- for cookie in cookies:
- lookup[(cookie.domain, cookie.path, cookie.name)] = None
-
- def no_matching_rfc2965(ns_cookie, lookup=lookup):
- key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
- return not lookup.has_key(key)
- ns_cookies = filter(no_matching_rfc2965, ns_cookies)
-
- if ns_cookies:
- cookies.extend(ns_cookies)
-
- return cookies
-
- def make_cookies(self, response, request):
- """Return sequence of Cookie objects extracted from response object.
-
- See extract_cookies.__doc__ for the interface required of the
- response and request arguments.
-
- """
- self._policy._now = self._now = int(time.time())
- return [cookie for cookie in self._make_cookies(response, request)
- if cookie.expires is None or not cookie.expires <= self._now]
-
- def set_cookie_if_ok(self, cookie, request):
- """Set a cookie if policy says it's OK to do so.
-
- cookie: mechanize.Cookie instance
- request: see extract_cookies.__doc__ for the required interface
-
- """
- self._policy._now = self._now = int(time.time())
-
- if self._policy.set_ok(cookie, request):
- self.set_cookie(cookie)
-
- def set_cookie(self, cookie):
- """Set a cookie, without checking whether or not it should be set.
-
- cookie: mechanize.Cookie instance
- """
- c = self._cookies
- if not c.has_key(cookie.domain): c[cookie.domain] = {}
- c2 = c[cookie.domain]
- if not c2.has_key(cookie.path): c2[cookie.path] = {}
- c3 = c2[cookie.path]
- c3[cookie.name] = cookie
-
- def extract_cookies(self, response, request):
- """Extract cookies from response, where allowable given the request.
-
- Look for allowable Set-Cookie: and Set-Cookie2: headers in the response
- object passed as argument. Any of these headers that are found are
- used to update the state of the object (subject to the policy.set_ok
- method's approval).
-
- The response object (usually be the result of a call to
- mechanize.urlopen, or similar) should support an info method, which
- returns a mimetools.Message object (in fact, the 'mimetools.Message
- object' may be any object that provides a getheaders method).
-
- The request object (usually a urllib2.Request instance) must support
- the methods get_full_url, get_type, get_host, and is_unverifiable, as
- documented by urllib2, and the port attribute (the port number). The
- request is used to set default values for cookie-attributes as well as
- for checking that the cookie is OK to be set.
-
- """
- debug("extract_cookies: %s", response.info())
- self._policy._now = self._now = int(time.time())
-
- for cookie in self._make_cookies(response, request):
- if cookie.expires is not None and cookie.expires <= self._now:
- # Expiry date in past is request to delete cookie. This can't be
- # in DefaultCookiePolicy, because can't delete cookies there.
- try:
- self.clear(cookie.domain, cookie.path, cookie.name)
- except KeyError:
- pass
- debug("Expiring cookie, domain='%s', path='%s', name='%s'",
- cookie.domain, cookie.path, cookie.name)
- elif self._policy.set_ok(cookie, request):
- debug(" setting cookie: %s", cookie)
- self.set_cookie(cookie)
-
- def clear(self, domain=None, path=None, name=None):
- """Clear some cookies.
-
- Invoking this method without arguments will clear all cookies. If
- given a single argument, only cookies belonging to that domain will be
- removed. If given two arguments, cookies belonging to the specified
- path within that domain are removed. If given three arguments, then
- the cookie with the specified name, path and domain is removed.
-
- Raises KeyError if no matching cookie exists.
-
- """
- if name is not None:
- if (domain is None) or (path is None):
- raise ValueError(
- "domain and path must be given to remove a cookie by name")
- del self._cookies[domain][path][name]
- elif path is not None:
- if domain is None:
- raise ValueError(
- "domain must be given to remove cookies by path")
- del self._cookies[domain][path]
- elif domain is not None:
- del self._cookies[domain]
- else:
- self._cookies = {}
-
- def clear_session_cookies(self):
- """Discard all session cookies.
-
- Discards all cookies held by object which had either no Max-Age or
- Expires cookie-attribute or an explicit Discard cookie-attribute, or
- which otherwise have ended up with a true discard attribute. For
- interactive browsers, the end of a session usually corresponds to
- closing the browser window.
-
- Note that the save method won't save session cookies anyway, unless you
- ask otherwise by passing a true ignore_discard argument.
-
- """
- for cookie in self:
- if cookie.discard:
- self.clear(cookie.domain, cookie.path, cookie.name)
-
- def clear_expired_cookies(self):
- """Discard all expired cookies.
-
- You probably don't need to call this method: expired cookies are never
- sent back to the server (provided you're using DefaultCookiePolicy),
- this method is called by CookieJar itself every so often, and the save
- method won't save expired cookies anyway (unless you ask otherwise by
- passing a true ignore_expires argument).
-
- """
- now = time.time()
- for cookie in self:
- if cookie.is_expired(now):
- self.clear(cookie.domain, cookie.path, cookie.name)
-
- def __getitem__(self, i):
- if i == 0:
- self._getitem_iterator = self.__iter__()
- elif self._prev_getitem_index != i-1: raise IndexError(
- "CookieJar.__getitem__ only supports sequential iteration")
- self._prev_getitem_index = i
- try:
- return self._getitem_iterator.next()
- except StopIteration:
- raise IndexError()
-
- def __iter__(self):
- return MappingIterator(self._cookies)
-
- def __len__(self):
- """Return number of contained cookies."""
- i = 0
- for cookie in self: i = i + 1
- return i
-
- def __repr__(self):
- r = []
- for cookie in self: r.append(repr(cookie))
- return "<%s[%s]>" % (self.__class__, ", ".join(r))
-
- def __str__(self):
- r = []
- for cookie in self: r.append(str(cookie))
- return "<%s[%s]>" % (self.__class__, ", ".join(r))
-
-
-class LoadError(Exception): pass
-
-class FileCookieJar(CookieJar):
- """CookieJar that can be loaded from and saved to a file.
-
- Additional methods
-
- save(filename=None, ignore_discard=False, ignore_expires=False)
- load(filename=None, ignore_discard=False, ignore_expires=False)
- revert(filename=None, ignore_discard=False, ignore_expires=False)
-
- Additional public attributes
-
- filename: filename for loading and saving cookies
-
- Additional public readable attributes
-
- delayload: request that cookies are lazily loaded from disk; this is only
- a hint since this only affects performance, not behaviour (unless the
- cookies on disk are changing); a CookieJar object may ignore it (in fact,
- only MSIECookieJar lazily loads cookies at the moment)
-
- """
-
- def __init__(self, filename=None, delayload=False, policy=None):
- """
- See FileCookieJar.__doc__ for argument documentation.
-
- Cookies are NOT loaded from the named file until either the load or
- revert method is called.
-
- """
- CookieJar.__init__(self, policy)
- if filename is not None and not isstringlike(filename):
- raise ValueError("filename must be string-like")
- self.filename = filename
- self.delayload = bool(delayload)
-
- def save(self, filename=None, ignore_discard=False, ignore_expires=False):
- """Save cookies to a file.
-
- filename: name of file in which to save cookies
- ignore_discard: save even cookies set to be discarded
- ignore_expires: save even cookies that have expired
-
- The file is overwritten if it already exists, thus wiping all its
- cookies. Saved cookies can be restored later using the load or revert
- methods. If filename is not specified, self.filename is used; if
- self.filename is None, ValueError is raised.
-
- """
- raise NotImplementedError()
-
- def load(self, filename=None, ignore_discard=False, ignore_expires=False):
- """Load cookies from a file.
-
- Old cookies are kept unless overwritten by newly loaded ones.
-
- Arguments are as for .save().
-
- If filename is not specified, self.filename is used; if self.filename
- is None, ValueError is raised. The named file must be in the format
- understood by the class, or LoadError will be raised. This format will
- be identical to that written by the save method, unless the load format
- is not sufficiently well understood (as is the case for MSIECookieJar).
-
- """
- if filename is None:
- if self.filename is not None: filename = self.filename
- else: raise ValueError(MISSING_FILENAME_TEXT)
-
- f = open(filename)
- try:
- self._really_load(f, filename, ignore_discard, ignore_expires)
- finally:
- f.close()
-
- def revert(self, filename=None,
- ignore_discard=False, ignore_expires=False):
- """Clear all cookies and reload cookies from a saved file.
-
- Raises LoadError (or IOError) if reversion is not successful; the
- object's state will not be altered if this happens.
-
- """
- if filename is None:
- if self.filename is not None: filename = self.filename
- else: raise ValueError(MISSING_FILENAME_TEXT)
-
- old_state = copy.deepcopy(self._cookies)
- self._cookies = {}
- try:
- self.load(filename, ignore_discard, ignore_expires)
- except (LoadError, IOError):
- self._cookies = old_state
- raise
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_debug.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_debug.py
deleted file mode 100644
index 596b114..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_debug.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import logging
-
-from urllib2 import BaseHandler
-from _response import response_seek_wrapper
-
-
-class HTTPResponseDebugProcessor(BaseHandler):
- handler_order = 900 # before redirections, after everything else
-
- def http_response(self, request, response):
- if not hasattr(response, "seek"):
- response = response_seek_wrapper(response)
- info = logging.getLogger("mechanize.http_responses").info
- try:
- info(response.read())
- finally:
- response.seek(0)
- info("*****************************************************")
- return response
-
- https_response = http_response
-
-class HTTPRedirectDebugProcessor(BaseHandler):
- def http_request(self, request):
- if hasattr(request, "redirect_dict"):
- info = logging.getLogger("mechanize.http_redirects").info
- info("redirecting to %s", request.get_full_url())
- return request
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_file.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_file.py
deleted file mode 100644
index db662a8..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_file.py
+++ /dev/null
@@ -1,60 +0,0 @@
-try:
- from cStringIO import StringIO
-except ImportError:
- from StringIO import StringIO
-import mimetools
-import os
-import socket
-import urllib
-from urllib2 import BaseHandler, URLError
-
-
-class FileHandler(BaseHandler):
- # Use local file or FTP depending on form of URL
- def file_open(self, req):
- url = req.get_selector()
- if url[:2] == '//' and url[2:3] != '/':
- req.type = 'ftp'
- return self.parent.open(req)
- else:
- return self.open_local_file(req)
-
- # names for the localhost
- names = None
- def get_names(self):
- if FileHandler.names is None:
- try:
- FileHandler.names = (socket.gethostbyname('localhost'),
- socket.gethostbyname(socket.gethostname()))
- except socket.gaierror:
- FileHandler.names = (socket.gethostbyname('localhost'),)
- return FileHandler.names
-
- # not entirely sure what the rules are here
- def open_local_file(self, req):
- try:
- import email.utils as emailutils
- except ImportError:
- import email.Utils as emailutils
- import mimetypes
- host = req.get_host()
- file = req.get_selector()
- localfile = urllib.url2pathname(file)
- try:
- stats = os.stat(localfile)
- size = stats.st_size
- modified = emailutils.formatdate(stats.st_mtime, usegmt=True)
- mtype = mimetypes.guess_type(file)[0]
- headers = mimetools.Message(StringIO(
- 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
- (mtype or 'text/plain', size, modified)))
- if host:
- host, port = urllib.splitport(host)
- if not host or \
- (not port and socket.gethostbyname(host) in self.get_names()):
- return urllib.addinfourl(open(localfile, 'rb'),
- headers, 'file:'+file)
- except OSError, msg:
- # urllib2 users shouldn't expect OSErrors coming from urlopen()
- raise URLError(msg)
- raise URLError('file not on local host')
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_firefox3cookiejar.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_firefox3cookiejar.py
deleted file mode 100644
index 34fe979..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_firefox3cookiejar.py
+++ /dev/null
@@ -1,249 +0,0 @@
-"""Firefox 3 "cookies.sqlite" cookie persistence.
-
-Copyright 2008 John J Lee <jjl@pobox.com>
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
-"""
-
-import logging
-import time
-import sqlite3
-
-from _clientcookie import CookieJar, Cookie, MappingIterator
-from _util import isstringlike, experimental
-debug = logging.getLogger("mechanize.cookies").debug
-
-
-class Firefox3CookieJar(CookieJar):
-
- """Firefox 3 cookie jar.
-
- The cookies are stored in Firefox 3's "cookies.sqlite" format.
-
- Constructor arguments:
-
- filename: filename of cookies.sqlite (typically found at the top level
- of a firefox profile directory)
- autoconnect: as a convenience, connect to the SQLite cookies database at
- Firefox3CookieJar construction time (default True)
- policy: an object satisfying the mechanize.CookiePolicy interface
-
- Note that this is NOT a FileCookieJar, and there are no .load(),
- .save() or .restore() methods. The database is in sync with the
- cookiejar object's state after each public method call.
-
- Following Firefox's own behaviour, session cookies are never saved to
- the database.
-
- The file is created, and an sqlite database written to it, if it does
- not already exist. The moz_cookies database table is created if it does
- not already exist.
- """
-
- # XXX
- # handle DatabaseError exceptions
- # add a FileCookieJar (explicit .save() / .revert() / .load() methods)
-
- def __init__(self, filename, autoconnect=True, policy=None):
- experimental("Firefox3CookieJar is experimental code")
- CookieJar.__init__(self, policy)
- if filename is not None and not isstringlike(filename):
- raise ValueError("filename must be string-like")
- self.filename = filename
- self._conn = None
- if autoconnect:
- self.connect()
-
- def connect(self):
- self._conn = sqlite3.connect(self.filename)
- self._conn.isolation_level = "DEFERRED"
- self._create_table_if_necessary()
-
- def close(self):
- self._conn.close()
-
- def _transaction(self, func):
- try:
- cur = self._conn.cursor()
- try:
- result = func(cur)
- finally:
- cur.close()
- except:
- self._conn.rollback()
- raise
- else:
- self._conn.commit()
- return result
-
- def _execute(self, query, params=()):
- return self._transaction(lambda cur: cur.execute(query, params))
-
- def _query(self, query, params=()):
- # XXX should we bother with a transaction?
- cur = self._conn.cursor()
- try:
- cur.execute(query, params)
- for row in cur.fetchall():
- yield row
- finally:
- cur.close()
-
- def _create_table_if_necessary(self):
- self._execute("""\
-CREATE TABLE IF NOT EXISTS moz_cookies (id INTEGER PRIMARY KEY, name TEXT,
- value TEXT, host TEXT, path TEXT,expiry INTEGER,
- lastAccessed INTEGER, isSecure INTEGER, isHttpOnly INTEGER)""")
-
- def _cookie_from_row(self, row):
- (pk, name, value, domain, path, expires,
- last_accessed, secure, http_only) = row
-
- version = 0
- domain = domain.encode("ascii", "ignore")
- path = path.encode("ascii", "ignore")
- name = name.encode("ascii", "ignore")
- value = value.encode("ascii", "ignore")
- secure = bool(secure)
-
- # last_accessed isn't a cookie attribute, so isn't added to rest
- rest = {}
- if http_only:
- rest["HttpOnly"] = None
-
- if name == "":
- name = value
- value = None
-
- initial_dot = domain.startswith(".")
- domain_specified = initial_dot
-
- discard = False
- if expires == "":
- expires = None
- discard = True
-
- return Cookie(version, name, value,
- None, False,
- domain, domain_specified, initial_dot,
- path, False,
- secure,
- expires,
- discard,
- None,
- None,
- rest)
-
- def clear(self, domain=None, path=None, name=None):
- CookieJar.clear(self, domain, path, name)
- where_parts = []
- sql_params = []
- if domain is not None:
- where_parts.append("host = ?")
- sql_params.append(domain)
- if path is not None:
- where_parts.append("path = ?")
- sql_params.append(path)
- if name is not None:
- where_parts.append("name = ?")
- sql_params.append(name)
- where = " AND ".join(where_parts)
- if where:
- where = " WHERE " + where
- def clear(cur):
- cur.execute("DELETE FROM moz_cookies%s" % where,
- tuple(sql_params))
- self._transaction(clear)
-
- def _row_from_cookie(self, cookie, cur):
- expires = cookie.expires
- if cookie.discard:
- expires = ""
-
- domain = unicode(cookie.domain)
- path = unicode(cookie.path)
- name = unicode(cookie.name)
- value = unicode(cookie.value)
- secure = bool(int(cookie.secure))
-
- if value is None:
- value = name
- name = ""
-
- last_accessed = int(time.time())
- http_only = cookie.has_nonstandard_attr("HttpOnly")
-
- query = cur.execute("""SELECT MAX(id) + 1 from moz_cookies""")
- pk = query.fetchone()[0]
- if pk is None:
- pk = 1
-
- return (pk, name, value, domain, path, expires,
- last_accessed, secure, http_only)
-
- def set_cookie(self, cookie):
- if cookie.discard:
- CookieJar.set_cookie(self, cookie)
- return
-
- def set_cookie(cur):
- # XXX
- # is this RFC 2965-correct?
- # could this do an UPDATE instead?
- row = self._row_from_cookie(cookie, cur)
- name, unused, domain, path = row[1:5]
- cur.execute("""\
-DELETE FROM moz_cookies WHERE host = ? AND path = ? AND name = ?""",
- (domain, path, name))
- cur.execute("""\
-INSERT INTO moz_cookies VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
-""", row)
- self._transaction(set_cookie)
-
- def __iter__(self):
- # session (non-persistent) cookies
- for cookie in MappingIterator(self._cookies):
- yield cookie
- # persistent cookies
- for row in self._query("""\
-SELECT * FROM moz_cookies ORDER BY name, path, host"""):
- yield self._cookie_from_row(row)
-
- def _cookies_for_request(self, request):
- session_cookies = CookieJar._cookies_for_request(self, request)
- def get_cookies(cur):
- query = cur.execute("SELECT host from moz_cookies")
- domains = [row[0] for row in query.fetchmany()]
- cookies = []
- for domain in domains:
- cookies += self._persistent_cookies_for_domain(domain,
- request, cur)
- return cookies
- persistent_coookies = self._transaction(get_cookies)
- return session_cookies + persistent_coookies
-
- def _persistent_cookies_for_domain(self, domain, request, cur):
- cookies = []
- if not self._policy.domain_return_ok(domain, request):
- return []
- debug("Checking %s for cookies to return", domain)
- query = cur.execute("""\
-SELECT * from moz_cookies WHERE host = ? ORDER BY path""",
- (domain,))
- cookies = [self._cookie_from_row(row) for row in query.fetchmany()]
- last_path = None
- r = []
- for cookie in cookies:
- if (cookie.path != last_path and
- not self._policy.path_return_ok(cookie.path, request)):
- last_path = cookie.path
- continue
- if not self._policy.return_ok(cookie, request):
- debug(" not returning cookie")
- continue
- debug(" it's a match")
- r.append(cookie)
- return r
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_gzip.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_gzip.py
deleted file mode 100644
index 26c2743..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_gzip.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import urllib2
-from cStringIO import StringIO
-import _response
-
-# GzipConsumer was taken from Fredrik Lundh's effbot.org-0.1-20041009 library
-class GzipConsumer:
-
- def __init__(self, consumer):
- self.__consumer = consumer
- self.__decoder = None
- self.__data = ""
-
- def __getattr__(self, key):
- return getattr(self.__consumer, key)
-
- def feed(self, data):
- if self.__decoder is None:
- # check if we have a full gzip header
- data = self.__data + data
- try:
- i = 10
- flag = ord(data[3])
- if flag & 4: # extra
- x = ord(data[i]) + 256*ord(data[i+1])
- i = i + 2 + x
- if flag & 8: # filename
- while ord(data[i]):
- i = i + 1
- i = i + 1
- if flag & 16: # comment
- while ord(data[i]):
- i = i + 1
- i = i + 1
- if flag & 2: # crc
- i = i + 2
- if len(data) < i:
- raise IndexError("not enough data")
- if data[:3] != "\x1f\x8b\x08":
- raise IOError("invalid gzip data")
- data = data[i:]
- except IndexError:
- self.__data = data
- return # need more data
- import zlib
- self.__data = ""
- self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS)
- data = self.__decoder.decompress(data)
- if data:
- self.__consumer.feed(data)
-
- def close(self):
- if self.__decoder:
- data = self.__decoder.flush()
- if data:
- self.__consumer.feed(data)
- self.__consumer.close()
-
-
-# --------------------------------------------------------------------
-
-# the rest of this module is John Lee's stupid code, not
-# Fredrik's nice code :-)
-
-class stupid_gzip_consumer:
- def __init__(self): self.data = []
- def feed(self, data): self.data.append(data)
-
-class stupid_gzip_wrapper(_response.closeable_response):
- def __init__(self, response):
- self._response = response
-
- c = stupid_gzip_consumer()
- gzc = GzipConsumer(c)
- gzc.feed(response.read())
- self.__data = StringIO("".join(c.data))
-
- def read(self, size=-1):
- return self.__data.read(size)
- def readline(self, size=-1):
- return self.__data.readline(size)
- def readlines(self, sizehint=-1):
- return self.__data.readlines(sizehint)
-
- def __getattr__(self, name):
- # delegate unknown methods/attributes
- return getattr(self._response, name)
-
-class HTTPGzipProcessor(urllib2.BaseHandler):
- handler_order = 200 # response processing before HTTPEquivProcessor
-
- def http_request(self, request):
- request.add_header("Accept-Encoding", "gzip")
- return request
-
- def http_response(self, request, response):
- # post-process response
- enc_hdrs = response.info().getheaders("Content-encoding")
- for enc_hdr in enc_hdrs:
- if ("gzip" in enc_hdr) or ("compress" in enc_hdr):
- return stupid_gzip_wrapper(response)
- return response
-
- https_response = http_response
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_headersutil.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_headersutil.py
deleted file mode 100644
index 49ba5de..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_headersutil.py
+++ /dev/null
@@ -1,232 +0,0 @@
-"""Utility functions for HTTP header value parsing and construction.
-
-Copyright 1997-1998, Gisle Aas
-Copyright 2002-2006, John J. Lee
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
-"""
-
-import os, re
-from types import StringType
-from types import UnicodeType
-STRING_TYPES = StringType, UnicodeType
-
-from _util import http2time
-import _rfc3986
-
-def is_html(ct_headers, url, allow_xhtml=False):
- """
- ct_headers: Sequence of Content-Type headers
- url: Response URL
-
- """
- if not ct_headers:
- # guess
- ext = os.path.splitext(_rfc3986.urlsplit(url)[2])[1]
- html_exts = [".htm", ".html"]
- if allow_xhtml:
- html_exts += [".xhtml"]
- return ext in html_exts
- # use first header
- ct = split_header_words(ct_headers)[0][0][0]
- html_types = ["text/html"]
- if allow_xhtml:
- html_types += [
- "text/xhtml", "text/xml",
- "application/xml", "application/xhtml+xml",
- ]
- return ct in html_types
-
-def unmatched(match):
- """Return unmatched part of re.Match object."""
- start, end = match.span(0)
- return match.string[:start]+match.string[end:]
-
-token_re = re.compile(r"^\s*([^=\s;,]+)")
-quoted_value_re = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
-value_re = re.compile(r"^\s*=\s*([^\s;,]*)")
-escape_re = re.compile(r"\\(.)")
-def split_header_words(header_values):
- r"""Parse header values into a list of lists containing key,value pairs.
-
- The function knows how to deal with ",", ";" and "=" as well as quoted
- values after "=". A list of space separated tokens are parsed as if they
- were separated by ";".
-
- If the header_values passed as argument contains multiple values, then they
- are treated as if they were a single value separated by comma ",".
-
- This means that this function is useful for parsing header fields that
- follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
- the requirement for tokens).
-
- headers = #header
- header = (token | parameter) *( [";"] (token | parameter))
-
- token = 1*<any CHAR except CTLs or separators>
- separators = "(" | ")" | "<" | ">" | "@"
- | "," | ";" | ":" | "\" | <">
- | "/" | "[" | "]" | "?" | "="
- | "{" | "}" | SP | HT
-
- quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
- qdtext = <any TEXT except <">>
- quoted-pair = "\" CHAR
-
- parameter = attribute "=" value
- attribute = token
- value = token | quoted-string
-
- Each header is represented by a list of key/value pairs. The value for a
- simple token (not part of a parameter) is None. Syntactically incorrect
- headers will not necessarily be parsed as you would want.
-
- This is easier to describe with some examples:
-
- >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
- [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
- >>> split_header_words(['text/html; charset="iso-8859-1"'])
- [[('text/html', None), ('charset', 'iso-8859-1')]]
- >>> split_header_words([r'Basic realm="\"foo\bar\""'])
- [[('Basic', None), ('realm', '"foobar"')]]
-
- """
- assert type(header_values) not in STRING_TYPES
- result = []
- for text in header_values:
- orig_text = text
- pairs = []
- while text:
- m = token_re.search(text)
- if m:
- text = unmatched(m)
- name = m.group(1)
- m = quoted_value_re.search(text)
- if m: # quoted value
- text = unmatched(m)
- value = m.group(1)
- value = escape_re.sub(r"\1", value)
- else:
- m = value_re.search(text)
- if m: # unquoted value
- text = unmatched(m)
- value = m.group(1)
- value = value.rstrip()
- else:
- # no value, a lone token
- value = None
- pairs.append((name, value))
- elif text.lstrip().startswith(","):
- # concatenated headers, as per RFC 2616 section 4.2
- text = text.lstrip()[1:]
- if pairs: result.append(pairs)
- pairs = []
- else:
- # skip junk
- non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
- assert nr_junk_chars > 0, (
- "split_header_words bug: '%s', '%s', %s" %
- (orig_text, text, pairs))
- text = non_junk
- if pairs: result.append(pairs)
- return result
-
-join_escape_re = re.compile(r"([\"\\])")
-def join_header_words(lists):
- """Do the inverse of the conversion done by split_header_words.
-
- Takes a list of lists of (key, value) pairs and produces a single header
- value. Attribute values are quoted if needed.
-
- >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
- 'text/plain; charset="iso-8859/1"'
- >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
- 'text/plain, charset="iso-8859/1"'
-
- """
- headers = []
- for pairs in lists:
- attr = []
- for k, v in pairs:
- if v is not None:
- if not re.search(r"^\w+$", v):
- v = join_escape_re.sub(r"\\\1", v) # escape " and \
- v = '"%s"' % v
- if k is None: # Netscape cookies may have no name
- k = v
- else:
- k = "%s=%s" % (k, v)
- attr.append(k)
- if attr: headers.append("; ".join(attr))
- return ", ".join(headers)
-
-def strip_quotes(text):
- if text.startswith('"'):
- text = text[1:]
- if text.endswith('"'):
- text = text[:-1]
- return text
-
-def parse_ns_headers(ns_headers):
- """Ad-hoc parser for Netscape protocol cookie-attributes.
-
- The old Netscape cookie format for Set-Cookie can for instance contain
- an unquoted "," in the expires field, so we have to use this ad-hoc
- parser instead of split_header_words.
-
- XXX This may not make the best possible effort to parse all the crap
- that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
- parser is probably better, so could do worse than following that if
- this ever gives any trouble.
-
- Currently, this is also used for parsing RFC 2109 cookies.
-
- """
- known_attrs = ("expires", "domain", "path", "secure",
- # RFC 2109 attrs (may turn up in Netscape cookies, too)
- "version", "port", "max-age")
-
- result = []
- for ns_header in ns_headers:
- pairs = []
- version_set = False
- params = re.split(r";\s*", ns_header)
- for ii in range(len(params)):
- param = params[ii]
- param = param.rstrip()
- if param == "": continue
- if "=" not in param:
- k, v = param, None
- else:
- k, v = re.split(r"\s*=\s*", param, 1)
- k = k.lstrip()
- if ii != 0:
- lc = k.lower()
- if lc in known_attrs:
- k = lc
- if k == "version":
- # This is an RFC 2109 cookie.
- v = strip_quotes(v)
- version_set = True
- if k == "expires":
- # convert expires date to seconds since epoch
- v = http2time(strip_quotes(v)) # None if invalid
- pairs.append((k, v))
-
- if pairs:
- if not version_set:
- pairs.append(("version", "0"))
- result.append(pairs)
-
- return result
-
-
-def _test():
- import doctest, _headersutil
- return doctest.testmod(_headersutil)
-
-if __name__ == "__main__":
- _test()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_html.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_html.py
deleted file mode 100644
index 5da0815..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_html.py
+++ /dev/null
@@ -1,631 +0,0 @@
-"""HTML handling.
-
-Copyright 2003-2006 John J. Lee <jjl@pobox.com>
-
-This code is free software; you can redistribute it and/or modify it under
-the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
-included with the distribution).
-
-"""
-
-import re, copy, htmlentitydefs
-import sgmllib, ClientForm
-
-import _request
-from _headersutil import split_header_words, is_html as _is_html
-import _rfc3986
-
-DEFAULT_ENCODING = "latin-1"
-
-COMPRESS_RE = re.compile(r"\s+")
-
-
-# the base classe is purely for backwards compatibility
-class ParseError(ClientForm.ParseError): pass
-
-
-class CachingGeneratorFunction(object):
- """Caching wrapper around a no-arguments iterable."""
-
- def __init__(self, iterable):
- self._cache = []
- # wrap iterable to make it non-restartable (otherwise, repeated
- # __call__ would give incorrect results)
- self._iterator = iter(iterable)
-
- def __call__(self):
- cache = self._cache
- for item in cache:
- yield item
- for item in self._iterator:
- cache.append(item)
- yield item
-
-
-class EncodingFinder:
- def __init__(self, default_encoding):
- self._default_encoding = default_encoding
- def encoding(self, response):
- # HTTPEquivProcessor may be in use, so both HTTP and HTTP-EQUIV
- # headers may be in the response. HTTP-EQUIV headers come last,
- # so try in order from first to last.
- for ct in response.info().getheaders("content-type"):
- for k, v in split_header_words([ct])[0]:
- if k == "charset":
- return v
- return self._default_encoding
-
-class ResponseTypeFinder:
- def __init__(self, allow_xhtml):
- self._allow_xhtml = allow_xhtml
- def is_html(self, response, encoding):
- ct_hdrs = response.info().getheaders("content-type")
- url = response.geturl()
- # XXX encoding
- return _is_html(ct_hdrs, url, self._allow_xhtml)
-
-
-# idea for this argument-processing trick is from Peter Otten
-class Args:
- def __init__(self, args_map):
- self.dictionary = dict(args_map)
- def __getattr__(self, key):
- try:
- return self.dictionary[key]
- except KeyError:
- return getattr(self.__class__, key)
-
-def form_parser_args(
- select_default=False,
- form_parser_class=None,
- request_class=None,
- backwards_compat=False,
- ):
- return Args(locals())
-
-
-class Link:
- def __init__(self, base_url, url, text, tag, attrs):
- assert None not in [url, tag, attrs]
- self.base_url = base_url
- self.absolute_url = _rfc3986.urljoin(base_url, url)
- self.url, self.text, self.tag, self.attrs = url, text, tag, attrs
- def __cmp__(self, other):
- try:
- for name in "url", "text", "tag", "attrs":
- if getattr(self, name) != getattr(other, name):
- return -1
- except AttributeError:
- return -1
- return 0
- def __repr__(self):
- return "Link(base_url=%r, url=%r, text=%r, tag=%r, attrs=%r)" % (
- self.base_url, self.url, self.text, self.tag, self.attrs)
-
-
-class LinksFactory:
-
- def __init__(self,
- link_parser_class=None,
- link_class=Link,
- urltags=None,
- ):
- import _pullparser
- if link_parser_class is None:
- link_parser_class = _pullparser.TolerantPullParser
- self.link_parser_class = link_parser_class
- self.link_class = link_class
- if urltags is None:
- urltags = {
- "a": "href",
- "area": "href",
- "frame": "src",
- "iframe": "src",
- }
- self.urltags = urltags
- self._response = None
- self._encoding = None
-
- def set_response(self, response, base_url, encoding):
- self._response = response
- self._encoding = encoding
- self._base_url = base_url
-
- def links(self):
- """Return an iterator that provides links of the document."""
- response = self._response
- encoding = self._encoding
- base_url = self._base_url
- p = self.link_parser_class(response, encoding=encoding)
-
- try:
- for token in p.tags(*(self.urltags.keys()+["base"])):
- if token.type == "endtag":
- continue
- if token.data == "base":
- base_href = dict(token.attrs).get("href")
- if base_href is not None:
- base_url = base_href
- continue
- attrs = dict(token.attrs)
- tag = token.data
- name = attrs.get("name")
- text = None
- # XXX use attr_encoding for ref'd doc if that doc does not
- # provide one by other means
- #attr_encoding = attrs.get("charset")
- url = attrs.get(self.urltags[tag]) # XXX is "" a valid URL?
- if not url:
- # Probably an <A NAME="blah"> link or <AREA NOHREF...>.
- # For our purposes a link is something with a URL, so
- # ignore this.
- continue
-
- url = _rfc3986.clean_url(url, encoding)
- if tag == "a":
- if token.type != "startendtag":
- # hmm, this'd break if end tag is missing
- text = p.get_compressed_text(("endtag", tag))
- # but this doesn't work for eg.
- # <a href="blah"><b>Andy</b></a>
- #text = p.get_compressed_text()
-
- yield Link(base_url, url, text, tag, token.attrs)
- except sgmllib.SGMLParseError, exc:
- raise ParseError(exc)
-
-class FormsFactory:
-
- """Makes a sequence of objects satisfying ClientForm.HTMLForm interface.
-
- After calling .forms(), the .global_form attribute is a form object
- containing all controls not a descendant of any FORM element.
-
- For constructor argument docs, see ClientForm.ParseResponse
- argument docs.
-
- """
-
- def __init__(self,
- select_default=False,
- form_parser_class=None,
- request_class=None,
- backwards_compat=False,
- ):
- import ClientForm
- self.select_default = select_default
- if form_parser_class is None:
- form_parser_class = ClientForm.FormParser
- self.form_parser_class = form_parser_class
- if request_class is None:
- request_class = _request.Request
- self.request_class = request_class
- self.backwards_compat = backwards_compat
- self._response = None
- self.encoding = None
- self.global_form = None
-
- def set_response(self, response, encoding):
- self._response = response
- self.encoding = encoding
- self.global_form = None
-
- def forms(self):
- import ClientForm
- encoding = self.encoding
- try:
- forms = ClientForm.ParseResponseEx(
- self._response,
- select_default=self.select_default,
- form_parser_class=self.form_parser_class,
- request_class=self.request_class,
- encoding=encoding,
- _urljoin=_rfc3986.urljoin,
- _urlparse=_rfc3986.urlsplit,
- _urlunparse=_rfc3986.urlunsplit,
- )
- except ClientForm.ParseError, exc:
- raise ParseError(exc)
- self.global_form = forms[0]
- return forms[1:]
-
-class TitleFactory:
- def __init__(self):
- self._response = self._encoding = None
-
- def set_response(self, response, encoding):
- self._response = response
- self._encoding = encoding
-
- def _get_title_text(self, parser):
- import _pullparser
- text = []
- tok = None
- while 1:
- try:
- tok = parser.get_token()
- except _pullparser.NoMoreTokensError:
- break
- if tok.type == "data":
- text.append(str(tok))
- elif tok.type == "entityref":
- t = unescape("&%s;" % tok.data,
- parser._entitydefs, parser.encoding)
- text.append(t)
- elif tok.type == "charref":
- t = unescape_charref(tok.data, parser.encoding)
- text.append(t)
- elif tok.type in ["starttag", "endtag", "startendtag"]:
- tag_name = tok.data
- if tok.type == "endtag" and tag_name == "title":
- break
- text.append(str(tok))
- return COMPRESS_RE.sub(" ", "".join(text).strip())
-
- def title(self):
- import _pullparser
- p = _pullparser.TolerantPullParser(
- self._response, encoding=self._encoding)
- try:
- try:
- p.get_tag("title")
- except _pullparser.NoMoreTokensError:
- return None
- else:
- return self._get_title_text(p)
- except sgmllib.SGMLParseError, exc:
- raise ParseError(exc)
-
-
-def unescape(data, entities, encoding):
- if data is None or "&" not in data:
- return data
-
- def replace_entities(match):
- ent = match.group()
- if ent[1] == "#":
- return unescape_charref(ent[2:-1], encoding)
-
- repl = entities.get(ent[1:-1])
- if repl is not None:
- repl = unichr(repl)
- if type(repl) != type(""):
- try:
- repl = repl.encode(encoding)
- except UnicodeError:
- repl = ent
- else:
- repl = ent
- return repl
-
- return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
-
-def unescape_charref(data, encoding):
- name, base = data, 10
- if name.startswith("x"):
- name, base= name[1:], 16
- uc = unichr(int(name, base))
- if encoding is None:
- return uc
- else:
- try:
- repl = uc.encode(encoding)
- except UnicodeError:
- repl = "&#%s;" % data
- return repl
-
-
-# bizarre import gymnastics for bundled BeautifulSoup
-import _beautifulsoup
-import ClientForm
-RobustFormParser, NestingRobustFormParser = ClientForm._create_bs_classes(
- _beautifulsoup.BeautifulSoup, _beautifulsoup.ICantBelieveItsBeautifulSoup
- )
-# monkeypatch sgmllib to fix http://www.python.org/sf/803422 :-(
-sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
-
-class MechanizeBs(_beautifulsoup.BeautifulSoup):
- _entitydefs = htmlentitydefs.name2codepoint
- # don't want the magic Microsoft-char workaround
- PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'),
- lambda(x):x.group(1) + ' />'),
- (re.compile('<!\s+([^<>]*)>'),
- lambda(x):'<!' + x.group(1) + '>')
- ]
-
- def __init__(self, encoding, text=None, avoidParserProblems=True,
- initialTextIsEverything=True):
- self._encoding = encoding
- _beautifulsoup.BeautifulSoup.__init__(
- self, text, avoidParserProblems, initialTextIsEverything)
-
- def handle_charref(self, ref):
- t = unescape("&#%s;"%ref, self._entitydefs, self._encoding)
- self.handle_data(t)
- def handle_entityref(self, ref):
- t = unescape("&%s;"%ref, self._entitydefs, self._encoding)
- self.handle_data(t)
- def unescape_attrs(self, attrs):
- escaped_attrs = []
- for key, val in attrs:
- val = unescape(val, self._entitydefs, self._encoding)
- escaped_attrs.append((key, val))
- return escaped_attrs
-
-class RobustLinksFactory:
-
- compress_re = COMPRESS_RE
-
- def __init__(self,
- link_parser_class=None,
- link_class=Link,
- urltags=None,
- ):
- if link_parser_class is None:
- link_parser_class = MechanizeBs
- self.link_parser_class = link_parser_class
- self.link_class = link_class
- if urltags is None:
- urltags = {
- "a": "href",
- "area": "href",
- "frame": "src",
- "iframe": "src",
- }
- self.urltags = urltags
- self._bs = None
- self._encoding = None
- self._base_url = None
-
- def set_soup(self, soup, base_url, encoding):
- self._bs = soup
- self._base_url = base_url
- self._encoding = encoding
-
- def links(self):
- import _beautifulsoup
- bs = self._bs
- base_url = self._base_url
- encoding = self._encoding
- gen = bs.recursiveChildGenerator()
- for ch in bs.recursiveChildGenerator():
- if (isinstance(ch, _beautifulsoup.Tag) and
- ch.name in self.urltags.keys()+["base"]):
- link = ch
- attrs = bs.unescape_attrs(link.attrs)
- attrs_dict = dict(attrs)
- if link.name == "base":
- base_href = attrs_dict.get("href")
- if base_href is not None:
- base_url = base_href
- continue
- url_attr = self.urltags[link.name]
- url = attrs_dict.get(url_attr)
- if not url:
- continue
- url = _rfc3986.clean_url(url, encoding)
- text = link.fetchText(lambda t: True)
- if not text:
- # follow _pullparser's weird behaviour rigidly
- if link.name == "a":
- text = ""
- else:
- text = None
- else:
- text = self.compress_re.sub(" ", " ".join(text).strip())
- yield Link(base_url, url, text, link.name, attrs)
-
-
-class RobustFormsFactory(FormsFactory):
- def __init__(self, *args, **kwds):
- args = form_parser_args(*args, **kwds)
- if args.form_parser_class is None:
- args.form_parser_class = RobustFormParser
- FormsFactory.__init__(self, **args.dictionary)
-
- def set_response(self, response, encoding):
- self._response = response
- self.encoding = encoding
-
-
-class RobustTitleFactory:
- def __init__(self):
- self._bs = self._encoding = None
-
- def set_soup(self, soup, encoding):
- self._bs = soup
- self._encoding = encoding
-
- def title(self):
- import _beautifulsoup
- title = self._bs.first("title")
- if title == _beautifulsoup.Null:
- return None
- else:
- inner_html = "".join([str(node) for node in title.contents])
- return COMPRESS_RE.sub(" ", inner_html.strip())
-
-
-class Factory:
- """Factory for forms, links, etc.
-
- This interface may expand in future.
-
- Public methods:
-
- set_request_class(request_class)
- set_response(response)
- forms()
- links()
-
- Public attributes:
-
- Note that accessing these attributes may raise ParseError.
-
- encoding: string specifying the encoding of response if it contains a text
- document (this value is left unspecified for documents that do not have
- an encoding, e.g. an image file)
- is_html: true if response contains an HTML document (XHTML may be
- regarded as HTML too)
- title: page title, or None if no title or not HTML
- global_form: form object containing all controls that are not descendants
- of any FORM element, or None if the forms_factory does not support
- supplying a global form
-
- """
-
- LAZY_ATTRS = ["encoding", "is_html", "title", "global_form"]
-
- def __init__(self, forms_factory, links_factory, title_factory,
- encoding_finder=EncodingFinder(DEFAULT_ENCODING),
- response_type_finder=ResponseTypeFinder(allow_xhtml=False),
- ):
- """
-
- Pass keyword arguments only.
-
- default_encoding: character encoding to use if encoding cannot be
- determined (or guessed) from the response. You should turn on
- HTTP-EQUIV handling if you want the best chance of getting this right
- without resorting to this default. The default value of this
- parameter (currently latin-1) may change in future.
-
- """
- self._forms_factory = forms_factory
- self._links_factory = links_factory
- self._title_factory = title_factory
- self._encoding_finder = encoding_finder
- self._response_type_finder = response_type_finder
-
- self.set_response(None)
-
- def set_request_class(self, request_class):
- """Set urllib2.Request class.
-
- ClientForm.HTMLForm instances returned by .forms() will return
- instances of this class when .click()ed.
-
- """
- self._forms_factory.request_class = request_class
-
- def set_response(self, response):
- """Set response.
-
- The response must either be None or implement the same interface as
- objects returned by urllib2.urlopen().
-
- """
- self._response = response
- self._forms_genf = self._links_genf = None
- self._get_title = None
- for name in self.LAZY_ATTRS:
- try:
- delattr(self, name)
- except AttributeError:
- pass
-
- def __getattr__(self, name):
- if name not in self.LAZY_ATTRS:
- return getattr(self.__class__, name)
-
- if name == "encoding":
- self.encoding = self._encoding_finder.encoding(
- copy.copy(self._response))
- return self.encoding
- elif name == "is_html":
- self.is_html = self._response_type_finder.is_html(
- copy.copy(self._response), self.encoding)
- return self.is_html
- elif name == "title":
- if self.is_html:
- self.title = self._title_factory.title()
- else:
- self.title = None
- return self.title
- elif name == "global_form":
- self.forms()
- return self.global_form
-
- def forms(self):
- """Return iterable over ClientForm.HTMLForm-like objects.
-
- Raises mechanize.ParseError on failure.
- """
- # this implementation sets .global_form as a side-effect, for benefit
- # of __getattr__ impl
- if self._forms_genf is None:
- try:
- self._forms_genf = CachingGeneratorFunction(
- self._forms_factory.forms())
- except: # XXXX define exception!
- self.set_response(self._response)
- raise
- self.global_form = getattr(
- self._forms_factory, "global_form", None)
- return self._forms_genf()
-
- def links(self):
- """Return iterable over mechanize.Link-like objects.
-
- Raises mechanize.ParseError on failure.
- """
- if self._links_genf is None:
- try:
- self._links_genf = CachingGeneratorFunction(
- self._links_factory.links())
- except: # XXXX define exception!
- self.set_response(self._response)
- raise
- return self._links_genf()
-
-class DefaultFactory(Factory):
- """Based on sgmllib."""
- def __init__(self, i_want_broken_xhtml_support=False):
- Factory.__init__(
- self,
- forms_factory=FormsFactory(),
- links_factory=LinksFactory(),
- title_factory=TitleFactory(),
- response_type_finder=ResponseTypeFinder(
- allow_xhtml=i_want_broken_xhtml_support),
- )
-
- def set_response(self, response):
- Factory.set_response(self, response)
- if response is not None:
- self._forms_factory.set_response(
- copy.copy(response), self.encoding)
- self._links_factory.set_response(
- copy.copy(response), response.geturl(), self.encoding)
- self._title_factory.set_response(
- copy.copy(response), self.encoding)
-
-class RobustFactory(Factory):
- """Based on BeautifulSoup, hopefully a bit more robust to bad HTML than is
- DefaultFactory.
-
- """
- def __init__(self, i_want_broken_xhtml_support=False,
- soup_class=None):
- Factory.__init__(
- self,
- forms_factory=RobustFormsFactory(),
- links_factory=RobustLinksFactory(),
- title_factory=RobustTitleFactory(),
- response_type_finder=ResponseTypeFinder(
- allow_xhtml=i_want_broken_xhtml_support),
- )
- if soup_class is None:
- soup_class = MechanizeBs
- self._soup_class = soup_class
-
- def set_response(self, response):
- Factory.set_response(self, response)
- if response is not None:
- data = response.read()
- soup = self._soup_class(self.encoding, data)
- self._forms_factory.set_response(
- copy.copy(response), self.encoding)
- self._links_factory.set_soup(
- soup, response.geturl(), self.encoding)
- self._title_factory.set_soup(soup, self.encoding)
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_http.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_http.py
deleted file mode 100644
index 1b80e2b..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_http.py
+++ /dev/null
@@ -1,758 +0,0 @@
-"""HTTP related handlers.
-
-Note that some other HTTP handlers live in more specific modules: _auth.py,
-_gzip.py, etc.
-
-
-Copyright 2002-2006 John J Lee <jjl@pobox.com>
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
-"""
-
-import time, htmlentitydefs, logging, socket, \
- urllib2, urllib, httplib, sgmllib
-from urllib2 import URLError, HTTPError, BaseHandler
-from cStringIO import StringIO
-
-from _clientcookie import CookieJar
-from _headersutil import is_html
-from _html import unescape, unescape_charref
-from _request import Request
-from _response import closeable_response, response_seek_wrapper
-import _rfc3986
-import _sockettimeout
-
-debug = logging.getLogger("mechanize").debug
-debug_robots = logging.getLogger("mechanize.robots").debug
-
-# monkeypatch urllib2.HTTPError to show URL
-## def urllib2_str(self):
-## return 'HTTP Error %s: %s (%s)' % (
-## self.code, self.msg, self.geturl())
-## urllib2.HTTPError.__str__ = urllib2_str
-
-
-CHUNK = 1024 # size of chunks fed to HTML HEAD parser, in bytes
-DEFAULT_ENCODING = 'latin-1'
-
-
-try:
- socket._fileobject("fake socket", close=True)
-except TypeError:
- # python <= 2.4
- create_readline_wrapper = socket._fileobject
-else:
- def create_readline_wrapper(fh):
- return socket._fileobject(fh, close=True)
-
-
-# This adds "refresh" to the list of redirectables and provides a redirection
-# algorithm that doesn't go into a loop in the presence of cookies
-# (Python 2.4 has this new algorithm, 2.3 doesn't).
-class HTTPRedirectHandler(BaseHandler):
- # maximum number of redirections to any single URL
- # this is needed because of the state that cookies introduce
- max_repeats = 4
- # maximum total number of redirections (regardless of URL) before
- # assuming we're in a loop
- max_redirections = 10
-
- # Implementation notes:
-
- # To avoid the server sending us into an infinite loop, the request
- # object needs to track what URLs we have already seen. Do this by
- # adding a handler-specific attribute to the Request object. The value
- # of the dict is used to count the number of times the same URL has
- # been visited. This is needed because visiting the same URL twice
- # does not necessarily imply a loop, thanks to state introduced by
- # cookies.
-
- # Always unhandled redirection codes:
- # 300 Multiple Choices: should not handle this here.
- # 304 Not Modified: no need to handle here: only of interest to caches
- # that do conditional GETs
- # 305 Use Proxy: probably not worth dealing with here
- # 306 Unused: what was this for in the previous versions of protocol??
-
- def redirect_request(self, newurl, req, fp, code, msg, headers):
- """Return a Request or None in response to a redirect.
-
- This is called by the http_error_30x methods when a redirection
- response is received. If a redirection should take place, return a
- new Request to allow http_error_30x to perform the redirect;
- otherwise, return None to indicate that an HTTPError should be
- raised.
-
- """
- if code in (301, 302, 303, "refresh") or \
- (code == 307 and not req.has_data()):
- # Strictly (according to RFC 2616), 301 or 302 in response to
- # a POST MUST NOT cause a redirection without confirmation
- # from the user (of urllib2, in this case). In practice,
- # essentially all clients do redirect in this case, so we do
- # the same.
- # XXX really refresh redirections should be visiting; tricky to
- # fix, so this will wait until post-stable release
- new = Request(newurl,
- headers=req.headers,
- origin_req_host=req.get_origin_req_host(),
- unverifiable=True,
- visit=False,
- )
- new._origin_req = getattr(req, "_origin_req", req)
- return new
- else:
- raise HTTPError(req.get_full_url(), code, msg, headers, fp)
-
- def http_error_302(self, req, fp, code, msg, headers):
- # Some servers (incorrectly) return multiple Location headers
- # (so probably same goes for URI). Use first header.
- if headers.has_key('location'):
- newurl = headers.getheaders('location')[0]
- elif headers.has_key('uri'):
- newurl = headers.getheaders('uri')[0]
- else:
- return
- newurl = _rfc3986.clean_url(newurl, "latin-1")
- newurl = _rfc3986.urljoin(req.get_full_url(), newurl)
-
- # XXX Probably want to forget about the state of the current
- # request, although that might interact poorly with other
- # handlers that also use handler-specific request attributes
- new = self.redirect_request(newurl, req, fp, code, msg, headers)
- if new is None:
- return
-
- # loop detection
- # .redirect_dict has a key url if url was previously visited.
- if hasattr(req, 'redirect_dict'):
- visited = new.redirect_dict = req.redirect_dict
- if (visited.get(newurl, 0) >= self.max_repeats or
- len(visited) >= self.max_redirections):
- raise HTTPError(req.get_full_url(), code,
- self.inf_msg + msg, headers, fp)
- else:
- visited = new.redirect_dict = req.redirect_dict = {}
- visited[newurl] = visited.get(newurl, 0) + 1
-
- # Don't close the fp until we are sure that we won't use it
- # with HTTPError.
- fp.read()
- fp.close()
-
- return self.parent.open(new)
-
- http_error_301 = http_error_303 = http_error_307 = http_error_302
- http_error_refresh = http_error_302
-
- inf_msg = "The HTTP server returned a redirect error that would " \
- "lead to an infinite loop.\n" \
- "The last 30x error message was:\n"
-
-
-# XXX would self.reset() work, instead of raising this exception?
-class EndOfHeadError(Exception): pass
-class AbstractHeadParser:
- # only these elements are allowed in or before HEAD of document
- head_elems = ("html", "head",
- "title", "base",
- "script", "style", "meta", "link", "object")
- _entitydefs = htmlentitydefs.name2codepoint
- _encoding = DEFAULT_ENCODING
-
- def __init__(self):
- self.http_equiv = []
-
- def start_meta(self, attrs):
- http_equiv = content = None
- for key, value in attrs:
- if key == "http-equiv":
- http_equiv = self.unescape_attr_if_required(value)
- elif key == "content":
- content = self.unescape_attr_if_required(value)
- if http_equiv is not None and content is not None:
- self.http_equiv.append((http_equiv, content))
-
- def end_head(self):
- raise EndOfHeadError()
-
- def handle_entityref(self, name):
- #debug("%s", name)
- self.handle_data(unescape(
- '&%s;' % name, self._entitydefs, self._encoding))
-
- def handle_charref(self, name):
- #debug("%s", name)
- self.handle_data(unescape_charref(name, self._encoding))
-
- def unescape_attr(self, name):
- #debug("%s", name)
- return unescape(name, self._entitydefs, self._encoding)
-
- def unescape_attrs(self, attrs):
- #debug("%s", attrs)
- escaped_attrs = {}
- for key, val in attrs.items():
- escaped_attrs[key] = self.unescape_attr(val)
- return escaped_attrs
-
- def unknown_entityref(self, ref):
- self.handle_data("&%s;" % ref)
-
- def unknown_charref(self, ref):
- self.handle_data("&#%s;" % ref)
-
-
-try:
- import HTMLParser
-except ImportError:
- pass
-else:
- class XHTMLCompatibleHeadParser(AbstractHeadParser,
- HTMLParser.HTMLParser):
- def __init__(self):
- HTMLParser.HTMLParser.__init__(self)
- AbstractHeadParser.__init__(self)
-
- def handle_starttag(self, tag, attrs):
- if tag not in self.head_elems:
- raise EndOfHeadError()
- try:
- method = getattr(self, 'start_' + tag)
- except AttributeError:
- try:
- method = getattr(self, 'do_' + tag)
- except AttributeError:
- pass # unknown tag
- else:
- method(attrs)
- else:
- method(attrs)
-
- def handle_endtag(self, tag):
- if tag not in self.head_elems:
- raise EndOfHeadError()
- try:
- method = getattr(self, 'end_' + tag)
- except AttributeError:
- pass # unknown tag
- else:
- method()
-
- def unescape(self, name):
- # Use the entitydefs passed into constructor, not
- # HTMLParser.HTMLParser's entitydefs.
- return self.unescape_attr(name)
-
- def unescape_attr_if_required(self, name):
- return name # HTMLParser.HTMLParser already did it
-
-class HeadParser(AbstractHeadParser, sgmllib.SGMLParser):
-
- def _not_called(self):
- assert False
-
- def __init__(self):
- sgmllib.SGMLParser.__init__(self)
- AbstractHeadParser.__init__(self)
-
- def handle_starttag(self, tag, method, attrs):
- if tag not in self.head_elems:
- raise EndOfHeadError()
- if tag == "meta":
- method(attrs)
-
- def unknown_starttag(self, tag, attrs):
- self.handle_starttag(tag, self._not_called, attrs)
-
- def handle_endtag(self, tag, method):
- if tag in self.head_elems:
- method()
- else:
- raise EndOfHeadError()
-
- def unescape_attr_if_required(self, name):
- return self.unescape_attr(name)
-
-def parse_head(fileobj, parser):
- """Return a list of key, value pairs."""
- while 1:
- data = fileobj.read(CHUNK)
- try:
- parser.feed(data)
- except EndOfHeadError:
- break
- if len(data) != CHUNK:
- # this should only happen if there is no HTML body, or if
- # CHUNK is big
- break
- return parser.http_equiv
-
-class HTTPEquivProcessor(BaseHandler):
- """Append META HTTP-EQUIV headers to regular HTTP headers."""
-
- handler_order = 300 # before handlers that look at HTTP headers
-
- def __init__(self, head_parser_class=HeadParser,
- i_want_broken_xhtml_support=False,
- ):
- self.head_parser_class = head_parser_class
- self._allow_xhtml = i_want_broken_xhtml_support
-
- def http_response(self, request, response):
- if not hasattr(response, "seek"):
- response = response_seek_wrapper(response)
- http_message = response.info()
- url = response.geturl()
- ct_hdrs = http_message.getheaders("content-type")
- if is_html(ct_hdrs, url, self._allow_xhtml):
- try:
- try:
- html_headers = parse_head(response,
- self.head_parser_class())
- finally:
- response.seek(0)
- except (HTMLParser.HTMLParseError,
- sgmllib.SGMLParseError):
- pass
- else:
- for hdr, val in html_headers:
- # add a header
- http_message.dict[hdr.lower()] = val
- text = hdr + ": " + val
- for line in text.split("\n"):
- http_message.headers.append(line + "\n")
- return response
-
- https_response = http_response
-
-class HTTPCookieProcessor(BaseHandler):
- """Handle HTTP cookies.
-
- Public attributes:
-
- cookiejar: CookieJar instance
-
- """
- def __init__(self, cookiejar=None):
- if cookiejar is None:
- cookiejar = CookieJar()
- self.cookiejar = cookiejar
-
- def http_request(self, request):
- self.cookiejar.add_cookie_header(request)
- return request
-
- def http_response(self, request, response):
- self.cookiejar.extract_cookies(response, request)
- return response
-
- https_request = http_request
- https_response = http_response
-
-try:
- import robotparser
-except ImportError:
- pass
-else:
- class MechanizeRobotFileParser(robotparser.RobotFileParser):
-
- def __init__(self, url='', opener=None):
- robotparser.RobotFileParser.__init__(self, url)
- self._opener = opener
- self._timeout = _sockettimeout._GLOBAL_DEFAULT_TIMEOUT
-
- def set_opener(self, opener=None):
- import _opener
- if opener is None:
- opener = _opener.OpenerDirector()
- self._opener = opener
-
- def set_timeout(self, timeout):
- self._timeout = timeout
-
- def read(self):
- """Reads the robots.txt URL and feeds it to the parser."""
- if self._opener is None:
- self.set_opener()
- req = Request(self.url, unverifiable=True, visit=False,
- timeout=self._timeout)
- try:
- f = self._opener.open(req)
- except HTTPError, f:
- pass
- except (IOError, socket.error, OSError), exc:
- debug_robots("ignoring error opening %r: %s" %
- (self.url, exc))
- return
- lines = []
- line = f.readline()
- while line:
- lines.append(line.strip())
- line = f.readline()
- status = f.code
- if status == 401 or status == 403:
- self.disallow_all = True
- debug_robots("disallow all")
- elif status >= 400:
- self.allow_all = True
- debug_robots("allow all")
- elif status == 200 and lines:
- debug_robots("parse lines")
- self.parse(lines)
-
- class RobotExclusionError(urllib2.HTTPError):
- def __init__(self, request, *args):
- apply(urllib2.HTTPError.__init__, (self,)+args)
- self.request = request
-
- class HTTPRobotRulesProcessor(BaseHandler):
- # before redirections, after everything else
- handler_order = 800
-
- try:
- from httplib import HTTPMessage
- except:
- from mimetools import Message
- http_response_class = Message
- else:
- http_response_class = HTTPMessage
-
- def __init__(self, rfp_class=MechanizeRobotFileParser):
- self.rfp_class = rfp_class
- self.rfp = None
- self._host = None
-
- def http_request(self, request):
- scheme = request.get_type()
- if scheme not in ["http", "https"]:
- # robots exclusion only applies to HTTP
- return request
-
- if request.get_selector() == "/robots.txt":
- # /robots.txt is always OK to fetch
- return request
-
- host = request.get_host()
-
- # robots.txt requests don't need to be allowed by robots.txt :-)
- origin_req = getattr(request, "_origin_req", None)
- if (origin_req is not None and
- origin_req.get_selector() == "/robots.txt" and
- origin_req.get_host() == host
- ):
- return request
-
- if host != self._host:
- self.rfp = self.rfp_class()
- try:
- self.rfp.set_opener(self.parent)
- except AttributeError:
- debug("%r instance does not support set_opener" %
- self.rfp.__class__)
- self.rfp.set_url(scheme+"://"+host+"/robots.txt")
- self.rfp.set_timeout(request.timeout)
- self.rfp.read()
- self._host = host
-
- ua = request.get_header("User-agent", "")
- if self.rfp.can_fetch(ua, request.get_full_url()):
- return request
- else:
- # XXX This should really have raised URLError. Too late now...
- msg = "request disallowed by robots.txt"
- raise RobotExclusionError(
- request,
- request.get_full_url(),
- 403, msg,
- self.http_response_class(StringIO()), StringIO(msg))
-
- https_request = http_request
-
-class HTTPRefererProcessor(BaseHandler):
- """Add Referer header to requests.
-
- This only makes sense if you use each RefererProcessor for a single
- chain of requests only (so, for example, if you use a single
- HTTPRefererProcessor to fetch a series of URLs extracted from a single
- page, this will break).
-
- There's a proper implementation of this in mechanize.Browser.
-
- """
- def __init__(self):
- self.referer = None
-
- def http_request(self, request):
- if ((self.referer is not None) and
- not request.has_header("Referer")):
- request.add_unredirected_header("Referer", self.referer)
- return request
-
- def http_response(self, request, response):
- self.referer = response.geturl()
- return response
-
- https_request = http_request
- https_response = http_response
-
-
-def clean_refresh_url(url):
- # e.g. Firefox 1.5 does (something like) this
- if ((url.startswith('"') and url.endswith('"')) or
- (url.startswith("'") and url.endswith("'"))):
- url = url[1:-1]
- return _rfc3986.clean_url(url, "latin-1") # XXX encoding
-
-def parse_refresh_header(refresh):
- """
- >>> parse_refresh_header("1; url=http://example.com/")
- (1.0, 'http://example.com/')
- >>> parse_refresh_header("1; url='http://example.com/'")
- (1.0, 'http://example.com/')
- >>> parse_refresh_header("1")
- (1.0, None)
- >>> parse_refresh_header("blah")
- Traceback (most recent call last):
- ValueError: invalid literal for float(): blah
-
- """
-
- ii = refresh.find(";")
- if ii != -1:
- pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
- jj = newurl_spec.find("=")
- key = None
- if jj != -1:
- key, newurl = newurl_spec[:jj], newurl_spec[jj+1:]
- newurl = clean_refresh_url(newurl)
- if key is None or key.strip().lower() != "url":
- raise ValueError()
- else:
- pause, newurl = float(refresh), None
- return pause, newurl
-
-class HTTPRefreshProcessor(BaseHandler):
- """Perform HTTP Refresh redirections.
-
- Note that if a non-200 HTTP code has occurred (for example, a 30x
- redirect), this processor will do nothing.
-
- By default, only zero-time Refresh headers are redirected. Use the
- max_time attribute / constructor argument to allow Refresh with longer
- pauses. Use the honor_time attribute / constructor argument to control
- whether the requested pause is honoured (with a time.sleep()) or
- skipped in favour of immediate redirection.
-
- Public attributes:
-
- max_time: see above
- honor_time: see above
-
- """
- handler_order = 1000
-
- def __init__(self, max_time=0, honor_time=True):
- self.max_time = max_time
- self.honor_time = honor_time
- self._sleep = time.sleep
-
- def http_response(self, request, response):
- code, msg, hdrs = response.code, response.msg, response.info()
-
- if code == 200 and hdrs.has_key("refresh"):
- refresh = hdrs.getheaders("refresh")[0]
- try:
- pause, newurl = parse_refresh_header(refresh)
- except ValueError:
- debug("bad Refresh header: %r" % refresh)
- return response
-
- if newurl is None:
- newurl = response.geturl()
- if (self.max_time is None) or (pause <= self.max_time):
- if pause > 1E-3 and self.honor_time:
- self._sleep(pause)
- hdrs["location"] = newurl
- # hardcoded http is NOT a bug
- response = self.parent.error(
- "http", request, response,
- "refresh", msg, hdrs)
- else:
- debug("Refresh header ignored: %r" % refresh)
-
- return response
-
- https_response = http_response
-
-class HTTPErrorProcessor(BaseHandler):
- """Process HTTP error responses.
-
- The purpose of this handler is to to allow other response processors a
- look-in by removing the call to parent.error() from
- AbstractHTTPHandler.
-
- For non-200 error codes, this just passes the job on to the
- Handler.<proto>_error_<code> methods, via the OpenerDirector.error
- method. Eventually, urllib2.HTTPDefaultErrorHandler will raise an
- HTTPError if no other handler handles the error.
-
- """
- handler_order = 1000 # after all other processors
-
- def http_response(self, request, response):
- code, msg, hdrs = response.code, response.msg, response.info()
-
- if code != 200:
- # hardcoded http is NOT a bug
- response = self.parent.error(
- "http", request, response, code, msg, hdrs)
-
- return response
-
- https_response = http_response
-
-
-class HTTPDefaultErrorHandler(BaseHandler):
- def http_error_default(self, req, fp, code, msg, hdrs):
- # why these error methods took the code, msg, headers args in the first
- # place rather than a response object, I don't know, but to avoid
- # multiple wrapping, we're discarding them
-
- if isinstance(fp, urllib2.HTTPError):
- response = fp
- else:
- response = urllib2.HTTPError(
- req.get_full_url(), code, msg, hdrs, fp)
- assert code == response.code
- assert msg == response.msg
- assert hdrs == response.hdrs
- raise response
-
-
-class AbstractHTTPHandler(BaseHandler):
-
- def __init__(self, debuglevel=0):
- self._debuglevel = debuglevel
-
- def set_http_debuglevel(self, level):
- self._debuglevel = level
-
- def do_request_(self, request):
- host = request.get_host()
- if not host:
- raise URLError('no host given')
-
- if request.has_data(): # POST
- data = request.get_data()
- if not request.has_header('Content-type'):
- request.add_unredirected_header(
- 'Content-type',
- 'application/x-www-form-urlencoded')
- if not request.has_header('Content-length'):
- request.add_unredirected_header(
- 'Content-length', '%d' % len(data))
-
- scheme, sel = urllib.splittype(request.get_selector())
- sel_host, sel_path = urllib.splithost(sel)
- if not request.has_header('Host'):
- request.add_unredirected_header('Host', sel_host or host)
- for name, value in self.parent.addheaders:
- name = name.capitalize()
- if not request.has_header(name):
- request.add_unredirected_header(name, value)
-
- return request
-
- def do_open(self, http_class, req):
- """Return an addinfourl object for the request, using http_class.
-
- http_class must implement the HTTPConnection API from httplib.
- The addinfourl return value is a file-like object. It also
- has methods and attributes including:
- - info(): return a mimetools.Message object for the headers
- - geturl(): return the original request URL
- - code: HTTP status code
- """
- host_port = req.get_host()
- if not host_port:
- raise URLError('no host given')
-
- try:
- h = http_class(host_port, timeout=req.timeout)
- except TypeError:
- # Python < 2.6, no per-connection timeout support
- h = http_class(host_port)
- h.set_debuglevel(self._debuglevel)
-
- headers = dict(req.headers)
- headers.update(req.unredirected_hdrs)
- # We want to make an HTTP/1.1 request, but the addinfourl
- # class isn't prepared to deal with a persistent connection.
- # It will try to read all remaining data from the socket,
- # which will block while the server waits for the next request.
- # So make sure the connection gets closed after the (only)
- # request.
- headers["Connection"] = "close"
- headers = dict(
- [(name.title(), val) for name, val in headers.items()])
- try:
- h.request(req.get_method(), req.get_selector(), req.data, headers)
- r = h.getresponse()
- except socket.error, err: # XXX what error?
- raise URLError(err)
-
- # Pick apart the HTTPResponse object to get the addinfourl
- # object initialized properly.
-
- # Wrap the HTTPResponse object in socket's file object adapter
- # for Windows. That adapter calls recv(), so delegate recv()
- # to read(). This weird wrapping allows the returned object to
- # have readline() and readlines() methods.
-
- # XXX It might be better to extract the read buffering code
- # out of socket._fileobject() and into a base class.
-
- r.recv = r.read
- fp = create_readline_wrapper(r)
-
- resp = closeable_response(fp, r.msg, req.get_full_url(),
- r.status, r.reason)
- return resp
-
-
-class HTTPHandler(AbstractHTTPHandler):
- def http_open(self, req):
- return self.do_open(httplib.HTTPConnection, req)
-
- http_request = AbstractHTTPHandler.do_request_
-
-if hasattr(httplib, 'HTTPS'):
-
- class HTTPSConnectionFactory:
- def __init__(self, key_file, cert_file):
- self._key_file = key_file
- self._cert_file = cert_file
- def __call__(self, hostport):
- return httplib.HTTPSConnection(
- hostport,
- key_file=self._key_file, cert_file=self._cert_file)
-
- class HTTPSHandler(AbstractHTTPHandler):
- def __init__(self, client_cert_manager=None):
- AbstractHTTPHandler.__init__(self)
- self.client_cert_manager = client_cert_manager
-
- def https_open(self, req):
- if self.client_cert_manager is not None:
- key_file, cert_file = self.client_cert_manager.find_key_cert(
- req.get_full_url())
- conn_factory = HTTPSConnectionFactory(key_file, cert_file)
- else:
- conn_factory = httplib.HTTPSConnection
- return self.do_open(conn_factory, req)
-
- https_request = AbstractHTTPHandler.do_request_
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_lwpcookiejar.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_lwpcookiejar.py
deleted file mode 100644
index f8d49cf..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_lwpcookiejar.py
+++ /dev/null
@@ -1,185 +0,0 @@
-"""Load / save to libwww-perl (LWP) format files.
-
-Actually, the format is slightly extended from that used by LWP's
-(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information
-not recorded by LWP.
-
-It uses the version string "2.0", though really there isn't an LWP Cookies
-2.0 format. This indicates that there is extra information in here
-(domain_dot and port_spec) while still being compatible with libwww-perl,
-I hope.
-
-Copyright 2002-2006 John J Lee <jjl@pobox.com>
-Copyright 1997-1999 Gisle Aas (original libwww-perl code)
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
-"""
-
-import time, re, logging
-
-from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
- MISSING_FILENAME_TEXT, LoadError
-from _headersutil import join_header_words, split_header_words
-from _util import iso2time, time2isoz
-
-debug = logging.getLogger("mechanize").debug
-
-
-def lwp_cookie_str(cookie):
- """Return string representation of Cookie in an the LWP cookie file format.
-
- Actually, the format is extended a bit -- see module docstring.
-
- """
- h = [(cookie.name, cookie.value),
- ("path", cookie.path),
- ("domain", cookie.domain)]
- if cookie.port is not None: h.append(("port", cookie.port))
- if cookie.path_specified: h.append(("path_spec", None))
- if cookie.port_specified: h.append(("port_spec", None))
- if cookie.domain_initial_dot: h.append(("domain_dot", None))
- if cookie.secure: h.append(("secure", None))
- if cookie.expires: h.append(("expires",
- time2isoz(float(cookie.expires))))
- if cookie.discard: h.append(("discard", None))
- if cookie.comment: h.append(("comment", cookie.comment))
- if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
- if cookie.rfc2109: h.append(("rfc2109", None))
-
- keys = cookie.nonstandard_attr_keys()
- keys.sort()
- for k in keys:
- h.append((k, str(cookie.get_nonstandard_attr(k))))
-
- h.append(("version", str(cookie.version)))
-
- return join_header_words([h])
-
-class LWPCookieJar(FileCookieJar):
- """
- The LWPCookieJar saves a sequence of"Set-Cookie3" lines.
- "Set-Cookie3" is the format used by the libwww-perl libary, not known
- to be compatible with any browser, but which is easy to read and
- doesn't lose information about RFC 2965 cookies.
-
- Additional methods
-
- as_lwp_str(ignore_discard=True, ignore_expired=True)
-
- """
-
- magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
-
- def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
- """Return cookies as a string of "\n"-separated "Set-Cookie3" headers.
-
- ignore_discard and ignore_expires: see docstring for FileCookieJar.save
-
- """
- now = time.time()
- r = []
- for cookie in self:
- if not ignore_discard and cookie.discard:
- debug(" Not saving %s: marked for discard", cookie.name)
- continue
- if not ignore_expires and cookie.is_expired(now):
- debug(" Not saving %s: expired", cookie.name)
- continue
- r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
- return "\n".join(r+[""])
-
- def save(self, filename=None, ignore_discard=False, ignore_expires=False):
- if filename is None:
- if self.filename is not None: filename = self.filename
- else: raise ValueError(MISSING_FILENAME_TEXT)
-
- f = open(filename, "w")
- try:
- debug("Saving LWP cookies file")
- # There really isn't an LWP Cookies 2.0 format, but this indicates
- # that there is extra information in here (domain_dot and
- # port_spec) while still being compatible with libwww-perl, I hope.
- f.write("#LWP-Cookies-2.0\n")
- f.write(self.as_lwp_str(ignore_discard, ignore_expires))
- finally:
- f.close()
-
- def _really_load(self, f, filename, ignore_discard, ignore_expires):
- magic = f.readline()
- if not re.search(self.magic_re, magic):
- msg = "%s does not seem to contain cookies" % filename
- raise LoadError(msg)
-
- now = time.time()
-
- header = "Set-Cookie3:"
- boolean_attrs = ("port_spec", "path_spec", "domain_dot",
- "secure", "discard", "rfc2109")
- value_attrs = ("version",
- "port", "path", "domain",
- "expires",
- "comment", "commenturl")
-
- try:
- while 1:
- line = f.readline()
- if line == "": break
- if not line.startswith(header):
- continue
- line = line[len(header):].strip()
-
- for data in split_header_words([line]):
- name, value = data[0]
- standard = {}
- rest = {}
- for k in boolean_attrs:
- standard[k] = False
- for k, v in data[1:]:
- if k is not None:
- lc = k.lower()
- else:
- lc = None
- # don't lose case distinction for unknown fields
- if (lc in value_attrs) or (lc in boolean_attrs):
- k = lc
- if k in boolean_attrs:
- if v is None: v = True
- standard[k] = v
- elif k in value_attrs:
- standard[k] = v
- else:
- rest[k] = v
-
- h = standard.get
- expires = h("expires")
- discard = h("discard")
- if expires is not None:
- expires = iso2time(expires)
- if expires is None:
- discard = True
- domain = h("domain")
- domain_specified = domain.startswith(".")
- c = Cookie(h("version"), name, value,
- h("port"), h("port_spec"),
- domain, domain_specified, h("domain_dot"),
- h("path"), h("path_spec"),
- h("secure"),
- expires,
- discard,
- h("comment"),
- h("commenturl"),
- rest,
- h("rfc2109"),
- )
- if not ignore_discard and c.discard:
- continue
- if not ignore_expires and c.is_expired(now):
- continue
- self.set_cookie(c)
- except:
- reraise_unmasked_exceptions((IOError,))
- raise LoadError("invalid Set-Cookie3 format file %s" % filename)
-
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mechanize.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mechanize.py
deleted file mode 100644
index ad729c9..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mechanize.py
+++ /dev/null
@@ -1,676 +0,0 @@
-"""Stateful programmatic WWW navigation, after Perl's WWW::Mechanize.
-
-Copyright 2003-2006 John J. Lee <jjl@pobox.com>
-Copyright 2003 Andy Lester (original Perl code)
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
-included with the distribution).
-
-"""
-
-import urllib2, copy, re, os, urllib
-
-
-from _html import DefaultFactory
-import _response
-import _request
-import _rfc3986
-import _sockettimeout
-from _useragent import UserAgentBase
-
-__version__ = (0, 1, 11, None, None) # 0.1.11
-
-class BrowserStateError(Exception): pass
-class LinkNotFoundError(Exception): pass
-class FormNotFoundError(Exception): pass
-
-
-def sanepathname2url(path):
- urlpath = urllib.pathname2url(path)
- if os.name == "nt" and urlpath.startswith("///"):
- urlpath = urlpath[2:]
- # XXX don't ask me about the mac...
- return urlpath
-
-
-class History:
- """
-
- Though this will become public, the implied interface is not yet stable.
-
- """
- def __init__(self):
- self._history = [] # LIFO
- def add(self, request, response):
- self._history.append((request, response))
- def back(self, n, _response):
- response = _response # XXX move Browser._response into this class?
- while n > 0 or response is None:
- try:
- request, response = self._history.pop()
- except IndexError:
- raise BrowserStateError("already at start of history")
- n -= 1
- return request, response
- def clear(self):
- del self._history[:]
- def close(self):
- for request, response in self._history:
- if response is not None:
- response.close()
- del self._history[:]
-
-
-class HTTPRefererProcessor(urllib2.BaseHandler):
- def http_request(self, request):
- # See RFC 2616 14.36. The only times we know the source of the
- # request URI has a URI associated with it are redirect, and
- # Browser.click() / Browser.submit() / Browser.follow_link().
- # Otherwise, it's the user's job to add any Referer header before
- # .open()ing.
- if hasattr(request, "redirect_dict"):
- request = self.parent._add_referer_header(
- request, origin_request=False)
- return request
-
- https_request = http_request
-
-
-class Browser(UserAgentBase):
- """Browser-like class with support for history, forms and links.
-
- BrowserStateError is raised whenever the browser is in the wrong state to
- complete the requested operation - eg., when .back() is called when the
- browser history is empty, or when .follow_link() is called when the current
- response does not contain HTML data.
-
- Public attributes:
-
- request: current request (mechanize.Request or urllib2.Request)
- form: currently selected form (see .select_form())
-
- """
-
- handler_classes = copy.copy(UserAgentBase.handler_classes)
- handler_classes["_referer"] = HTTPRefererProcessor
- default_features = copy.copy(UserAgentBase.default_features)
- default_features.append("_referer")
-
- def __init__(self,
- factory=None,
- history=None,
- request_class=None,
- ):
- """
-
- Only named arguments should be passed to this constructor.
-
- factory: object implementing the mechanize.Factory interface.
- history: object implementing the mechanize.History interface. Note
- this interface is still experimental and may change in future.
- request_class: Request class to use. Defaults to mechanize.Request
- by default for Pythons older than 2.4, urllib2.Request otherwise.
-
- The Factory and History objects passed in are 'owned' by the Browser,
- so they should not be shared across Browsers. In particular,
- factory.set_response() should not be called except by the owning
- Browser itself.
-
- Note that the supplied factory's request_class is overridden by this
- constructor, to ensure only one Request class is used.
-
- """
- self._handle_referer = True
-
- if history is None:
- history = History()
- self._history = history
-
- if request_class is None:
- if not hasattr(urllib2.Request, "add_unredirected_header"):
- request_class = _request.Request
- else:
- request_class = urllib2.Request # Python >= 2.4
-
- if factory is None:
- factory = DefaultFactory()
- factory.set_request_class(request_class)
- self._factory = factory
- self.request_class = request_class
-
- self.request = None
- self._set_response(None, False)
-
- # do this last to avoid __getattr__ problems
- UserAgentBase.__init__(self)
-
- def close(self):
- UserAgentBase.close(self)
- if self._response is not None:
- self._response.close()
- if self._history is not None:
- self._history.close()
- self._history = None
-
- # make use after .close easy to spot
- self.form = None
- self.request = self._response = None
- self.request = self.response = self.set_response = None
- self.geturl = self.reload = self.back = None
- self.clear_history = self.set_cookie = self.links = self.forms = None
- self.viewing_html = self.encoding = self.title = None
- self.select_form = self.click = self.submit = self.click_link = None
- self.follow_link = self.find_link = None
-
- def set_handle_referer(self, handle):
- """Set whether to add Referer header to each request."""
- self._set_handler("_referer", handle)
- self._handle_referer = bool(handle)
-
- def _add_referer_header(self, request, origin_request=True):
- if self.request is None:
- return request
- scheme = request.get_type()
- original_scheme = self.request.get_type()
- if scheme not in ["http", "https"]:
- return request
- if not origin_request and not self.request.has_header("Referer"):
- return request
-
- if (self._handle_referer and
- original_scheme in ["http", "https"] and
- not (original_scheme == "https" and scheme != "https")):
- # strip URL fragment (RFC 2616 14.36)
- parts = _rfc3986.urlsplit(self.request.get_full_url())
- parts = parts[:-1]+(None,)
- referer = _rfc3986.urlunsplit(parts)
- request.add_unredirected_header("Referer", referer)
- return request
-
- def open_novisit(self, url, data=None,
- timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
- """Open a URL without visiting it.
-
- Browser state (including request, response, history, forms and links)
- is left unchanged by calling this function.
-
- The interface is the same as for .open().
-
- This is useful for things like fetching images.
-
- See also .retrieve().
-
- """
- return self._mech_open(url, data, visit=False, timeout=timeout)
-
- def open(self, url, data=None,
- timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
- return self._mech_open(url, data, timeout=timeout)
-
- def _mech_open(self, url, data=None, update_history=True, visit=None,
- timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
- try:
- url.get_full_url
- except AttributeError:
- # string URL -- convert to absolute URL if required
- scheme, authority = _rfc3986.urlsplit(url)[:2]
- if scheme is None:
- # relative URL
- if self._response is None:
- raise BrowserStateError(
- "can't fetch relative reference: "
- "not viewing any document")
- url = _rfc3986.urljoin(self._response.geturl(), url)
-
- request = self._request(url, data, visit, timeout)
- visit = request.visit
- if visit is None:
- visit = True
-
- if visit:
- self._visit_request(request, update_history)
-
- success = True
- try:
- response = UserAgentBase.open(self, request, data)
- except urllib2.HTTPError, error:
- success = False
- if error.fp is None: # not a response
- raise
- response = error
-## except (IOError, socket.error, OSError), error:
-## # Yes, urllib2 really does raise all these :-((
-## # See test_urllib2.py for examples of socket.gaierror and OSError,
-## # plus note that FTPHandler raises IOError.
-## # XXX I don't seem to have an example of exactly socket.error being
-## # raised, only socket.gaierror...
-## # I don't want to start fixing these here, though, since this is a
-## # subclass of OpenerDirector, and it would break old code. Even in
-## # Python core, a fix would need some backwards-compat. hack to be
-## # acceptable.
-## raise
-
- if visit:
- self._set_response(response, False)
- response = copy.copy(self._response)
- elif response is not None:
- response = _response.upgrade_response(response)
-
- if not success:
- raise response
- return response
-
- def __str__(self):
- text = []
- text.append("<%s " % self.__class__.__name__)
- if self._response:
- text.append("visiting %s" % self._response.geturl())
- else:
- text.append("(not visiting a URL)")
- if self.form:
- text.append("\n selected form:\n %s\n" % str(self.form))
- text.append(">")
- return "".join(text)
-
- def response(self):
- """Return a copy of the current response.
-
- The returned object has the same interface as the object returned by
- .open() (or urllib2.urlopen()).
-
- """
- return copy.copy(self._response)
-
- def open_local_file(self, filename):
- path = sanepathname2url(os.path.abspath(filename))
- url = 'file://'+path
- return self.open(url)
-
- def set_response(self, response):
- """Replace current response with (a copy of) response.
-
- response may be None.
-
- This is intended mostly for HTML-preprocessing.
- """
- self._set_response(response, True)
-
- def _set_response(self, response, close_current):
- # sanity check, necessary but far from sufficient
- if not (response is None or
- (hasattr(response, "info") and hasattr(response, "geturl") and
- hasattr(response, "read")
- )
- ):
- raise ValueError("not a response object")
-
- self.form = None
- if response is not None:
- response = _response.upgrade_response(response)
- if close_current and self._response is not None:
- self._response.close()
- self._response = response
- self._factory.set_response(response)
-
- def visit_response(self, response, request=None):
- """Visit the response, as if it had been .open()ed.
-
- Unlike .set_response(), this updates history rather than replacing the
- current response.
- """
- if request is None:
- request = _request.Request(response.geturl())
- self._visit_request(request, True)
- self._set_response(response, False)
-
- def _visit_request(self, request, update_history):
- if self._response is not None:
- self._response.close()
- if self.request is not None and update_history:
- self._history.add(self.request, self._response)
- self._response = None
- # we want self.request to be assigned even if UserAgentBase.open
- # fails
- self.request = request
-
- def geturl(self):
- """Get URL of current document."""
- if self._response is None:
- raise BrowserStateError("not viewing any document")
- return self._response.geturl()
-
- def reload(self):
- """Reload current document, and return response object."""
- if self.request is None:
- raise BrowserStateError("no URL has yet been .open()ed")
- if self._response is not None:
- self._response.close()
- return self._mech_open(self.request, update_history=False)
-
- def back(self, n=1):
- """Go back n steps in history, and return response object.
-
- n: go back this number of steps (default 1 step)
-
- """
- if self._response is not None:
- self._response.close()
- self.request, response = self._history.back(n, self._response)
- self.set_response(response)
- if not response.read_complete:
- return self.reload()
- return copy.copy(response)
-
- def clear_history(self):
- self._history.clear()
-
- def set_cookie(self, cookie_string):
- """Request to set a cookie.
-
- Note that it is NOT necessary to call this method under ordinary
- circumstances: cookie handling is normally entirely automatic. The
- intended use case is rather to simulate the setting of a cookie by
- client script in a web page (e.g. JavaScript). In that case, use of
- this method is necessary because mechanize currently does not support
- JavaScript, VBScript, etc.
-
- The cookie is added in the same way as if it had arrived with the
- current response, as a result of the current request. This means that,
- for example, if it is not appropriate to set the cookie based on the
- current request, no cookie will be set.
-
- The cookie will be returned automatically with subsequent responses
- made by the Browser instance whenever that's appropriate.
-
- cookie_string should be a valid value of the Set-Cookie header.
-
- For example:
-
- browser.set_cookie(
- "sid=abcdef; expires=Wednesday, 09-Nov-06 23:12:40 GMT")
-
- Currently, this method does not allow for adding RFC 2986 cookies.
- This limitation will be lifted if anybody requests it.
-
- """
- if self._response is None:
- raise BrowserStateError("not viewing any document")
- if self.request.get_type() not in ["http", "https"]:
- raise BrowserStateError("can't set cookie for non-HTTP/HTTPS "
- "transactions")
- cookiejar = self._ua_handlers["_cookies"].cookiejar
- response = self.response() # copy
- headers = response.info()
- headers["Set-cookie"] = cookie_string
- cookiejar.extract_cookies(response, self.request)
-
- def links(self, **kwds):
- """Return iterable over links (mechanize.Link objects)."""
- if not self.viewing_html():
- raise BrowserStateError("not viewing HTML")
- links = self._factory.links()
- if kwds:
- return self._filter_links(links, **kwds)
- else:
- return links
-
- def forms(self):
- """Return iterable over forms.
-
- The returned form objects implement the ClientForm.HTMLForm interface.
-
- """
- if not self.viewing_html():
- raise BrowserStateError("not viewing HTML")
- return self._factory.forms()
-
- def global_form(self):
- """Return the global form object, or None if the factory implementation
- did not supply one.
-
- The "global" form object contains all controls that are not descendants
- of any FORM element.
-
- The returned form object implements the ClientForm.HTMLForm interface.
-
- This is a separate method since the global form is not regarded as part
- of the sequence of forms in the document -- mostly for
- backwards-compatibility.
-
- """
- if not self.viewing_html():
- raise BrowserStateError("not viewing HTML")
- return self._factory.global_form
-
- def viewing_html(self):
- """Return whether the current response contains HTML data."""
- if self._response is None:
- raise BrowserStateError("not viewing any document")
- return self._factory.is_html
-
- def encoding(self):
- if self._response is None:
- raise BrowserStateError("not viewing any document")
- return self._factory.encoding
-
- def title(self):
- r"""Return title, or None if there is no title element in the document.
-
- Treatment of any tag children of attempts to follow Firefox and IE
- (currently, tags are preserved).
-
- """
- if not self.viewing_html():
- raise BrowserStateError("not viewing HTML")
- return self._factory.title
-
- def select_form(self, name=None, predicate=None, nr=None):
- """Select an HTML form for input.
-
- This is a bit like giving a form the "input focus" in a browser.
-
- If a form is selected, the Browser object supports the HTMLForm
- interface, so you can call methods like .set_value(), .set(), and
- .click().
-
- Another way to select a form is to assign to the .form attribute. The
- form assigned should be one of the objects returned by the .forms()
- method.
-
- At least one of the name, predicate and nr arguments must be supplied.
- If no matching form is found, mechanize.FormNotFoundError is raised.
-
- If name is specified, then the form must have the indicated name.
-
- If predicate is specified, then the form must match that function. The
- predicate function is passed the HTMLForm as its single argument, and
- should return a boolean value indicating whether the form matched.
-
- nr, if supplied, is the sequence number of the form (where 0 is the
- first). Note that control 0 is the first form matching all the other
- arguments (if supplied); it is not necessarily the first control in the
- form. The "global form" (consisting of all form controls not contained
- in any FORM element) is considered not to be part of this sequence and
- to have no name, so will not be matched unless both name and nr are
- None.
-
- """
- if not self.viewing_html():
- raise BrowserStateError("not viewing HTML")
- if (name is None) and (predicate is None) and (nr is None):
- raise ValueError(
- "at least one argument must be supplied to specify form")
-
- global_form = self._factory.global_form
- if nr is None and name is None and \
- predicate is not None and predicate(global_form):
- self.form = global_form
- return
-
- orig_nr = nr
- for form in self.forms():
- if name is not None and name != form.name:
- continue
- if predicate is not None and not predicate(form):
- continue
- if nr:
- nr -= 1
- continue
- self.form = form
- break # success
- else:
- # failure
- description = []
- if name is not None: description.append("name '%s'" % name)
- if predicate is not None:
- description.append("predicate %s" % predicate)
- if orig_nr is not None: description.append("nr %d" % orig_nr)
- description = ", ".join(description)
- raise FormNotFoundError("no form matching "+description)
-
- def click(self, *args, **kwds):
- """See ClientForm.HTMLForm.click for documentation."""
- if not self.viewing_html():
- raise BrowserStateError("not viewing HTML")
- request = self.form.click(*args, **kwds)
- return self._add_referer_header(request)
-
- def submit(self, *args, **kwds):
- """Submit current form.
-
- Arguments are as for ClientForm.HTMLForm.click().
-
- Return value is same as for Browser.open().
-
- """
- return self.open(self.click(*args, **kwds))
-
- def click_link(self, link=None, **kwds):
- """Find a link and return a Request object for it.
-
- Arguments are as for .find_link(), except that a link may be supplied
- as the first argument.
-
- """
- if not self.viewing_html():
- raise BrowserStateError("not viewing HTML")
- if not link:
- link = self.find_link(**kwds)
- else:
- if kwds:
- raise ValueError(
- "either pass a Link, or keyword arguments, not both")
- request = self.request_class(link.absolute_url)
- return self._add_referer_header(request)
-
- def follow_link(self, link=None, **kwds):
- """Find a link and .open() it.
-
- Arguments are as for .click_link().
-
- Return value is same as for Browser.open().
-
- """
- return self.open(self.click_link(link, **kwds))
-
- def find_link(self, **kwds):
- """Find a link in current page.
-
- Links are returned as mechanize.Link objects.
-
- # Return third link that .search()-matches the regexp "python"
- # (by ".search()-matches", I mean that the regular expression method
- # .search() is used, rather than .match()).
- find_link(text_regex=re.compile("python"), nr=2)
-
- # Return first http link in the current page that points to somewhere
- # on python.org whose link text (after tags have been removed) is
- # exactly "monty python".
- find_link(text="monty python",
- url_regex=re.compile("http.*python.org"))
-
- # Return first link with exactly three HTML attributes.
- find_link(predicate=lambda link: len(link.attrs) == 3)
-
- Links include anchors (<a>), image maps (<area>), and frames (<frame>,
- <iframe>).
-
- All arguments must be passed by keyword, not position. Zero or more
- arguments may be supplied. In order to find a link, all arguments
- supplied must match.
-
- If a matching link is not found, mechanize.LinkNotFoundError is raised.
-
- text: link text between link tags: eg. <a href="blah">this bit</a> (as
- returned by pullparser.get_compressed_text(), ie. without tags but
- with opening tags "textified" as per the pullparser docs) must compare
- equal to this argument, if supplied
- text_regex: link text between tag (as defined above) must match the
- regular expression object or regular expression string passed as this
- argument, if supplied
- name, name_regex: as for text and text_regex, but matched against the
- name HTML attribute of the link tag
- url, url_regex: as for text and text_regex, but matched against the
- URL of the link tag (note this matches against Link.url, which is a
- relative or absolute URL according to how it was written in the HTML)
- tag: element name of opening tag, eg. "a"
- predicate: a function taking a Link object as its single argument,
- returning a boolean result, indicating whether the links
- nr: matches the nth link that matches all other criteria (default 0)
-
- """
- try:
- return self._filter_links(self._factory.links(), **kwds).next()
- except StopIteration:
- raise LinkNotFoundError()
-
- def __getattr__(self, name):
- # pass through ClientForm / DOMForm methods and attributes
- form = self.__dict__.get("form")
- if form is None:
- raise AttributeError(
- "%s instance has no attribute %s (perhaps you forgot to "
- ".select_form()?)" % (self.__class__, name))
- return getattr(form, name)
-
- def _filter_links(self, links,
- text=None, text_regex=None,
- name=None, name_regex=None,
- url=None, url_regex=None,
- tag=None,
- predicate=None,
- nr=0
- ):
- if not self.viewing_html():
- raise BrowserStateError("not viewing HTML")
-
- found_links = []
- orig_nr = nr
-
- for link in links:
- if url is not None and url != link.url:
- continue
- if url_regex is not None and not re.search(url_regex, link.url):
- continue
- if (text is not None and
- (link.text is None or text != link.text)):
- continue
- if (text_regex is not None and
- (link.text is None or not re.search(text_regex, link.text))):
- continue
- if name is not None and name != dict(link.attrs).get("name"):
- continue
- if name_regex is not None:
- link_name = dict(link.attrs).get("name")
- if link_name is None or not re.search(name_regex, link_name):
- continue
- if tag is not None and tag != link.tag:
- continue
- if predicate is not None and not predicate(link):
- continue
- if nr:
- nr -= 1
- continue
- yield link
- nr = orig_nr
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mozillacookiejar.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mozillacookiejar.py
deleted file mode 100644
index 51e81bb..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mozillacookiejar.py
+++ /dev/null
@@ -1,161 +0,0 @@
-"""Mozilla / Netscape cookie loading / saving.
-
-Copyright 2002-2006 John J Lee <jjl@pobox.com>
-Copyright 1997-1999 Gisle Aas (original libwww-perl code)
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
-"""
-
-import re, time, logging
-
-from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
- MISSING_FILENAME_TEXT, LoadError
-debug = logging.getLogger("ClientCookie").debug
-
-
-class MozillaCookieJar(FileCookieJar):
- """
-
- WARNING: you may want to backup your browser's cookies file if you use
- this class to save cookies. I *think* it works, but there have been
- bugs in the past!
-
- This class differs from CookieJar only in the format it uses to save and
- load cookies to and from a file. This class uses the Mozilla/Netscape
- `cookies.txt' format. lynx uses this file format, too.
-
- Don't expect cookies saved while the browser is running to be noticed by
- the browser (in fact, Mozilla on unix will overwrite your saved cookies if
- you change them on disk while it's running; on Windows, you probably can't
- save at all while the browser is running).
-
- Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
- Netscape cookies on saving.
-
- In particular, the cookie version and port number information is lost,
- together with information about whether or not Path, Port and Discard were
- specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
- domain as set in the HTTP header started with a dot (yes, I'm aware some
- domains in Netscape files start with a dot and some don't -- trust me, you
- really don't want to know any more about this).
-
- Note that though Mozilla and Netscape use the same format, they use
- slightly different headers. The class saves cookies using the Netscape
- header by default (Mozilla can cope with that).
-
- """
- magic_re = "#( Netscape)? HTTP Cookie File"
- header = """\
- # Netscape HTTP Cookie File
- # http://www.netscape.com/newsref/std/cookie_spec.html
- # This is a generated file! Do not edit.
-
-"""
-
- def _really_load(self, f, filename, ignore_discard, ignore_expires):
- now = time.time()
-
- magic = f.readline()
- if not re.search(self.magic_re, magic):
- f.close()
- raise LoadError(
- "%s does not look like a Netscape format cookies file" %
- filename)
-
- try:
- while 1:
- line = f.readline()
- if line == "": break
-
- # last field may be absent, so keep any trailing tab
- if line.endswith("\n"): line = line[:-1]
-
- # skip comments and blank lines XXX what is $ for?
- if (line.strip().startswith("#") or
- line.strip().startswith("$") or
- line.strip() == ""):
- continue
-
- domain, domain_specified, path, secure, expires, name, value = \
- line.split("\t", 6)
- secure = (secure == "TRUE")
- domain_specified = (domain_specified == "TRUE")
- if name == "":
- name = value
- value = None
-
- initial_dot = domain.startswith(".")
- if domain_specified != initial_dot:
- raise LoadError("domain and domain specified flag don't "
- "match in %s: %s" % (filename, line))
-
- discard = False
- if expires == "":
- expires = None
- discard = True
-
- # assume path_specified is false
- c = Cookie(0, name, value,
- None, False,
- domain, domain_specified, initial_dot,
- path, False,
- secure,
- expires,
- discard,
- None,
- None,
- {})
- if not ignore_discard and c.discard:
- continue
- if not ignore_expires and c.is_expired(now):
- continue
- self.set_cookie(c)
-
- except:
- reraise_unmasked_exceptions((IOError, LoadError))
- raise LoadError("invalid Netscape format file %s: %s" %
- (filename, line))
-
- def save(self, filename=None, ignore_discard=False, ignore_expires=False):
- if filename is None:
- if self.filename is not None: filename = self.filename
- else: raise ValueError(MISSING_FILENAME_TEXT)
-
- f = open(filename, "w")
- try:
- debug("Saving Netscape cookies.txt file")
- f.write(self.header)
- now = time.time()
- for cookie in self:
- if not ignore_discard and cookie.discard:
- debug(" Not saving %s: marked for discard", cookie.name)
- continue
- if not ignore_expires and cookie.is_expired(now):
- debug(" Not saving %s: expired", cookie.name)
- continue
- if cookie.secure: secure = "TRUE"
- else: secure = "FALSE"
- if cookie.domain.startswith("."): initial_dot = "TRUE"
- else: initial_dot = "FALSE"
- if cookie.expires is not None:
- expires = str(cookie.expires)
- else:
- expires = ""
- if cookie.value is None:
- # cookies.txt regards 'Set-Cookie: foo' as a cookie
- # with no name, whereas cookielib regards it as a
- # cookie with no value.
- name = ""
- value = cookie.name
- else:
- name = cookie.name
- value = cookie.value
- f.write(
- "\t".join([cookie.domain, initial_dot, cookie.path,
- secure, expires, name, value])+
- "\n")
- finally:
- f.close()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_msiecookiejar.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_msiecookiejar.py
deleted file mode 100644
index 1057811..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_msiecookiejar.py
+++ /dev/null
@@ -1,388 +0,0 @@
-"""Microsoft Internet Explorer cookie loading on Windows.
-
-Copyright 2002-2003 Johnny Lee <typo_pl@hotmail.com> (MSIE Perl code)
-Copyright 2002-2006 John J Lee <jjl@pobox.com> (The Python port)
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
-"""
-
-# XXX names and comments are not great here
-
-import os, re, time, struct, logging
-if os.name == "nt":
- import _winreg
-
-from _clientcookie import FileCookieJar, CookieJar, Cookie, \
- MISSING_FILENAME_TEXT, LoadError
-
-debug = logging.getLogger("mechanize").debug
-
-
-def regload(path, leaf):
- key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, path, 0,
- _winreg.KEY_ALL_ACCESS)
- try:
- value = _winreg.QueryValueEx(key, leaf)[0]
- except WindowsError:
- value = None
- return value
-
-WIN32_EPOCH = 0x019db1ded53e8000L # 1970 Jan 01 00:00:00 in Win32 FILETIME
-
-def epoch_time_offset_from_win32_filetime(filetime):
- """Convert from win32 filetime to seconds-since-epoch value.
-
- MSIE stores create and expire times as Win32 FILETIME, which is 64
- bits of 100 nanosecond intervals since Jan 01 1601.
-
- mechanize expects time in 32-bit value expressed in seconds since the
- epoch (Jan 01 1970).
-
- """
- if filetime < WIN32_EPOCH:
- raise ValueError("filetime (%d) is before epoch (%d)" %
- (filetime, WIN32_EPOCH))
-
- return divmod((filetime - WIN32_EPOCH), 10000000L)[0]
-
-def binary_to_char(c): return "%02X" % ord(c)
-def binary_to_str(d): return "".join(map(binary_to_char, list(d)))
-
-class MSIEBase:
- magic_re = re.compile(r"Client UrlCache MMF Ver \d\.\d.*")
- padding = "\x0d\xf0\xad\x0b"
-
- msie_domain_re = re.compile(r"^([^/]+)(/.*)$")
- cookie_re = re.compile("Cookie\:.+\@([\x21-\xFF]+).*?"
- "(.+\@[\x21-\xFF]+\.txt)")
-
- # path under HKEY_CURRENT_USER from which to get location of index.dat
- reg_path = r"software\microsoft\windows" \
- r"\currentversion\explorer\shell folders"
- reg_key = "Cookies"
-
- def __init__(self):
- self._delayload_domains = {}
-
- def _delayload_domain(self, domain):
- # if necessary, lazily load cookies for this domain
- delayload_info = self._delayload_domains.get(domain)
- if delayload_info is not None:
- cookie_file, ignore_discard, ignore_expires = delayload_info
- try:
- self.load_cookie_data(cookie_file,
- ignore_discard, ignore_expires)
- except (LoadError, IOError):
- debug("error reading cookie file, skipping: %s", cookie_file)
- else:
- del self._delayload_domains[domain]
-
- def _load_cookies_from_file(self, filename):
- debug("Loading MSIE cookies file: %s", filename)
- cookies = []
-
- cookies_fh = open(filename)
-
- try:
- while 1:
- key = cookies_fh.readline()
- if key == "": break
-
- rl = cookies_fh.readline
- def getlong(rl=rl): return long(rl().rstrip())
- def getstr(rl=rl): return rl().rstrip()
-
- key = key.rstrip()
- value = getstr()
- domain_path = getstr()
- flags = getlong() # 0x2000 bit is for secure I think
- lo_expire = getlong()
- hi_expire = getlong()
- lo_create = getlong()
- hi_create = getlong()
- sep = getstr()
-
- if "" in (key, value, domain_path, flags, hi_expire, lo_expire,
- hi_create, lo_create, sep) or (sep != "*"):
- break
-
- m = self.msie_domain_re.search(domain_path)
- if m:
- domain = m.group(1)
- path = m.group(2)
-
- cookies.append({"KEY": key, "VALUE": value,
- "DOMAIN": domain, "PATH": path,
- "FLAGS": flags, "HIXP": hi_expire,
- "LOXP": lo_expire, "HICREATE": hi_create,
- "LOCREATE": lo_create})
- finally:
- cookies_fh.close()
-
- return cookies
-
- def load_cookie_data(self, filename,
- ignore_discard=False, ignore_expires=False):
- """Load cookies from file containing actual cookie data.
-
- Old cookies are kept unless overwritten by newly loaded ones.
-
- You should not call this method if the delayload attribute is set.
-
- I think each of these files contain all cookies for one user, domain,
- and path.
-
- filename: file containing cookies -- usually found in a file like
- C:\WINNT\Profiles\joe\Cookies\joe@blah[1].txt
-
- """
- now = int(time.time())
-
- cookie_data = self._load_cookies_from_file(filename)
-
- for cookie in cookie_data:
- flags = cookie["FLAGS"]
- secure = ((flags & 0x2000) != 0)
- filetime = (cookie["HIXP"] << 32) + cookie["LOXP"]
- expires = epoch_time_offset_from_win32_filetime(filetime)
- if expires < now:
- discard = True
- else:
- discard = False
- domain = cookie["DOMAIN"]
- initial_dot = domain.startswith(".")
- if initial_dot:
- domain_specified = True
- else:
- # MSIE 5 does not record whether the domain cookie-attribute
- # was specified.
- # Assuming it wasn't is conservative, because with strict
- # domain matching this will match less frequently; with regular
- # Netscape tail-matching, this will match at exactly the same
- # times that domain_specified = True would. It also means we
- # don't have to prepend a dot to achieve consistency with our
- # own & Mozilla's domain-munging scheme.
- domain_specified = False
-
- # assume path_specified is false
- # XXX is there other stuff in here? -- eg. comment, commentURL?
- c = Cookie(0,
- cookie["KEY"], cookie["VALUE"],
- None, False,
- domain, domain_specified, initial_dot,
- cookie["PATH"], False,
- secure,
- expires,
- discard,
- None,
- None,
- {"flags": flags})
- if not ignore_discard and c.discard:
- continue
- if not ignore_expires and c.is_expired(now):
- continue
- CookieJar.set_cookie(self, c)
-
- def load_from_registry(self, ignore_discard=False, ignore_expires=False,
- username=None):
- """
- username: only required on win9x
-
- """
- cookies_dir = regload(self.reg_path, self.reg_key)
- filename = os.path.normpath(os.path.join(cookies_dir, "INDEX.DAT"))
- self.load(filename, ignore_discard, ignore_expires, username)
-
- def _really_load(self, index, filename, ignore_discard, ignore_expires,
- username):
- now = int(time.time())
-
- if username is None:
- username = os.environ['USERNAME'].lower()
-
- cookie_dir = os.path.dirname(filename)
-
- data = index.read(256)
- if len(data) != 256:
- raise LoadError("%s file is too short" % filename)
-
- # Cookies' index.dat file starts with 32 bytes of signature
- # followed by an offset to the first record, stored as a little-
- # endian DWORD.
- sig, size, data = data[:32], data[32:36], data[36:]
- size = struct.unpack("<L", size)[0]
-
- # check that sig is valid
- if not self.magic_re.match(sig) or size != 0x4000:
- raise LoadError("%s ['%s' %s] does not seem to contain cookies" %
- (str(filename), sig, size))
-
- # skip to start of first record
- index.seek(size, 0)
-
- sector = 128 # size of sector in bytes
-
- while 1:
- data = ""
-
- # Cookies are usually in two contiguous sectors, so read in two
- # sectors and adjust if not a Cookie.
- to_read = 2 * sector
- d = index.read(to_read)
- if len(d) != to_read:
- break
- data = data + d
-
- # Each record starts with a 4-byte signature and a count
- # (little-endian DWORD) of sectors for the record.
- sig, size, data = data[:4], data[4:8], data[8:]
- size = struct.unpack("<L", size)[0]
-
- to_read = (size - 2) * sector
-
-## from urllib import quote
-## print "data", quote(data)
-## print "sig", quote(sig)
-## print "size in sectors", size
-## print "size in bytes", size*sector
-## print "size in units of 16 bytes", (size*sector) / 16
-## print "size to read in bytes", to_read
-## print
-
- if sig != "URL ":
- assert sig in ("HASH", "LEAK", \
- self.padding, "\x00\x00\x00\x00"), \
- "unrecognized MSIE index.dat record: %s" % \
- binary_to_str(sig)
- if sig == "\x00\x00\x00\x00":
- # assume we've got all the cookies, and stop
- break
- if sig == self.padding:
- continue
- # skip the rest of this record
- assert to_read >= 0
- if size != 2:
- assert to_read != 0
- index.seek(to_read, 1)
- continue
-
- # read in rest of record if necessary
- if size > 2:
- more_data = index.read(to_read)
- if len(more_data) != to_read: break
- data = data + more_data
-
- cookie_re = ("Cookie\:%s\@([\x21-\xFF]+).*?" % username +
- "(%s\@[\x21-\xFF]+\.txt)" % username)
- m = re.search(cookie_re, data, re.I)
- if m:
- cookie_file = os.path.join(cookie_dir, m.group(2))
- if not self.delayload:
- try:
- self.load_cookie_data(cookie_file,
- ignore_discard, ignore_expires)
- except (LoadError, IOError):
- debug("error reading cookie file, skipping: %s",
- cookie_file)
- else:
- domain = m.group(1)
- i = domain.find("/")
- if i != -1:
- domain = domain[:i]
-
- self._delayload_domains[domain] = (
- cookie_file, ignore_discard, ignore_expires)
-
-
-class MSIECookieJar(MSIEBase, FileCookieJar):
- """FileCookieJar that reads from the Windows MSIE cookies database.
-
- MSIECookieJar can read the cookie files of Microsoft Internet Explorer
- (MSIE) for Windows version 5 on Windows NT and version 6 on Windows XP and
- Windows 98. Other configurations may also work, but are untested. Saving
- cookies in MSIE format is NOT supported. If you save cookies, they'll be
- in the usual Set-Cookie3 format, which you can read back in using an
- instance of the plain old CookieJar class. Don't save using the same
- filename that you loaded cookies from, because you may succeed in
- clobbering your MSIE cookies index file!
-
- You should be able to have LWP share Internet Explorer's cookies like
- this (note you need to supply a username to load_from_registry if you're on
- Windows 9x or Windows ME):
-
- cj = MSIECookieJar(delayload=1)
- # find cookies index file in registry and load cookies from it
- cj.load_from_registry()
- opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
- response = opener.open("http://example.com/")
-
- Iterating over a delayloaded MSIECookieJar instance will not cause any
- cookies to be read from disk. To force reading of all cookies from disk,
- call read_all_cookies. Note that the following methods iterate over self:
- clear_temporary_cookies, clear_expired_cookies, __len__, __repr__, __str__
- and as_string.
-
- Additional methods:
-
- load_from_registry(ignore_discard=False, ignore_expires=False,
- username=None)
- load_cookie_data(filename, ignore_discard=False, ignore_expires=False)
- read_all_cookies()
-
- """
- def __init__(self, filename=None, delayload=False, policy=None):
- MSIEBase.__init__(self)
- FileCookieJar.__init__(self, filename, delayload, policy)
-
- def set_cookie(self, cookie):
- if self.delayload:
- self._delayload_domain(cookie.domain)
- CookieJar.set_cookie(self, cookie)
-
- def _cookies_for_request(self, request):
- """Return a list of cookies to be returned to server."""
- domains = self._cookies.copy()
- domains.update(self._delayload_domains)
- domains = domains.keys()
-
- cookies = []
- for domain in domains:
- cookies.extend(self._cookies_for_domain(domain, request))
- return cookies
-
- def _cookies_for_domain(self, domain, request):
- if not self._policy.domain_return_ok(domain, request):
- return []
- debug("Checking %s for cookies to return", domain)
- if self.delayload:
- self._delayload_domain(domain)
- return CookieJar._cookies_for_domain(self, domain, request)
-
- def read_all_cookies(self):
- """Eagerly read in all cookies."""
- if self.delayload:
- for domain in self._delayload_domains.keys():
- self._delayload_domain(domain)
-
- def load(self, filename, ignore_discard=False, ignore_expires=False,
- username=None):
- """Load cookies from an MSIE 'index.dat' cookies index file.
-
- filename: full path to cookie index file
- username: only required on win9x
-
- """
- if filename is None:
- if self.filename is not None: filename = self.filename
- else: raise ValueError(MISSING_FILENAME_TEXT)
-
- index = open(filename, "rb")
-
- try:
- self._really_load(index, filename, ignore_discard, ignore_expires,
- username)
- finally:
- index.close()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_opener.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_opener.py
deleted file mode 100644
index d94eacf..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_opener.py
+++ /dev/null
@@ -1,436 +0,0 @@
-"""Integration with Python standard library module urllib2: OpenerDirector
-class.
-
-Copyright 2004-2006 John J Lee <jjl@pobox.com>
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
-"""
-
-import os, urllib2, bisect, httplib, types, tempfile
-try:
- import threading as _threading
-except ImportError:
- import dummy_threading as _threading
-try:
- set
-except NameError:
- import sets
- set = sets.Set
-
-import _file
-import _http
-from _request import Request
-import _response
-import _rfc3986
-import _sockettimeout
-import _upgrade
-from _util import isstringlike
-
-
-class ContentTooShortError(urllib2.URLError):
- def __init__(self, reason, result):
- urllib2.URLError.__init__(self, reason)
- self.result = result
-
-
-def set_request_attr(req, name, value, default):
- try:
- getattr(req, name)
- except AttributeError:
- setattr(req, name, default)
- if value is not default:
- setattr(req, name, value)
-
-
-class OpenerDirector(urllib2.OpenerDirector):
- def __init__(self):
- urllib2.OpenerDirector.__init__(self)
- # really none of these are (sanely) public -- the lack of initial
- # underscore on some is just due to following urllib2
- self.process_response = {}
- self.process_request = {}
- self._any_request = {}
- self._any_response = {}
- self._handler_index_valid = True
- self._tempfiles = []
-
- def add_handler(self, handler):
- if handler in self.handlers:
- return
- # XXX why does self.handlers need to be sorted?
- bisect.insort(self.handlers, handler)
- handler.add_parent(self)
- self._handler_index_valid = False
-
- def _maybe_reindex_handlers(self):
- if self._handler_index_valid:
- return
-
- handle_error = {}
- handle_open = {}
- process_request = {}
- process_response = {}
- any_request = set()
- any_response = set()
- unwanted = []
-
- for handler in self.handlers:
- added = False
- for meth in dir(handler):
- if meth in ["redirect_request", "do_open", "proxy_open"]:
- # oops, coincidental match
- continue
-
- if meth == "any_request":
- any_request.add(handler)
- added = True
- continue
- elif meth == "any_response":
- any_response.add(handler)
- added = True
- continue
-
- ii = meth.find("_")
- scheme = meth[:ii]
- condition = meth[ii+1:]
-
- if condition.startswith("error"):
- jj = meth[ii+1:].find("_") + ii + 1
- kind = meth[jj+1:]
- try:
- kind = int(kind)
- except ValueError:
- pass
- lookup = handle_error.setdefault(scheme, {})
- elif condition == "open":
- kind = scheme
- lookup = handle_open
- elif condition == "request":
- kind = scheme
- lookup = process_request
- elif condition == "response":
- kind = scheme
- lookup = process_response
- else:
- continue
-
- lookup.setdefault(kind, set()).add(handler)
- added = True
-
- if not added:
- unwanted.append(handler)
-
- for handler in unwanted:
- self.handlers.remove(handler)
-
- # sort indexed methods
- # XXX could be cleaned up
- for lookup in [process_request, process_response]:
- for scheme, handlers in lookup.iteritems():
- lookup[scheme] = handlers
- for scheme, lookup in handle_error.iteritems():
- for code, handlers in lookup.iteritems():
- handlers = list(handlers)
- handlers.sort()
- lookup[code] = handlers
- for scheme, handlers in handle_open.iteritems():
- handlers = list(handlers)
- handlers.sort()
- handle_open[scheme] = handlers
-
- # cache the indexes
- self.handle_error = handle_error
- self.handle_open = handle_open
- self.process_request = process_request
- self.process_response = process_response
- self._any_request = any_request
- self._any_response = any_response
-
- def _request(self, url_or_req, data, visit,
- timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
- if isstringlike(url_or_req):
- req = Request(url_or_req, data, visit=visit, timeout=timeout)
- else:
- # already a urllib2.Request or mechanize.Request instance
- req = url_or_req
- if data is not None:
- req.add_data(data)
- # XXX yuck
- set_request_attr(req, "visit", visit, None)
- set_request_attr(req, "timeout", timeout,
- _sockettimeout._GLOBAL_DEFAULT_TIMEOUT)
- return req
-
- def open(self, fullurl, data=None,
- timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
- req = self._request(fullurl, data, None, timeout)
- req_scheme = req.get_type()
-
- self._maybe_reindex_handlers()
-
- # pre-process request
- # XXX should we allow a Processor to change the URL scheme
- # of the request?
- request_processors = set(self.process_request.get(req_scheme, []))
- request_processors.update(self._any_request)
- request_processors = list(request_processors)
- request_processors.sort()
- for processor in request_processors:
- for meth_name in ["any_request", req_scheme+"_request"]:
- meth = getattr(processor, meth_name, None)
- if meth:
- req = meth(req)
-
- # In Python >= 2.4, .open() supports processors already, so we must
- # call ._open() instead.
- urlopen = getattr(urllib2.OpenerDirector, "_open",
- urllib2.OpenerDirector.open)
- response = urlopen(self, req, data)
-
- # post-process response
- response_processors = set(self.process_response.get(req_scheme, []))
- response_processors.update(self._any_response)
- response_processors = list(response_processors)
- response_processors.sort()
- for processor in response_processors:
- for meth_name in ["any_response", req_scheme+"_response"]:
- meth = getattr(processor, meth_name, None)
- if meth:
- response = meth(req, response)
-
- return response
-
- def error(self, proto, *args):
- if proto in ['http', 'https']:
- # XXX http[s] protocols are special-cased
- dict = self.handle_error['http'] # https is not different than http
- proto = args[2] # YUCK!
- meth_name = 'http_error_%s' % proto
- http_err = 1
- orig_args = args
- else:
- dict = self.handle_error
- meth_name = proto + '_error'
- http_err = 0
- args = (dict, proto, meth_name) + args
- result = apply(self._call_chain, args)
- if result:
- return result
-
- if http_err:
- args = (dict, 'default', 'http_error_default') + orig_args
- return apply(self._call_chain, args)
-
- BLOCK_SIZE = 1024*8
- def retrieve(self, fullurl, filename=None, reporthook=None, data=None,
- timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
- """Returns (filename, headers).
-
- For remote objects, the default filename will refer to a temporary
- file. Temporary files are removed when the OpenerDirector.close()
- method is called.
-
- For file: URLs, at present the returned filename is None. This may
- change in future.
-
- If the actual number of bytes read is less than indicated by the
- Content-Length header, raises ContentTooShortError (a URLError
- subclass). The exception's .result attribute contains the (filename,
- headers) that would have been returned.
-
- """
- req = self._request(fullurl, data, False, timeout)
- scheme = req.get_type()
- fp = self.open(req)
- headers = fp.info()
- if filename is None and scheme == 'file':
- # XXX req.get_selector() seems broken here, return None,
- # pending sanity :-/
- return None, headers
- #return urllib.url2pathname(req.get_selector()), headers
- if filename:
- tfp = open(filename, 'wb')
- else:
- path = _rfc3986.urlsplit(req.get_full_url())[2]
- suffix = os.path.splitext(path)[1]
- fd, filename = tempfile.mkstemp(suffix)
- self._tempfiles.append(filename)
- tfp = os.fdopen(fd, 'wb')
-
- result = filename, headers
- bs = self.BLOCK_SIZE
- size = -1
- read = 0
- blocknum = 0
- if reporthook:
- if "content-length" in headers:
- size = int(headers["Content-Length"])
- reporthook(blocknum, bs, size)
- while 1:
- block = fp.read(bs)
- if block == "":
- break
- read += len(block)
- tfp.write(block)
- blocknum += 1
- if reporthook:
- reporthook(blocknum, bs, size)
- fp.close()
- tfp.close()
- del fp
- del tfp
-
- # raise exception if actual size does not match content-length header
- if size >= 0 and read < size:
- raise ContentTooShortError(
- "retrieval incomplete: "
- "got only %i out of %i bytes" % (read, size),
- result
- )
-
- return result
-
- def close(self):
- urllib2.OpenerDirector.close(self)
-
- # make it very obvious this object is no longer supposed to be used
- self.open = self.error = self.retrieve = self.add_handler = None
-
- if self._tempfiles:
- for filename in self._tempfiles:
- try:
- os.unlink(filename)
- except OSError:
- pass
- del self._tempfiles[:]
-
-
-def wrapped_open(urlopen, process_response_object, fullurl, data=None,
- timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
- success = True
- try:
- response = urlopen(fullurl, data, timeout)
- except urllib2.HTTPError, error:
- success = False
- if error.fp is None: # not a response
- raise
- response = error
-
- if response is not None:
- response = process_response_object(response)
-
- if not success:
- raise response
- return response
-
-class ResponseProcessingOpener(OpenerDirector):
-
- def open(self, fullurl, data=None,
- timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
- def bound_open(fullurl, data=None,
- timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
- return OpenerDirector.open(self, fullurl, data, timeout)
- return wrapped_open(
- bound_open, self.process_response_object, fullurl, data, timeout)
-
- def process_response_object(self, response):
- return response
-
-
-class SeekableResponseOpener(ResponseProcessingOpener):
- def process_response_object(self, response):
- return _response.seek_wrapped_response(response)
-
-
-class OpenerFactory:
- """This class's interface is quite likely to change."""
-
- default_classes = [
- # handlers
- urllib2.ProxyHandler,
- urllib2.UnknownHandler,
- _http.HTTPHandler, # derived from new AbstractHTTPHandler
- _http.HTTPDefaultErrorHandler,
- _http.HTTPRedirectHandler, # bugfixed
- urllib2.FTPHandler,
- _file.FileHandler,
- # processors
- _upgrade.HTTPRequestUpgradeProcessor,
- _http.HTTPCookieProcessor,
- _http.HTTPErrorProcessor,
- ]
- if hasattr(httplib, 'HTTPS'):
- default_classes.append(_http.HTTPSHandler)
- handlers = []
- replacement_handlers = []
-
- def __init__(self, klass=OpenerDirector):
- self.klass = klass
-
- def build_opener(self, *handlers):
- """Create an opener object from a list of handlers and processors.
-
- The opener will use several default handlers and processors, including
- support for HTTP and FTP.
-
- If any of the handlers passed as arguments are subclasses of the
- default handlers, the default handlers will not be used.
-
- """
- opener = self.klass()
- default_classes = list(self.default_classes)
- skip = []
- for klass in default_classes:
- for check in handlers:
- if type(check) == types.ClassType:
- if issubclass(check, klass):
- skip.append(klass)
- elif type(check) == types.InstanceType:
- if isinstance(check, klass):
- skip.append(klass)
- for klass in skip:
- default_classes.remove(klass)
-
- for klass in default_classes:
- opener.add_handler(klass())
- for h in handlers:
- if type(h) == types.ClassType:
- h = h()
- opener.add_handler(h)
-
- return opener
-
-
-build_opener = OpenerFactory().build_opener
-
-_opener = None
-urlopen_lock = _threading.Lock()
-def urlopen(url, data=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
- global _opener
- if _opener is None:
- urlopen_lock.acquire()
- try:
- if _opener is None:
- _opener = build_opener()
- finally:
- urlopen_lock.release()
- return _opener.open(url, data, timeout)
-
-def urlretrieve(url, filename=None, reporthook=None, data=None,
- timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
- global _opener
- if _opener is None:
- urlopen_lock.acquire()
- try:
- if _opener is None:
- _opener = build_opener()
- finally:
- urlopen_lock.release()
- return _opener.retrieve(url, filename, reporthook, data, timeout)
-
-def install_opener(opener):
- global _opener
- _opener = opener
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_pullparser.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_pullparser.py
deleted file mode 100644
index 4d8d9d3..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_pullparser.py
+++ /dev/null
@@ -1,390 +0,0 @@
-"""A simple "pull API" for HTML parsing, after Perl's HTML::TokeParser.
-
-Examples
-
-This program extracts all links from a document. It will print one
-line for each link, containing the URL and the textual description
-between the <A>...</A> tags:
-
-import pullparser, sys
-f = file(sys.argv[1])
-p = pullparser.PullParser(f)
-for token in p.tags("a"):
- if token.type == "endtag": continue
- url = dict(token.attrs).get("href", "-")
- text = p.get_compressed_text(endat=("endtag", "a"))
- print "%s\t%s" % (url, text)
-
-This program extracts the <TITLE> from the document:
-
-import pullparser, sys
-f = file(sys.argv[1])
-p = pullparser.PullParser(f)
-if p.get_tag("title"):
- title = p.get_compressed_text()
- print "Title: %s" % title
-
-
-Copyright 2003-2006 John J. Lee <jjl@pobox.com>
-Copyright 1998-2001 Gisle Aas (original libwww-perl code)
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses.
-
-"""
-
-import re, htmlentitydefs
-import sgmllib, HTMLParser
-from xml.sax import saxutils
-
-from _html import unescape, unescape_charref
-
-
-class NoMoreTokensError(Exception): pass
-
-class Token:
- """Represents an HTML tag, declaration, processing instruction etc.
-
- Behaves as both a tuple-like object (ie. iterable) and has attributes
- .type, .data and .attrs.
-
- >>> t = Token("starttag", "a", [("href", "http://www.python.org/")])
- >>> t == ("starttag", "a", [("href", "http://www.python.org/")])
- True
- >>> (t.type, t.data) == ("starttag", "a")
- True
- >>> t.attrs == [("href", "http://www.python.org/")]
- True
-
- Public attributes
-
- type: one of "starttag", "endtag", "startendtag", "charref", "entityref",
- "data", "comment", "decl", "pi", after the corresponding methods of
- HTMLParser.HTMLParser
- data: For a tag, the tag name; otherwise, the relevant data carried by the
- tag, as a string
- attrs: list of (name, value) pairs representing HTML attributes
- (or None if token does not represent an opening tag)
-
- """
- def __init__(self, type, data, attrs=None):
- self.type = type
- self.data = data
- self.attrs = attrs
- def __iter__(self):
- return iter((self.type, self.data, self.attrs))
- def __eq__(self, other):
- type, data, attrs = other
- if (self.type == type and
- self.data == data and
- self.attrs == attrs):
- return True
- else:
- return False
- def __ne__(self, other): return not self.__eq__(other)
- def __repr__(self):
- args = ", ".join(map(repr, [self.type, self.data, self.attrs]))
- return self.__class__.__name__+"(%s)" % args
-
- def __str__(self):
- """
- >>> print Token("starttag", "br")
- <br>
- >>> print Token("starttag", "a",
- ... [("href", "http://www.python.org/"), ("alt", '"foo"')])
- <a href="http://www.python.org/" alt='"foo"'>
- >>> print Token("startendtag", "br")
- <br />
- >>> print Token("startendtag", "br", [("spam", "eggs")])
- <br spam="eggs" />
- >>> print Token("endtag", "p")
- </p>
- >>> print Token("charref", "38")
- &#38;
- >>> print Token("entityref", "amp")
- &amp;
- >>> print Token("data", "foo\\nbar")
- foo
- bar
- >>> print Token("comment", "Life is a bowl\\nof cherries.")
- <!--Life is a bowl
- of cherries.-->
- >>> print Token("decl", "decl")
- <!decl>
- >>> print Token("pi", "pi")
- <?pi>
- """
- if self.attrs is not None:
- attrs = "".join([" %s=%s" % (k, saxutils.quoteattr(v)) for
- k, v in self.attrs])
- else:
- attrs = ""
- if self.type == "starttag":
- return "<%s%s>" % (self.data, attrs)
- elif self.type == "startendtag":
- return "<%s%s />" % (self.data, attrs)
- elif self.type == "endtag":
- return "</%s>" % self.data
- elif self.type == "charref":
- return "&#%s;" % self.data
- elif self.type == "entityref":
- return "&%s;" % self.data
- elif self.type == "data":
- return self.data
- elif self.type == "comment":
- return "<!--%s-->" % self.data
- elif self.type == "decl":
- return "<!%s>" % self.data
- elif self.type == "pi":
- return "<?%s>" % self.data
- assert False
-
-
-def iter_until_exception(fn, exception, *args, **kwds):
- while 1:
- try:
- yield fn(*args, **kwds)
- except exception:
- raise StopIteration
-
-
-class _AbstractParser:
- chunk = 1024
- compress_re = re.compile(r"\s+")
- def __init__(self, fh, textify={"img": "alt", "applet": "alt"},
- encoding="ascii", entitydefs=None):
- """
- fh: file-like object (only a .read() method is required) from which to
- read HTML to be parsed
- textify: mapping used by .get_text() and .get_compressed_text() methods
- to represent opening tags as text
- encoding: encoding used to encode numeric character references by
- .get_text() and .get_compressed_text() ("ascii" by default)
-
- entitydefs: mapping like {"amp": "&", ...} containing HTML entity
- definitions (a sensible default is used). This is used to unescape
- entities in .get_text() (and .get_compressed_text()) and attribute
- values. If the encoding can not represent the character, the entity
- reference is left unescaped. Note that entity references (both
- numeric - e.g. &#123; or &#xabc; - and non-numeric - e.g. &amp;) are
- unescaped in attribute values and the return value of .get_text(), but
- not in data outside of tags. Instead, entity references outside of
- tags are represented as tokens. This is a bit odd, it's true :-/
-
- If the element name of an opening tag matches a key in the textify
- mapping then that tag is converted to text. The corresponding value is
- used to specify which tag attribute to obtain the text from. textify
- maps from element names to either:
-
- - an HTML attribute name, in which case the HTML attribute value is
- used as its text value along with the element name in square
- brackets (eg."alt text goes here[IMG]", or, if the alt attribute
- were missing, just "[IMG]")
- - a callable object (eg. a function) which takes a Token and returns
- the string to be used as its text value
-
- If textify has no key for an element name, nothing is substituted for
- the opening tag.
-
- Public attributes:
-
- encoding and textify: see above
-
- """
- self._fh = fh
- self._tokenstack = [] # FIFO
- self.textify = textify
- self.encoding = encoding
- if entitydefs is None:
- entitydefs = htmlentitydefs.name2codepoint
- self._entitydefs = entitydefs
-
- def __iter__(self): return self
-
- def tags(self, *names):
- return iter_until_exception(self.get_tag, NoMoreTokensError, *names)
-
- def tokens(self, *tokentypes):
- return iter_until_exception(self.get_token, NoMoreTokensError,
- *tokentypes)
-
- def next(self):
- try:
- return self.get_token()
- except NoMoreTokensError:
- raise StopIteration()
-
- def get_token(self, *tokentypes):
- """Pop the next Token object from the stack of parsed tokens.
-
- If arguments are given, they are taken to be token types in which the
- caller is interested: tokens representing other elements will be
- skipped. Element names must be given in lower case.
-
- Raises NoMoreTokensError.
-
- """
- while 1:
- while self._tokenstack:
- token = self._tokenstack.pop(0)
- if tokentypes:
- if token.type in tokentypes:
- return token
- else:
- return token
- data = self._fh.read(self.chunk)
- if not data:
- raise NoMoreTokensError()
- self.feed(data)
-
- def unget_token(self, token):
- """Push a Token back onto the stack."""
- self._tokenstack.insert(0, token)
-
- def get_tag(self, *names):
- """Return the next Token that represents an opening or closing tag.
-
- If arguments are given, they are taken to be element names in which the
- caller is interested: tags representing other elements will be skipped.
- Element names must be given in lower case.
-
- Raises NoMoreTokensError.
-
- """
- while 1:
- tok = self.get_token()
- if tok.type not in ["starttag", "endtag", "startendtag"]:
- continue
- if names:
- if tok.data in names:
- return tok
- else:
- return tok
-
- def get_text(self, endat=None):
- """Get some text.
-
- endat: stop reading text at this tag (the tag is included in the
- returned text); endtag is a tuple (type, name) where type is
- "starttag", "endtag" or "startendtag", and name is the element name of
- the tag (element names must be given in lower case)
-
- If endat is not given, .get_text() will stop at the next opening or
- closing tag, or when there are no more tokens (no exception is raised).
- Note that .get_text() includes the text representation (if any) of the
- opening tag, but pushes the opening tag back onto the stack. As a
- result, if you want to call .get_text() again, you need to call
- .get_tag() first (unless you want an empty string returned when you
- next call .get_text()).
-
- Entity references are translated using the value of the entitydefs
- constructor argument (a mapping from names to characters like that
- provided by the standard module htmlentitydefs). Named entity
- references that are not in this mapping are left unchanged.
-
- The textify attribute is used to translate opening tags into text: see
- the class docstring.
-
- """
- text = []
- tok = None
- while 1:
- try:
- tok = self.get_token()
- except NoMoreTokensError:
- # unget last token (not the one we just failed to get)
- if tok: self.unget_token(tok)
- break
- if tok.type == "data":
- text.append(tok.data)
- elif tok.type == "entityref":
- t = unescape("&%s;"%tok.data, self._entitydefs, self.encoding)
- text.append(t)
- elif tok.type == "charref":
- t = unescape_charref(tok.data, self.encoding)
- text.append(t)
- elif tok.type in ["starttag", "endtag", "startendtag"]:
- tag_name = tok.data
- if tok.type in ["starttag", "startendtag"]:
- alt = self.textify.get(tag_name)
- if alt is not None:
- if callable(alt):
- text.append(alt(tok))
- elif tok.attrs is not None:
- for k, v in tok.attrs:
- if k == alt:
- text.append(v)
- text.append("[%s]" % tag_name.upper())
- if endat is None or endat == (tok.type, tag_name):
- self.unget_token(tok)
- break
- return "".join(text)
-
- def get_compressed_text(self, *args, **kwds):
- """
- As .get_text(), but collapses each group of contiguous whitespace to a
- single space character, and removes all initial and trailing
- whitespace.
-
- """
- text = self.get_text(*args, **kwds)
- text = text.strip()
- return self.compress_re.sub(" ", text)
-
- def handle_startendtag(self, tag, attrs):
- self._tokenstack.append(Token("startendtag", tag, attrs))
- def handle_starttag(self, tag, attrs):
- self._tokenstack.append(Token("starttag", tag, attrs))
- def handle_endtag(self, tag):
- self._tokenstack.append(Token("endtag", tag))
- def handle_charref(self, name):
- self._tokenstack.append(Token("charref", name))
- def handle_entityref(self, name):
- self._tokenstack.append(Token("entityref", name))
- def handle_data(self, data):
- self._tokenstack.append(Token("data", data))
- def handle_comment(self, data):
- self._tokenstack.append(Token("comment", data))
- def handle_decl(self, decl):
- self._tokenstack.append(Token("decl", decl))
- def unknown_decl(self, data):
- # XXX should this call self.error instead?
- #self.error("unknown declaration: " + `data`)
- self._tokenstack.append(Token("decl", data))
- def handle_pi(self, data):
- self._tokenstack.append(Token("pi", data))
-
- def unescape_attr(self, name):
- return unescape(name, self._entitydefs, self.encoding)
- def unescape_attrs(self, attrs):
- escaped_attrs = []
- for key, val in attrs:
- escaped_attrs.append((key, self.unescape_attr(val)))
- return escaped_attrs
-
-class PullParser(_AbstractParser, HTMLParser.HTMLParser):
- def __init__(self, *args, **kwds):
- HTMLParser.HTMLParser.__init__(self)
- _AbstractParser.__init__(self, *args, **kwds)
- def unescape(self, name):
- # Use the entitydefs passed into constructor, not
- # HTMLParser.HTMLParser's entitydefs.
- return self.unescape_attr(name)
-
-class TolerantPullParser(_AbstractParser, sgmllib.SGMLParser):
- def __init__(self, *args, **kwds):
- sgmllib.SGMLParser.__init__(self)
- _AbstractParser.__init__(self, *args, **kwds)
- def unknown_starttag(self, tag, attrs):
- attrs = self.unescape_attrs(attrs)
- self._tokenstack.append(Token("starttag", tag, attrs))
- def unknown_endtag(self, tag):
- self._tokenstack.append(Token("endtag", tag))
-
-
-def _test():
- import doctest, _pullparser
- return doctest.testmod(_pullparser)
-
-if __name__ == "__main__":
- _test()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_request.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_request.py
deleted file mode 100644
index 7824441..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_request.py
+++ /dev/null
@@ -1,87 +0,0 @@
-"""Integration with Python standard library module urllib2: Request class.
-
-Copyright 2004-2006 John J Lee <jjl@pobox.com>
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
-"""
-
-import urllib2, urllib, logging
-
-from _clientcookie import request_host_lc
-import _rfc3986
-import _sockettimeout
-
-warn = logging.getLogger("mechanize").warning
-
-
-class Request(urllib2.Request):
- def __init__(self, url, data=None, headers={},
- origin_req_host=None, unverifiable=False, visit=None,
- timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
- # In mechanize 0.2, the interpretation of a unicode url argument will
- # change: A unicode url argument will be interpreted as an IRI, and a
- # bytestring as a URI. For now, we accept unicode or bytestring. We
- # don't insist that the value is always a URI (specifically, must only
- # contain characters which are legal), because that might break working
- # code (who knows what bytes some servers want to see, especially with
- # browser plugins for internationalised URIs).
- if not _rfc3986.is_clean_uri(url):
- warn("url argument is not a URI "
- "(contains illegal characters) %r" % url)
- urllib2.Request.__init__(self, url, data, headers)
- self.selector = None
- self.unredirected_hdrs = {}
- self.visit = visit
- self.timeout = timeout
-
- # All the terminology below comes from RFC 2965.
- self.unverifiable = unverifiable
- # Set request-host of origin transaction.
- # The origin request-host is needed in order to decide whether
- # unverifiable sub-requests (automatic redirects, images embedded
- # in HTML, etc.) are to third-party hosts. If they are, the
- # resulting transactions might need to be conducted with cookies
- # turned off.
- if origin_req_host is None:
- origin_req_host = request_host_lc(self)
- self.origin_req_host = origin_req_host
-
- def get_selector(self):
- return urllib.splittag(self.__r_host)[0]
-
- def get_origin_req_host(self):
- return self.origin_req_host
-
- def is_unverifiable(self):
- return self.unverifiable
-
- def add_unredirected_header(self, key, val):
- """Add a header that will not be added to a redirected request."""
- self.unredirected_hdrs[key.capitalize()] = val
-
- def has_header(self, header_name):
- """True iff request has named header (regular or unredirected)."""
- return (header_name in self.headers or
- header_name in self.unredirected_hdrs)
-
- def get_header(self, header_name, default=None):
- return self.headers.get(
- header_name,
- self.unredirected_hdrs.get(header_name, default))
-
- def header_items(self):
- hdrs = self.unredirected_hdrs.copy()
- hdrs.update(self.headers)
- return hdrs.items()
-
- def __str__(self):
- return "<Request for %s>" % self.get_full_url()
-
- def get_method(self):
- if self.has_data():
- return "POST"
- else:
- return "GET"
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_response.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_response.py
deleted file mode 100644
index fad9b57..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_response.py
+++ /dev/null
@@ -1,527 +0,0 @@
-"""Response classes.
-
-The seek_wrapper code is not used if you're using UserAgent with
-.set_seekable_responses(False), or if you're using the urllib2-level interface
-without SeekableProcessor or HTTPEquivProcessor. Class closeable_response is
-instantiated by some handlers (AbstractHTTPHandler), but the closeable_response
-interface is only depended upon by Browser-level code. Function
-upgrade_response is only used if you're using Browser or
-ResponseUpgradeProcessor.
-
-
-Copyright 2006 John J. Lee <jjl@pobox.com>
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
-included with the distribution).
-
-"""
-
-import copy, mimetools
-from cStringIO import StringIO
-import urllib2
-
-
-def len_of_seekable(file_):
- # this function exists because evaluation of len(file_.getvalue()) on every
- # .read() from seek_wrapper would be O(N**2) in number of .read()s
- pos = file_.tell()
- file_.seek(0, 2) # to end
- try:
- return file_.tell()
- finally:
- file_.seek(pos)
-
-
-# XXX Andrew Dalke kindly sent me a similar class in response to my request on
-# comp.lang.python, which I then proceeded to lose. I wrote this class
-# instead, but I think he's released his code publicly since, could pinch the
-# tests from it, at least...
-
-# For testing seek_wrapper invariant (note that
-# test_urllib2.HandlerTest.test_seekable is expected to fail when this
-# invariant checking is turned on). The invariant checking is done by module
-# ipdc, which is available here:
-# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/436834
-## from ipdbc import ContractBase
-## class seek_wrapper(ContractBase):
-class seek_wrapper:
- """Adds a seek method to a file object.
-
- This is only designed for seeking on readonly file-like objects.
-
- Wrapped file-like object must have a read method. The readline method is
- only supported if that method is present on the wrapped object. The
- readlines method is always supported. xreadlines and iteration are
- supported only for Python 2.2 and above.
-
- Public attributes:
-
- wrapped: the wrapped file object
- is_closed: true iff .close() has been called
-
- WARNING: All other attributes of the wrapped object (ie. those that are not
- one of wrapped, read, readline, readlines, xreadlines, __iter__ and next)
- are passed through unaltered, which may or may not make sense for your
- particular file object.
-
- """
- # General strategy is to check that cache is full enough, then delegate to
- # the cache (self.__cache, which is a cStringIO.StringIO instance). A seek
- # position (self.__pos) is maintained independently of the cache, in order
- # that a single cache may be shared between multiple seek_wrapper objects.
- # Copying using module copy shares the cache in this way.
-
- def __init__(self, wrapped):
- self.wrapped = wrapped
- self.__read_complete_state = [False]
- self.__is_closed_state = [False]
- self.__have_readline = hasattr(self.wrapped, "readline")
- self.__cache = StringIO()
- self.__pos = 0 # seek position
-
- def invariant(self):
- # The end of the cache is always at the same place as the end of the
- # wrapped file (though the .tell() method is not required to be present
- # on wrapped file).
- return self.wrapped.tell() == len(self.__cache.getvalue())
-
- def close(self):
- self.wrapped.close()
- self.is_closed = True
-
- def __getattr__(self, name):
- if name == "is_closed":
- return self.__is_closed_state[0]
- elif name == "read_complete":
- return self.__read_complete_state[0]
-
- wrapped = self.__dict__.get("wrapped")
- if wrapped:
- return getattr(wrapped, name)
-
- return getattr(self.__class__, name)
-
- def __setattr__(self, name, value):
- if name == "is_closed":
- self.__is_closed_state[0] = bool(value)
- elif name == "read_complete":
- if not self.is_closed:
- self.__read_complete_state[0] = bool(value)
- else:
- self.__dict__[name] = value
-
- def seek(self, offset, whence=0):
- assert whence in [0,1,2]
-
- # how much data, if any, do we need to read?
- if whence == 2: # 2: relative to end of *wrapped* file
- if offset < 0: raise ValueError("negative seek offset")
- # since we don't know yet where the end of that file is, we must
- # read everything
- to_read = None
- else:
- if whence == 0: # 0: absolute
- if offset < 0: raise ValueError("negative seek offset")
- dest = offset
- else: # 1: relative to current position
- pos = self.__pos
- if pos < offset:
- raise ValueError("seek to before start of file")
- dest = pos + offset
- end = len_of_seekable(self.__cache)
- to_read = dest - end
- if to_read < 0:
- to_read = 0
-
- if to_read != 0:
- self.__cache.seek(0, 2)
- if to_read is None:
- assert whence == 2
- self.__cache.write(self.wrapped.read())
- self.read_complete = True
- self.__pos = self.__cache.tell() - offset
- else:
- data = self.wrapped.read(to_read)
- if not data:
- self.read_complete = True
- else:
- self.__cache.write(data)
- # Don't raise an exception even if we've seek()ed past the end
- # of .wrapped, since fseek() doesn't complain in that case.
- # Also like fseek(), pretend we have seek()ed past the end,
- # i.e. not:
- #self.__pos = self.__cache.tell()
- # but rather:
- self.__pos = dest
- else:
- self.__pos = dest
-
- def tell(self):
- return self.__pos
-
- def __copy__(self):
- cpy = self.__class__(self.wrapped)
- cpy.__cache = self.__cache
- cpy.__read_complete_state = self.__read_complete_state
- cpy.__is_closed_state = self.__is_closed_state
- return cpy
-
- def get_data(self):
- pos = self.__pos
- try:
- self.seek(0)
- return self.read(-1)
- finally:
- self.__pos = pos
-
- def read(self, size=-1):
- pos = self.__pos
- end = len_of_seekable(self.__cache)
- available = end - pos
-
- # enough data already cached?
- if size <= available and size != -1:
- self.__cache.seek(pos)
- self.__pos = pos+size
- return self.__cache.read(size)
-
- # no, so read sufficient data from wrapped file and cache it
- self.__cache.seek(0, 2)
- if size == -1:
- self.__cache.write(self.wrapped.read())
- self.read_complete = True
- else:
- to_read = size - available
- assert to_read > 0
- data = self.wrapped.read(to_read)
- if not data:
- self.read_complete = True
- else:
- self.__cache.write(data)
- self.__cache.seek(pos)
-
- data = self.__cache.read(size)
- self.__pos = self.__cache.tell()
- assert self.__pos == pos + len(data)
- return data
-
- def readline(self, size=-1):
- if not self.__have_readline:
- raise NotImplementedError("no readline method on wrapped object")
-
- # line we're about to read might not be complete in the cache, so
- # read another line first
- pos = self.__pos
- self.__cache.seek(0, 2)
- data = self.wrapped.readline()
- if not data:
- self.read_complete = True
- else:
- self.__cache.write(data)
- self.__cache.seek(pos)
-
- data = self.__cache.readline()
- if size != -1:
- r = data[:size]
- self.__pos = pos+size
- else:
- r = data
- self.__pos = pos+len(data)
- return r
-
- def readlines(self, sizehint=-1):
- pos = self.__pos
- self.__cache.seek(0, 2)
- self.__cache.write(self.wrapped.read())
- self.read_complete = True
- self.__cache.seek(pos)
- data = self.__cache.readlines(sizehint)
- self.__pos = self.__cache.tell()
- return data
-
- def __iter__(self): return self
- def next(self):
- line = self.readline()
- if line == "": raise StopIteration
- return line
-
- xreadlines = __iter__
-
- def __repr__(self):
- return ("<%s at %s whose wrapped object = %r>" %
- (self.__class__.__name__, hex(abs(id(self))), self.wrapped))
-
-
-class response_seek_wrapper(seek_wrapper):
-
- """
- Supports copying response objects and setting response body data.
-
- """
-
- def __init__(self, wrapped):
- seek_wrapper.__init__(self, wrapped)
- self._headers = self.wrapped.info()
-
- def __copy__(self):
- cpy = seek_wrapper.__copy__(self)
- # copy headers from delegate
- cpy._headers = copy.copy(self.info())
- return cpy
-
- # Note that .info() and .geturl() (the only two urllib2 response methods
- # that are not implemented by seek_wrapper) must be here explicitly rather
- # than by seek_wrapper's __getattr__ delegation) so that the nasty
- # dynamically-created HTTPError classes in get_seek_wrapper_class() get the
- # wrapped object's implementation, and not HTTPError's.
-
- def info(self):
- return self._headers
-
- def geturl(self):
- return self.wrapped.geturl()
-
- def set_data(self, data):
- self.seek(0)
- self.read()
- self.close()
- cache = self._seek_wrapper__cache = StringIO()
- cache.write(data)
- self.seek(0)
-
-
-class eoffile:
- # file-like object that always claims to be at end-of-file...
- def read(self, size=-1): return ""
- def readline(self, size=-1): return ""
- def __iter__(self): return self
- def next(self): return ""
- def close(self): pass
-
-class eofresponse(eoffile):
- def __init__(self, url, headers, code, msg):
- self._url = url
- self._headers = headers
- self.code = code
- self.msg = msg
- def geturl(self): return self._url
- def info(self): return self._headers
-
-
-class closeable_response:
- """Avoids unnecessarily clobbering urllib.addinfourl methods on .close().
-
- Only supports responses returned by mechanize.HTTPHandler.
-
- After .close(), the following methods are supported:
-
- .read()
- .readline()
- .info()
- .geturl()
- .__iter__()
- .next()
- .close()
-
- and the following attributes are supported:
-
- .code
- .msg
-
- Also supports pickling (but the stdlib currently does something to prevent
- it: http://python.org/sf/1144636).
-
- """
- # presence of this attr indicates is useable after .close()
- closeable_response = None
-
- def __init__(self, fp, headers, url, code, msg):
- self._set_fp(fp)
- self._headers = headers
- self._url = url
- self.code = code
- self.msg = msg
-
- def _set_fp(self, fp):
- self.fp = fp
- self.read = self.fp.read
- self.readline = self.fp.readline
- if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
- if hasattr(self.fp, "fileno"):
- self.fileno = self.fp.fileno
- else:
- self.fileno = lambda: None
- self.__iter__ = self.fp.__iter__
- self.next = self.fp.next
-
- def __repr__(self):
- return '<%s at %s whose fp = %r>' % (
- self.__class__.__name__, hex(abs(id(self))), self.fp)
-
- def info(self):
- return self._headers
-
- def geturl(self):
- return self._url
-
- def close(self):
- wrapped = self.fp
- wrapped.close()
- new_wrapped = eofresponse(
- self._url, self._headers, self.code, self.msg)
- self._set_fp(new_wrapped)
-
- def __getstate__(self):
- # There are three obvious options here:
- # 1. truncate
- # 2. read to end
- # 3. close socket, pickle state including read position, then open
- # again on unpickle and use Range header
- # XXXX um, 4. refuse to pickle unless .close()d. This is better,
- # actually ("errors should never pass silently"). Pickling doesn't
- # work anyway ATM, because of http://python.org/sf/1144636 so fix
- # this later
-
- # 2 breaks pickle protocol, because one expects the original object
- # to be left unscathed by pickling. 3 is too complicated and
- # surprising (and too much work ;-) to happen in a sane __getstate__.
- # So we do 1.
-
- state = self.__dict__.copy()
- new_wrapped = eofresponse(
- self._url, self._headers, self.code, self.msg)
- state["wrapped"] = new_wrapped
- return state
-
-def test_response(data='test data', headers=[],
- url="http://example.com/", code=200, msg="OK"):
- return make_response(data, headers, url, code, msg)
-
-def test_html_response(data='test data', headers=[],
- url="http://example.com/", code=200, msg="OK"):
- headers += [("Content-type", "text/html")]
- return make_response(data, headers, url, code, msg)
-
-def make_response(data, headers, url, code, msg):
- """Convenient factory for objects implementing response interface.
-
- data: string containing response body data
- headers: sequence of (name, value) pairs
- url: URL of response
- code: integer response code (e.g. 200)
- msg: string response code message (e.g. "OK")
-
- """
- mime_headers = make_headers(headers)
- r = closeable_response(StringIO(data), mime_headers, url, code, msg)
- return response_seek_wrapper(r)
-
-
-def make_headers(headers):
- """
- headers: sequence of (name, value) pairs
- """
- hdr_text = []
- for name_value in headers:
- hdr_text.append("%s: %s" % name_value)
- return mimetools.Message(StringIO("\n".join(hdr_text)))
-
-
-# Rest of this module is especially horrible, but needed, at least until fork
-# urllib2. Even then, may want to preseve urllib2 compatibility.
-
-def get_seek_wrapper_class(response):
- # in order to wrap response objects that are also exceptions, we must
- # dynamically subclass the exception :-(((
- if (isinstance(response, urllib2.HTTPError) and
- not hasattr(response, "seek")):
- if response.__class__.__module__ == "__builtin__":
- exc_class_name = response.__class__.__name__
- else:
- exc_class_name = "%s.%s" % (
- response.__class__.__module__, response.__class__.__name__)
-
- class httperror_seek_wrapper(response_seek_wrapper, response.__class__):
- # this only derives from HTTPError in order to be a subclass --
- # the HTTPError behaviour comes from delegation
-
- _exc_class_name = exc_class_name
-
- def __init__(self, wrapped):
- response_seek_wrapper.__init__(self, wrapped)
- # be compatible with undocumented HTTPError attributes :-(
- self.hdrs = wrapped.info()
- self.filename = wrapped.geturl()
-
- def __repr__(self):
- return (
- "<%s (%s instance) at %s "
- "whose wrapped object = %r>" % (
- self.__class__.__name__, self._exc_class_name,
- hex(abs(id(self))), self.wrapped)
- )
- wrapper_class = httperror_seek_wrapper
- else:
- wrapper_class = response_seek_wrapper
- return wrapper_class
-
-def seek_wrapped_response(response):
- """Return a copy of response that supports seekable response interface.
-
- Accepts responses from both mechanize and urllib2 handlers.
-
- Copes with both oridinary response instances and HTTPError instances (which
- can't be simply wrapped due to the requirement of preserving the exception
- base class).
- """
- if not hasattr(response, "seek"):
- wrapper_class = get_seek_wrapper_class(response)
- response = wrapper_class(response)
- assert hasattr(response, "get_data")
- return response
-
-def upgrade_response(response):
- """Return a copy of response that supports Browser response interface.
-
- Browser response interface is that of "seekable responses"
- (response_seek_wrapper), plus the requirement that responses must be
- useable after .close() (closeable_response).
-
- Accepts responses from both mechanize and urllib2 handlers.
-
- Copes with both ordinary response instances and HTTPError instances (which
- can't be simply wrapped due to the requirement of preserving the exception
- base class).
- """
- wrapper_class = get_seek_wrapper_class(response)
- if hasattr(response, "closeable_response"):
- if not hasattr(response, "seek"):
- response = wrapper_class(response)
- assert hasattr(response, "get_data")
- return copy.copy(response)
-
- # a urllib2 handler constructed the response, i.e. the response is an
- # urllib.addinfourl or a urllib2.HTTPError, instead of a
- # _Util.closeable_response as returned by e.g. mechanize.HTTPHandler
- try:
- code = response.code
- except AttributeError:
- code = None
- try:
- msg = response.msg
- except AttributeError:
- msg = None
-
- # may have already-.read() data from .seek() cache
- data = None
- get_data = getattr(response, "get_data", None)
- if get_data:
- data = get_data()
-
- response = closeable_response(
- response.fp, response.info(), response.geturl(), code, msg)
- response = wrapper_class(response)
- if data:
- response.set_data(data)
- return response
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_rfc3986.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_rfc3986.py
deleted file mode 100644
index 1bb5021..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_rfc3986.py
+++ /dev/null
@@ -1,241 +0,0 @@
-"""RFC 3986 URI parsing and relative reference resolution / absolutization.
-
-(aka splitting and joining)
-
-Copyright 2006 John J. Lee <jjl@pobox.com>
-
-This code is free software; you can redistribute it and/or modify it under
-the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
-included with the distribution).
-
-"""
-
-# XXX Wow, this is ugly. Overly-direct translation of the RFC ATM.
-
-import re, urllib
-
-## def chr_range(a, b):
-## return "".join(map(chr, range(ord(a), ord(b)+1)))
-
-## UNRESERVED_URI_CHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-## "abcdefghijklmnopqrstuvwxyz"
-## "0123456789"
-## "-_.~")
-## RESERVED_URI_CHARS = "!*'();:@&=+$,/?#[]"
-## URI_CHARS = RESERVED_URI_CHARS+UNRESERVED_URI_CHARS+'%'
-# this re matches any character that's not in URI_CHARS
-BAD_URI_CHARS_RE = re.compile("[^A-Za-z0-9\-_.~!*'();:@&=+$,/?%#[\]]")
-
-
-def clean_url(url, encoding):
- # percent-encode illegal URI characters
- # Trying to come up with test cases for this gave me a headache, revisit
- # when do switch to unicode.
- # Somebody else's comments (lost the attribution):
-## - IE will return you the url in the encoding you send it
-## - Mozilla/Firefox will send you latin-1 if there's no non latin-1
-## characters in your link. It will send you utf-8 however if there are...
- if type(url) == type(""):
- url = url.decode(encoding, "replace")
- url = url.strip()
- # for second param to urllib.quote(), we want URI_CHARS, minus the
- # 'always_safe' characters that urllib.quote() never percent-encodes
- return urllib.quote(url.encode(encoding), "!*'();:@&=+$,/?%#[]~")
-
-def is_clean_uri(uri):
- """
- >>> is_clean_uri("ABC!")
- True
- >>> is_clean_uri(u"ABC!")
- True
- >>> is_clean_uri("ABC|")
- False
- >>> is_clean_uri(u"ABC|")
- False
- >>> is_clean_uri("http://example.com/0")
- True
- >>> is_clean_uri(u"http://example.com/0")
- True
- """
- # note module re treats bytestrings as through they were decoded as latin-1
- # so this function accepts both unicode and bytestrings
- return not bool(BAD_URI_CHARS_RE.search(uri))
-
-
-SPLIT_MATCH = re.compile(
- r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?").match
-def urlsplit(absolute_uri):
- """Return scheme, authority, path, query, fragment."""
- match = SPLIT_MATCH(absolute_uri)
- if match:
- g = match.groups()
- return g[1], g[3], g[4], g[6], g[8]
-
-def urlunsplit(parts):
- scheme, authority, path, query, fragment = parts
- r = []
- append = r.append
- if scheme is not None:
- append(scheme)
- append(":")
- if authority is not None:
- append("//")
- append(authority)
- append(path)
- if query is not None:
- append("?")
- append(query)
- if fragment is not None:
- append("#")
- append(fragment)
- return "".join(r)
-
-def urljoin(base_uri, uri_reference):
- return urlunsplit(urljoin_parts(urlsplit(base_uri),
- urlsplit(uri_reference)))
-
-# oops, this doesn't do the same thing as the literal translation
-# from the RFC below
-## import posixpath
-## def urljoin_parts(base_parts, reference_parts):
-## scheme, authority, path, query, fragment = base_parts
-## rscheme, rauthority, rpath, rquery, rfragment = reference_parts
-
-## # compute target URI path
-## if rpath == "":
-## tpath = path
-## else:
-## tpath = rpath
-## if not tpath.startswith("/"):
-## tpath = merge(authority, path, tpath)
-## tpath = posixpath.normpath(tpath)
-
-## if rscheme is not None:
-## return (rscheme, rauthority, tpath, rquery, rfragment)
-## elif rauthority is not None:
-## return (scheme, rauthority, tpath, rquery, rfragment)
-## elif rpath == "":
-## if rquery is not None:
-## tquery = rquery
-## else:
-## tquery = query
-## return (scheme, authority, tpath, tquery, rfragment)
-## else:
-## return (scheme, authority, tpath, rquery, rfragment)
-
-def urljoin_parts(base_parts, reference_parts):
- scheme, authority, path, query, fragment = base_parts
- rscheme, rauthority, rpath, rquery, rfragment = reference_parts
-
- if rscheme == scheme:
- rscheme = None
-
- if rscheme is not None:
- tscheme, tauthority, tpath, tquery = (
- rscheme, rauthority, remove_dot_segments(rpath), rquery)
- else:
- if rauthority is not None:
- tauthority, tpath, tquery = (
- rauthority, remove_dot_segments(rpath), rquery)
- else:
- if rpath == "":
- tpath = path
- if rquery is not None:
- tquery = rquery
- else:
- tquery = query
- else:
- if rpath.startswith("/"):
- tpath = remove_dot_segments(rpath)
- else:
- tpath = merge(authority, path, rpath)
- tpath = remove_dot_segments(tpath)
- tquery = rquery
- tauthority = authority
- tscheme = scheme
- tfragment = rfragment
- return (tscheme, tauthority, tpath, tquery, tfragment)
-
-# um, something *vaguely* like this is what I want, but I have to generate
-# lots of test cases first, if only to understand what it is that
-# remove_dot_segments really does...
-## def remove_dot_segments(path):
-## if path == '':
-## return ''
-## comps = path.split('/')
-## new_comps = []
-## for comp in comps:
-## if comp in ['.', '']:
-## if not new_comps or new_comps[-1]:
-## new_comps.append('')
-## continue
-## if comp != '..':
-## new_comps.append(comp)
-## elif new_comps:
-## new_comps.pop()
-## return '/'.join(new_comps)
-
-
-def remove_dot_segments(path):
- r = []
- while path:
- # A
- if path.startswith("../"):
- path = path[3:]
- continue
- if path.startswith("./"):
- path = path[2:]
- continue
- # B
- if path.startswith("/./"):
- path = path[2:]
- continue
- if path == "/.":
- path = "/"
- continue
- # C
- if path.startswith("/../"):
- path = path[3:]
- if r:
- r.pop()
- continue
- if path == "/..":
- path = "/"
- if r:
- r.pop()
- continue
- # D
- if path == ".":
- path = path[1:]
- continue
- if path == "..":
- path = path[2:]
- continue
- # E
- start = 0
- if path.startswith("/"):
- start = 1
- ii = path.find("/", start)
- if ii < 0:
- ii = None
- r.append(path[:ii])
- if ii is None:
- break
- path = path[ii:]
- return "".join(r)
-
-def merge(base_authority, base_path, ref_path):
- # XXXX Oddly, the sample Perl implementation of this by Roy Fielding
- # doesn't even take base_authority as a parameter, despite the wording in
- # the RFC suggesting otherwise. Perhaps I'm missing some obvious identity.
- #if base_authority is not None and base_path == "":
- if base_path == "":
- return "/" + ref_path
- ii = base_path.rfind("/")
- if ii >= 0:
- return base_path[:ii+1] + ref_path
- return ref_path
-
-if __name__ == "__main__":
- import doctest
- doctest.testmod()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_seek.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_seek.py
deleted file mode 100644
index 4086d52..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_seek.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from urllib2 import BaseHandler
-from _util import deprecation
-from _response import response_seek_wrapper
-
-
-class SeekableProcessor(BaseHandler):
- """Deprecated: Make responses seekable."""
-
- def __init__(self):
- deprecation(
- "See http://wwwsearch.sourceforge.net/mechanize/doc.html#seekable")
-
- def any_response(self, request, response):
- if not hasattr(response, "seek"):
- return response_seek_wrapper(response)
- return response
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_sockettimeout.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_sockettimeout.py
deleted file mode 100644
index c22b734..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_sockettimeout.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import socket
-
-try:
- _GLOBAL_DEFAULT_TIMEOUT = socket._GLOBAL_DEFAULT_TIMEOUT
-except AttributeError:
- _GLOBAL_DEFAULT_TIMEOUT = object()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_testcase.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_testcase.py
deleted file mode 100644
index a13cca3..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_testcase.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import shutil
-import tempfile
-import unittest
-
-
-class SetupStack(object):
-
- def __init__(self):
- self._on_teardown = []
-
- def add_teardown(self, teardown):
- self._on_teardown.append(teardown)
-
- def tear_down(self):
- for func in reversed(self._on_teardown):
- func()
-
-
-class TearDownConvenience(object):
-
- def __init__(self, setup_stack=None):
- self._own_setup_stack = setup_stack is None
- if setup_stack is None:
- setup_stack = SetupStack()
- self._setup_stack = setup_stack
-
- # only call this convenience method if no setup_stack was supplied to c'tor
- def tear_down(self):
- assert self._own_setup_stack
- self._setup_stack.tear_down()
-
-
-class TempDirMaker(TearDownConvenience):
-
- def make_temp_dir(self):
- temp_dir = tempfile.mkdtemp(prefix="tmp-%s-" % self.__class__.__name__)
- def tear_down():
- shutil.rmtree(temp_dir)
- self._setup_stack.add_teardown(tear_down)
- return temp_dir
-
-
-class MonkeyPatcher(TearDownConvenience):
-
- def monkey_patch(self, obj, name, value):
- orig_value = getattr(obj, name)
- setattr(obj, name, value)
- def reverse_patch():
- setattr(obj, name, orig_value)
- self._setup_stack.add_teardown(reverse_patch)
-
-
-class TestCase(unittest.TestCase):
-
- def setUp(self):
- self._setup_stack = SetupStack()
-
- def tearDown(self):
- self._setup_stack.tear_down()
-
- def make_temp_dir(self, *args, **kwds):
- return TempDirMaker(self._setup_stack).make_temp_dir(*args, **kwds)
-
- def monkey_patch(self, *args, **kwds):
- return MonkeyPatcher(self._setup_stack).monkey_patch(*args, **kwds)
-
- def assert_contains(self, container, containee):
- self.assertTrue(containee in container, "%r not in %r" %
- (containee, container))
-
- def assert_less_than(self, got, expected):
- self.assertTrue(got < expected, "%r >= %r" %
- (got, expected))
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_upgrade.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_upgrade.py
deleted file mode 100644
index df59c01..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_upgrade.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from urllib2 import BaseHandler
-
-from _request import Request
-from _response import upgrade_response
-from _util import deprecation
-
-
-class HTTPRequestUpgradeProcessor(BaseHandler):
- # upgrade urllib2.Request to this module's Request
- # yuck!
- handler_order = 0 # before anything else
-
- def http_request(self, request):
- if not hasattr(request, "add_unredirected_header"):
- newrequest = Request(request.get_full_url(), request.data,
- request.headers)
- try: newrequest.origin_req_host = request.origin_req_host
- except AttributeError: pass
- try: newrequest.unverifiable = request.unverifiable
- except AttributeError: pass
- try: newrequest.visit = request.visit
- except AttributeError: pass
- request = newrequest
- return request
-
- https_request = http_request
-
-
-class ResponseUpgradeProcessor(BaseHandler):
- # upgrade responses to be .close()able without becoming unusable
- handler_order = 0 # before anything else
-
- def __init__(self):
- deprecation(
- "See http://wwwsearch.sourceforge.net/mechanize/doc.html#seekable")
-
- def any_response(self, request, response):
- if not hasattr(response, 'closeable_response'):
- response = upgrade_response(response)
- return response
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_urllib2.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_urllib2.py
deleted file mode 100644
index cbb761b..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_urllib2.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# urllib2 work-alike interface
-# ...from urllib2...
-from urllib2 import \
- URLError, \
- HTTPError, \
- BaseHandler, \
- UnknownHandler, \
- FTPHandler, \
- CacheFTPHandler
-# ...and from mechanize
-from _auth import \
- HTTPPasswordMgr, \
- HTTPPasswordMgrWithDefaultRealm, \
- AbstractBasicAuthHandler, \
- AbstractDigestAuthHandler, \
- HTTPProxyPasswordMgr, \
- ProxyHandler, \
- ProxyBasicAuthHandler, \
- ProxyDigestAuthHandler, \
- HTTPBasicAuthHandler, \
- HTTPDigestAuthHandler, \
- HTTPSClientCertMgr
-from _debug import \
- HTTPResponseDebugProcessor, \
- HTTPRedirectDebugProcessor
-from _file import \
- FileHandler
-# crap ATM
-## from _gzip import \
-## HTTPGzipProcessor
-from _http import \
- HTTPHandler, \
- HTTPDefaultErrorHandler, \
- HTTPRedirectHandler, \
- HTTPEquivProcessor, \
- HTTPCookieProcessor, \
- HTTPRefererProcessor, \
- HTTPRefreshProcessor, \
- HTTPErrorProcessor, \
- HTTPRobotRulesProcessor, \
- RobotExclusionError
-import httplib
-if hasattr(httplib, 'HTTPS'):
- from _http import HTTPSHandler
-del httplib
-from _opener import OpenerDirector, \
- SeekableResponseOpener, \
- build_opener, install_opener, urlopen
-from _request import \
- Request
-from _seek import \
- SeekableProcessor
-from _upgrade import \
- HTTPRequestUpgradeProcessor, \
- ResponseUpgradeProcessor
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_useragent.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_useragent.py
deleted file mode 100644
index 723f87c..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_useragent.py
+++ /dev/null
@@ -1,352 +0,0 @@
-"""Convenient HTTP UserAgent class.
-
-This is a subclass of urllib2.OpenerDirector.
-
-
-Copyright 2003-2006 John J. Lee <jjl@pobox.com>
-
-This code is free software; you can redistribute it and/or modify it under
-the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
-included with the distribution).
-
-"""
-
-import warnings
-
-import _auth
-import _gzip
-import _opener
-import _response
-import _sockettimeout
-import _urllib2
-
-
-class UserAgentBase(_opener.OpenerDirector):
- """Convenient user-agent class.
-
- Do not use .add_handler() to add a handler for something already dealt with
- by this code.
-
- The only reason at present for the distinction between UserAgent and
- UserAgentBase is so that classes that depend on .seek()able responses
- (e.g. mechanize.Browser) can inherit from UserAgentBase. The subclass
- UserAgent exposes a .set_seekable_responses() method that allows switching
- off the adding of a .seek() method to responses.
-
- Public attributes:
-
- addheaders: list of (name, value) pairs specifying headers to send with
- every request, unless they are overridden in the Request instance.
-
- >>> ua = UserAgentBase()
- >>> ua.addheaders = [
- ... ("User-agent", "Mozilla/5.0 (compatible)"),
- ... ("From", "responsible.person@example.com")]
-
- """
-
- handler_classes = {
- # scheme handlers
- "http": _urllib2.HTTPHandler,
- # CacheFTPHandler is buggy, at least in 2.3, so we don't use it
- "ftp": _urllib2.FTPHandler,
- "file": _urllib2.FileHandler,
-
- # other handlers
- "_unknown": _urllib2.UnknownHandler,
- # HTTP{S,}Handler depend on HTTPErrorProcessor too
- "_http_error": _urllib2.HTTPErrorProcessor,
- "_http_request_upgrade": _urllib2.HTTPRequestUpgradeProcessor,
- "_http_default_error": _urllib2.HTTPDefaultErrorHandler,
-
- # feature handlers
- "_basicauth": _urllib2.HTTPBasicAuthHandler,
- "_digestauth": _urllib2.HTTPDigestAuthHandler,
- "_redirect": _urllib2.HTTPRedirectHandler,
- "_cookies": _urllib2.HTTPCookieProcessor,
- "_refresh": _urllib2.HTTPRefreshProcessor,
- "_equiv": _urllib2.HTTPEquivProcessor,
- "_proxy": _urllib2.ProxyHandler,
- "_proxy_basicauth": _urllib2.ProxyBasicAuthHandler,
- "_proxy_digestauth": _urllib2.ProxyDigestAuthHandler,
- "_robots": _urllib2.HTTPRobotRulesProcessor,
- "_gzip": _gzip.HTTPGzipProcessor, # experimental!
-
- # debug handlers
- "_debug_redirect": _urllib2.HTTPRedirectDebugProcessor,
- "_debug_response_body": _urllib2.HTTPResponseDebugProcessor,
- }
-
- default_schemes = ["http", "ftp", "file"]
- default_others = ["_unknown", "_http_error", "_http_request_upgrade",
- "_http_default_error",
- ]
- default_features = ["_redirect", "_cookies",
- "_refresh", "_equiv",
- "_basicauth", "_digestauth",
- "_proxy", "_proxy_basicauth", "_proxy_digestauth",
- "_robots",
- ]
- if hasattr(_urllib2, 'HTTPSHandler'):
- handler_classes["https"] = _urllib2.HTTPSHandler
- default_schemes.append("https")
-
- def __init__(self):
- _opener.OpenerDirector.__init__(self)
-
- ua_handlers = self._ua_handlers = {}
- for scheme in (self.default_schemes+
- self.default_others+
- self.default_features):
- klass = self.handler_classes[scheme]
- ua_handlers[scheme] = klass()
- for handler in ua_handlers.itervalues():
- self.add_handler(handler)
-
- # Yuck.
- # Ensure correct default constructor args were passed to
- # HTTPRefreshProcessor and HTTPEquivProcessor.
- if "_refresh" in ua_handlers:
- self.set_handle_refresh(True)
- if "_equiv" in ua_handlers:
- self.set_handle_equiv(True)
- # Ensure default password managers are installed.
- pm = ppm = None
- if "_basicauth" in ua_handlers or "_digestauth" in ua_handlers:
- pm = _urllib2.HTTPPasswordMgrWithDefaultRealm()
- if ("_proxy_basicauth" in ua_handlers or
- "_proxy_digestauth" in ua_handlers):
- ppm = _auth.HTTPProxyPasswordMgr()
- self.set_password_manager(pm)
- self.set_proxy_password_manager(ppm)
- # set default certificate manager
- if "https" in ua_handlers:
- cm = _urllib2.HTTPSClientCertMgr()
- self.set_client_cert_manager(cm)
-
- def close(self):
- _opener.OpenerDirector.close(self)
- self._ua_handlers = None
-
- # XXX
-## def set_timeout(self, timeout):
-## self._timeout = timeout
-## def set_http_connection_cache(self, conn_cache):
-## self._http_conn_cache = conn_cache
-## def set_ftp_connection_cache(self, conn_cache):
-## # XXX ATM, FTP has cache as part of handler; should it be separate?
-## self._ftp_conn_cache = conn_cache
-
- def set_handled_schemes(self, schemes):
- """Set sequence of URL scheme (protocol) strings.
-
- For example: ua.set_handled_schemes(["http", "ftp"])
-
- If this fails (with ValueError) because you've passed an unknown
- scheme, the set of handled schemes will not be changed.
-
- """
- want = {}
- for scheme in schemes:
- if scheme.startswith("_"):
- raise ValueError("not a scheme '%s'" % scheme)
- if scheme not in self.handler_classes:
- raise ValueError("unknown scheme '%s'")
- want[scheme] = None
-
- # get rid of scheme handlers we don't want
- for scheme, oldhandler in self._ua_handlers.items():
- if scheme.startswith("_"): continue # not a scheme handler
- if scheme not in want:
- self._replace_handler(scheme, None)
- else:
- del want[scheme] # already got it
- # add the scheme handlers that are missing
- for scheme in want.keys():
- self._set_handler(scheme, True)
-
- def set_cookiejar(self, cookiejar):
- """Set a mechanize.CookieJar, or None."""
- self._set_handler("_cookies", obj=cookiejar)
-
- # XXX could use Greg Stein's httpx for some of this instead?
- # or httplib2??
- def set_proxies(self, proxies):
- """Set a dictionary mapping URL scheme to proxy specification, or None.
-
- e.g. {"http": "joe:password@myproxy.example.com:3128",
- "ftp": "proxy.example.com"}
-
- """
- self._set_handler("_proxy", obj=proxies)
-
- def add_password(self, url, user, password, realm=None):
- self._password_manager.add_password(realm, url, user, password)
- def add_proxy_password(self, user, password, hostport=None, realm=None):
- self._proxy_password_manager.add_password(
- realm, hostport, user, password)
-
- def add_client_certificate(self, url, key_file, cert_file):
- """Add an SSL client certificate, for HTTPS client auth.
-
- key_file and cert_file must be filenames of the key and certificate
- files, in PEM format. You can use e.g. OpenSSL to convert a p12 (PKCS
- 12) file to PEM format:
-
- openssl pkcs12 -clcerts -nokeys -in cert.p12 -out cert.pem
- openssl pkcs12 -nocerts -in cert.p12 -out key.pem
-
-
- Note that client certificate password input is very inflexible ATM. At
- the moment this seems to be console only, which is presumably the
- default behaviour of libopenssl. In future mechanize may support
- third-party libraries that (I assume) allow more options here.
-
- """
- self._client_cert_manager.add_key_cert(url, key_file, cert_file)
-
- # the following are rarely useful -- use add_password / add_proxy_password
- # instead
- def set_password_manager(self, password_manager):
- """Set a mechanize.HTTPPasswordMgrWithDefaultRealm, or None."""
- self._password_manager = password_manager
- self._set_handler("_basicauth", obj=password_manager)
- self._set_handler("_digestauth", obj=password_manager)
- def set_proxy_password_manager(self, password_manager):
- """Set a mechanize.HTTPProxyPasswordMgr, or None."""
- self._proxy_password_manager = password_manager
- self._set_handler("_proxy_basicauth", obj=password_manager)
- self._set_handler("_proxy_digestauth", obj=password_manager)
- def set_client_cert_manager(self, cert_manager):
- """Set a mechanize.HTTPClientCertMgr, or None."""
- self._client_cert_manager = cert_manager
- handler = self._ua_handlers["https"]
- handler.client_cert_manager = cert_manager
-
- # these methods all take a boolean parameter
- def set_handle_robots(self, handle):
- """Set whether to observe rules from robots.txt."""
- self._set_handler("_robots", handle)
- def set_handle_redirect(self, handle):
- """Set whether to handle HTTP 30x redirections."""
- self._set_handler("_redirect", handle)
- def set_handle_refresh(self, handle, max_time=None, honor_time=True):
- """Set whether to handle HTTP Refresh headers."""
- self._set_handler("_refresh", handle, constructor_kwds=
- {"max_time": max_time, "honor_time": honor_time})
- def set_handle_equiv(self, handle, head_parser_class=None):
- """Set whether to treat HTML http-equiv headers like HTTP headers.
-
- Response objects may be .seek()able if this is set (currently returned
- responses are, raised HTTPError exception responses are not).
-
- """
- if head_parser_class is not None:
- constructor_kwds = {"head_parser_class": head_parser_class}
- else:
- constructor_kwds={}
- self._set_handler("_equiv", handle, constructor_kwds=constructor_kwds)
- def set_handle_gzip(self, handle):
- """Handle gzip transfer encoding.
-
- """
- if handle:
- warnings.warn(
- "gzip transfer encoding is experimental!", stacklevel=2)
- self._set_handler("_gzip", handle)
- def set_debug_redirects(self, handle):
- """Log information about HTTP redirects (including refreshes).
-
- Logging is performed using module logging. The logger name is
- "mechanize.http_redirects". To actually print some debug output,
- eg:
-
- import sys, logging
- logger = logging.getLogger("mechanize.http_redirects")
- logger.addHandler(logging.StreamHandler(sys.stdout))
- logger.setLevel(logging.INFO)
-
- Other logger names relevant to this module:
-
- "mechanize.http_responses"
- "mechanize.cookies" (or "cookielib" if running Python 2.4)
-
- To turn on everything:
-
- import sys, logging
- logger = logging.getLogger("mechanize")
- logger.addHandler(logging.StreamHandler(sys.stdout))
- logger.setLevel(logging.INFO)
-
- """
- self._set_handler("_debug_redirect", handle)
- def set_debug_responses(self, handle):
- """Log HTTP response bodies.
-
- See docstring for .set_debug_redirects() for details of logging.
-
- Response objects may be .seek()able if this is set (currently returned
- responses are, raised HTTPError exception responses are not).
-
- """
- self._set_handler("_debug_response_body", handle)
- def set_debug_http(self, handle):
- """Print HTTP headers to sys.stdout."""
- level = int(bool(handle))
- for scheme in "http", "https":
- h = self._ua_handlers.get(scheme)
- if h is not None:
- h.set_http_debuglevel(level)
-
- def _set_handler(self, name, handle=None, obj=None,
- constructor_args=(), constructor_kwds={}):
- if handle is None:
- handle = obj is not None
- if handle:
- handler_class = self.handler_classes[name]
- if obj is not None:
- newhandler = handler_class(obj)
- else:
- newhandler = handler_class(
- *constructor_args, **constructor_kwds)
- else:
- newhandler = None
- self._replace_handler(name, newhandler)
-
- def _replace_handler(self, name, newhandler=None):
- # first, if handler was previously added, remove it
- if name is not None:
- handler = self._ua_handlers.get(name)
- if handler:
- try:
- self.handlers.remove(handler)
- except ValueError:
- pass
- # then add the replacement, if any
- if newhandler is not None:
- self.add_handler(newhandler)
- self._ua_handlers[name] = newhandler
-
-
-class UserAgent(UserAgentBase):
-
- def __init__(self):
- UserAgentBase.__init__(self)
- self._seekable = False
-
- def set_seekable_responses(self, handle):
- """Make response objects .seek()able."""
- self._seekable = bool(handle)
-
- def open(self, fullurl, data=None,
- timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
- if self._seekable:
- def bound_open(fullurl, data=None,
- timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
- return UserAgentBase.open(self, fullurl, data, timeout)
- response = _opener.wrapped_open(
- bound_open, _response.seek_wrapped_response, fullurl, data,
- timeout)
- else:
- response = UserAgentBase.open(self, fullurl, data)
- return response
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_util.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_util.py
deleted file mode 100644
index dcdefa9..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_util.py
+++ /dev/null
@@ -1,291 +0,0 @@
-"""Utility functions and date/time routines.
-
- Copyright 2002-2006 John J Lee <jjl@pobox.com>
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
-"""
-
-import re, time, warnings
-
-
-class ExperimentalWarning(UserWarning):
- pass
-
-def experimental(message):
- warnings.warn(message, ExperimentalWarning, stacklevel=3)
-def hide_experimental_warnings():
- warnings.filterwarnings("ignore", category=ExperimentalWarning)
-def reset_experimental_warnings():
- warnings.filterwarnings("default", category=ExperimentalWarning)
-
-def deprecation(message):
- warnings.warn(message, DeprecationWarning, stacklevel=3)
-def hide_deprecations():
- warnings.filterwarnings("ignore", category=DeprecationWarning)
-def reset_deprecations():
- warnings.filterwarnings("default", category=DeprecationWarning)
-
-
-def isstringlike(x):
- try: x+""
- except: return False
- else: return True
-
-## def caller():
-## try:
-## raise SyntaxError
-## except:
-## import sys
-## return sys.exc_traceback.tb_frame.f_back.f_back.f_code.co_name
-
-
-from calendar import timegm
-
-# Date/time conversion routines for formats used by the HTTP protocol.
-
-EPOCH = 1970
-def my_timegm(tt):
- year, month, mday, hour, min, sec = tt[:6]
- if ((year >= EPOCH) and (1 <= month <= 12) and (1 <= mday <= 31) and
- (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
- return timegm(tt)
- else:
- return None
-
-days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
-months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
- "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
-months_lower = []
-for month in months: months_lower.append(month.lower())
-
-
-def time2isoz(t=None):
- """Return a string representing time in seconds since epoch, t.
-
- If the function is called without an argument, it will use the current
- time.
-
- The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
- representing Universal Time (UTC, aka GMT). An example of this format is:
-
- 1994-11-24 08:49:37Z
-
- """
- if t is None: t = time.time()
- year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
- return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
- year, mon, mday, hour, min, sec)
-
-def time2netscape(t=None):
- """Return a string representing time in seconds since epoch, t.
-
- If the function is called without an argument, it will use the current
- time.
-
- The format of the returned string is like this:
-
- Wed, DD-Mon-YYYY HH:MM:SS GMT
-
- """
- if t is None: t = time.time()
- year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
- return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
- days[wday], mday, months[mon-1], year, hour, min, sec)
-
-
-UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
-
-timezone_re = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
-def offset_from_tz_string(tz):
- offset = None
- if UTC_ZONES.has_key(tz):
- offset = 0
- else:
- m = timezone_re.search(tz)
- if m:
- offset = 3600 * int(m.group(2))
- if m.group(3):
- offset = offset + 60 * int(m.group(3))
- if m.group(1) == '-':
- offset = -offset
- return offset
-
-def _str2time(day, mon, yr, hr, min, sec, tz):
- # translate month name to number
- # month numbers start with 1 (January)
- try:
- mon = months_lower.index(mon.lower())+1
- except ValueError:
- # maybe it's already a number
- try:
- imon = int(mon)
- except ValueError:
- return None
- if 1 <= imon <= 12:
- mon = imon
- else:
- return None
-
- # make sure clock elements are defined
- if hr is None: hr = 0
- if min is None: min = 0
- if sec is None: sec = 0
-
- yr = int(yr)
- day = int(day)
- hr = int(hr)
- min = int(min)
- sec = int(sec)
-
- if yr < 1000:
- # find "obvious" year
- cur_yr = time.localtime(time.time())[0]
- m = cur_yr % 100
- tmp = yr
- yr = yr + cur_yr - m
- m = m - tmp
- if abs(m) > 50:
- if m > 0: yr = yr + 100
- else: yr = yr - 100
-
- # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
- t = my_timegm((yr, mon, day, hr, min, sec, tz))
-
- if t is not None:
- # adjust time using timezone string, to get absolute time since epoch
- if tz is None:
- tz = "UTC"
- tz = tz.upper()
- offset = offset_from_tz_string(tz)
- if offset is None:
- return None
- t = t - offset
-
- return t
-
-
-strict_re = re.compile(r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
- r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
-wkday_re = re.compile(
- r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
-loose_http_re = re.compile(
- r"""^
- (\d\d?) # day
- (?:\s+|[-\/])
- (\w+) # month
- (?:\s+|[-\/])
- (\d+) # year
- (?:
- (?:\s+|:) # separator before clock
- (\d\d?):(\d\d) # hour:min
- (?::(\d\d))? # optional seconds
- )? # optional clock
- \s*
- ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
- \s*
- (?:\(\w+\))? # ASCII representation of timezone in parens.
- \s*$""", re.X)
-def http2time(text):
- """Returns time in seconds since epoch of time represented by a string.
-
- Return value is an integer.
-
- None is returned if the format of str is unrecognized, the time is outside
- the representable range, or the timezone string is not recognized. If the
- string contains no timezone, UTC is assumed.
-
- The timezone in the string may be numerical (like "-0800" or "+0100") or a
- string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
- timezone strings equivalent to UTC (zero offset) are known to the function.
-
- The function loosely parses the following formats:
-
- Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
- Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
- Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
- 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
- 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
- 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
-
- The parser ignores leading and trailing whitespace. The time may be
- absent.
-
- If the year is given with only 2 digits, the function will select the
- century that makes the year closest to the current date.
-
- """
- # fast exit for strictly conforming string
- m = strict_re.search(text)
- if m:
- g = m.groups()
- mon = months_lower.index(g[1].lower()) + 1
- tt = (int(g[2]), mon, int(g[0]),
- int(g[3]), int(g[4]), float(g[5]))
- return my_timegm(tt)
-
- # No, we need some messy parsing...
-
- # clean up
- text = text.lstrip()
- text = wkday_re.sub("", text, 1) # Useless weekday
-
- # tz is time zone specifier string
- day, mon, yr, hr, min, sec, tz = [None]*7
-
- # loose regexp parse
- m = loose_http_re.search(text)
- if m is not None:
- day, mon, yr, hr, min, sec, tz = m.groups()
- else:
- return None # bad format
-
- return _str2time(day, mon, yr, hr, min, sec, tz)
-
-
-iso_re = re.compile(
- """^
- (\d{4}) # year
- [-\/]?
- (\d\d?) # numerical month
- [-\/]?
- (\d\d?) # day
- (?:
- (?:\s+|[-:Tt]) # separator before clock
- (\d\d?):?(\d\d) # hour:min
- (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
- )? # optional clock
- \s*
- ([-+]?\d\d?:?(:?\d\d)?
- |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
- \s*$""", re.X)
-def iso2time(text):
- """
- As for http2time, but parses the ISO 8601 formats:
-
- 1994-02-03 14:15:29 -0100 -- ISO 8601 format
- 1994-02-03 14:15:29 -- zone is optional
- 1994-02-03 -- only date
- 1994-02-03T14:15:29 -- Use T as separator
- 19940203T141529Z -- ISO 8601 compact format
- 19940203 -- only date
-
- """
- # clean up
- text = text.lstrip()
-
- # tz is time zone specifier string
- day, mon, yr, hr, min, sec, tz = [None]*7
-
- # loose regexp parse
- m = iso_re.search(text)
- if m is not None:
- # XXX there's an extra bit of the timezone I'm ignoring here: is
- # this the right thing to do?
- yr, mon, day, hr, min, sec, tz, _ = m.groups()
- else:
- return None # bad format
-
- return _str2time(day, mon, yr, hr, min, sec, tz)
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/pep8.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/pep8.py
deleted file mode 100755
index c319370..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/pep8.py
+++ /dev/null
@@ -1,1254 +0,0 @@
-#!/usr/bin/python
-# pep8.py - Check Python source code formatting, according to PEP 8
-# Copyright (C) 2006 Johann C. Rocholl <johann@rocholl.net>
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation files
-# (the "Software"), to deal in the Software without restriction,
-# including without limitation the rights to use, copy, modify, merge,
-# publish, distribute, sublicense, and/or sell copies of the Software,
-# and to permit persons to whom the Software is furnished to do so,
-# subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-"""
-Check Python source code formatting, according to PEP 8:
-http://www.python.org/dev/peps/pep-0008/
-
-For usage and a list of options, try this:
-$ python pep8.py -h
-
-This program and its regression test suite live here:
-http://github.com/jcrocholl/pep8
-
-Groups of errors and warnings:
-E errors
-W warnings
-100 indentation
-200 whitespace
-300 blank lines
-400 imports
-500 line length
-600 deprecation
-700 statements
-
-You can add checks to this program by writing plugins. Each plugin is
-a simple function that is called for each line of source code, either
-physical or logical.
-
-Physical line:
-- Raw line of text from the input file.
-
-Logical line:
-- Multi-line statements converted to a single line.
-- Stripped left and right.
-- Contents of strings replaced with 'xxx' of same length.
-- Comments removed.
-
-The check function requests physical or logical lines by the name of
-the first argument:
-
-def maximum_line_length(physical_line)
-def extraneous_whitespace(logical_line)
-def blank_lines(logical_line, blank_lines, indent_level, line_number)
-
-The last example above demonstrates how check plugins can request
-additional information with extra arguments. All attributes of the
-Checker object are available. Some examples:
-
-lines: a list of the raw lines from the input file
-tokens: the tokens that contribute to this logical line
-line_number: line number in the input file
-blank_lines: blank lines before this one
-indent_char: first indentation character in this file (' ' or '\t')
-indent_level: indentation (with tabs expanded to multiples of 8)
-previous_indent_level: indentation on previous line
-previous_logical: previous logical line
-
-The docstring of each check function shall be the relevant part of
-text from PEP 8. It is printed if the user enables --show-pep8.
-Several docstrings contain examples directly from the PEP 8 document.
-
-Okay: spam(ham[1], {eggs: 2})
-E201: spam( ham[1], {eggs: 2})
-
-These examples are verified automatically when pep8.py is run with the
---doctest option. You can add examples for your own check functions.
-The format is simple: "Okay" or error/warning code followed by colon
-and space, the rest of the line is example source code. If you put 'r'
-before the docstring, you can use \n for newline, \t for tab and \s
-for space.
-
-"""
-
-__version__ = '0.5.0'
-
-import os
-import sys
-import re
-import time
-import inspect
-import tokenize
-from optparse import OptionParser
-from keyword import iskeyword
-from fnmatch import fnmatch
-
-DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git'
-DEFAULT_IGNORE = ['E24']
-
-INDENT_REGEX = re.compile(r'([ \t]*)')
-RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)')
-SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)')
-ERRORCODE_REGEX = re.compile(r'[EW]\d{3}')
-E301NOT_REGEX = re.compile(r'class |def |u?r?["\']')
-
-WHITESPACE = ' \t'
-
-BINARY_OPERATORS = ['**=', '*=', '+=', '-=', '!=', '<>',
- '%=', '^=', '&=', '|=', '==', '/=', '//=', '>=', '<=', '>>=', '<<=',
- '%', '^', '&', '|', '=', '/', '//', '>', '<', '>>', '<<']
-UNARY_OPERATORS = ['**', '*', '+', '-']
-OPERATORS = BINARY_OPERATORS + UNARY_OPERATORS
-
-options = None
-args = None
-
-
-##############################################################################
-# Plugins (check functions) for physical lines
-##############################################################################
-
-
-def tabs_or_spaces(physical_line, indent_char):
- r"""
- Never mix tabs and spaces.
-
- The most popular way of indenting Python is with spaces only. The
- second-most popular way is with tabs only. Code indented with a mixture
- of tabs and spaces should be converted to using spaces exclusively. When
- invoking the Python command line interpreter with the -t option, it issues
- warnings about code that illegally mixes tabs and spaces. When using -tt
- these warnings become errors. These options are highly recommended!
-
- Okay: if a == 0:\n a = 1\n b = 1
- E101: if a == 0:\n a = 1\n\tb = 1
- """
- indent = INDENT_REGEX.match(physical_line).group(1)
- for offset, char in enumerate(indent):
- if char != indent_char:
- return offset, "E101 indentation contains mixed spaces and tabs"
-
-
-def tabs_obsolete(physical_line):
- r"""
- For new projects, spaces-only are strongly recommended over tabs. Most
- editors have features that make this easy to do.
-
- Okay: if True:\n return
- W191: if True:\n\treturn
- """
- indent = INDENT_REGEX.match(physical_line).group(1)
- if indent.count('\t'):
- return indent.index('\t'), "W191 indentation contains tabs"
-
-
-def trailing_whitespace(physical_line):
- """
- JCR: Trailing whitespace is superfluous.
-
- Okay: spam(1)
- W291: spam(1)\s
- """
- physical_line = physical_line.rstrip('\n') # chr(10), newline
- physical_line = physical_line.rstrip('\r') # chr(13), carriage return
- physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L
- stripped = physical_line.rstrip()
- if physical_line != stripped:
- return len(stripped), "W291 trailing whitespace"
-
-
-def trailing_blank_lines(physical_line, lines, line_number):
- r"""
- JCR: Trailing blank lines are superfluous.
-
- Okay: spam(1)
- W391: spam(1)\n
- """
- if physical_line.strip() == '' and line_number == len(lines):
- return 0, "W391 blank line at end of file"
-
-
-def missing_newline(physical_line):
- """
- JCR: The last line should have a newline.
- """
- if physical_line.rstrip() == physical_line:
- return len(physical_line), "W292 no newline at end of file"
-
-
-def maximum_line_length(physical_line):
- """
- Limit all lines to a maximum of 79 characters.
-
- There are still many devices around that are limited to 80 character
- lines; plus, limiting windows to 80 characters makes it possible to have
- several windows side-by-side. The default wrapping on such devices looks
- ugly. Therefore, please limit all lines to a maximum of 79 characters.
- For flowing long blocks of text (docstrings or comments), limiting the
- length to 72 characters is recommended.
- """
- length = len(physical_line.rstrip())
- if length > 79:
- return 79, "E501 line too long (%d characters)" % length
-
-
-##############################################################################
-# Plugins (check functions) for logical lines
-##############################################################################
-
-
-def blank_lines(logical_line, blank_lines, indent_level, line_number,
- previous_logical, blank_lines_before_comment):
- r"""
- Separate top-level function and class definitions with two blank lines.
-
- Method definitions inside a class are separated by a single blank line.
-
- Extra blank lines may be used (sparingly) to separate groups of related
- functions. Blank lines may be omitted between a bunch of related
- one-liners (e.g. a set of dummy implementations).
-
- Use blank lines in functions, sparingly, to indicate logical sections.
-
- Okay: def a():\n pass\n\n\ndef b():\n pass
- Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass
-
- E301: class Foo:\n b = 0\n def bar():\n pass
- E302: def a():\n pass\n\ndef b(n):\n pass
- E303: def a():\n pass\n\n\n\ndef b(n):\n pass
- E303: def a():\n\n\n\n pass
- E304: @decorator\n\ndef a():\n pass
- """
- if line_number == 1:
- return # Don't expect blank lines before the first line
- max_blank_lines = max(blank_lines, blank_lines_before_comment)
- if previous_logical.startswith('@'):
- if max_blank_lines:
- return 0, "E304 blank lines found after function decorator"
- elif max_blank_lines > 2 or (indent_level and max_blank_lines == 2):
- return 0, "E303 too many blank lines (%d)" % max_blank_lines
- elif (logical_line.startswith('def ') or
- logical_line.startswith('class ') or
- logical_line.startswith('@')):
- if indent_level:
- if not (max_blank_lines or E301NOT_REGEX.match(previous_logical)):
- return 0, "E301 expected 1 blank line, found 0"
- elif max_blank_lines != 2:
- return 0, "E302 expected 2 blank lines, found %d" % max_blank_lines
-
-
-def extraneous_whitespace(logical_line):
- """
- Avoid extraneous whitespace in the following situations:
-
- - Immediately inside parentheses, brackets or braces.
-
- - Immediately before a comma, semicolon, or colon.
-
- Okay: spam(ham[1], {eggs: 2})
- E201: spam( ham[1], {eggs: 2})
- E201: spam(ham[ 1], {eggs: 2})
- E201: spam(ham[1], { eggs: 2})
- E202: spam(ham[1], {eggs: 2} )
- E202: spam(ham[1 ], {eggs: 2})
- E202: spam(ham[1], {eggs: 2 })
-
- E203: if x == 4: print x, y; x, y = y , x
- E203: if x == 4: print x, y ; x, y = y, x
- E203: if x == 4 : print x, y; x, y = y, x
- """
- line = logical_line
- for char in '([{':
- found = line.find(char + ' ')
- if found > -1:
- return found + 1, "E201 whitespace after '%s'" % char
- for char in '}])':
- found = line.find(' ' + char)
- if found > -1 and line[found - 1] != ',':
- return found, "E202 whitespace before '%s'" % char
- for char in ',;:':
- found = line.find(' ' + char)
- if found > -1:
- return found, "E203 whitespace before '%s'" % char
-
-
-def missing_whitespace(logical_line):
- """
- JCR: Each comma, semicolon or colon should be followed by whitespace.
-
- Okay: [a, b]
- Okay: (3,)
- Okay: a[1:4]
- Okay: a[:4]
- Okay: a[1:]
- Okay: a[1:4:2]
- E231: ['a','b']
- E231: foo(bar,baz)
- """
- line = logical_line
- for index in range(len(line) - 1):
- char = line[index]
- if char in ',;:' and line[index + 1] not in WHITESPACE:
- before = line[:index]
- if char == ':' and before.count('[') > before.count(']'):
- continue # Slice syntax, no space required
- if char == ',' and line[index + 1] == ')':
- continue # Allow tuple with only one element: (3,)
- return index, "E231 missing whitespace after '%s'" % char
-
-
-def indentation(logical_line, previous_logical, indent_char,
- indent_level, previous_indent_level):
- r"""
- Use 4 spaces per indentation level.
-
- For really old code that you don't want to mess up, you can continue to
- use 8-space tabs.
-
- Okay: a = 1
- Okay: if a == 0:\n a = 1
- E111: a = 1
-
- Okay: for item in items:\n pass
- E112: for item in items:\npass
-
- Okay: a = 1\nb = 2
- E113: a = 1\n b = 2
- """
- if indent_char == ' ' and indent_level % 4:
- return 0, "E111 indentation is not a multiple of four"
- indent_expect = previous_logical.endswith(':')
- if indent_expect and indent_level <= previous_indent_level:
- return 0, "E112 expected an indented block"
- if indent_level > previous_indent_level and not indent_expect:
- return 0, "E113 unexpected indentation"
-
-
-def whitespace_before_parameters(logical_line, tokens):
- """
- Avoid extraneous whitespace in the following situations:
-
- - Immediately before the open parenthesis that starts the argument
- list of a function call.
-
- - Immediately before the open parenthesis that starts an indexing or
- slicing.
-
- Okay: spam(1)
- E211: spam (1)
-
- Okay: dict['key'] = list[index]
- E211: dict ['key'] = list[index]
- E211: dict['key'] = list [index]
- """
- prev_type = tokens[0][0]
- prev_text = tokens[0][1]
- prev_end = tokens[0][3]
- for index in range(1, len(tokens)):
- token_type, text, start, end, line = tokens[index]
- if (token_type == tokenize.OP and
- text in '([' and
- start != prev_end and
- prev_type == tokenize.NAME and
- (index < 2 or tokens[index - 2][1] != 'class') and
- (not iskeyword(prev_text))):
- return prev_end, "E211 whitespace before '%s'" % text
- prev_type = token_type
- prev_text = text
- prev_end = end
-
-
-def whitespace_around_operator(logical_line):
- """
- Avoid extraneous whitespace in the following situations:
-
- - More than one space around an assignment (or other) operator to
- align it with another.
-
- Okay: a = 12 + 3
- E221: a = 4 + 5
- E222: a = 4 + 5
- E223: a = 4\t+ 5
- E224: a = 4 +\t5
- """
- line = logical_line
- for operator in OPERATORS:
- found = line.find(' ' + operator)
- if found > -1:
- return found, "E221 multiple spaces before operator"
- found = line.find(operator + ' ')
- if found > -1:
- return found, "E222 multiple spaces after operator"
- found = line.find('\t' + operator)
- if found > -1:
- return found, "E223 tab before operator"
- found = line.find(operator + '\t')
- if found > -1:
- return found, "E224 tab after operator"
-
-
-def missing_whitespace_around_operator(logical_line, tokens):
- r"""
- - Always surround these binary operators with a single space on
- either side: assignment (=), augmented assignment (+=, -= etc.),
- comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not),
- Booleans (and, or, not).
-
- - Use spaces around arithmetic operators.
-
- Okay: i = i + 1
- Okay: submitted += 1
- Okay: x = x * 2 - 1
- Okay: hypot2 = x * x + y * y
- Okay: c = (a + b) * (a - b)
- Okay: foo(bar, key='word', *args, **kwargs)
- Okay: baz(**kwargs)
- Okay: negative = -1
- Okay: spam(-1)
- Okay: alpha[:-i]
- Okay: if not -5 < x < +5:\n pass
- Okay: lambda *args, **kw: (args, kw)
-
- E225: i=i+1
- E225: submitted +=1
- E225: x = x*2 - 1
- E225: hypot2 = x*x + y*y
- E225: c = (a+b) * (a-b)
- E225: c = alpha -4
- E225: z = x **y
- """
- parens = 0
- need_space = False
- prev_type = tokenize.OP
- prev_text = prev_end = None
- for token_type, text, start, end, line in tokens:
- if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN):
- # ERRORTOKEN is triggered by backticks in Python 3000
- continue
- if text in ('(', 'lambda'):
- parens += 1
- elif text == ')':
- parens -= 1
- if need_space:
- if start == prev_end:
- return prev_end, "E225 missing whitespace around operator"
- need_space = False
- elif token_type == tokenize.OP:
- if text == '=' and parens:
- # Allow keyword args or defaults: foo(bar=None).
- pass
- elif text in BINARY_OPERATORS:
- need_space = True
- elif text in UNARY_OPERATORS:
- if ((prev_type != tokenize.OP or prev_text in '}])') and not
- (prev_type == tokenize.NAME and iskeyword(prev_text))):
- # Allow unary operators: -123, -x, +1.
- # Allow argument unpacking: foo(*args, **kwargs).
- need_space = True
- if need_space and start == prev_end:
- return prev_end, "E225 missing whitespace around operator"
- prev_type = token_type
- prev_text = text
- prev_end = end
-
-
-def whitespace_around_comma(logical_line):
- """
- Avoid extraneous whitespace in the following situations:
-
- - More than one space around an assignment (or other) operator to
- align it with another.
-
- JCR: This should also be applied around comma etc.
- Note: these checks are disabled by default
-
- Okay: a = (1, 2)
- E241: a = (1, 2)
- E242: a = (1,\t2)
- """
- line = logical_line
- for separator in ',;:':
- found = line.find(separator + ' ')
- if found > -1:
- return found + 1, "E241 multiple spaces after '%s'" % separator
- found = line.find(separator + '\t')
- if found > -1:
- return found + 1, "E242 tab after '%s'" % separator
-
-
-def whitespace_around_named_parameter_equals(logical_line):
- """
- Don't use spaces around the '=' sign when used to indicate a
- keyword argument or a default parameter value.
-
- Okay: def complex(real, imag=0.0):
- Okay: return magic(r=real, i=imag)
- Okay: boolean(a == b)
- Okay: boolean(a != b)
- Okay: boolean(a <= b)
- Okay: boolean(a >= b)
-
- E251: def complex(real, imag = 0.0):
- E251: return magic(r = real, i = imag)
- """
- parens = 0
- window = ' '
- equal_ok = ['==', '!=', '<=', '>=']
-
- for pos, c in enumerate(logical_line):
- window = window[1:] + c
- if parens:
- if window[0] in WHITESPACE and window[1] == '=':
- if window[1:] not in equal_ok:
- issue = "E251 no spaces around keyword / parameter equals"
- return pos, issue
- if window[2] in WHITESPACE and window[1] == '=':
- if window[:2] not in equal_ok:
- issue = "E251 no spaces around keyword / parameter equals"
- return pos, issue
- if c == '(':
- parens += 1
- elif c == ')':
- parens -= 1
-
-
-def whitespace_before_inline_comment(logical_line, tokens):
- """
- Separate inline comments by at least two spaces.
-
- An inline comment is a comment on the same line as a statement. Inline
- comments should be separated by at least two spaces from the statement.
- They should start with a # and a single space.
-
- Okay: x = x + 1 # Increment x
- Okay: x = x + 1 # Increment x
- E261: x = x + 1 # Increment x
- E262: x = x + 1 #Increment x
- E262: x = x + 1 # Increment x
- """
- prev_end = (0, 0)
- for token_type, text, start, end, line in tokens:
- if token_type == tokenize.NL:
- continue
- if token_type == tokenize.COMMENT:
- if not line[:start[1]].strip():
- continue
- if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
- return (prev_end,
- "E261 at least two spaces before inline comment")
- if (len(text) > 1 and text.startswith('# ')
- or not text.startswith('# ')):
- return start, "E262 inline comment should start with '# '"
- else:
- prev_end = end
-
-
-def imports_on_separate_lines(logical_line):
- r"""
- Imports should usually be on separate lines.
-
- Okay: import os\nimport sys
- E401: import sys, os
-
- Okay: from subprocess import Popen, PIPE
- Okay: from myclas import MyClass
- Okay: from foo.bar.yourclass import YourClass
- Okay: import myclass
- Okay: import foo.bar.yourclass
- """
- line = logical_line
- if line.startswith('import '):
- found = line.find(',')
- if found > -1:
- return found, "E401 multiple imports on one line"
-
-
-def compound_statements(logical_line):
- r"""
- Compound statements (multiple statements on the same line) are
- generally discouraged.
-
- While sometimes it's okay to put an if/for/while with a small body
- on the same line, never do this for multi-clause statements. Also
- avoid folding such long lines!
-
- Okay: if foo == 'blah':\n do_blah_thing()
- Okay: do_one()
- Okay: do_two()
- Okay: do_three()
-
- E701: if foo == 'blah': do_blah_thing()
- E701: for x in lst: total += x
- E701: while t < 10: t = delay()
- E701: if foo == 'blah': do_blah_thing()
- E701: else: do_non_blah_thing()
- E701: try: something()
- E701: finally: cleanup()
- E701: if foo == 'blah': one(); two(); three()
-
- E702: do_one(); do_two(); do_three()
- """
- line = logical_line
- found = line.find(':')
- if -1 < found < len(line) - 1:
- before = line[:found]
- if (before.count('{') <= before.count('}') and # {'a': 1} (dict)
- before.count('[') <= before.count(']') and # [1:2] (slice)
- not re.search(r'\blambda\b', before)): # lambda x: x
- return found, "E701 multiple statements on one line (colon)"
- found = line.find(';')
- if -1 < found:
- return found, "E702 multiple statements on one line (semicolon)"
-
-
-def python_3000_has_key(logical_line):
- """
- The {}.has_key() method will be removed in the future version of
- Python. Use the 'in' operation instead, like:
- d = {"a": 1, "b": 2}
- if "b" in d:
- print d["b"]
- """
- pos = logical_line.find('.has_key(')
- if pos > -1:
- return pos, "W601 .has_key() is deprecated, use 'in'"
-
-
-def python_3000_raise_comma(logical_line):
- """
- When raising an exception, use "raise ValueError('message')"
- instead of the older form "raise ValueError, 'message'".
-
- The paren-using form is preferred because when the exception arguments
- are long or include string formatting, you don't need to use line
- continuation characters thanks to the containing parentheses. The older
- form will be removed in Python 3000.
- """
- match = RAISE_COMMA_REGEX.match(logical_line)
- if match:
- return match.start(1), "W602 deprecated form of raising exception"
-
-
-def python_3000_not_equal(logical_line):
- """
- != can also be written <>, but this is an obsolete usage kept for
- backwards compatibility only. New code should always use !=.
- The older syntax is removed in Python 3000.
- """
- pos = logical_line.find('<>')
- if pos > -1:
- return pos, "W603 '<>' is deprecated, use '!='"
-
-
-def python_3000_backticks(logical_line):
- """
- Backticks are removed in Python 3000.
- Use repr() instead.
- """
- pos = logical_line.find('`')
- if pos > -1:
- return pos, "W604 backticks are deprecated, use 'repr()'"
-
-
-##############################################################################
-# Helper functions
-##############################################################################
-
-
-def expand_indent(line):
- """
- Return the amount of indentation.
- Tabs are expanded to the next multiple of 8.
-
- >>> expand_indent(' ')
- 4
- >>> expand_indent('\\t')
- 8
- >>> expand_indent(' \\t')
- 8
- >>> expand_indent(' \\t')
- 8
- >>> expand_indent(' \\t')
- 16
- """
- result = 0
- for char in line:
- if char == '\t':
- result = result // 8 * 8 + 8
- elif char == ' ':
- result += 1
- else:
- break
- return result
-
-
-def mute_string(text):
- """
- Replace contents with 'xxx' to prevent syntax matching.
-
- >>> mute_string('"abc"')
- '"xxx"'
- >>> mute_string("'''abc'''")
- "'''xxx'''"
- >>> mute_string("r'abc'")
- "r'xxx'"
- """
- start = 1
- end = len(text) - 1
- # String modifiers (e.g. u or r)
- if text.endswith('"'):
- start += text.index('"')
- elif text.endswith("'"):
- start += text.index("'")
- # Triple quotes
- if text.endswith('"""') or text.endswith("'''"):
- start += 2
- end -= 2
- return text[:start] + 'x' * (end - start) + text[end:]
-
-
-def message(text):
- """Print a message."""
- # print >> sys.stderr, options.prog + ': ' + text
- # print >> sys.stderr, text
- print(text)
-
-
-##############################################################################
-# Framework to run all checks
-##############################################################################
-
-
-def find_checks(argument_name):
- """
- Find all globally visible functions where the first argument name
- starts with argument_name.
- """
- checks = []
- for name, function in globals().items():
- if not inspect.isfunction(function):
- continue
- args = inspect.getargspec(function)[0]
- if args and args[0].startswith(argument_name):
- codes = ERRORCODE_REGEX.findall(inspect.getdoc(function) or '')
- for code in codes or ['']:
- if not code or not ignore_code(code):
- checks.append((name, function, args))
- break
- checks.sort()
- return checks
-
-
-class Checker(object):
- """
- Load a Python source file, tokenize it, check coding style.
- """
-
- def __init__(self, filename):
- if filename:
- self.filename = filename
- try:
- self.lines = open(filename).readlines()
- except UnicodeDecodeError:
- # Errors may occur with non-UTF8 files in Python 3000
- self.lines = open(filename, errors='replace').readlines()
- else:
- self.filename = 'stdin'
- self.lines = []
- options.counters['physical lines'] = \
- options.counters.get('physical lines', 0) + len(self.lines)
-
- def readline(self):
- """
- Get the next line from the input buffer.
- """
- self.line_number += 1
- if self.line_number > len(self.lines):
- return ''
- return self.lines[self.line_number - 1]
-
- def readline_check_physical(self):
- """
- Check and return the next physical line. This method can be
- used to feed tokenize.generate_tokens.
- """
- line = self.readline()
- if line:
- self.check_physical(line)
- return line
-
- def run_check(self, check, argument_names):
- """
- Run a check plugin.
- """
- arguments = []
- for name in argument_names:
- arguments.append(getattr(self, name))
- return check(*arguments)
-
- def check_physical(self, line):
- """
- Run all physical checks on a raw input line.
- """
- self.physical_line = line
- if self.indent_char is None and len(line) and line[0] in ' \t':
- self.indent_char = line[0]
- for name, check, argument_names in options.physical_checks:
- result = self.run_check(check, argument_names)
- if result is not None:
- offset, text = result
- self.report_error(self.line_number, offset, text, check)
-
- def build_tokens_line(self):
- """
- Build a logical line from tokens.
- """
- self.mapping = []
- logical = []
- length = 0
- previous = None
- for token in self.tokens:
- token_type, text = token[0:2]
- if token_type in (tokenize.COMMENT, tokenize.NL,
- tokenize.INDENT, tokenize.DEDENT,
- tokenize.NEWLINE):
- continue
- if token_type == tokenize.STRING:
- text = mute_string(text)
- if previous:
- end_line, end = previous[3]
- start_line, start = token[2]
- if end_line != start_line: # different row
- if self.lines[end_line - 1][end - 1] not in '{[(':
- logical.append(' ')
- length += 1
- elif end != start: # different column
- fill = self.lines[end_line - 1][end:start]
- logical.append(fill)
- length += len(fill)
- self.mapping.append((length, token))
- logical.append(text)
- length += len(text)
- previous = token
- self.logical_line = ''.join(logical)
- assert self.logical_line.lstrip() == self.logical_line
- assert self.logical_line.rstrip() == self.logical_line
-
- def check_logical(self):
- """
- Build a line from tokens and run all logical checks on it.
- """
- options.counters['logical lines'] = \
- options.counters.get('logical lines', 0) + 1
- self.build_tokens_line()
- first_line = self.lines[self.mapping[0][1][2][0] - 1]
- indent = first_line[:self.mapping[0][1][2][1]]
- self.previous_indent_level = self.indent_level
- self.indent_level = expand_indent(indent)
- if options.verbose >= 2:
- print(self.logical_line[:80].rstrip())
- for name, check, argument_names in options.logical_checks:
- if options.verbose >= 3:
- print(' ', name)
- result = self.run_check(check, argument_names)
- if result is not None:
- offset, text = result
- if isinstance(offset, tuple):
- original_number, original_offset = offset
- else:
- for token_offset, token in self.mapping:
- if offset >= token_offset:
- original_number = token[2][0]
- original_offset = (token[2][1]
- + offset - token_offset)
- self.report_error(original_number, original_offset,
- text, check)
- self.previous_logical = self.logical_line
-
- def check_all(self):
- """
- Run all checks on the input file.
- """
- self.file_errors = 0
- self.line_number = 0
- self.indent_char = None
- self.indent_level = 0
- self.previous_logical = ''
- self.blank_lines = 0
- self.blank_lines_before_comment = 0
- self.tokens = []
- parens = 0
- for token in tokenize.generate_tokens(self.readline_check_physical):
- # print(tokenize.tok_name[token[0]], repr(token))
- self.tokens.append(token)
- token_type, text = token[0:2]
- if token_type == tokenize.OP and text in '([{':
- parens += 1
- if token_type == tokenize.OP and text in '}])':
- parens -= 1
- if token_type == tokenize.NEWLINE and not parens:
- self.check_logical()
- self.blank_lines = 0
- self.blank_lines_before_comment = 0
- self.tokens = []
- if token_type == tokenize.NL and not parens:
- if len(self.tokens) <= 1:
- # The physical line contains only this token.
- self.blank_lines += 1
- self.tokens = []
- if token_type == tokenize.COMMENT:
- source_line = token[4]
- token_start = token[2][1]
- if source_line[:token_start].strip() == '':
- self.blank_lines_before_comment = max(self.blank_lines,
- self.blank_lines_before_comment)
- self.blank_lines = 0
- if text.endswith('\n') and not parens:
- # The comment also ends a physical line. This works around
- # Python < 2.6 behaviour, which does not generate NL after
- # a comment which is on a line by itself.
- self.tokens = []
- return self.file_errors
-
- def report_error(self, line_number, offset, text, check):
- """
- Report an error, according to options.
- """
- if options.quiet == 1 and not self.file_errors:
- message(self.filename)
- self.file_errors += 1
- code = text[:4]
- options.counters[code] = options.counters.get(code, 0) + 1
- options.messages[code] = text[5:]
- if options.quiet:
- return
- if options.testsuite:
- basename = os.path.basename(self.filename)
- if basename[:4] != code:
- return # Don't care about other errors or warnings
- if 'not' not in basename:
- return # Don't print the expected error message
- if ignore_code(code):
- return
- if options.counters[code] == 1 or options.repeat:
- message("%s:%s:%d: %s" %
- (self.filename, line_number, offset + 1, text))
- if options.show_source:
- line = self.lines[line_number - 1]
- message(line.rstrip())
- message(' ' * offset + '^')
- if options.show_pep8:
- message(check.__doc__.lstrip('\n').rstrip())
-
-
-def input_file(filename):
- """
- Run all checks on a Python source file.
- """
- if excluded(filename):
- return {}
- if options.verbose:
- message('checking ' + filename)
- files_counter_before = options.counters.get('files', 0)
- if options.testsuite: # Keep showing errors for multiple tests
- options.counters = {}
- options.counters['files'] = files_counter_before + 1
- errors = Checker(filename).check_all()
- if options.testsuite: # Check if the expected error was found
- basename = os.path.basename(filename)
- code = basename[:4]
- count = options.counters.get(code, 0)
- if count == 0 and 'not' not in basename:
- message("%s: error %s not found" % (filename, code))
-
-
-def input_dir(dirname):
- """
- Check all Python source files in this directory and all subdirectories.
- """
- dirname = dirname.rstrip('/')
- if excluded(dirname):
- return
- for root, dirs, files in os.walk(dirname):
- if options.verbose:
- message('directory ' + root)
- options.counters['directories'] = \
- options.counters.get('directories', 0) + 1
- dirs.sort()
- for subdir in dirs:
- if excluded(subdir):
- dirs.remove(subdir)
- files.sort()
- for filename in files:
- if filename_match(filename):
- input_file(os.path.join(root, filename))
-
-
-def excluded(filename):
- """
- Check if options.exclude contains a pattern that matches filename.
- """
- basename = os.path.basename(filename)
- for pattern in options.exclude:
- if fnmatch(basename, pattern):
- # print basename, 'excluded because it matches', pattern
- return True
-
-
-def filename_match(filename):
- """
- Check if options.filename contains a pattern that matches filename.
- If options.filename is unspecified, this always returns True.
- """
- if not options.filename:
- return True
- for pattern in options.filename:
- if fnmatch(filename, pattern):
- return True
-
-
-def ignore_code(code):
- """
- Check if options.ignore contains a prefix of the error code.
- If options.select contains a prefix of the error code, do not ignore it.
- """
- for select in options.select:
- if code.startswith(select):
- return False
- for ignore in options.ignore:
- if code.startswith(ignore):
- return True
-
-
-def get_error_statistics():
- """Get error statistics."""
- return get_statistics("E")
-
-
-def get_warning_statistics():
- """Get warning statistics."""
- return get_statistics("W")
-
-
-def get_statistics(prefix=''):
- """
- Get statistics for message codes that start with the prefix.
-
- prefix='' matches all errors and warnings
- prefix='E' matches all errors
- prefix='W' matches all warnings
- prefix='E4' matches all errors that have to do with imports
- """
- stats = []
- keys = list(options.messages.keys())
- keys.sort()
- for key in keys:
- if key.startswith(prefix):
- stats.append('%-7s %s %s' %
- (options.counters[key], key, options.messages[key]))
- return stats
-
-
-def get_count(prefix=''):
- """Return the total count of errors and warnings."""
- keys = list(options.messages.keys())
- count = 0
- for key in keys:
- if key.startswith(prefix):
- count += options.counters[key]
- return count
-
-
-def print_statistics(prefix=''):
- """Print overall statistics (number of errors and warnings)."""
- for line in get_statistics(prefix):
- print(line)
-
-
-def print_benchmark(elapsed):
- """
- Print benchmark numbers.
- """
- print('%-7.2f %s' % (elapsed, 'seconds elapsed'))
- keys = ['directories', 'files',
- 'logical lines', 'physical lines']
- for key in keys:
- if key in options.counters:
- print('%-7d %s per second (%d total)' % (
- options.counters[key] / elapsed, key,
- options.counters[key]))
-
-
-def selftest():
- """
- Test all check functions with test cases in docstrings.
- """
- count_passed = 0
- count_failed = 0
- checks = options.physical_checks + options.logical_checks
- for name, check, argument_names in checks:
- for line in check.__doc__.splitlines():
- line = line.lstrip()
- match = SELFTEST_REGEX.match(line)
- if match is None:
- continue
- code, source = match.groups()
- checker = Checker(None)
- for part in source.split(r'\n'):
- part = part.replace(r'\t', '\t')
- part = part.replace(r'\s', ' ')
- checker.lines.append(part + '\n')
- options.quiet = 2
- options.counters = {}
- checker.check_all()
- error = None
- if code == 'Okay':
- if len(options.counters) > 1:
- codes = [key for key in options.counters.keys()
- if key != 'logical lines']
- error = "incorrectly found %s" % ', '.join(codes)
- elif options.counters.get(code, 0) == 0:
- error = "failed to find %s" % code
- if not error:
- count_passed += 1
- else:
- count_failed += 1
- if len(checker.lines) == 1:
- print("pep8.py: %s: %s" %
- (error, checker.lines[0].rstrip()))
- else:
- print("pep8.py: %s:" % error)
- for line in checker.lines:
- print(line.rstrip())
- if options.verbose:
- print("%d passed and %d failed." % (count_passed, count_failed))
- if count_failed:
- print("Test failed.")
- else:
- print("Test passed.")
-
-
-def process_options(arglist=None):
- """
- Process options passed either via arglist or via command line args.
- """
- global options, args
- parser = OptionParser(version=__version__,
- usage="%prog [options] input ...")
- parser.add_option('-v', '--verbose', default=0, action='count',
- help="print status messages, or debug with -vv")
- parser.add_option('-q', '--quiet', default=0, action='count',
- help="report only file names, or nothing with -qq")
- parser.add_option('-r', '--repeat', action='store_true',
- help="show all occurrences of the same error")
- parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
- help="exclude files or directories which match these "
- "comma separated patterns (default: %s)" %
- DEFAULT_EXCLUDE)
- parser.add_option('--filename', metavar='patterns', default='*.py',
- help="when parsing directories, only check filenames "
- "matching these comma separated patterns (default: "
- "*.py)")
- parser.add_option('--select', metavar='errors', default='',
- help="select errors and warnings (e.g. E,W6)")
- parser.add_option('--ignore', metavar='errors', default='',
- help="skip errors and warnings (e.g. E4,W)")
- parser.add_option('--show-source', action='store_true',
- help="show source code for each error")
- parser.add_option('--show-pep8', action='store_true',
- help="show text of PEP 8 for each error")
- parser.add_option('--statistics', action='store_true',
- help="count errors and warnings")
- parser.add_option('--count', action='store_true',
- help="print total number of errors and warnings "
- "to standard error and set exit code to 1 if "
- "total is not null")
- parser.add_option('--benchmark', action='store_true',
- help="measure processing speed")
- parser.add_option('--testsuite', metavar='dir',
- help="run regression tests from dir")
- parser.add_option('--doctest', action='store_true',
- help="run doctest on myself")
- options, args = parser.parse_args(arglist)
- if options.testsuite:
- args.append(options.testsuite)
- if len(args) == 0 and not options.doctest:
- parser.error('input not specified')
- options.prog = os.path.basename(sys.argv[0])
- options.exclude = options.exclude.split(',')
- for index in range(len(options.exclude)):
- options.exclude[index] = options.exclude[index].rstrip('/')
- if options.filename:
- options.filename = options.filename.split(',')
- if options.select:
- options.select = options.select.split(',')
- else:
- options.select = []
- if options.ignore:
- options.ignore = options.ignore.split(',')
- elif options.select:
- # Ignore all checks which are not explicitly selected
- options.ignore = ['']
- elif options.testsuite or options.doctest:
- # For doctest and testsuite, all checks are required
- options.ignore = []
- else:
- # The default choice: ignore controversial checks
- options.ignore = DEFAULT_IGNORE
- options.physical_checks = find_checks('physical_line')
- options.logical_checks = find_checks('logical_line')
- options.counters = {}
- options.messages = {}
- return options, args
-
-
-def _main():
- """
- Parse options and run checks on Python source.
- """
- options, args = process_options()
- if options.doctest:
- import doctest
- doctest.testmod(verbose=options.verbose)
- selftest()
- start_time = time.time()
- for path in args:
- if os.path.isdir(path):
- input_dir(path)
- else:
- input_file(path)
- elapsed = time.time() - start_time
- if options.statistics:
- print_statistics()
- if options.benchmark:
- print_benchmark(elapsed)
- if options.count:
- count = get_count()
- if count:
- sys.stderr.write(str(count) + '\n')
- sys.exit(1)
-
-
-if __name__ == '__main__':
- _main()
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/.upload.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/.upload.py.url
deleted file mode 100644
index 8098dbc..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/.upload.py.url
+++ /dev/null
@@ -1 +0,0 @@
-http://webkit-rietveld.googlecode.com/svn/trunk/static/upload.py \ No newline at end of file
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/__init__.py
deleted file mode 100644
index c1e4c6d..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# This file is required for Python to search this directory for modules.
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/upload.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/upload.py
deleted file mode 100755
index e91060f..0000000
--- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/upload.py
+++ /dev/null
@@ -1,1702 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2007 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tool for uploading diffs from a version control system to the codereview app.
-
-Usage summary: upload.py [options] [-- diff_options]
-
-Diff options are passed to the diff command of the underlying system.
-
-Supported version control systems:
- Git
- Mercurial
- Subversion
-
-It is important for Git/Mercurial users to specify a tree/node/branch to diff
-against by using the '--rev' option.
-"""
-# This code is derived from appcfg.py in the App Engine SDK (open source),
-# and from ASPN recipe #146306.
-
-import ConfigParser
-import cookielib
-import fnmatch
-import getpass
-import logging
-import mimetypes
-import optparse
-import os
-import re
-import socket
-import subprocess
-import sys
-import urllib
-import urllib2
-import urlparse
-
-# The md5 module was deprecated in Python 2.5.
-try:
- from hashlib import md5
-except ImportError:
- from md5 import md5
-
-try:
- import readline
-except ImportError:
- pass
-
-# The logging verbosity:
-# 0: Errors only.
-# 1: Status messages.
-# 2: Info logs.
-# 3: Debug logs.
-verbosity = 1
-
-# Max size of patch or base file.
-MAX_UPLOAD_SIZE = 900 * 1024
-
-# Constants for version control names. Used by GuessVCSName.
-VCS_GIT = "Git"
-VCS_MERCURIAL = "Mercurial"
-VCS_SUBVERSION = "Subversion"
-VCS_UNKNOWN = "Unknown"
-
-# whitelist for non-binary filetypes which do not start with "text/"
-# .mm (Objective-C) shows up as application/x-freemind on my Linux box.
-TEXT_MIMETYPES = ['application/javascript', 'application/x-javascript',
- 'application/xml', 'application/x-freemind']
-
-VCS_ABBREVIATIONS = {
- VCS_MERCURIAL.lower(): VCS_MERCURIAL,
- "hg": VCS_MERCURIAL,
- VCS_SUBVERSION.lower(): VCS_SUBVERSION,
- "svn": VCS_SUBVERSION,
- VCS_GIT.lower(): VCS_GIT,
-}
-
-# The result of parsing Subversion's [auto-props] setting.
-svn_auto_props_map = None
-
-def GetEmail(prompt):
- """Prompts the user for their email address and returns it.
-
- The last used email address is saved to a file and offered up as a suggestion
- to the user. If the user presses enter without typing in anything the last
- used email address is used. If the user enters a new address, it is saved
- for next time we prompt.
-
- """
- last_email_file_name = os.path.expanduser("~/.last_codereview_email_address")
- last_email = ""
- if os.path.exists(last_email_file_name):
- try:
- last_email_file = open(last_email_file_name, "r")
- last_email = last_email_file.readline().strip("\n")
- last_email_file.close()
- prompt += " [%s]" % last_email
- except IOError, e:
- pass
- email = raw_input(prompt + ": ").strip()
- if email:
- try:
- last_email_file = open(last_email_file_name, "w")
- last_email_file.write(email)
- last_email_file.close()
- except IOError, e:
- pass
- else:
- email = last_email
- return email
-
-
-def StatusUpdate(msg):
- """Print a status message to stdout.
-
- If 'verbosity' is greater than 0, print the message.
-
- Args:
- msg: The string to print.
- """
- if verbosity > 0:
- print msg
-
-
-def ErrorExit(msg):
- """Print an error message to stderr and exit."""
- print >>sys.stderr, msg
- sys.exit(1)
-
-
-class ClientLoginError(urllib2.HTTPError):
- """Raised to indicate there was an error authenticating with ClientLogin."""
-
- def __init__(self, url, code, msg, headers, args):
- urllib2.HTTPError.__init__(self, url, code, msg, headers, None)
- self.args = args
- self.reason = args["Error"]
-
-
-class AbstractRpcServer(object):
- """Provides a common interface for a simple RPC server."""
-
- def __init__(self, host, auth_function, host_override=None, extra_headers={},
- save_cookies=False):
- """Creates a new HttpRpcServer.
-
- Args:
- host: The host to send requests to.
- auth_function: A function that takes no arguments and returns an
- (email, password) tuple when called. Will be called if authentication
- is required.
- host_override: The host header to send to the server (defaults to host).
- extra_headers: A dict of extra headers to append to every request.
- save_cookies: If True, save the authentication cookies to local disk.
- If False, use an in-memory cookiejar instead. Subclasses must
- implement this functionality. Defaults to False.
- """
- self.host = host
- self.host_override = host_override
- self.auth_function = auth_function
- self.authenticated = False
- self.extra_headers = extra_headers
- self.save_cookies = save_cookies
- self.opener = self._GetOpener()
- if self.host_override:
- logging.info("Server: %s; Host: %s", self.host, self.host_override)
- else:
- logging.info("Server: %s", self.host)
-
- def _GetOpener(self):
- """Returns an OpenerDirector for making HTTP requests.
-
- Returns:
- A urllib2.OpenerDirector object.
- """
- raise NotImplementedError()
-
- def _CreateRequest(self, url, data=None):
- """Creates a new urllib request."""
- logging.debug("Creating request for: '%s' with payload:\n%s", url, data)
- req = urllib2.Request(url, data=data)
- if self.host_override:
- req.add_header("Host", self.host_override)
- for key, value in self.extra_headers.iteritems():
- req.add_header(key, value)
- return req
-
- def _GetAuthToken(self, email, password):
- """Uses ClientLogin to authenticate the user, returning an auth token.
-
- Args:
- email: The user's email address
- password: The user's password
-
- Raises:
- ClientLoginError: If there was an error authenticating with ClientLogin.
- HTTPError: If there was some other form of HTTP error.
-
- Returns:
- The authentication token returned by ClientLogin.
- """
- account_type = "GOOGLE"
- if self.host.endswith(".google.com"):
- # Needed for use inside Google.
- account_type = "HOSTED"
- req = self._CreateRequest(
- url="https://www.google.com/accounts/ClientLogin",
- data=urllib.urlencode({
- "Email": email,
- "Passwd": password,
- "service": "ah",
- "source": "rietveld-codereview-upload",
- "accountType": account_type,
- }),
- )
- try:
- response = self.opener.open(req)
- response_body = response.read()
- response_dict = dict(x.split("=")
- for x in response_body.split("\n") if x)
- return response_dict["Auth"]
- except urllib2.HTTPError, e:
- if e.code == 403:
- body = e.read()
- response_dict = dict(x.split("=", 1) for x in body.split("\n") if x)
- raise ClientLoginError(req.get_full_url(), e.code, e.msg,
- e.headers, response_dict)
- else:
- raise
-
- def _GetAuthCookie(self, auth_token):
- """Fetches authentication cookies for an authentication token.
-
- Args:
- auth_token: The authentication token returned by ClientLogin.
-
- Raises:
- HTTPError: If there was an error fetching the authentication cookies.
- """
- # This is a dummy value to allow us to identify when we're successful.
- continue_location = "http://localhost/"
- args = {"continue": continue_location, "auth": auth_token}
- req = self._CreateRequest("http://%s/_ah/login?%s" %
- (self.host, urllib.urlencode(args)))
- try:
- response = self.opener.open(req)
- except urllib2.HTTPError, e:
- response = e
- if (response.code != 302 or
- response.info()["location"] != continue_location):
- raise urllib2.HTTPError(req.get_full_url(), response.code, response.msg,
- response.headers, response.fp)
- self.authenticated = True
-
- def _Authenticate(self):
- """Authenticates the user.
-
- The authentication process works as follows:
- 1) We get a username and password from the user
- 2) We use ClientLogin to obtain an AUTH token for the user
- (see http://code.google.com/apis/accounts/AuthForInstalledApps.html).
- 3) We pass the auth token to /_ah/login on the server to obtain an
- authentication cookie. If login was successful, it tries to redirect
- us to the URL we provided.
-
- If we attempt to access the upload API without first obtaining an
- authentication cookie, it returns a 401 response (or a 302) and
- directs us to authenticate ourselves with ClientLogin.
- """
- for i in range(3):
- credentials = self.auth_function()
- try:
- auth_token = self._GetAuthToken(credentials[0], credentials[1])
- except ClientLoginError, e:
- if e.reason == "BadAuthentication":
- print >>sys.stderr, "Invalid username or password."
- continue
- if e.reason == "CaptchaRequired":
- print >>sys.stderr, (
- "Please go to\n"
- "https://www.google.com/accounts/DisplayUnlockCaptcha\n"
- "and verify you are a human. Then try again.")
- break
- if e.reason == "NotVerified":
- print >>sys.stderr, "Account not verified."
- break
- if e.reason == "TermsNotAgreed":
- print >>sys.stderr, "User has not agreed to TOS."
- break
- if e.reason == "AccountDeleted":
- print >>sys.stderr, "The user account has been deleted."
- break
- if e.reason == "AccountDisabled":
- print >>sys.stderr, "The user account has been disabled."
- break
- if e.reason == "ServiceDisabled":
- print >>sys.stderr, ("The user's access to the service has been "
- "disabled.")
- break
- if e.reason == "ServiceUnavailable":
- print >>sys.stderr, "The service is not available; try again later."
- break
- raise
- self._GetAuthCookie(auth_token)
- return
-
- def Send(self, request_path, payload=None,
- content_type="application/octet-stream",
- timeout=None,
- **kwargs):
- """Sends an RPC and returns the response.
-
- Args:
- request_path: The path to send the request to, eg /api/appversion/create.
- payload: The body of the request, or None to send an empty request.
- content_type: The Content-Type header to use.
- timeout: timeout in seconds; default None i.e. no timeout.
- (Note: for large requests on OS X, the timeout doesn't work right.)
- kwargs: Any keyword arguments are converted into query string parameters.
-
- Returns:
- The response body, as a string.
- """
- # TODO: Don't require authentication. Let the server say
- # whether it is necessary.
- if not self.authenticated:
- self._Authenticate()
-
- old_timeout = socket.getdefaulttimeout()
- socket.setdefaulttimeout(timeout)
- try:
- tries = 0
- while True:
- tries += 1
- args = dict(kwargs)
- url = "http://%s%s" % (self.host, request_path)
- if args:
- url += "?" + urllib.urlencode(args)
- req = self._CreateRequest(url=url, data=payload)
- req.add_header("Content-Type", content_type)
- try:
- f = self.opener.open(req)
- response = f.read()
- f.close()
- return response
- except urllib2.HTTPError, e:
- if tries > 3:
- raise
- elif e.code == 401 or e.code == 302:
- self._Authenticate()
-## elif e.code >= 500 and e.code < 600:
-## # Server Error - try again.
-## continue
- else:
- raise
- finally:
- socket.setdefaulttimeout(old_timeout)
-
-
-class HttpRpcServer(AbstractRpcServer):
- """Provides a simplified RPC-style interface for HTTP requests."""
-
- def _Authenticate(self):
- """Save the cookie jar after authentication."""
- super(HttpRpcServer, self)._Authenticate()
- if self.save_cookies:
- StatusUpdate("Saving authentication cookies to %s" % self.cookie_file)
- self.cookie_jar.save()
-
- def _GetOpener(self):
- """Returns an OpenerDirector that supports cookies and ignores redirects.
-
- Returns:
- A urllib2.OpenerDirector object.
- """
- opener = urllib2.OpenerDirector()
- opener.add_handler(urllib2.ProxyHandler())
- opener.add_handler(urllib2.UnknownHandler())
- opener.add_handler(urllib2.HTTPHandler())
- opener.add_handler(urllib2.HTTPDefaultErrorHandler())
- opener.add_handler(urllib2.HTTPSHandler())
- opener.add_handler(urllib2.HTTPErrorProcessor())
- if self.save_cookies:
- self.cookie_file = os.path.expanduser("~/.codereview_upload_cookies")
- self.cookie_jar = cookielib.MozillaCookieJar(self.cookie_file)
- if os.path.exists(self.cookie_file):
- try:
- self.cookie_jar.load()
- self.authenticated = True
- StatusUpdate("Loaded authentication cookies from %s" %
- self.cookie_file)
- except (cookielib.LoadError, IOError):
- # Failed to load cookies - just ignore them.
- pass
- else:
- # Create an empty cookie file with mode 600
- fd = os.open(self.cookie_file, os.O_CREAT, 0600)
- os.close(fd)
- # Always chmod the cookie file
- os.chmod(self.cookie_file, 0600)
- else:
- # Don't save cookies across runs of update.py.
- self.cookie_jar = cookielib.CookieJar()
- opener.add_handler(urllib2.HTTPCookieProcessor(self.cookie_jar))
- return opener
-
-
-parser = optparse.OptionParser(usage="%prog [options] [-- diff_options]")
-parser.add_option("-y", "--assume_yes", action="store_true",
- dest="assume_yes", default=False,
- help="Assume that the answer to yes/no questions is 'yes'.")
-# Logging
-group = parser.add_option_group("Logging options")
-group.add_option("-q", "--quiet", action="store_const", const=0,
- dest="verbose", help="Print errors only.")
-group.add_option("-v", "--verbose", action="store_const", const=2,
- dest="verbose", default=1,
- help="Print info level logs (default).")
-group.add_option("--noisy", action="store_const", const=3,
- dest="verbose", help="Print all logs.")
-# Review server
-group = parser.add_option_group("Review server options")
-group.add_option("-s", "--server", action="store", dest="server",
- default="codereview.appspot.com",
- metavar="SERVER",
- help=("The server to upload to. The format is host[:port]. "
- "Defaults to '%default'."))
-group.add_option("-e", "--email", action="store", dest="email",
- metavar="EMAIL", default=None,
- help="The username to use. Will prompt if omitted.")
-group.add_option("-H", "--host", action="store", dest="host",
- metavar="HOST", default=None,
- help="Overrides the Host header sent with all RPCs.")
-group.add_option("--no_cookies", action="store_false",
- dest="save_cookies", default=True,
- help="Do not save authentication cookies to local disk.")
-# Issue
-group = parser.add_option_group("Issue options")
-group.add_option("-d", "--description", action="store", dest="description",
- metavar="DESCRIPTION", default=None,
- help="Optional description when creating an issue.")
-group.add_option("-f", "--description_file", action="store",
- dest="description_file", metavar="DESCRIPTION_FILE",
- default=None,
- help="Optional path of a file that contains "
- "the description when creating an issue.")
-group.add_option("-r", "--reviewers", action="store", dest="reviewers",
- metavar="REVIEWERS", default=None,
- help="Add reviewers (comma separated email addresses).")
-group.add_option("--cc", action="store", dest="cc",
- metavar="CC", default=None,
- help="Add CC (comma separated email addresses).")
-group.add_option("--private", action="store_true", dest="private",
- default=False,
- help="Make the issue restricted to reviewers and those CCed")
-# Upload options
-group = parser.add_option_group("Patch options")
-group.add_option("-m", "--message", action="store", dest="message",
- metavar="MESSAGE", default=None,
- help="A message to identify the patch. "
- "Will prompt if omitted.")
-group.add_option("-i", "--issue", type="int", action="store",
- metavar="ISSUE", default=None,
- help="Issue number to which to add. Defaults to new issue.")
-group.add_option("--base_url", action="store", dest="base_url", default=None,
- help="Base repository URL (listed as \"Base URL\" when "
- "viewing issue). If omitted, will be guessed automatically "
- "for SVN repos and left blank for others.")
-group.add_option("--download_base", action="store_true",
- dest="download_base", default=False,
- help="Base files will be downloaded by the server "
- "(side-by-side diffs may not work on files with CRs).")
-group.add_option("--rev", action="store", dest="revision",
- metavar="REV", default=None,
- help="Base revision/branch/tree to diff against. Use "
- "rev1:rev2 range to review already committed changeset.")
-group.add_option("--send_mail", action="store_true",
- dest="send_mail", default=False,
- help="Send notification email to reviewers.")
-group.add_option("--vcs", action="store", dest="vcs",
- metavar="VCS", default=None,
- help=("Version control system (optional, usually upload.py "
- "already guesses the right VCS)."))
-group.add_option("--emulate_svn_auto_props", action="store_true",
- dest="emulate_svn_auto_props", default=False,
- help=("Emulate Subversion's auto properties feature."))
-
-
-def GetRpcServer(server, email=None, host_override=None, save_cookies=True):
- """Returns an instance of an AbstractRpcServer.
-
- Args:
- server: String containing the review server URL.
- email: String containing user's email address.
- host_override: If not None, string containing an alternate hostname to use
- in the host header.
- save_cookies: Whether authentication cookies should be saved to disk.
-
- Returns:
- A new AbstractRpcServer, on which RPC calls can be made.
- """
-
- rpc_server_class = HttpRpcServer
-
- def GetUserCredentials():
- """Prompts the user for a username and password."""
- if email is None:
- email = GetEmail("Email (login for uploading to %s)" % server)
- password = getpass.getpass("Password for %s: " % email)
- return (email, password)
-
- # If this is the dev_appserver, use fake authentication.
- host = (host_override or server).lower()
- if host == "localhost" or host.startswith("localhost:"):
- if email is None:
- email = "test@example.com"
- logging.info("Using debug user %s. Override with --email" % email)
- server = rpc_server_class(
- server,
- lambda: (email, "password"),
- host_override=host_override,
- extra_headers={"Cookie":
- 'dev_appserver_login="%s:False"' % email},
- save_cookies=save_cookies)
- # Don't try to talk to ClientLogin.
- server.authenticated = True
- return server
-
- return rpc_server_class(server,
- GetUserCredentials,
- host_override=host_override,
- save_cookies=save_cookies)
-
-
-def EncodeMultipartFormData(fields, files):
- """Encode form fields for multipart/form-data.
-
- Args:
- fields: A sequence of (name, value) elements for regular form fields.
- files: A sequence of (name, filename, value) elements for data to be
- uploaded as files.
- Returns:
- (content_type, body) ready for httplib.HTTP instance.
-
- Source:
- http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306
- """
- BOUNDARY = '-M-A-G-I-C---B-O-U-N-D-A-R-Y-'
- CRLF = '\r\n'
- lines = []
- for (key, value) in fields:
- lines.append('--' + BOUNDARY)
- lines.append('Content-Disposition: form-data; name="%s"' % key)
- lines.append('')
- lines.append(value)
- for (key, filename, value) in files:
- lines.append('--' + BOUNDARY)
- lines.append('Content-Disposition: form-data; name="%s"; filename="%s"' %
- (key, filename))
- lines.append('Content-Type: %s' % GetContentType(filename))
- lines.append('')
- lines.append(value)
- lines.append('--' + BOUNDARY + '--')
- lines.append('')
- body = CRLF.join(lines)
- content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
- return content_type, body
-
-
-def GetContentType(filename):
- """Helper to guess the content-type from the filename."""
- return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
-
-
-# Use a shell for subcommands on Windows to get a PATH search.
-use_shell = sys.platform.startswith("win")
-
-def RunShellWithReturnCode(command, print_output=False,
- universal_newlines=True,
- env=os.environ):
- """Executes a command and returns the output from stdout and the return code.
-
- Args:
- command: Command to execute.
- print_output: If True, the output is printed to stdout.
- If False, both stdout and stderr are ignored.
- universal_newlines: Use universal_newlines flag (default: True).
-
- Returns:
- Tuple (output, return code)
- """
- logging.info("Running %s", command)
- p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
- shell=use_shell, universal_newlines=universal_newlines,
- env=env)
- if print_output:
- output_array = []
- while True:
- line = p.stdout.readline()
- if not line:
- break
- print line.strip("\n")
- output_array.append(line)
- output = "".join(output_array)
- else:
- output = p.stdout.read()
- p.wait()
- errout = p.stderr.read()
- if print_output and errout:
- print >>sys.stderr, errout
- p.stdout.close()
- p.stderr.close()
- return output, p.returncode
-
-
-def RunShell(command, silent_ok=False, universal_newlines=True,
- print_output=False, env=os.environ):
- data, retcode = RunShellWithReturnCode(command, print_output,
- universal_newlines, env)
- if retcode:
- ErrorExit("Got error status from %s:\n%s" % (command, data))
- if not silent_ok and not data:
- ErrorExit("No output from %s" % command)
- return data
-
-
-class VersionControlSystem(object):
- """Abstract base class providing an interface to the VCS."""
-
- def __init__(self, options):
- """Constructor.
-
- Args:
- options: Command line options.
- """
- self.options = options
-
- def GenerateDiff(self, args):
- """Return the current diff as a string.
-
- Args:
- args: Extra arguments to pass to the diff command.
- """
- raise NotImplementedError(
- "abstract method -- subclass %s must override" % self.__class__)
-
- def GetUnknownFiles(self):
- """Return a list of files unknown to the VCS."""
- raise NotImplementedError(
- "abstract method -- subclass %s must override" % self.__class__)
-
- def CheckForUnknownFiles(self):
- """Show an "are you sure?" prompt if there are unknown files."""
- unknown_files = self.GetUnknownFiles()
- if unknown_files:
- print "The following files are not added to version control:"
- for line in unknown_files:
- print line
- prompt = "Are you sure to continue?(y/N) "
- answer = raw_input(prompt).strip()
- if answer != "y":
- ErrorExit("User aborted")
-
- def GetBaseFile(self, filename):
- """Get the content of the upstream version of a file.
-
- Returns:
- A tuple (base_content, new_content, is_binary, status)
- base_content: The contents of the base file.
- new_content: For text files, this is empty. For binary files, this is
- the contents of the new file, since the diff output won't contain
- information to reconstruct the current file.
- is_binary: True iff the file is binary.
- status: The status of the file.
- """
-
- raise NotImplementedError(
- "abstract method -- subclass %s must override" % self.__class__)
-
-
- def GetBaseFiles(self, diff):
- """Helper that calls GetBase file for each file in the patch.
-
- Returns:
- A dictionary that maps from filename to GetBaseFile's tuple. Filenames
- are retrieved based on lines that start with "Index:" or
- "Property changes on:".
- """
- files = {}
- for line in diff.splitlines(True):
- if line.startswith('Index:') or line.startswith('Property changes on:'):
- unused, filename = line.split(':', 1)
- # On Windows if a file has property changes its filename uses '\'
- # instead of '/'.
- filename = filename.strip().replace('\\', '/')
- files[filename] = self.GetBaseFile(filename)
- return files
-
-
- def UploadBaseFiles(self, issue, rpc_server, patch_list, patchset, options,
- files):
- """Uploads the base files (and if necessary, the current ones as well)."""
-
- def UploadFile(filename, file_id, content, is_binary, status, is_base):
- """Uploads a file to the server."""
- file_too_large = False
- if is_base:
- type = "base"
- else:
- type = "current"
- if len(content) > MAX_UPLOAD_SIZE:
- print ("Not uploading the %s file for %s because it's too large." %
- (type, filename))
- file_too_large = True
- content = ""
- checksum = md5(content).hexdigest()
- if options.verbose > 0 and not file_too_large:
- print "Uploading %s file for %s" % (type, filename)
- url = "/%d/upload_content/%d/%d" % (int(issue), int(patchset), file_id)
- form_fields = [("filename", filename),
- ("status", status),
- ("checksum", checksum),
- ("is_binary", str(is_binary)),
- ("is_current", str(not is_base)),
- ]
- if file_too_large:
- form_fields.append(("file_too_large", "1"))
- if options.email:
- form_fields.append(("user", options.email))
- ctype, body = EncodeMultipartFormData(form_fields,
- [("data", filename, content)])
- response_body = rpc_server.Send(url, body,
- content_type=ctype)
- if not response_body.startswith("OK"):
- StatusUpdate(" --> %s" % response_body)
- sys.exit(1)
-
- patches = dict()
- [patches.setdefault(v, k) for k, v in patch_list]
- for filename in patches.keys():
- base_content, new_content, is_binary, status = files[filename]
- file_id_str = patches.get(filename)
- if file_id_str.find("nobase") != -1:
- base_content = None
- file_id_str = file_id_str[file_id_str.rfind("_") + 1:]
- file_id = int(file_id_str)
- if base_content != None:
- UploadFile(filename, file_id, base_content, is_binary, status, True)
- if new_content != None:
- UploadFile(filename, file_id, new_content, is_binary, status, False)
-
- def IsImage(self, filename):
- """Returns true if the filename has an image extension."""
- mimetype = mimetypes.guess_type(filename)[0]
- if not mimetype:
- return False
- return mimetype.startswith("image/")
-
- def IsBinary(self, filename):
- """Returns true if the guessed mimetyped isnt't in text group."""
- mimetype = mimetypes.guess_type(filename)[0]
- if not mimetype:
- return False # e.g. README, "real" binaries usually have an extension
- # special case for text files which don't start with text/
- if mimetype in TEXT_MIMETYPES:
- return False
- return not mimetype.startswith("text/")
-
-
-class SubversionVCS(VersionControlSystem):
- """Implementation of the VersionControlSystem interface for Subversion."""
-
- def __init__(self, options):
- super(SubversionVCS, self).__init__(options)
- if self.options.revision:
- match = re.match(r"(\d+)(:(\d+))?", self.options.revision)
- if not match:
- ErrorExit("Invalid Subversion revision %s." % self.options.revision)
- self.rev_start = match.group(1)
- self.rev_end = match.group(3)
- else:
- self.rev_start = self.rev_end = None
- # Cache output from "svn list -r REVNO dirname".
- # Keys: dirname, Values: 2-tuple (ouput for start rev and end rev).
- self.svnls_cache = {}
- # Base URL is required to fetch files deleted in an older revision.
- # Result is cached to not guess it over and over again in GetBaseFile().
- required = self.options.download_base or self.options.revision is not None
- self.svn_base = self._GuessBase(required)
-
- def GuessBase(self, required):
- """Wrapper for _GuessBase."""
- return self.svn_base
-
- def _GuessBase(self, required):
- """Returns the SVN base URL.
-
- Args:
- required: If true, exits if the url can't be guessed, otherwise None is
- returned.
- """
- info = RunShell(["svn", "info"])
- for line in info.splitlines():
- words = line.split()
- if len(words) == 2 and words[0] == "URL:":
- url = words[1]
- scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
- username, netloc = urllib.splituser(netloc)
- if username:
- logging.info("Removed username from base URL")
- if netloc.endswith("svn.python.org"):
- if netloc == "svn.python.org":
- if path.startswith("/projects/"):
- path = path[9:]
- elif netloc != "pythondev@svn.python.org":
- ErrorExit("Unrecognized Python URL: %s" % url)
- base = "http://svn.python.org/view/*checkout*%s/" % path
- logging.info("Guessed Python base = %s", base)
- elif netloc.endswith("svn.collab.net"):
- if path.startswith("/repos/"):
- path = path[6:]
- base = "http://svn.collab.net/viewvc/*checkout*%s/" % path
- logging.info("Guessed CollabNet base = %s", base)
- elif netloc.endswith(".googlecode.com"):
- path = path + "/"
- base = urlparse.urlunparse(("http", netloc, path, params,
- query, fragment))
- logging.info("Guessed Google Code base = %s", base)
- else:
- path = path + "/"
- base = urlparse.urlunparse((scheme, netloc, path, params,
- query, fragment))
- logging.info("Guessed base = %s", base)
- return base
- if required:
- ErrorExit("Can't find URL in output from svn info")
- return None
-
- def GenerateDiff(self, args):
- cmd = ["svn", "diff"]
- if self.options.revision:
- cmd += ["-r", self.options.revision]
- cmd.extend(args)
- data = RunShell(cmd)
- count = 0
- for line in data.splitlines():
- if line.startswith("Index:") or line.startswith("Property changes on:"):
- count += 1
- logging.info(line)
- if not count:
- ErrorExit("No valid patches found in output from svn diff")
- return data
-
- def _CollapseKeywords(self, content, keyword_str):
- """Collapses SVN keywords."""
- # svn cat translates keywords but svn diff doesn't. As a result of this
- # behavior patching.PatchChunks() fails with a chunk mismatch error.
- # This part was originally written by the Review Board development team
- # who had the same problem (http://reviews.review-board.org/r/276/).
- # Mapping of keywords to known aliases
- svn_keywords = {
- # Standard keywords
- 'Date': ['Date', 'LastChangedDate'],
- 'Revision': ['Revision', 'LastChangedRevision', 'Rev'],
- 'Author': ['Author', 'LastChangedBy'],
- 'HeadURL': ['HeadURL', 'URL'],
- 'Id': ['Id'],
-
- # Aliases
- 'LastChangedDate': ['LastChangedDate', 'Date'],
- 'LastChangedRevision': ['LastChangedRevision', 'Rev', 'Revision'],
- 'LastChangedBy': ['LastChangedBy', 'Author'],
- 'URL': ['URL', 'HeadURL'],
- }
-
- def repl(m):
- if m.group(2):
- return "$%s::%s$" % (m.group(1), " " * len(m.group(3)))
- return "$%s$" % m.group(1)
- keywords = [keyword
- for name in keyword_str.split(" ")
- for keyword in svn_keywords.get(name, [])]
- return re.sub(r"\$(%s):(:?)([^\$]+)\$" % '|'.join(keywords), repl, content)
-
- def GetUnknownFiles(self):
- status = RunShell(["svn", "status", "--ignore-externals"], silent_ok=True)
- unknown_files = []
- for line in status.split("\n"):
- if line and line[0] == "?":
- unknown_files.append(line)
- return unknown_files
-
- def ReadFile(self, filename):
- """Returns the contents of a file."""
- file = open(filename, 'rb')
- result = ""
- try:
- result = file.read()
- finally:
- file.close()
- return result
-
- def GetStatus(self, filename):
- """Returns the status of a file."""
- if not self.options.revision:
- status = RunShell(["svn", "status", "--ignore-externals", filename])
- if not status:
- ErrorExit("svn status returned no output for %s" % filename)
- status_lines = status.splitlines()
- # If file is in a cl, the output will begin with
- # "\n--- Changelist 'cl_name':\n". See
- # http://svn.collab.net/repos/svn/trunk/notes/changelist-design.txt
- if (len(status_lines) == 3 and
- not status_lines[0] and
- status_lines[1].startswith("--- Changelist")):
- status = status_lines[2]
- else:
- status = status_lines[0]
- # If we have a revision to diff against we need to run "svn list"
- # for the old and the new revision and compare the results to get
- # the correct status for a file.
- else:
- dirname, relfilename = os.path.split(filename)
- if dirname not in self.svnls_cache:
- cmd = ["svn", "list", "-r", self.rev_start, dirname or "."]
- out, returncode = RunShellWithReturnCode(cmd)
- if returncode:
- ErrorExit("Failed to get status for %s." % filename)
- old_files = out.splitlines()
- args = ["svn", "list"]
- if self.rev_end:
- args += ["-r", self.rev_end]
- cmd = args + [dirname or "."]
- out, returncode = RunShellWithReturnCode(cmd)
- if returncode:
- ErrorExit("Failed to run command %s" % cmd)
- self.svnls_cache[dirname] = (old_files, out.splitlines())
- old_files, new_files = self.svnls_cache[dirname]
- if relfilename in old_files and relfilename not in new_files:
- status = "D "
- elif relfilename in old_files and relfilename in new_files:
- status = "M "
- else:
- status = "A "
- return status
-
- def GetBaseFile(self, filename):
- status = self.GetStatus(filename)
- base_content = None
- new_content = None
-
- # If a file is copied its status will be "A +", which signifies
- # "addition-with-history". See "svn st" for more information. We need to
- # upload the original file or else diff parsing will fail if the file was
- # edited.
- if status[0] == "A" and status[3] != "+":
- # We'll need to upload the new content if we're adding a binary file
- # since diff's output won't contain it.
- mimetype = RunShell(["svn", "propget", "svn:mime-type", filename],
- silent_ok=True)
- base_content = ""
- is_binary = bool(mimetype) and not mimetype.startswith("text/")
- if is_binary and self.IsImage(filename):
- new_content = self.ReadFile(filename)
- elif (status[0] in ("M", "D", "R") or
- (status[0] == "A" and status[3] == "+") or # Copied file.
- (status[0] == " " and status[1] == "M")): # Property change.
- args = []
- if self.options.revision:
- url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start)
- else:
- # Don't change filename, it's needed later.
- url = filename
- args += ["-r", "BASE"]
- cmd = ["svn"] + args + ["propget", "svn:mime-type", url]
- mimetype, returncode = RunShellWithReturnCode(cmd)
- if returncode:
- # File does not exist in the requested revision.
- # Reset mimetype, it contains an error message.
- mimetype = ""
- get_base = False
- is_binary = bool(mimetype) and not mimetype.startswith("text/")
- if status[0] == " ":
- # Empty base content just to force an upload.
- base_content = ""
- elif is_binary:
- if self.IsImage(filename):
- get_base = True
- if status[0] == "M":
- if not self.rev_end:
- new_content = self.ReadFile(filename)
- else:
- url = "%s/%s@%s" % (self.svn_base, filename, self.rev_end)
- new_content = RunShell(["svn", "cat", url],
- universal_newlines=True, silent_ok=True)
- else:
- base_content = ""
- else:
- get_base = True
-
- if get_base:
- if is_binary:
- universal_newlines = False
- else:
- universal_newlines = True
- if self.rev_start:
- # "svn cat -r REV delete_file.txt" doesn't work. cat requires
- # the full URL with "@REV" appended instead of using "-r" option.
- url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start)
- base_content = RunShell(["svn", "cat", url],
- universal_newlines=universal_newlines,
- silent_ok=True)
- else:
- base_content = RunShell(["svn", "cat", filename],
- universal_newlines=universal_newlines,
- silent_ok=True)
- if not is_binary:
- args = []
- if self.rev_start:
- url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start)
- else:
- url = filename
- args += ["-r", "BASE"]
- cmd = ["svn"] + args + ["propget", "svn:keywords", url]
- keywords, returncode = RunShellWithReturnCode(cmd)
- if keywords and not returncode:
- base_content = self._CollapseKeywords(base_content, keywords)
- else:
- StatusUpdate("svn status returned unexpected output: %s" % status)
- sys.exit(1)
- return base_content, new_content, is_binary, status[0:5]
-
-
-class GitVCS(VersionControlSystem):
- """Implementation of the VersionControlSystem interface for Git."""
-
- def __init__(self, options):
- super(GitVCS, self).__init__(options)
- # Map of filename -> (hash before, hash after) of base file.
- # Hashes for "no such file" are represented as None.
- self.hashes = {}
- # Map of new filename -> old filename for renames.
- self.renames = {}
-
- def GenerateDiff(self, extra_args):
- # This is more complicated than svn's GenerateDiff because we must convert
- # the diff output to include an svn-style "Index:" line as well as record
- # the hashes of the files, so we can upload them along with our diff.
-
- # Special used by git to indicate "no such content".
- NULL_HASH = "0"*40
-
- extra_args = extra_args[:]
- if self.options.revision:
- extra_args = [self.options.revision] + extra_args
-
- # --no-ext-diff is broken in some versions of Git, so try to work around
- # this by overriding the environment (but there is still a problem if the
- # git config key "diff.external" is used).
- env = os.environ.copy()
- if 'GIT_EXTERNAL_DIFF' in env: del env['GIT_EXTERNAL_DIFF']
- gitdiff = RunShell(["git", "diff", "--no-ext-diff", "--full-index", "-M"]
- + extra_args, env=env)
-
- def IsFileNew(filename):
- return filename in self.hashes and self.hashes[filename][0] is None
-
- def AddSubversionPropertyChange(filename):
- """Add svn's property change information into the patch if given file is
- new file.
-
- We use Subversion's auto-props setting to retrieve its property.
- See http://svnbook.red-bean.com/en/1.1/ch07.html#svn-ch-7-sect-1.3.2 for
- Subversion's [auto-props] setting.
- """
- if self.options.emulate_svn_auto_props and IsFileNew(filename):
- svnprops = GetSubversionPropertyChanges(filename)
- if svnprops:
- svndiff.append("\n" + svnprops + "\n")
-
- svndiff = []
- filecount = 0
- filename = None
- for line in gitdiff.splitlines():
- match = re.match(r"diff --git a/(.*) b/(.*)$", line)
- if match:
- # Add auto property here for previously seen file.
- if filename is not None:
- AddSubversionPropertyChange(filename)
- filecount += 1
- # Intentionally use the "after" filename so we can show renames.
- filename = match.group(2)
- svndiff.append("Index: %s\n" % filename)
- if match.group(1) != match.group(2):
- self.renames[match.group(2)] = match.group(1)
- else:
- # The "index" line in a git diff looks like this (long hashes elided):
- # index 82c0d44..b2cee3f 100755
- # We want to save the left hash, as that identifies the base file.
- match = re.match(r"index (\w+)\.\.(\w+)", line)
- if match:
- before, after = (match.group(1), match.group(2))
- if before == NULL_HASH:
- before = None
- if after == NULL_HASH:
- after = None
- self.hashes[filename] = (before, after)
- svndiff.append(line + "\n")
- if not filecount:
- ErrorExit("No valid patches found in output from git diff")
- # Add auto property for the last seen file.
- assert filename is not None
- AddSubversionPropertyChange(filename)
- return "".join(svndiff)
-
- def GetUnknownFiles(self):
- status = RunShell(["git", "ls-files", "--exclude-standard", "--others"],
- silent_ok=True)
- return status.splitlines()
-
- def GetFileContent(self, file_hash, is_binary):
- """Returns the content of a file identified by its git hash."""
- data, retcode = RunShellWithReturnCode(["git", "show", file_hash],
- universal_newlines=not is_binary)
- if retcode:
- ErrorExit("Got error status from 'git show %s'" % file_hash)
- return data
-
- def GetBaseFile(self, filename):
- hash_before, hash_after = self.hashes.get(filename, (None,None))
- base_content = None
- new_content = None
- is_binary = self.IsBinary(filename)
- status = None
-
- if filename in self.renames:
- status = "A +" # Match svn attribute name for renames.
- if filename not in self.hashes:
- # If a rename doesn't change the content, we never get a hash.
- base_content = RunShell(["git", "show", "HEAD:" + filename])
- elif not hash_before:
- status = "A"
- base_content = ""
- elif not hash_after:
- status = "D"
- else:
- status = "M"
-
- is_image = self.IsImage(filename)
-
- # Grab the before/after content if we need it.
- # We should include file contents if it's text or it's an image.
- if not is_binary or is_image:
- # Grab the base content if we don't have it already.
- if base_content is None and hash_before:
- base_content = self.GetFileContent(hash_before, is_binary)
- # Only include the "after" file if it's an image; otherwise it
- # it is reconstructed from the diff.
- if is_image and hash_after:
- new_content = self.GetFileContent(hash_after, is_binary)
-
- return (base_content, new_content, is_binary, status)
-
-
-class MercurialVCS(VersionControlSystem):
- """Implementation of the VersionControlSystem interface for Mercurial."""
-
- def __init__(self, options, repo_dir):
- super(MercurialVCS, self).__init__(options)
- # Absolute path to repository (we can be in a subdir)
- self.repo_dir = os.path.normpath(repo_dir)
- # Compute the subdir
- cwd = os.path.normpath(os.getcwd())
- assert cwd.startswith(self.repo_dir)
- self.subdir = cwd[len(self.repo_dir):].lstrip(r"\/")
- if self.options.revision:
- self.base_rev = self.options.revision
- else:
- self.base_rev = RunShell(["hg", "parent", "-q"]).split(':')[1].strip()
-
- def _GetRelPath(self, filename):
- """Get relative path of a file according to the current directory,
- given its logical path in the repo."""
- assert filename.startswith(self.subdir), (filename, self.subdir)
- return filename[len(self.subdir):].lstrip(r"\/")
-
- def GenerateDiff(self, extra_args):
- # If no file specified, restrict to the current subdir
- extra_args = extra_args or ["."]
- cmd = ["hg", "diff", "--git", "-r", self.base_rev] + extra_args
- data = RunShell(cmd, silent_ok=True)
- svndiff = []
- filecount = 0
- for line in data.splitlines():
- m = re.match("diff --git a/(\S+) b/(\S+)", line)
- if m:
- # Modify line to make it look like as it comes from svn diff.
- # With this modification no changes on the server side are required
- # to make upload.py work with Mercurial repos.
- # NOTE: for proper handling of moved/copied files, we have to use
- # the second filename.
- filename = m.group(2)
- svndiff.append("Index: %s" % filename)
- svndiff.append("=" * 67)
- filecount += 1
- logging.info(line)
- else:
- svndiff.append(line)
- if not filecount:
- ErrorExit("No valid patches found in output from hg diff")
- return "\n".join(svndiff) + "\n"
-
- def GetUnknownFiles(self):
- """Return a list of files unknown to the VCS."""
- args = []
- status = RunShell(["hg", "status", "--rev", self.base_rev, "-u", "."],
- silent_ok=True)
- unknown_files = []
- for line in status.splitlines():
- st, fn = line.split(" ", 1)
- if st == "?":
- unknown_files.append(fn)
- return unknown_files
-
- def GetBaseFile(self, filename):
- # "hg status" and "hg cat" both take a path relative to the current subdir
- # rather than to the repo root, but "hg diff" has given us the full path
- # to the repo root.
- base_content = ""
- new_content = None
- is_binary = False
- oldrelpath = relpath = self._GetRelPath(filename)
- # "hg status -C" returns two lines for moved/copied files, one otherwise
- out = RunShell(["hg", "status", "-C", "--rev", self.base_rev, relpath])
- out = out.splitlines()
- # HACK: strip error message about missing file/directory if it isn't in
- # the working copy
- if out[0].startswith('%s: ' % relpath):
- out = out[1:]
- if len(out) > 1:
- # Moved/copied => considered as modified, use old filename to
- # retrieve base contents
- oldrelpath = out[1].strip()
- status = "M"
- else:
- status, _ = out[0].split(' ', 1)
- if ":" in self.base_rev:
- base_rev = self.base_rev.split(":", 1)[0]
- else:
- base_rev = self.base_rev
- if status != "A":
- base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath],
- silent_ok=True)
- is_binary = "\0" in base_content # Mercurial's heuristic
- if status != "R":
- new_content = open(relpath, "rb").read()
- is_binary = is_binary or "\0" in new_content
- if is_binary and base_content:
- # Fetch again without converting newlines
- base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath],
- silent_ok=True, universal_newlines=False)
- if not is_binary or not self.IsImage(relpath):
- new_content = None
- return base_content, new_content, is_binary, status
-
-
-# NOTE: The SplitPatch function is duplicated in engine.py, keep them in sync.
-def SplitPatch(data):
- """Splits a patch into separate pieces for each file.
-
- Args:
- data: A string containing the output of svn diff.
-
- Returns:
- A list of 2-tuple (filename, text) where text is the svn diff output
- pertaining to filename.
- """
- patches = []
- filename = None
- diff = []
- for line in data.splitlines(True):
- new_filename = None
- if line.startswith('Index:'):
- unused, new_filename = line.split(':', 1)
- new_filename = new_filename.strip()
- elif line.startswith('Property changes on:'):
- unused, temp_filename = line.split(':', 1)
- # When a file is modified, paths use '/' between directories, however
- # when a property is modified '\' is used on Windows. Make them the same
- # otherwise the file shows up twice.
- temp_filename = temp_filename.strip().replace('\\', '/')
- if temp_filename != filename:
- # File has property changes but no modifications, create a new diff.
- new_filename = temp_filename
- if new_filename:
- if filename and diff:
- patches.append((filename, ''.join(diff)))
- filename = new_filename
- diff = [line]
- continue
- if diff is not None:
- diff.append(line)
- if filename and diff:
- patches.append((filename, ''.join(diff)))
- return patches
-
-
-def UploadSeparatePatches(issue, rpc_server, patchset, data, options):
- """Uploads a separate patch for each file in the diff output.
-
- Returns a list of [patch_key, filename] for each file.
- """
- patches = SplitPatch(data)
- rv = []
- for patch in patches:
- if len(patch[1]) > MAX_UPLOAD_SIZE:
- print ("Not uploading the patch for " + patch[0] +
- " because the file is too large.")
- continue
- form_fields = [("filename", patch[0])]
- if not options.download_base:
- form_fields.append(("content_upload", "1"))
- files = [("data", "data.diff", patch[1])]
- ctype, body = EncodeMultipartFormData(form_fields, files)
- url = "/%d/upload_patch/%d" % (int(issue), int(patchset))
- print "Uploading patch for " + patch[0]
- response_body = rpc_server.Send(url, body, content_type=ctype)
- lines = response_body.splitlines()
- if not lines or lines[0] != "OK":
- StatusUpdate(" --> %s" % response_body)
- sys.exit(1)
- rv.append([lines[1], patch[0]])
- return rv
-
-
-def GuessVCSName():
- """Helper to guess the version control system.
-
- This examines the current directory, guesses which VersionControlSystem
- we're using, and returns an string indicating which VCS is detected.
-
- Returns:
- A pair (vcs, output). vcs is a string indicating which VCS was detected
- and is one of VCS_GIT, VCS_MERCURIAL, VCS_SUBVERSION, or VCS_UNKNOWN.
- output is a string containing any interesting output from the vcs
- detection routine, or None if there is nothing interesting.
- """
- # Mercurial has a command to get the base directory of a repository
- # Try running it, but don't die if we don't have hg installed.
- # NOTE: we try Mercurial first as it can sit on top of an SVN working copy.
- try:
- out, returncode = RunShellWithReturnCode(["hg", "root"])
- if returncode == 0:
- return (VCS_MERCURIAL, out.strip())
- except OSError, (errno, message):
- if errno != 2: # ENOENT -- they don't have hg installed.
- raise
-
- # Subversion has a .svn in all working directories.
- if os.path.isdir('.svn'):
- logging.info("Guessed VCS = Subversion")
- return (VCS_SUBVERSION, None)
-
- # Git has a command to test if you're in a git tree.
- # Try running it, but don't die if we don't have git installed.
- try:
- out, returncode = RunShellWithReturnCode(["git", "rev-parse",
- "--is-inside-work-tree"])
- if returncode == 0:
- return (VCS_GIT, None)
- except OSError, (errno, message):
- if errno != 2: # ENOENT -- they don't have git installed.
- raise
-
- return (VCS_UNKNOWN, None)
-
-
-def GuessVCS(options):
- """Helper to guess the version control system.
-
- This verifies any user-specified VersionControlSystem (by command line
- or environment variable). If the user didn't specify one, this examines
- the current directory, guesses which VersionControlSystem we're using,
- and returns an instance of the appropriate class. Exit with an error
- if we can't figure it out.
-
- Returns:
- A VersionControlSystem instance. Exits if the VCS can't be guessed.
- """
- vcs = options.vcs
- if not vcs:
- vcs = os.environ.get("CODEREVIEW_VCS")
- if vcs:
- v = VCS_ABBREVIATIONS.get(vcs.lower())
- if v is None:
- ErrorExit("Unknown version control system %r specified." % vcs)
- (vcs, extra_output) = (v, None)
- else:
- (vcs, extra_output) = GuessVCSName()
-
- if vcs == VCS_MERCURIAL:
- if extra_output is None:
- extra_output = RunShell(["hg", "root"]).strip()
- return MercurialVCS(options, extra_output)
- elif vcs == VCS_SUBVERSION:
- return SubversionVCS(options)
- elif vcs == VCS_GIT:
- return GitVCS(options)
-
- ErrorExit(("Could not guess version control system. "
- "Are you in a working copy directory?"))
-
-
-def CheckReviewer(reviewer):
- """Validate a reviewer -- either a nickname or an email addres.
-
- Args:
- reviewer: A nickname or an email address.
-
- Calls ErrorExit() if it is an invalid email address.
- """
- if "@" not in reviewer:
- return # Assume nickname
- parts = reviewer.split("@")
- if len(parts) > 2:
- ErrorExit("Invalid email address: %r" % reviewer)
- assert len(parts) == 2
- if "." not in parts[1]:
- ErrorExit("Invalid email address: %r" % reviewer)
-
-
-def LoadSubversionAutoProperties():
- """Returns the content of [auto-props] section of Subversion's config file as
- a dictionary.
-
- Returns:
- A dictionary whose key-value pair corresponds the [auto-props] section's
- key-value pair.
- In following cases, returns empty dictionary:
- - config file doesn't exist, or
- - 'enable-auto-props' is not set to 'true-like-value' in [miscellany].
- """
- # Todo(hayato): Windows users might use different path for configuration file.
- subversion_config = os.path.expanduser("~/.subversion/config")
- if not os.path.exists(subversion_config):
- return {}
- config = ConfigParser.ConfigParser()
- config.read(subversion_config)
- if (config.has_section("miscellany") and
- config.has_option("miscellany", "enable-auto-props") and
- config.getboolean("miscellany", "enable-auto-props") and
- config.has_section("auto-props")):
- props = {}
- for file_pattern in config.options("auto-props"):
- props[file_pattern] = ParseSubversionPropertyValues(
- config.get("auto-props", file_pattern))
- return props
- else:
- return {}
-
-def ParseSubversionPropertyValues(props):
- """Parse the given property value which comes from [auto-props] section and
- returns a list whose element is a (svn_prop_key, svn_prop_value) pair.
-
- See the following doctest for example.
-
- >>> ParseSubversionPropertyValues('svn:eol-style=LF')
- [('svn:eol-style', 'LF')]
- >>> ParseSubversionPropertyValues('svn:mime-type=image/jpeg')
- [('svn:mime-type', 'image/jpeg')]
- >>> ParseSubversionPropertyValues('svn:eol-style=LF;svn:executable')
- [('svn:eol-style', 'LF'), ('svn:executable', '*')]
- """
- key_value_pairs = []
- for prop in props.split(";"):
- key_value = prop.split("=")
- assert len(key_value) <= 2
- if len(key_value) == 1:
- # If value is not given, use '*' as a Subversion's convention.
- key_value_pairs.append((key_value[0], "*"))
- else:
- key_value_pairs.append((key_value[0], key_value[1]))
- return key_value_pairs
-
-
-def GetSubversionPropertyChanges(filename):
- """Return a Subversion's 'Property changes on ...' string, which is used in
- the patch file.
-
- Args:
- filename: filename whose property might be set by [auto-props] config.
-
- Returns:
- A string like 'Property changes on |filename| ...' if given |filename|
- matches any entries in [auto-props] section. None, otherwise.
- """
- global svn_auto_props_map
- if svn_auto_props_map is None:
- svn_auto_props_map = LoadSubversionAutoProperties()
-
- all_props = []
- for file_pattern, props in svn_auto_props_map.items():
- if fnmatch.fnmatch(filename, file_pattern):
- all_props.extend(props)
- if all_props:
- return FormatSubversionPropertyChanges(filename, all_props)
- return None
-
-
-def FormatSubversionPropertyChanges(filename, props):
- """Returns Subversion's 'Property changes on ...' strings using given filename
- and properties.
-
- Args:
- filename: filename
- props: A list whose element is a (svn_prop_key, svn_prop_value) pair.
-
- Returns:
- A string which can be used in the patch file for Subversion.
-
- See the following doctest for example.
-
- >>> print FormatSubversionPropertyChanges('foo.cc', [('svn:eol-style', 'LF')])
- Property changes on: foo.cc
- ___________________________________________________________________
- Added: svn:eol-style
- + LF
- <BLANKLINE>
- """
- prop_changes_lines = [
- "Property changes on: %s" % filename,
- "___________________________________________________________________"]
- for key, value in props:
- prop_changes_lines.append("Added: " + key)
- prop_changes_lines.append(" + " + value)
- return "\n".join(prop_changes_lines) + "\n"
-
-
-def RealMain(argv, data=None):
- """The real main function.
-
- Args:
- argv: Command line arguments.
- data: Diff contents. If None (default) the diff is generated by
- the VersionControlSystem implementation returned by GuessVCS().
-
- Returns:
- A 2-tuple (issue id, patchset id).
- The patchset id is None if the base files are not uploaded by this
- script (applies only to SVN checkouts).
- """
- logging.basicConfig(format=("%(asctime).19s %(levelname)s %(filename)s:"
- "%(lineno)s %(message)s "))
- os.environ['LC_ALL'] = 'C'
- options, args = parser.parse_args(argv[1:])
- global verbosity
- verbosity = options.verbose
- if verbosity >= 3:
- logging.getLogger().setLevel(logging.DEBUG)
- elif verbosity >= 2:
- logging.getLogger().setLevel(logging.INFO)
-
- vcs = GuessVCS(options)
-
- base = options.base_url
- if isinstance(vcs, SubversionVCS):
- # Guessing the base field is only supported for Subversion.
- # Note: Fetching base files may become deprecated in future releases.
- guessed_base = vcs.GuessBase(options.download_base)
- if base:
- if guessed_base and base != guessed_base:
- print "Using base URL \"%s\" from --base_url instead of \"%s\"" % \
- (base, guessed_base)
- else:
- base = guessed_base
-
- if not base and options.download_base:
- options.download_base = True
- logging.info("Enabled upload of base file")
- if not options.assume_yes:
- vcs.CheckForUnknownFiles()
- if data is None:
- data = vcs.GenerateDiff(args)
- files = vcs.GetBaseFiles(data)
- if verbosity >= 1:
- print "Upload server:", options.server, "(change with -s/--server)"
- if options.issue:
- prompt = "Message describing this patch set: "
- else:
- prompt = "New issue subject: "
- message = options.message or raw_input(prompt).strip()
- if not message:
- ErrorExit("A non-empty message is required")
- rpc_server = GetRpcServer(options.server,
- options.email,
- options.host,
- options.save_cookies)
- form_fields = [("subject", message)]
- if base:
- form_fields.append(("base", base))
- if options.issue:
- form_fields.append(("issue", str(options.issue)))
- if options.email:
- form_fields.append(("user", options.email))
- if options.reviewers:
- for reviewer in options.reviewers.split(','):
- CheckReviewer(reviewer)
- form_fields.append(("reviewers", options.reviewers))
- if options.cc:
- for cc in options.cc.split(','):
- CheckReviewer(cc)
- form_fields.append(("cc", options.cc))
- description = options.description
- if options.description_file:
- if options.description:
- ErrorExit("Can't specify description and description_file")
- file = open(options.description_file, 'r')
- description = file.read()
- file.close()
- if description:
- form_fields.append(("description", description))
- # Send a hash of all the base file so the server can determine if a copy
- # already exists in an earlier patchset.
- base_hashes = ""
- for file, info in files.iteritems():
- if not info[0] is None:
- checksum = md5(info[0]).hexdigest()
- if base_hashes:
- base_hashes += "|"
- base_hashes += checksum + ":" + file
- form_fields.append(("base_hashes", base_hashes))
- if options.private:
- if options.issue:
- print "Warning: Private flag ignored when updating an existing issue."
- else:
- form_fields.append(("private", "1"))
- # If we're uploading base files, don't send the email before the uploads, so
- # that it contains the file status.
- if options.send_mail and options.download_base:
- form_fields.append(("send_mail", "1"))
- if not options.download_base:
- form_fields.append(("content_upload", "1"))
- if len(data) > MAX_UPLOAD_SIZE:
- print "Patch is large, so uploading file patches separately."
- uploaded_diff_file = []
- form_fields.append(("separate_patches", "1"))
- else:
- uploaded_diff_file = [("data", "data.diff", data)]
- ctype, body = EncodeMultipartFormData(form_fields, uploaded_diff_file)
- response_body = rpc_server.Send("/upload", body, content_type=ctype)
- patchset = None
- if not options.download_base or not uploaded_diff_file:
- lines = response_body.splitlines()
- if len(lines) >= 2:
- msg = lines[0]
- patchset = lines[1].strip()
- patches = [x.split(" ", 1) for x in lines[2:]]
- else:
- msg = response_body
- else:
- msg = response_body
- StatusUpdate(msg)
- if not response_body.startswith("Issue created.") and \
- not response_body.startswith("Issue updated."):
- sys.exit(0)
- issue = msg[msg.rfind("/")+1:]
-
- if not uploaded_diff_file:
- result = UploadSeparatePatches(issue, rpc_server, patchset, data, options)
- if not options.download_base:
- patches = result
-
- if not options.download_base:
- vcs.UploadBaseFiles(issue, rpc_server, patches, patchset, options, files)
- if options.send_mail:
- rpc_server.Send("/" + issue + "/mail", payload="")
- return issue, patchset
-
-
-def main():
- try:
- RealMain(sys.argv)
- except KeyboardInterrupt:
- print
- StatusUpdate("Interrupted.")
- sys.exit(1)
-
-
-if __name__ == "__main__":
- main()
diff --git a/WebKitTools/Scripts/webkitpy/tool/bot/irc_command.py b/WebKitTools/Scripts/webkitpy/tool/bot/irc_command.py
index c21fdc6..ee8c669 100644
--- a/WebKitTools/Scripts/webkitpy/tool/bot/irc_command.py
+++ b/WebKitTools/Scripts/webkitpy/tool/bot/irc_command.py
@@ -75,8 +75,35 @@ class Rollout(IRCCommand):
tool.bugs.bug_url_for_bug_id(bug_id))
+class Help(IRCCommand):
+ def execute(self, nick, args, tool, sheriff):
+ return "%s: Available commands: %s" % (nick, ", ".join(commands.keys()))
+
+
class Hi(IRCCommand):
def execute(self, nick, args, tool, sheriff):
quips = tool.bugs.quips()
quips.append('"Only you can prevent forest fires." -- Smokey the Bear')
return random.choice(quips)
+
+
+class Eliza(IRCCommand):
+ therapist = None
+
+ def __init__(self):
+ if not self.therapist:
+ import webkitpy.thirdparty.autoinstalled.eliza as eliza
+ Eliza.therapist = eliza.eliza()
+
+ def execute(self, nick, args, tool, sheriff):
+ return "%s: %s" % (nick, self.therapist.respond(" ".join(args)))
+
+
+# FIXME: Lame. We should have an auto-registering CommandCenter.
+commands = {
+ "last-green-revision": LastGreenRevision,
+ "restart": Restart,
+ "rollout": Rollout,
+ "help": Help,
+ "hi": Hi,
+}
diff --git a/WebKitTools/Scripts/webkitpy/tool/bot/irc_command_unittest.py b/WebKitTools/Scripts/webkitpy/tool/bot/irc_command_unittest.py
new file mode 100644
index 0000000..7aeb6a0
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/tool/bot/irc_command_unittest.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2010 Google Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+
+from webkitpy.tool.bot.irc_command import *
+
+
+class IRCCommandTest(unittest.TestCase):
+ def test_eliza(self):
+ eliza = Eliza()
+ eliza.execute("tom", "hi", None, None)
+ eliza.execute("tom", "bye", None, None)
diff --git a/WebKitTools/Scripts/webkitpy/tool/bot/queueengine.py b/WebKitTools/Scripts/webkitpy/tool/bot/queueengine.py
index ac7a760..a1a66a1 100644
--- a/WebKitTools/Scripts/webkitpy/tool/bot/queueengine.py
+++ b/WebKitTools/Scripts/webkitpy/tool/bot/queueengine.py
@@ -113,7 +113,7 @@ class QueueEngine:
# handled in the child process and we should just keep looping.
if e.exit_code == self.handled_error_code:
continue
- message = "Unexpected failure when landing patch! Please file a bug against webkit-patch.\n%s" % e.message_with_output()
+ message = "Unexpected failure when processing patch! Please file a bug against webkit-patch.\n%s" % e.message_with_output()
self._delegate.handle_unexpected_error(work_item, message)
except TerminateQueue, e:
log("\nTerminateQueue exception received.")
diff --git a/WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot.py b/WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot.py
index 43aa9c3..de77222 100644
--- a/WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot.py
+++ b/WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot.py
@@ -52,14 +52,6 @@ class _IRCThreadTearoff(IRCBotDelegate):
class SheriffIRCBot(object):
- # FIXME: Lame. We should have an auto-registering CommandCenter.
- commands = {
- "last-green-revision": irc_command.LastGreenRevision,
- "restart": irc_command.Restart,
- "rollout": irc_command.Rollout,
- "hi": irc_command.Hi,
- }
-
def __init__(self, tool, sheriff):
self._tool = tool
self._sheriff = sheriff
@@ -75,15 +67,13 @@ class SheriffIRCBot(object):
tokenized_request = request.strip().split(" ")
if not tokenized_request:
return
- command = self.commands.get(tokenized_request[0])
+ command = irc_command.commands.get(tokenized_request[0])
+ args = tokenized_request[1:]
if not command:
- self._tool.irc().post("%s: Available commands: %s" % (
- nick, ", ".join(self.commands.keys())))
- return
- response = command().execute(nick,
- tokenized_request[1:],
- self._tool,
- self._sheriff)
+ # Give the peoples someone to talk with.
+ command = irc_command.Eliza
+ args = tokenized_request
+ response = command().execute(nick, args, self._tool, self._sheriff)
if response:
self._tool.irc().post(response)
diff --git a/WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot_unittest.py b/WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot_unittest.py
index d5116e4..21bff12 100644
--- a/WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot_unittest.py
+++ b/WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot_unittest.py
@@ -50,9 +50,9 @@ class SheriffIRCBotTest(unittest.TestCase):
expected_stderr = 'MOCK: irc.post: "Only you can prevent forest fires." -- Smokey the Bear\n'
OutputCapture().assert_outputs(self, run, args=["hi"], expected_stderr=expected_stderr)
- def test_bogus(self):
- expected_stderr = "MOCK: irc.post: mock_nick: Available commands: rollout, hi, restart, last-green-revision\n"
- OutputCapture().assert_outputs(self, run, args=["bogus"], expected_stderr=expected_stderr)
+ def test_help(self):
+ expected_stderr = "MOCK: irc.post: mock_nick: Available commands: rollout, hi, help, restart, last-green-revision\n"
+ OutputCapture().assert_outputs(self, run, args=["help"], expected_stderr=expected_stderr)
def test_lgr(self):
expected_stderr = "MOCK: irc.post: mock_nick: http://trac.webkit.org/changeset/9479\n"
diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/download.py b/WebKitTools/Scripts/webkitpy/tool/commands/download.py
index a283da9..a85b09a 100644
--- a/WebKitTools/Scripts/webkitpy/tool/commands/download.py
+++ b/WebKitTools/Scripts/webkitpy/tool/commands/download.py
@@ -182,6 +182,18 @@ class BuildAttachment(AbstractPatchSequencingCommand, ProcessAttachmentsMixin):
]
+class PostAttachmentToRietveld(AbstractPatchSequencingCommand, ProcessAttachmentsMixin):
+ name = "post-attachment-to-rietveld"
+ help_text = "Uploads a bugzilla attachment to rietveld"
+ arguments_names = "ATTACHMENTID"
+ main_steps = [
+ steps.CleanWorkingDirectory,
+ steps.Update,
+ steps.ApplyPatch,
+ steps.PostCodeReview,
+ ]
+
+
class AbstractPatchApplyingCommand(AbstractPatchSequencingCommand):
prepare_steps = [
steps.EnsureLocalCommitIfNeeded,
diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/download_unittest.py b/WebKitTools/Scripts/webkitpy/tool/commands/download_unittest.py
index 08a4377..958620a 100644
--- a/WebKitTools/Scripts/webkitpy/tool/commands/download_unittest.py
+++ b/WebKitTools/Scripts/webkitpy/tool/commands/download_unittest.py
@@ -108,6 +108,10 @@ class DownloadCommandsTest(CommandsTest):
expected_stderr = "Processing 1 patch from 1 bug.\nUpdating working directory\nProcessing patch 197 from bug 42.\nBuilding WebKit\n"
self.assert_execute_outputs(BuildAttachment(), [197], options=self._default_options(), expected_stderr=expected_stderr)
+ def test_post_attachment_to_rietveld(self):
+ expected_stderr = "Processing 1 patch from 1 bug.\nUpdating working directory\nProcessing patch 197 from bug 42.\nMOCK: Uploading patch to rietveld\nMOCK setting flag 'in-rietveld' to '+' on attachment '197' with comment 'None' and additional comment 'None'\n"
+ self.assert_execute_outputs(PostAttachmentToRietveld(), [197], options=self._default_options(), expected_stderr=expected_stderr)
+
def test_land_attachment(self):
# FIXME: This expected result is imperfect, notice how it's seeing the same patch as still there after it thought it would have cleared the flags.
expected_stderr = """Processing 1 patch from 1 bug.
diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/earlywarningsystem_unittest.py b/WebKitTools/Scripts/webkitpy/tool/commands/earlywarningsystem_unittest.py
index 27e09ba..67393d8 100644
--- a/WebKitTools/Scripts/webkitpy/tool/commands/earlywarningsystem_unittest.py
+++ b/WebKitTools/Scripts/webkitpy/tool/commands/earlywarningsystem_unittest.py
@@ -43,17 +43,23 @@ class EarlyWarningSytemTest(QueuesTest):
string_replacemnts = {
"name": ews.name,
"checkout_dir": os.getcwd(), # FIXME: Use of os.getcwd() is wrong, should be scm.checkout_root
+ "port": ews.port_name,
+ "watchers": ews.watchers,
}
expected_stderr = {
"begin_work_queue": "CAUTION: %(name)s will discard all local changes in \"%(checkout_dir)s\"\nRunning WebKit %(name)s.\n" % string_replacemnts,
"handle_unexpected_error": "Mock error message\n",
"next_work_item": "MOCK: update_work_items: %(name)s [103]\n" % string_replacemnts,
"process_work_item": "MOCK: update_status: %(name)s Pass\n" % string_replacemnts,
+ "handle_script_error": "MOCK: update_status: %(name)s ScriptError error message\nMOCK bug comment: bug_id=345, cc=%(watchers)s\n--- Begin comment ---\\Attachment 1234 did not build on %(port)s:\nBuild output: http://dummy_url\n--- End comment ---\n\n" % string_replacemnts,
}
return expected_stderr
def _test_ews(self, ews):
- self.assert_queue_outputs(ews, expected_stderr=self._default_expected_stderr(ews))
+ expected_exceptions = {
+ "handle_script_error": SystemExit,
+ }
+ self.assert_queue_outputs(ews, expected_stderr=self._default_expected_stderr(ews), expected_exceptions=expected_exceptions)
# FIXME: If all EWSes are going to output the same text, we
# could test them all in one method with a for loop over an array.
@@ -73,4 +79,7 @@ class EarlyWarningSytemTest(QueuesTest):
ews = MacEWS()
expected_stderr = self._default_expected_stderr(ews)
expected_stderr["process_work_item"] = "MOCK: update_status: mac-ews Error: mac-ews cannot process patches from non-committers :(\n"
- self.assert_queue_outputs(ews, expected_stderr=expected_stderr)
+ expected_exceptions = {
+ "handle_script_error": SystemExit,
+ }
+ self.assert_queue_outputs(ews, expected_stderr=expected_stderr, expected_exceptions=expected_exceptions)
diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/queues.py b/WebKitTools/Scripts/webkitpy/tool/commands/queues.py
index 08bd3aa..d14ac9e 100644
--- a/WebKitTools/Scripts/webkitpy/tool/commands/queues.py
+++ b/WebKitTools/Scripts/webkitpy/tool/commands/queues.py
@@ -121,7 +121,7 @@ class AbstractQueue(Command, QueueEngineDelegate):
@classmethod
def _update_status_for_script_error(cls, tool, state, script_error, is_error=False):
- message = script_error.message
+ message = str(script_error)
if is_error:
message = "Error: %s" % message
output = script_error.message_with_output(output_limit=1024*1024) # 1MB
@@ -289,7 +289,6 @@ class CommitQueue(AbstractPatchQueue, StepSequenceErrorHandler):
self.committer_validator.reject_patch_from_commit_queue(patch.id(), message)
# StepSequenceErrorHandler methods
-
@staticmethod
def _error_message_for_bug(tool, status_id, script_error):
if not script_error.output:
@@ -304,6 +303,51 @@ class CommitQueue(AbstractPatchQueue, StepSequenceErrorHandler):
validator.reject_patch_from_commit_queue(state["patch"].id(), cls._error_message_for_bug(tool, status_id, script_error))
+class RietveldUploadQueue(AbstractPatchQueue, StepSequenceErrorHandler):
+ name = "rietveld-upload-queue"
+
+ def __init__(self):
+ AbstractPatchQueue.__init__(self)
+
+ # AbstractPatchQueue methods
+
+ def next_work_item(self):
+ patch_id = self.tool.bugs.queries.fetch_first_patch_from_rietveld_queue()
+ if patch_id:
+ return patch_id
+ self._update_status("Empty queue")
+
+ def should_proceed_with_work_item(self, patch):
+ self._update_status("Uploading patch", patch)
+ return True
+
+ def process_work_item(self, patch):
+ try:
+ self.run_webkit_patch(["post-attachment-to-rietveld", "--force-clean", "--non-interactive", "--parent-command=rietveld-upload-queue", patch.id()])
+ self._did_pass(patch)
+ return True
+ except ScriptError, e:
+ if e.exit_code != QueueEngine.handled_error_code:
+ self._did_fail(patch)
+ raise e
+
+ @classmethod
+ def _reject_patch(cls, tool, patch_id):
+ tool.bugs.set_flag_on_attachment(patch_id, "in-rietveld", "-")
+
+ def handle_unexpected_error(self, patch, message):
+ log(message)
+ self._reject_patch(self.tool, patch.id())
+
+ # StepSequenceErrorHandler methods
+
+ @classmethod
+ def handle_script_error(cls, tool, state, script_error):
+ log(script_error.message_with_output())
+ cls._update_status_for_script_error(tool, state, script_error)
+ cls._reject_patch(tool, state["patch"].id())
+
+
class AbstractReviewQueue(AbstractPatchQueue, PersistentPatchCollectionDelegate, StepSequenceErrorHandler):
def __init__(self, options=None):
AbstractPatchQueue.__init__(self, options)
diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/queues_unittest.py b/WebKitTools/Scripts/webkitpy/tool/commands/queues_unittest.py
index a5d56da..b32dfa8 100644
--- a/WebKitTools/Scripts/webkitpy/tool/commands/queues_unittest.py
+++ b/WebKitTools/Scripts/webkitpy/tool/commands/queues_unittest.py
@@ -122,10 +122,13 @@ class CommitQueueTest(QueuesTest):
# FIXME: The commit-queue warns about bad committers twice. This is due to the fact that we access Attachment.reviewer() twice and it logs each time.
"next_work_item" : """Warning, attachment 128 on bug 42 has invalid committer (non-committer@example.com)
Warning, attachment 128 on bug 42 has invalid committer (non-committer@example.com)
+MOCK setting flag 'commit-queue' to '-' on attachment '128' with comment 'Rejecting patch 128 from commit-queue.' and additional comment 'non-committer@example.com does not have committer permissions according to http://trac.webkit.org/browser/trunk/WebKitTools/Scripts/webkitpy/common/config/committers.py.\n\n- If you do not have committer rights please read http://webkit.org/coding/contributing.html for instructions on how to use bugzilla flags.\n\n- If you have committer rights please correct the error in WebKitTools/Scripts/webkitpy/common/config/committers.py by adding yourself to the file (no review needed). Due to bug 30084 the commit-queue will require a restart after your change. Please contact eseidel@chromium.org to request a commit-queue restart. After restart the commit-queue will correctly respect your committer rights.'
MOCK: update_work_items: commit-queue [106, 197]
2 patches in commit-queue [106, 197]
""",
"process_work_item" : "MOCK: update_status: commit-queue Pass\n",
+ "handle_unexpected_error" : "MOCK setting flag 'commit-queue' to '-' on attachment '1234' with comment 'Rejecting patch 1234 from commit-queue.' and additional comment 'Mock error message'\n",
+ "handle_script_error": "MOCK: update_status: commit-queue ScriptError error message\nMOCK setting flag 'commit-queue' to '-' on attachment '1234' with comment 'Rejecting patch 1234 from commit-queue.' and additional comment 'ScriptError error message'\n",
}
self.assert_queue_outputs(CommitQueue(), expected_stderr=expected_stderr)
@@ -138,11 +141,14 @@ MOCK: update_work_items: commit-queue [106, 197]
# FIXME: The commit-queue warns about bad committers twice. This is due to the fact that we access Attachment.reviewer() twice and it logs each time.
"next_work_item" : """Warning, attachment 128 on bug 42 has invalid committer (non-committer@example.com)
Warning, attachment 128 on bug 42 has invalid committer (non-committer@example.com)
+MOCK setting flag \'commit-queue\' to \'-\' on attachment \'128\' with comment \'Rejecting patch 128 from commit-queue.\' and additional comment \'non-committer@example.com does not have committer permissions according to http://trac.webkit.org/browser/trunk/WebKitTools/Scripts/webkitpy/common/config/committers.py.\n\n- If you do not have committer rights please read http://webkit.org/coding/contributing.html for instructions on how to use bugzilla flags.\n\n- If you have committer rights please correct the error in WebKitTools/Scripts/webkitpy/common/config/committers.py by adding yourself to the file (no review needed). Due to bug 30084 the commit-queue will require a restart after your change. Please contact eseidel@chromium.org to request a commit-queue restart. After restart the commit-queue will correctly respect your committer rights.\'
MOCK: update_work_items: commit-queue [106, 197]
MOCK: update_status: commit-queue Builders ["Builder2"] are red. See http://build.webkit.org
1 patch in commit-queue [106]
""",
"process_work_item" : "MOCK: update_status: commit-queue Builders [\"Builder2\"] are red. See http://build.webkit.org\n",
+ "handle_unexpected_error" : "MOCK setting flag 'commit-queue' to '-' on attachment '1234' with comment 'Rejecting patch 1234 from commit-queue.' and additional comment 'Mock error message'\n",
+ "handle_script_error": "MOCK: update_status: commit-queue ScriptError error message\nMOCK setting flag 'commit-queue' to '-' on attachment '1234' with comment 'Rejecting patch 1234 from commit-queue.' and additional comment 'ScriptError error message'\n",
}
self.assert_queue_outputs(CommitQueue(), tool=tool, expected_stderr=expected_stderr)
@@ -156,11 +162,14 @@ MOCK: update_status: commit-queue Builders ["Builder2"] are red. See http://buil
# FIXME: The commit-queue warns about bad committers twice. This is due to the fact that we access Attachment.reviewer() twice and it logs each time.
"next_work_item": """Warning, attachment 128 on bug 42 has invalid committer (non-committer@example.com)
Warning, attachment 128 on bug 42 has invalid committer (non-committer@example.com)
+MOCK setting flag \'commit-queue\' to \'-\' on attachment \'128\' with comment \'Rejecting patch 128 from commit-queue.\' and additional comment \'non-committer@example.com does not have committer permissions according to http://trac.webkit.org/browser/trunk/WebKitTools/Scripts/webkitpy/common/config/committers.py.\n\n- If you do not have committer rights please read http://webkit.org/coding/contributing.html for instructions on how to use bugzilla flags.\n\n- If you have committer rights please correct the error in WebKitTools/Scripts/webkitpy/common/config/committers.py by adding yourself to the file (no review needed). Due to bug 30084 the commit-queue will require a restart after your change. Please contact eseidel@chromium.org to request a commit-queue restart. After restart the commit-queue will correctly respect your committer rights.\'
MOCK: update_work_items: commit-queue [106, 197]
MOCK: update_status: commit-queue Builders ["Builder2"] are red. See http://build.webkit.org
1 patch in commit-queue [106]
""",
"process_work_item": "MOCK run_and_throw_if_fail: ['echo', '--status-host=example.com', 'land-attachment', '--force-clean', '--build', '--non-interactive', '--ignore-builders', '--build-style=both', '--quiet', 76543]\nMOCK: update_status: commit-queue Pass\n",
+ "handle_unexpected_error": "MOCK setting flag 'commit-queue' to '-' on attachment '76543' with comment 'Rejecting patch 76543 from commit-queue.' and additional comment 'Mock error message'\n",
+ "handle_script_error": "MOCK: update_status: commit-queue ScriptError error message\nMOCK setting flag 'commit-queue' to '-' on attachment '1234' with comment 'Rejecting patch 1234 from commit-queue.' and additional comment 'ScriptError error message'\n",
}
self.assert_queue_outputs(CommitQueue(), tool=tool, work_item=rollout_patch, expected_stderr=expected_stderr)
@@ -193,6 +202,18 @@ MOCK: update_status: commit-queue Builders ["Builder2"] are red. See http://buil
self.assertEqual(attachments, expected_sort)
+class RietveldUploadQueueTest(QueuesTest):
+ def test_rietveld_upload_queue(self):
+ expected_stderr = {
+ "begin_work_queue": "CAUTION: rietveld-upload-queue will discard all local changes in \"%s\"\nRunning WebKit rietveld-upload-queue.\n" % MockSCM.fake_checkout_root,
+ "should_proceed_with_work_item": "MOCK: update_status: rietveld-upload-queue Uploading patch\n",
+ "process_work_item": "MOCK: update_status: rietveld-upload-queue Pass\n",
+ "handle_unexpected_error": "Mock error message\nMOCK setting flag 'in-rietveld' to '-' on attachment '1234' with comment 'None' and additional comment 'None'\n",
+ "handle_script_error": "ScriptError error message\nMOCK: update_status: rietveld-upload-queue ScriptError error message\nMOCK setting flag 'in-rietveld' to '-' on attachment '1234' with comment 'None' and additional comment 'None'\n",
+ }
+ self.assert_queue_outputs(RietveldUploadQueue(), expected_stderr=expected_stderr)
+
+
class StyleQueueTest(QueuesTest):
def test_style_queue(self):
expected_stderr = {
@@ -201,5 +222,9 @@ class StyleQueueTest(QueuesTest):
"should_proceed_with_work_item": "MOCK: update_status: style-queue Checking style\n",
"process_work_item" : "MOCK: update_status: style-queue Pass\n",
"handle_unexpected_error" : "Mock error message\n",
+ "handle_script_error": "MOCK: update_status: style-queue ScriptError error message\nMOCK bug comment: bug_id=345, cc=[]\n--- Begin comment ---\\Attachment 1234 did not pass style-queue:\n\nScriptError error message\n\nIf any of these errors are false positives, please file a bug against check-webkit-style.\n--- End comment ---\n\n",
+ }
+ expected_exceptions = {
+ "handle_script_error": SystemExit,
}
- self.assert_queue_outputs(StyleQueue(), expected_stderr=expected_stderr)
+ self.assert_queue_outputs(StyleQueue(), expected_stderr=expected_stderr, expected_exceptions=expected_exceptions)
diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/queuestest.py b/WebKitTools/Scripts/webkitpy/tool/commands/queuestest.py
index bf7e32a..9e17c5c 100644
--- a/WebKitTools/Scripts/webkitpy/tool/commands/queuestest.py
+++ b/WebKitTools/Scripts/webkitpy/tool/commands/queuestest.py
@@ -30,6 +30,7 @@ import unittest
from webkitpy.common.net.bugzilla import Attachment
from webkitpy.common.system.outputcapture import OutputCapture
+from webkitpy.common.system.executive import ScriptError
from webkitpy.thirdparty.mock import Mock
from webkitpy.tool.mocktool import MockTool
@@ -42,6 +43,14 @@ class MockQueueEngine(object):
pass
+class MockPatch():
+ def id(self):
+ return 1234
+
+ def bug_id(self):
+ return 345
+
+
class QueuesTest(unittest.TestCase):
mock_work_item = Attachment({
"id": 1234,
@@ -50,7 +59,19 @@ class QueuesTest(unittest.TestCase):
"attacher_email": "adam@example.com",
}, None)
- def assert_queue_outputs(self, queue, args=None, work_item=None, expected_stdout=None, expected_stderr=None, options=Mock(), tool=MockTool()):
+ def assert_outputs(self, func, func_name, args, expected_stdout, expected_stderr, expected_exceptions):
+ exception = None
+ if expected_exceptions and func_name in expected_exceptions:
+ exception = expected_exceptions[func_name]
+
+ OutputCapture().assert_outputs(self,
+ func,
+ args=args,
+ expected_stdout=expected_stdout.get(func_name, ""),
+ expected_stderr=expected_stderr.get(func_name, ""),
+ expected_exception=exception)
+
+ def assert_queue_outputs(self, queue, args=None, work_item=None, expected_stdout=None, expected_stderr=None, expected_exceptions=None, options=Mock(), tool=MockTool()):
if not expected_stdout:
expected_stdout = {}
if not expected_stderr:
@@ -63,38 +84,12 @@ class QueuesTest(unittest.TestCase):
queue.execute(options, args, tool, engine=MockQueueEngine)
- OutputCapture().assert_outputs(self,
- queue.queue_log_path,
- expected_stdout=expected_stdout.get("queue_log_path", ""),
- expected_stderr=expected_stderr.get("queue_log_path", ""))
- OutputCapture().assert_outputs(self,
- queue.work_item_log_path,
- args=[work_item],
- expected_stdout=expected_stdout.get("work_item_log_path", ""),
- expected_stderr=expected_stderr.get("work_item_log_path", ""))
- OutputCapture().assert_outputs(self,
- queue.begin_work_queue,
- expected_stdout=expected_stdout.get("begin_work_queue", ""),
- expected_stderr=expected_stderr.get("begin_work_queue", ""))
- OutputCapture().assert_outputs(self,
- queue.should_continue_work_queue,
- expected_stdout=expected_stdout.get("should_continue_work_queue", ""), expected_stderr=expected_stderr.get("should_continue_work_queue", ""))
- OutputCapture().assert_outputs(self,
- queue.next_work_item,
- expected_stdout=expected_stdout.get("next_work_item", ""),
- expected_stderr=expected_stderr.get("next_work_item", ""))
- OutputCapture().assert_outputs(self,
- queue.should_proceed_with_work_item,
- args=[work_item],
- expected_stdout=expected_stdout.get("should_proceed_with_work_item", ""),
- expected_stderr=expected_stderr.get("should_proceed_with_work_item", ""))
- OutputCapture().assert_outputs(self,
- queue.process_work_item,
- args=[work_item],
- expected_stdout=expected_stdout.get("process_work_item", ""),
- expected_stderr=expected_stderr.get("process_work_item", ""))
- OutputCapture().assert_outputs(self,
- queue.handle_unexpected_error,
- args=[work_item, "Mock error message"],
- expected_stdout=expected_stdout.get("handle_unexpected_error", ""),
- expected_stderr=expected_stderr.get("handle_unexpected_error", ""))
+ self.assert_outputs(queue.queue_log_path, "queue_log_path", [], expected_stdout, expected_stderr, expected_exceptions)
+ self.assert_outputs(queue.work_item_log_path, "work_item_log_path", [work_item], expected_stdout, expected_stderr, expected_exceptions)
+ self.assert_outputs(queue.begin_work_queue, "begin_work_queue", [], expected_stdout, expected_stderr, expected_exceptions)
+ self.assert_outputs(queue.should_continue_work_queue, "should_continue_work_queue", [], expected_stdout, expected_stderr, expected_exceptions)
+ self.assert_outputs(queue.next_work_item, "next_work_item", [], expected_stdout, expected_stderr, expected_exceptions)
+ self.assert_outputs(queue.should_proceed_with_work_item, "should_proceed_with_work_item", [work_item], expected_stdout, expected_stderr, expected_exceptions)
+ self.assert_outputs(queue.process_work_item, "process_work_item", [work_item], expected_stdout, expected_stderr, expected_exceptions)
+ self.assert_outputs(queue.handle_unexpected_error, "handle_unexpected_error", [work_item, "Mock error message"], expected_stdout, expected_stderr, expected_exceptions)
+ self.assert_outputs(queue.handle_script_error, "handle_script_error", [tool, {"patch": MockPatch()}, ScriptError(message="ScriptError error message", script_args="MockErrorCommand")], expected_stdout, expected_stderr, expected_exceptions)
diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/upload.py b/WebKitTools/Scripts/webkitpy/tool/commands/upload.py
index cf715b9..e682ca7 100644
--- a/WebKitTools/Scripts/webkitpy/tool/commands/upload.py
+++ b/WebKitTools/Scripts/webkitpy/tool/commands/upload.py
@@ -171,7 +171,6 @@ class Post(AbstractPatchUploadingCommand):
steps = [
steps.CheckStyle,
steps.ConfirmDiff,
- steps.PostCodeReview,
steps.ObsoletePatches,
steps.PostDiff,
]
@@ -215,7 +214,6 @@ class Upload(AbstractPatchUploadingCommand):
steps.PrepareChangeLog,
steps.EditChangeLog,
steps.ConfirmDiff,
- steps.PostCodeReview,
steps.ObsoletePatches,
steps.PostDiff,
]
diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/upload_unittest.py b/WebKitTools/Scripts/webkitpy/tool/commands/upload_unittest.py
index d52775b..8fef54a 100644
--- a/WebKitTools/Scripts/webkitpy/tool/commands/upload_unittest.py
+++ b/WebKitTools/Scripts/webkitpy/tool/commands/upload_unittest.py
@@ -56,8 +56,6 @@ class UploadCommandsTest(CommandsTest):
options.request_commit = False
options.review = True
options.comment = None
- # Rietveld upload code requires a real SCM checkout.
- options.fancy_review = False
options.cc = None
expected_stderr = """Running check-webkit-style
MOCK: user.open_url: file://...
@@ -87,8 +85,6 @@ MOCK: user.open_url: http://example.com/42
options.request_commit = False
options.review = True
options.comment = None
- # Rietveld upload code requires a real SCM checkout.
- options.fancy_review = False
options.cc = None
expected_stderr = """Running check-webkit-style
MOCK: user.open_url: file://...
diff --git a/WebKitTools/Scripts/webkitpy/tool/mocktool.py b/WebKitTools/Scripts/webkitpy/tool/mocktool.py
index 3934ea3..d88190f 100644
--- a/WebKitTools/Scripts/webkitpy/tool/mocktool.py
+++ b/WebKitTools/Scripts/webkitpy/tool/mocktool.py
@@ -86,6 +86,7 @@ _patch3 = {
"name": "Patch3",
"is_obsolete": False,
"is_patch": True,
+ "in-rietveld": "?",
"review": "?",
"attacher_email": "eric@webkit.org",
}
@@ -112,6 +113,7 @@ _patch5 = {
"name": "Patch5",
"is_obsolete": False,
"is_patch": True,
+ "in-rietveld": "?",
"review": "+",
"reviewer_email": "foo@bar.com",
"attacher_email": "eric@webkit.org",
@@ -125,6 +127,7 @@ _patch6 = { # Valid committer, but no reviewer.
"name": "ROLLOUT of r3489",
"is_obsolete": False,
"is_patch": True,
+ "in-rietveld": "-",
"commit-queue": "+",
"committer_email": "foo@bar.com",
"attacher_email": "eric@webkit.org",
@@ -138,6 +141,7 @@ _patch7 = { # Valid review, patch is marked obsolete.
"name": "Patch7",
"is_obsolete": True,
"is_patch": True,
+ "in-rietveld": "+",
"review": "+",
"reviewer_email": "foo@bar.com",
"attacher_email": "eric@webkit.org",
@@ -221,6 +225,12 @@ class MockBugzillaQueries(Mock):
def fetch_patches_from_pending_commit_list(self):
return sum([bug.reviewed_patches() for bug in self._all_bugs()], [])
+ def fetch_first_patch_from_rietveld_queue(self):
+ for bug in self._all_bugs():
+ patches = bug.in_rietveld_queue_patches()
+ if len(patches):
+ return patches[0]
+ raise Exception('No patches in the rietveld queue')
# FIXME: Bugzilla is the wrong Mock-point. Once we have a BugzillaNetwork
# class we should mock that instead.
@@ -287,6 +297,15 @@ class MockBugzilla(Mock):
action_param = "&action=%s" % action
return "%s/%s%s" % (self.bug_server_url, attachment_id, action_param)
+ def set_flag_on_attachment(self,
+ attachment_id,
+ flag_name,
+ flag_value,
+ comment_text=None,
+ additional_comment_text=None):
+ log("MOCK setting flag '%s' to '%s' on attachment '%s' with comment '%s' and additional comment '%s'" % (
+ flag_name, flag_value, attachment_id, comment_text, additional_comment_text))
+
def post_comment_to_bug(self, bug_id, comment_text, cc=None):
log("MOCK bug comment: bug_id=%s, cc=%s\n--- Begin comment ---\%s\n--- End comment ---\n" % (
bug_id, cc, comment_text))
@@ -453,6 +472,9 @@ class MockUser(object):
def confirm(self, message=None):
return True
+ def can_open_url(self):
+ return True
+
def open_url(self, url):
if url.startswith("file://"):
log("MOCK: user.open_url: file://...")
@@ -490,6 +512,8 @@ class MockStatusServer(object):
def update_svn_revision(self, svn_revision, broken_bot):
return 191
+ def results_url_for_status(self, status_id):
+ return "http://dummy_url"
class MockExecute(Mock):
def __init__(self, should_log):
@@ -513,6 +537,15 @@ class MockExecute(Mock):
return "MOCK output of child process"
+class MockRietveld():
+
+ def __init__(self, executive, dryrun=False):
+ pass
+
+ def post(self, diff, message=None, codereview_issue=None, cc=None):
+ log("MOCK: Uploading patch to rietveld")
+
+
class MockTool():
def __init__(self, log_executive=False):
@@ -526,7 +559,7 @@ class MockTool():
self._checkout = MockCheckout()
self.status_server = MockStatusServer()
self.irc_password = "MOCK irc password"
- self.codereview = Rietveld(self.executive)
+ self.codereview = MockRietveld(self.executive)
def scm(self):
return self._scm
diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/abstractstep.py b/WebKitTools/Scripts/webkitpy/tool/steps/abstractstep.py
index abafe63..20f8bbf 100644
--- a/WebKitTools/Scripts/webkitpy/tool/steps/abstractstep.py
+++ b/WebKitTools/Scripts/webkitpy/tool/steps/abstractstep.py
@@ -53,8 +53,9 @@ class AbstractStep(object):
return self._port
_well_known_keys = {
- "diff": lambda self: self._tool.scm().create_patch(self._options.git_commit, self._options.squash),
- "changelogs": lambda self: self._tool.checkout().modified_changelogs(self._options.git_commit, self._options.squash),
+ "diff": lambda self, state: self._tool.scm().create_patch(self._options.git_commit, self._options.squash),
+ "changelogs": lambda self, state: self._tool.checkout().modified_changelogs(self._options.git_commit, self._options.squash),
+ "bug_title": lambda self, state: self._tool.bugs.fetch_bug(state["bug_id"]).title(),
}
def cached_lookup(self, state, key, promise=None):
@@ -62,7 +63,7 @@ class AbstractStep(object):
return state[key]
if not promise:
promise = self._well_known_keys.get(key)
- state[key] = promise(self)
+ state[key] = promise(self, state)
return state[key]
@classmethod
diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/confirmdiff.py b/WebKitTools/Scripts/webkitpy/tool/steps/confirmdiff.py
index 626fcf3..7e8e348 100644
--- a/WebKitTools/Scripts/webkitpy/tool/steps/confirmdiff.py
+++ b/WebKitTools/Scripts/webkitpy/tool/steps/confirmdiff.py
@@ -46,6 +46,9 @@ class ConfirmDiff(AbstractStep):
]
def _show_pretty_diff(self, diff):
+ if not self._tool.user.can_open_url():
+ return None
+
try:
pretty_patch = PrettyPatch(self._tool.executive,
self._tool.scm().checkout_root)
diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/options.py b/WebKitTools/Scripts/webkitpy/tool/steps/options.py
index 186d292..fa36f73 100644
--- a/WebKitTools/Scripts/webkitpy/tool/steps/options.py
+++ b/WebKitTools/Scripts/webkitpy/tool/steps/options.py
@@ -41,7 +41,6 @@ class Options(object):
confirm = make_option("--no-confirm", action="store_false", dest="confirm", default=True, help="Skip confirmation steps.")
description = make_option("-m", "--description", action="store", type="string", dest="description", help="Description string for the attachment (default: \"patch\")")
email = make_option("--email", action="store", type="string", dest="email", help="Email address to use in ChangeLogs.")
- fancy_review = make_option("--fancy-review", action="store_true", dest="fancy_review", default=False, help="(Experimental) Upload the patch to Rietveld code review tool.")
force_clean = make_option("--force-clean", action="store_true", dest="force_clean", default=False, help="Clean working directory before applying patches (removes local changes and commits)")
# FIXME: Make commit ranges treat each commit separately instead of squashing them into one.
git_commit = make_option("--git-commit", action="store", dest="git_commit", help="Local git commit to upload/land. If a range, the commits are squashed into one.")
diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/postcodereview.py b/WebKitTools/Scripts/webkitpy/tool/steps/postcodereview.py
index 8397519..f9bc685 100644
--- a/WebKitTools/Scripts/webkitpy/tool/steps/postcodereview.py
+++ b/WebKitTools/Scripts/webkitpy/tool/steps/postcodereview.py
@@ -36,33 +36,27 @@ class PostCodeReview(AbstractStep):
return AbstractStep.options() + [
Options.cc,
Options.description,
- Options.fancy_review,
]
def run(self, state):
- if not self._options.fancy_review:
- return
+ patch = state.get("patch")
+ bug_id = patch.bug_id()
+ title = patch.name()
- bug_id = state.get("bug_id")
- if not bug_id:
- raise ScriptError(message="Cannot upload a fancy review without a bug ID.")
-
- message = self._options.description
- if not message:
- # If we have an issue number, then the message becomes the label
- # of the new patch. Otherwise, it becomes the title of the whole
- # issue.
- if state.get("bug_title"):
- # This is the common case for the the first "upload" command.
- message = state.get("bug_title")
- elif bug_id:
- # This is the common case for the "post" command and
- # subsequent runs of the "upload" command.
- message = "Code review for %s" % self._tool.bugs.bug_url_for_bug_id(bug_id)
- else:
- # Unreachable with our current commands, but we might hit
- # this case if we support bug-less code reviews.
- message = "Code review"
+ # If the issue already exists, then the message becomes the label
+ # of the new patch. Otherwise, it becomes the title of the whole
+ # issue.
+ if title:
+ # This is the common case for the the first "upload" command.
+ message = title
+ elif bug_id:
+ # This is the common case for the "post" command and
+ # subsequent runs of the "upload" command.
+ message = "Code review for %s" % self._tool.bugs.bug_url_for_bug_id(bug_id)
+ else:
+ # Unreachable with our current commands, but we might hit
+ # this case if we support bug-less code reviews.
+ message = "Code review"
# Use the bug ID as the rietveld issue number. This means rietveld code reviews
# when there are multiple different patches on a bug will be a bit wonky, but
@@ -71,3 +65,5 @@ class PostCodeReview(AbstractStep):
message=message,
codereview_issue=bug_id,
cc=self._options.cc)
+
+ self._tool.bugs.set_flag_on_attachment(patch.id(), 'in-rietveld', '+')
diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog.py b/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog.py
index 3a5c013..59048a3 100644
--- a/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog.py
+++ b/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog.py
@@ -28,6 +28,7 @@
import os
+from webkitpy.common.checkout.changelog import ChangeLog
from webkitpy.common.system.executive import ScriptError
from webkitpy.tool.steps.abstractstep import AbstractStep
from webkitpy.tool.steps.options import Options
@@ -46,8 +47,21 @@ class PrepareChangeLog(AbstractStep):
Options.squash,
]
+ def _ensure_bug_url(self, state):
+ if not state.get("bug_id"):
+ return
+ bug_id = state.get("bug_id")
+ changelogs = self.cached_lookup(state, "changelogs")
+ for changelog_path in changelogs:
+ changelog = ChangeLog(changelog_path)
+ if not changelog.latest_entry().bug_id():
+ changelog.set_short_description_and_bug_url(
+ self.cached_lookup(state, "bug_title"),
+ self._tool.bugs.bug_url_for_bug_id(bug_id))
+
def run(self, state):
if self.cached_lookup(state, "changelogs"):
+ self._ensure_bug_url(state)
return
os.chdir(self._tool.scm().checkout_root)
args = [self.port().script_path("prepare-ChangeLog")]
@@ -56,7 +70,7 @@ class PrepareChangeLog(AbstractStep):
if self._options.email:
args.append("--email=%s" % self._options.email)
if self._tool.scm().should_squash(self._options.squash):
- args.append("--merge-base=%s" % self._tool.scm().svn_merge_base())
+ args.append("--merge-base=%s" % self._tool.scm().remote_merge_base())
if self._options.git_commit:
args.append("--git-commit=%s" % self._options.git_commit)
diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog_unittest.py b/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog_unittest.py
new file mode 100644
index 0000000..1d0db75
--- /dev/null
+++ b/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog_unittest.py
@@ -0,0 +1,55 @@
+# Copyright (C) 2010 Google Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import unittest
+
+from webkitpy.common.checkout.changelog_unittest import ChangeLogTest
+from webkitpy.common.system.outputcapture import OutputCapture
+from webkitpy.thirdparty.mock import Mock
+from webkitpy.tool.mocktool import MockTool
+from webkitpy.tool.steps.preparechangelog import PrepareChangeLog
+
+
+class PrepareChangeLogTest(ChangeLogTest):
+ def test_ensure_bug_url(self):
+ capture = OutputCapture()
+ step = PrepareChangeLog(MockTool(), Mock())
+ changelog_contents = u"%s\n%s" % (self._new_entry_boilerplate, self._example_changelog)
+ changelog_path = self._write_tmp_file_with_contents(changelog_contents.encode("utf-8"))
+ state = {
+ "bug_title": "Example title",
+ "bug_id": 1234,
+ "changelogs": [changelog_path],
+ }
+ capture.assert_outputs(self, step.run, [state])
+ actual_contents = self._read_file_contents(changelog_path, "utf-8")
+ expected_message = "Example title\n http://example.com/1234"
+ expected_contents = changelog_contents.replace("Need a short description and bug URL (OOPS!)", expected_message)
+ os.remove(changelog_path)
+ self.assertEquals(actual_contents, expected_contents)