diff options
author | Ben Murdoch <benm@google.com> | 2010-06-15 19:36:43 +0100 |
---|---|---|
committer | Ben Murdoch <benm@google.com> | 2010-06-16 14:52:28 +0100 |
commit | 545e470e52f0ac6a3a072bf559c796b42c6066b6 (patch) | |
tree | c0c14763654d84d37577dde512c3d3b4699a9e86 /WebKitTools/Scripts/webkitpy | |
parent | 719298a66237d38ea5c05f1547123ad8aacbc237 (diff) | |
download | external_webkit-545e470e52f0ac6a3a072bf559c796b42c6066b6.zip external_webkit-545e470e52f0ac6a3a072bf559c796b42c6066b6.tar.gz external_webkit-545e470e52f0ac6a3a072bf559c796b42c6066b6.tar.bz2 |
Merge webkit.org at r61121: Initial merge by git.
Change-Id: Icd6db395c62285be384d137164d95d7466c98760
Diffstat (limited to 'WebKitTools/Scripts/webkitpy')
84 files changed, 580 insertions, 17987 deletions
diff --git a/WebKitTools/Scripts/webkitpy/common/checkout/changelog.py b/WebKitTools/Scripts/webkitpy/common/checkout/changelog.py index 6220fbd..40657eb 100644 --- a/WebKitTools/Scripts/webkitpy/common/checkout/changelog.py +++ b/WebKitTools/Scripts/webkitpy/common/checkout/changelog.py @@ -36,6 +36,8 @@ import textwrap from webkitpy.common.system.deprecated_logging import log from webkitpy.common.config.committers import CommitterList +from webkitpy.common.net.bugzilla import parse_bug_id + def view_source_url(revision_number): # FIMXE: This doesn't really belong in this file, but we don't have a @@ -88,6 +90,9 @@ class ChangeLogEntry(object): def contents(self): return self._contents + def bug_id(self): + return parse_bug_id(self._contents) + # FIXME: Various methods on ChangeLog should move into ChangeLogEntry instead. class ChangeLog(object): @@ -183,3 +188,8 @@ class ChangeLog(object): for line in fileinput.FileInput(self.path, inplace=1): # Trailing comma suppresses printing newline print line.replace("NOBODY (OOPS!)", reviewer.encode("utf-8")), + + def set_short_description_and_bug_url(self, short_description, bug_url): + message = "%s\n %s" % (short_description, bug_url) + for line in fileinput.FileInput(self.path, inplace=1): + print line.replace("Need a short description and bug URL (OOPS!)", message.encode("utf-8")), diff --git a/WebKitTools/Scripts/webkitpy/common/checkout/changelog_unittest.py b/WebKitTools/Scripts/webkitpy/common/checkout/changelog_unittest.py index 864428a..6aeb1f8 100644 --- a/WebKitTools/Scripts/webkitpy/common/checkout/changelog_unittest.py +++ b/WebKitTools/Scripts/webkitpy/common/checkout/changelog_unittest.py @@ -38,7 +38,7 @@ from StringIO import StringIO from webkitpy.common.checkout.changelog import * -class ChangeLogsTest(unittest.TestCase): +class ChangeLogTest(unittest.TestCase): _example_entry = u'''2009-08-17 Peter Kasting <pkasting@google.com> @@ -131,6 +131,18 @@ class ChangeLogsTest(unittest.TestCase): os.remove(changelog_path) self.assertEquals(actual_contents, expected_contents) + def test_set_short_description_and_bug_url(self): + changelog_contents = u"%s\n%s" % (self._new_entry_boilerplate, self._example_changelog) + changelog_path = self._write_tmp_file_with_contents(changelog_contents.encode("utf-8")) + short_description = "A short description" + bug_url = "http://example.com/b/2344" + ChangeLog(changelog_path).set_short_description_and_bug_url(short_description, bug_url) + actual_contents = self._read_file_contents(changelog_path, "utf-8") + expected_message = "%s\n %s" % (short_description, bug_url) + expected_contents = changelog_contents.replace("Need a short description and bug URL (OOPS!)", expected_message) + os.remove(changelog_path) + self.assertEquals(actual_contents, expected_contents) + _revert_message = """ Unreviewed, rolling out r12345. http://trac.webkit.org/changeset/12345 http://example.com/123 diff --git a/WebKitTools/Scripts/webkitpy/common/checkout/scm.py b/WebKitTools/Scripts/webkitpy/common/checkout/scm.py index eea76be..fc4c6fd 100644 --- a/WebKitTools/Scripts/webkitpy/common/checkout/scm.py +++ b/WebKitTools/Scripts/webkitpy/common/checkout/scm.py @@ -240,7 +240,7 @@ class SCM: def supports_local_commits(): raise NotImplementedError, "subclasses must implement" - def svn_merge_base(): + def remote_merge_base(): raise NotImplementedError, "subclasses must implement" def commit_locally_with_message(self, message): @@ -465,11 +465,11 @@ class Git(SCM): def discard_local_commits(self): # FIXME: This should probably use cwd=self.checkout_root - self.run(['git', 'reset', '--hard', self.svn_branch_name()]) + self.run(['git', 'reset', '--hard', self.remote_branch_ref()]) def local_commits(self): # FIXME: This should probably use cwd=self.checkout_root - return self.run(['git', 'log', '--pretty=oneline', 'HEAD...' + self.svn_branch_name()]).splitlines() + return self.run(['git', 'log', '--pretty=oneline', 'HEAD...' + self.remote_branch_ref()]).splitlines() def rebase_in_progress(self): return os.path.exists(os.path.join(self.checkout_root, '.git/rebase-apply')) @@ -507,7 +507,7 @@ class Git(SCM): return git_commit if self.should_squash(squash): - return self.svn_merge_base() + return self.remote_merge_base() # FIXME: Non-squash behavior should match commit_with_message. It raises an error # if there are working copy changes and --squash or --no-squash wasn't passed in. @@ -602,14 +602,14 @@ class Git(SCM): if num_local_commits > 1 or (num_local_commits > 0 and not self.working_directory_is_clean()): raise ScriptError(message=self._get_squash_error_message(num_local_commits)) - if squash and self._svn_branch_has_extra_commits(): + if squash and self._remote_branch_has_extra_commits(): raise ScriptError(message="Cannot use --squash when HEAD is not fully merged/rebased to %s. " - "This branch needs to be synced first." % self.svn_branch_name()) + "This branch needs to be synced first." % self.remote_branch_ref()) return squash - def _svn_branch_has_extra_commits(self): - return len(run_command(['git', 'rev-list', '--max-count=1', self.svn_branch_name(), '^HEAD'])) + def _remote_branch_has_extra_commits(self): + return len(run_command(['git', 'rev-list', '--max-count=1', self.remote_branch_ref(), '^HEAD'])) def commit_with_message(self, message, username=None, git_commit=None, squash=None): # Username is ignored during Git commits. @@ -624,7 +624,7 @@ class Git(SCM): squash = self.should_squash(squash) if squash: - self.run(['git', 'reset', '--soft', self.svn_branch_name()]) + self.run(['git', 'reset', '--soft', self.remote_branch_ref()]) self.commit_locally_with_message(message) elif not self.working_directory_is_clean(): if not len(self.local_commits()): @@ -650,8 +650,8 @@ class Git(SCM): # We want to squash all this branch's commits into one commit with the proper description. # We do this by doing a "merge --squash" into a new commit branch, then dcommitting that. - MERGE_BRANCH = 'webkit-patch-land' - self.delete_branch(MERGE_BRANCH) + MERGE_BRANCH_NAME = 'webkit-patch-land' + self.delete_branch(MERGE_BRANCH_NAME) # We might be in a directory that's present in this branch but not in the # trunk. Move up to the top of the tree so that git commands that expect a @@ -662,7 +662,7 @@ class Git(SCM): # We wrap in a try...finally block so if anything goes wrong, we clean up the branches. commit_succeeded = True try: - self.run(['git', 'checkout', '-q', '-b', MERGE_BRANCH, self.svn_branch_name()]) + self.run(['git', 'checkout', '-q', '-b', MERGE_BRANCH_NAME, self.remote_branch_ref()]) for commit in commit_ids: # We're on a different branch now, so convert "head" to the branch name. @@ -681,7 +681,7 @@ class Git(SCM): # And then swap back to the original branch and clean up. self.clean_working_directory() self.run(['git', 'checkout', '-q', branch_name]) - self.delete_branch(MERGE_BRANCH) + self.delete_branch(MERGE_BRANCH_NAME) return output @@ -693,18 +693,31 @@ class Git(SCM): return self.run(['git', 'svn', 'log', '--limit=1']) # Git-specific methods: + def _branch_ref_exists(self, branch_ref): + return self.run(['git', 'show-ref', '--quiet', '--verify', branch_ref], return_exit_code=True) == 0 - def delete_branch(self, branch): - if self.run(['git', 'show-ref', '--quiet', '--verify', 'refs/heads/' + branch], return_exit_code=True) == 0: - self.run(['git', 'branch', '-D', branch]) + def delete_branch(self, branch_name): + if self._branch_ref_exists('refs/heads/' + branch_name): + self.run(['git', 'branch', '-D', branch_name]) - def svn_merge_base(self): - return self.run(['git', 'merge-base', self.svn_branch_name(), 'HEAD']).strip() + def remote_merge_base(self): + return self.run(['git', 'merge-base', self.remote_branch_ref(), 'HEAD']).strip() + + def remote_branch_ref(self): + # Use references so that we can avoid collisions, e.g. we don't want to operate on refs/heads/trunk if it exists. - def svn_branch_name(self): # FIXME: This should so something like: Git.read_git_config('svn-remote.svn.fetch').split(':')[1] # but that doesn't work if the git repo is tracking multiple svn branches. - return 'trunk' + remote_branch_refs = [ + 'refs/remotes/trunk', # A git-svn checkout as per http://trac.webkit.org/wiki/UsingGitWithWebKit. + 'refs/remotes/origin/master', # A git clone of git://git.webkit.org/WebKit.git that is not tracking svn. + ] + + for ref in remote_branch_refs: + if self._branch_ref_exists(ref): + return ref + + raise ScriptError(message="Can't find a branch to diff against. %s branches do not exist." % " and ".join(remote_branch_refs)) def commit_locally_with_message(self, message): self.run(['git', 'commit', '--all', '-F', '-'], input=message) @@ -726,7 +739,7 @@ class Git(SCM): # A B : [A, B] (different from git diff, which would use "rev-list A..B") def commit_ids_from_commitish_arguments(self, args): if not len(args): - args.append('%s..HEAD' % self.svn_branch_name()) + args.append('%s..HEAD' % self.remote_branch_ref()) commit_ids = [] for commitish in args: diff --git a/WebKitTools/Scripts/webkitpy/common/checkout/scm_unittest.py b/WebKitTools/Scripts/webkitpy/common/checkout/scm_unittest.py index 8eea4d8..36a1d1c 100644 --- a/WebKitTools/Scripts/webkitpy/common/checkout/scm_unittest.py +++ b/WebKitTools/Scripts/webkitpy/common/checkout/scm_unittest.py @@ -635,25 +635,63 @@ Q1dTBx0AAAB42itg4GlgYJjGwMDDyODMxMDw34GBgQEAJPQDJA== class GitTest(SCMTest): - def _setup_git_clone_of_svn_repository(self): + def setUp(self): + """Sets up fresh git repository with one commit. Then setups a second git + repo that tracks the first one.""" + self.original_dir = os.getcwd() + + self.untracking_checkout_path = tempfile.mkdtemp(suffix="git_test_checkout2") + run_command(['git', 'init', self.untracking_checkout_path]) + + os.chdir(self.untracking_checkout_path) + write_into_file_at_path('foo_file', 'foo') + run_command(['git', 'add', 'foo_file']) + run_command(['git', 'commit', '-am', 'dummy commit']) + self.untracking_scm = detect_scm_system(self.untracking_checkout_path) + + self.tracking_git_checkout_path = tempfile.mkdtemp(suffix="git_test_checkout") + run_command(['git', 'clone', '--quiet', self.untracking_checkout_path, self.tracking_git_checkout_path]) + os.chdir(self.tracking_git_checkout_path) + self.tracking_scm = detect_scm_system(self.tracking_git_checkout_path) + + def tearDown(self): + # Change back to a valid directory so that later calls to os.getcwd() do not fail. + os.chdir(self.original_dir) + run_command(['rm', '-rf', self.tracking_git_checkout_path]) + run_command(['rm', '-rf', self.untracking_checkout_path]) + + def test_remote_branch_ref(self): + self.assertEqual(self.tracking_scm.remote_branch_ref(), 'refs/remotes/origin/master') + + os.chdir(self.untracking_checkout_path) + self.assertRaises(ScriptError, self.untracking_scm.remote_branch_ref) + + +class GitSVNTest(SCMTest): + + def _setup_git_checkout(self): self.git_checkout_path = tempfile.mkdtemp(suffix="git_test_checkout") # --quiet doesn't make git svn silent, so we use run_silent to redirect output run_silent(['git', 'svn', 'clone', '-T', 'trunk', self.svn_repo_url, self.git_checkout_path]) + os.chdir(self.git_checkout_path) - def _tear_down_git_clone_of_svn_repository(self): + def _tear_down_git_checkout(self): + # Change back to a valid directory so that later calls to os.getcwd() do not fail. + os.chdir(self.original_dir) run_command(['rm', '-rf', self.git_checkout_path]) def setUp(self): + self.original_dir = os.getcwd() + SVNTestRepository.setup(self) - self._setup_git_clone_of_svn_repository() - os.chdir(self.git_checkout_path) + self._setup_git_checkout() self.scm = detect_scm_system(self.git_checkout_path) # For historical reasons, we test some checkout code here too. self.checkout = Checkout(self.scm) def tearDown(self): SVNTestRepository.tear_down(self) - self._tear_down_git_clone_of_svn_repository() + self._tear_down_git_checkout() def test_detection(self): scm = detect_scm_system(self.git_checkout_path) @@ -683,25 +721,24 @@ class GitTest(SCMTest): self.assertEqual(len(self.scm.local_commits()), 0) def test_delete_branch(self): - old_branch = run_command(['git', 'symbolic-ref', 'HEAD']).strip() new_branch = 'foo' run_command(['git', 'checkout', '-b', new_branch]) self.assertEqual(run_command(['git', 'symbolic-ref', 'HEAD']).strip(), 'refs/heads/' + new_branch) - run_command(['git', 'checkout', old_branch]) + run_command(['git', 'checkout', '-b', 'bar']) self.scm.delete_branch(new_branch) self.assertFalse(re.search(r'foo', run_command(['git', 'branch']))) - def test_svn_merge_base(self): + def test_remote_merge_base(self): # Diff to merge-base should include working-copy changes, # which the diff to svn_branch.. doesn't. test_file = os.path.join(self.git_checkout_path, 'test_file') write_into_file_at_path(test_file, 'foo') - diff_to_common_base = _git_diff(self.scm.svn_branch_name() + '..') - diff_to_merge_base = _git_diff(self.scm.svn_merge_base()) + diff_to_common_base = _git_diff(self.scm.remote_branch_ref() + '..') + diff_to_merge_base = _git_diff(self.scm.remote_merge_base()) self.assertFalse(re.search(r'foo', diff_to_common_base)) self.assertTrue(re.search(r'foo', diff_to_merge_base)) @@ -888,6 +925,9 @@ class GitTest(SCMTest): scm = detect_scm_system(self.git_checkout_path) self.assertRaises(ScriptError, scm.commit_with_message, "another test commit", squash=True) + def test_remote_branch_ref(self): + self.assertEqual(self.scm.remote_branch_ref(), 'refs/remotes/trunk') + def test_reverse_diff(self): self._shared_test_reverse_diff() diff --git a/WebKitTools/Scripts/webkitpy/common/config/committers.py b/WebKitTools/Scripts/webkitpy/common/config/committers.py index d9c541f..37bd4eb 100644 --- a/WebKitTools/Scripts/webkitpy/common/config/committers.py +++ b/WebKitTools/Scripts/webkitpy/common/config/committers.py @@ -70,6 +70,7 @@ committers_unable_to_review = [ Committer("Alexander Kellett", ["lypanov@mac.com", "a-lists001@lypanov.net", "lypanov@kde.org"], "lypanov"), Committer("Alexander Pavlov", "apavlov@chromium.org"), Committer("Andre Boule", "aboule@apple.com"), + Committer("Andrei Popescu", "andreip@google.com", "andreip"), Committer("Andrew Wellington", ["andrew@webkit.org", "proton@wiretapped.net"], "proton"), Committer("Andras Becsi", "abecsi@webkit.org", "bbandix"), Committer("Andy Estes", "aestes@apple.com", "estes"), @@ -133,14 +134,15 @@ committers_unable_to_review = [ Committer("Krzysztof Kowalczyk", "kkowalczyk@gmail.com"), Committer("Levi Weintraub", "lweintraub@apple.com"), Committer("Mads Ager", "ager@chromium.org"), + Committer("Marcus Voltis Bulach", "bulach@chromium.org"), Committer("Matt Lilek", ["webkit@mattlilek.com", "pewtermoose@webkit.org"]), Committer("Matt Perry", "mpcomplete@chromium.org"), Committer("Maxime Britto", ["maxime.britto@gmail.com", "britto@apple.com"]), Committer("Maxime Simon", ["simon.maxime@gmail.com", "maxime.simon@webkit.org"], "maxime.simon"), - Committer("Martin Robinson", ["mrobinson@webkit.org", "martin.james.robinson@gmail.com"]), + Committer("Martin Robinson", ["mrobinson@igalia.com", "mrobinson@webkit.org", "martin.james.robinson@gmail.com"], "mrobinson"), Committer("Michelangelo De Simone", "michelangelo@webkit.org", "michelangelo"), Committer("Mike Belshe", ["mbelshe@chromium.org", "mike@belshe.com"]), - Committer("Mike Fenton", ["mike.fenton@torchmobile.com", "mifenton@rim.com"], "mfenton"), + Committer("Mike Fenton", ["mifenton@rim.com", "mike.fenton@torchmobile.com"], "mfenton"), Committer("Mike Thole", ["mthole@mikethole.com", "mthole@apple.com"]), Committer("Mikhail Naganov", "mnaganov@chromium.org"), Committer("MORITA Hajime", "morrita@google.com", "morrita"), @@ -166,6 +168,7 @@ committers_unable_to_review = [ Committer("Yong Li", ["yong.li.webkit@gmail.com", "yong.li@torchmobile.com"], "yong"), Committer("Yongjun Zhang", "yongjun.zhang@nokia.com"), Committer("Yuzo Fujishima", "yuzo@google.com", "yuzo"), + Committer("Zhenyao Mo", "zmo@google.com"), Committer("Zoltan Herczeg", "zherczeg@webkit.org", "zherczeg"), Committer("Zoltan Horvath", "zoltan@webkit.org", "zoltan"), ] diff --git a/WebKitTools/Scripts/webkitpy/common/net/bugzilla.py b/WebKitTools/Scripts/webkitpy/common/net/bugzilla.py index 26d3652..40db32c 100644 --- a/WebKitTools/Scripts/webkitpy/common/net/bugzilla.py +++ b/WebKitTools/Scripts/webkitpy/common/net/bugzilla.py @@ -113,6 +113,9 @@ class Attachment(object): def commit_queue(self): return self._attachment_dictionary.get("commit-queue") + def in_rietveld(self): + return self._attachment_dictionary.get("in-rietveld") + def url(self): # FIXME: This should just return # self._bugzilla().attachment_url_for_id(self.id()). scm_unittest.py @@ -158,6 +161,9 @@ class Bug(object): def id(self): return self.bug_dictionary["id"] + def title(self): + return self.bug_dictionary["title"] + def assigned_to_email(self): return self.bug_dictionary["assigned_to_email"] @@ -201,6 +207,9 @@ class Bug(object): # a valid committer. return filter(lambda patch: patch.committer(), patches) + def in_rietveld_queue_patches(self): + return [patch for patch in self.patches() if patch.in_rietveld() == None] + # A container for all of the logic for making and parsing buzilla queries. class BugzillaQueries(object): @@ -264,6 +273,16 @@ class BugzillaQueries(object): return sum([self._fetch_bug(bug_id).commit_queued_patches() for bug_id in self.fetch_bug_ids_from_commit_queue()], []) + def fetch_first_patch_from_rietveld_queue(self): + # rietveld-queue processes all patches that don't have in-rietveld set. + query_url = "buglist.cgi?query_format=advanced&bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&field0-0-0=flagtypes.name&type0-0-0=notsubstring&value0-0-0=in-rietveld&field0-1-0=attachments.ispatch&type0-1-0=equals&value0-1-0=1&order=Last+Changed&field0-2-0=attachments.isobsolete&type0-2-0=equals&value0-2-0=0" + bugs = self._fetch_bug_ids_advanced_query(query_url) + if not len(bugs): + return None + + patches = self._fetch_bug(bugs[0]).in_rietveld_queue_patches() + return patches[0] if len(patches) else None + def _fetch_bug_ids_from_review_queue(self): review_queue_url = "buglist.cgi?query_format=advanced&bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&field0-0-0=flagtypes.name&type0-0-0=equals&value0-0-0=review?" return self._fetch_bug_ids_advanced_query(review_queue_url) @@ -474,6 +493,8 @@ class Bugzilla(object): self._parse_attachment_flag( element, 'review', attachment, 'reviewer_email') self._parse_attachment_flag( + element, 'in-rietveld', attachment, 'rietveld_uploader_email') + self._parse_attachment_flag( element, 'commit-queue', attachment, 'committer_email') return attachment @@ -592,7 +613,8 @@ class Bugzilla(object): comment_text=None, mark_for_review=False, mark_for_commit_queue=False, - mark_for_landing=False, bug_id=None): + mark_for_landing=False, + bug_id=None): self.browser['description'] = description self.browser['ispatch'] = ("1",) self.browser['flag_type-1'] = ('?',) if mark_for_review else ('X',) @@ -703,7 +725,7 @@ class Bugzilla(object): self.browser["blocked"] = unicode(blocked) if assignee == None: assignee = self.username - if assignee: + if assignee and not self.browser.find_control("assigned_to").disabled: self.browser["assigned_to"] = assignee self.browser["short_desc"] = bug_title self.browser["comment"] = bug_description @@ -730,8 +752,10 @@ class Bugzilla(object): # FIXME: This will break if we ever re-order attachment flags if flag_name == "review": return self.browser.find_control(type='select', nr=0) - if flag_name == "commit-queue": + elif flag_name == "commit-queue": return self.browser.find_control(type='select', nr=1) + elif flag_name == "in-rietveld": + return self.browser.find_control(type='select', nr=2) raise Exception("Don't know how to find flag named \"%s\"" % flag_name) def clear_attachment_flags(self, @@ -758,8 +782,8 @@ class Bugzilla(object): attachment_id, flag_name, flag_value, - comment_text, - additional_comment_text): + comment_text=None, + additional_comment_text=None): # FIXME: We need a way to test this function on a live bugzilla # instance. @@ -774,7 +798,10 @@ class Bugzilla(object): self.browser.open(self.attachment_url_for_id(attachment_id, 'edit')) self.browser.select_form(nr=1) - self.browser.set_value(comment_text, name='comment', nr=0) + + if comment_text: + self.browser.set_value(comment_text, name='comment', nr=0) + self._find_select_element_for_flag(flag_name).value = (flag_value,) self.browser.submit() diff --git a/WebKitTools/Scripts/webkitpy/common/net/bugzilla_unittest.py b/WebKitTools/Scripts/webkitpy/common/net/bugzilla_unittest.py index ce992e7..3556121 100644 --- a/WebKitTools/Scripts/webkitpy/common/net/bugzilla_unittest.py +++ b/WebKitTools/Scripts/webkitpy/common/net/bugzilla_unittest.py @@ -96,6 +96,11 @@ class BugzillaTest(unittest.TestCase): status="+" setter="two@test.com" /> + <flag name="in-rietveld" + id="17933" + status="+" + setter="three@test.com" + /> </attachment> ''' _expected_example_attachment_parsing = { @@ -111,6 +116,8 @@ class BugzillaTest(unittest.TestCase): 'reviewer_email' : 'one@test.com', 'commit-queue' : '+', 'committer_email' : 'two@test.com', + 'in-rietveld': '+', + 'rietveld_uploader_email': 'three@test.com', 'attacher_email' : 'christian.plesner.hansen@gmail.com', } @@ -191,12 +198,12 @@ removed-because-it-was-really-long ZEZpbmlzaExvYWRXaXRoUmVhc29uOnJlYXNvbl07Cit9CisKIEBlbmQKIAogI2VuZGlmCg== </data> - <flag name="review" - id="27602" - status="?" - setter="mjs@apple.com" - /> - </attachment> + <flag name="review" + id="27602" + status="?" + setter="mjs@apple.com" + /> + </attachment> </bug> </bugzilla> """ diff --git a/WebKitTools/Scripts/webkitpy/common/net/buildbot.py b/WebKitTools/Scripts/webkitpy/common/net/buildbot.py index 6c6ed43..c849ef1 100644 --- a/WebKitTools/Scripts/webkitpy/common/net/buildbot.py +++ b/WebKitTools/Scripts/webkitpy/common/net/buildbot.py @@ -333,7 +333,12 @@ class BuildBot(object): builder['built_revision'] = int(revision_string) \ if not re.match('\D', revision_string) \ else None - builder['is_green'] = not re.search('fail', cell.renderContents()) + + # FIXME: We treat slave lost as green even though it is not to + # work around the Qts bot being on a broken internet connection. + # The real fix is https://bugs.webkit.org/show_bug.cgi?id=37099 + builder['is_green'] = not re.search('fail', cell.renderContents()) or \ + not not re.search('lost', cell.renderContents()) status_link_regexp = r"builders/(?P<builder_name>.*)/builds/(?P<build_number>\d+)" link_match = re.match(status_link_regexp, status_link['href']) diff --git a/WebKitTools/Scripts/webkitpy/common/net/buildbot_unittest.py b/WebKitTools/Scripts/webkitpy/common/net/buildbot_unittest.py index 5e04745..5384321 100644 --- a/WebKitTools/Scripts/webkitpy/common/net/buildbot_unittest.py +++ b/WebKitTools/Scripts/webkitpy/common/net/buildbot_unittest.py @@ -169,6 +169,10 @@ class BuildBotTest(unittest.TestCase): <td class="box"><a href="builders/Qt%20Linux%20Release">Qt Linux Release</a></td> <td align="center" class="LastBuild box failure"><a href="builders/Qt%20Linux%20Release/builds/654">47383</a><br />failed<br />compile-webkit</td> <td align="center" class="Activity idle">idle<br />3 pending</td> + <tr> + <td class="box"><a href="builders/Qt%20Windows%2032-bit%20Debug">Qt Windows 32-bit Debug</a></td> + <td align="center" class="LastBuild box failure"><a href="builders/Qt%20Windows%2032-bit%20Debug/builds/2090">60563</a><br />failed<br />failed<br />slave<br />lost</td> + <td align="center" class="Activity building">building<br />ETA in<br />~ 5 mins<br />at 08:25</td> </table> ''' _expected_example_one_box_parsings = [ @@ -196,6 +200,14 @@ class BuildBotTest(unittest.TestCase): 'activity': 'idle', 'pending_builds': 3, }, + { + 'is_green': True, + 'build_number' : 2090, + 'name': u'Qt Windows 32-bit Debug', + 'built_revision': 60563, + 'activity': 'building', + 'pending_builds': 0, + }, ] def test_status_parsing(self): diff --git a/WebKitTools/Scripts/webkitpy/common/net/rietveld.py b/WebKitTools/Scripts/webkitpy/common/net/rietveld.py index 572d1fd..eccda3a 100644 --- a/WebKitTools/Scripts/webkitpy/common/net/rietveld.py +++ b/WebKitTools/Scripts/webkitpy/common/net/rietveld.py @@ -51,6 +51,10 @@ class Rietveld(object): if not message: raise ScriptError("Rietveld requires a message.") + # Rietveld has a 100 character limit on message length. + if len(message) > 100: + message = message[:100] + args = [ # First argument is empty string to mimic sys.argv. "", @@ -70,5 +74,5 @@ class Rietveld(object): # Use RealMain instead of calling upload from the commandline so that # we can pass in the diff ourselves. Otherwise, upload will just use # git diff for git checkouts, which doesn't respect --squash and --git-commit. - issue, patchset = upload.RealMain(args[1:], data=diff) + issue, patchset = upload.RealMain(args, data=diff) return issue diff --git a/WebKitTools/Scripts/webkitpy/common/system/outputcapture.py b/WebKitTools/Scripts/webkitpy/common/system/outputcapture.py index 592a669..68a3919 100644 --- a/WebKitTools/Scripts/webkitpy/common/system/outputcapture.py +++ b/WebKitTools/Scripts/webkitpy/common/system/outputcapture.py @@ -52,9 +52,12 @@ class OutputCapture(object): def restore_output(self): return (self._restore_output_with_name("stdout"), self._restore_output_with_name("stderr")) - def assert_outputs(self, testcase, function, args=[], kwargs={}, expected_stdout="", expected_stderr=""): + def assert_outputs(self, testcase, function, args=[], kwargs={}, expected_stdout="", expected_stderr="", expected_exception=None): self.capture_output() - return_value = function(*args, **kwargs) + if expected_exception: + return_value = testcase.assertRaises(expected_exception, function, *args, **kwargs) + else: + return_value = function(*args, **kwargs) (stdout_string, stderr_string) = self.restore_output() testcase.assertEqual(stdout_string, expected_stdout) testcase.assertEqual(stderr_string, expected_stderr) diff --git a/WebKitTools/Scripts/webkitpy/common/system/user.py b/WebKitTools/Scripts/webkitpy/common/system/user.py index 82fa0d3..b4df3cb 100644 --- a/WebKitTools/Scripts/webkitpy/common/system/user.py +++ b/WebKitTools/Scripts/webkitpy/common/system/user.py @@ -104,5 +104,14 @@ class User(object): response = raw_input("%s [Y/n]: " % message) return not response or response.lower() == "y" + def can_open_url(self): + try: + webbrowser.get() + return True + except webbrowser.Error, e: + return False + def open_url(self, url): + if not self.can_open_url(): + _log.warn("Failed to open %s" % url) webbrowser.open(url) diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_layout_results_generator.py b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_layout_results_generator.py index cee44ad..bb214f7 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_layout_results_generator.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_layout_results_generator.py @@ -77,6 +77,7 @@ class JSONLayoutResultsGenerator(json_results_generator.JSONResultsGenerator): self._test_timings = dict( (path_to_name(test_tuple.filename), test_tuple.test_run_time) for test_tuple in test_timings) + self._svn_repositories = port.test_repository_paths() self._generate_json_output() diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_results_generator.py b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_results_generator.py index 0993cbd..1cf1b95 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_results_generator.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_results_generator.py @@ -38,6 +38,8 @@ import time import urllib2 import xml.dom.minidom +from webkitpy.common.checkout import scm +from webkitpy.common.system.executive import ScriptError from webkitpy.layout_tests.layout_package import test_expectations import webkitpy.thirdparty.simplejson as simplejson @@ -46,6 +48,7 @@ _log = logging.getLogger("webkitpy.layout_tests.layout_package." class JSONResultsGenerator(object): + """A JSON results generator for generic tests.""" MAX_NUMBER_OF_BUILD_RESULTS_TO_LOG = 750 # Min time (seconds) that will be added to the JSON. @@ -60,8 +63,6 @@ class JSONResultsGenerator(object): RESULTS = "results" TIMES = "times" BUILD_NUMBERS = "buildNumbers" - WEBKIT_SVN = "webkitRevision" - CHROME_SVN = "chromeRevision" TIME = "secondsSinceEpoch" TESTS = "tests" @@ -102,7 +103,6 @@ class JSONResultsGenerator(object): all_tests: List of all the tests that were run. This should not include skipped tests. """ - self._port = port self._builder_name = builder_name self._build_name = build_name self._build_number = build_number @@ -114,6 +114,7 @@ class JSONResultsGenerator(object): self._passed_tests = passed_tests self._skipped_tests = skipped_tests self._all_tests = all_tests + self._svn_repositories = port.test_repository_paths() self._generate_json_output() @@ -132,6 +133,7 @@ class JSONResultsGenerator(object): Args: in_directory: The directory where svn is to be run. """ + if os.path.exists(os.path.join(in_directory, '.svn')): # Note: Not thread safe: http://bugs.python.org/issue2320 output = subprocess.Popen(["svn", "info", "--xml"], @@ -312,23 +314,11 @@ class JSONResultsGenerator(object): self._insert_item_into_raw_list(results_for_builder, self._build_number, self.BUILD_NUMBERS) - # These next two branches test to see which source repos we can - # pull revisions from. - if hasattr(self._port, 'path_from_webkit_base'): - path_to_webkit = self._port.path_from_webkit_base('WebCore') + # Include SVN revisions for the given repositories. + for (name, path) in self._svn_repositories: self._insert_item_into_raw_list(results_for_builder, - self._get_svn_revision(path_to_webkit), - self.WEBKIT_SVN) - - if hasattr(self._port, 'path_from_chromium_base'): - try: - path_to_chrome = self._port.path_from_chromium_base() - self._insert_item_into_raw_list(results_for_builder, - self._get_svn_revision(path_to_chrome), - self.CHROME_SVN) - except AssertionError: - # We're not in a Chromium checkout, that's ok. - pass + self._get_svn_revision(path), + name + 'Revision') self._insert_item_into_raw_list(results_for_builder, int(time.time()), diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/base.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/base.py index 782c87c..e73579f 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/base.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/base.py @@ -537,6 +537,14 @@ class Port(object): expectations, determining search paths, and logging information.""" raise NotImplementedError('Port.version') + def test_repository_paths(self): + """Returns a list of (repository_name, repository_path) tuples + of its depending code base. By default it returns a list that only + contains a ('webkit', <webkitRepossitoryPath>) tuple. + """ + return [('webkit', self.layout_tests_dir())] + + _WDIFF_DEL = '##WDIFF_DEL##' _WDIFF_ADD = '##WDIFF_ADD##' _WDIFF_END = '##WDIFF_END##' diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium.py index db23eb8..e7f9ac8 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium.py @@ -232,6 +232,13 @@ class ChromiumPort(base.Port): raise ValueError('Unsupported test_platform_name: %s' % test_platform_name) + def test_repository_paths(self): + # Note: for JSON file's backward-compatibility we use 'chrome' rather + # than 'chromium' here. + repos = super(ChromiumPort, self).test_repository_paths() + repos.append(('chrome', self.path_from_chromium_base())) + return repos + # # PROTECTED METHODS # diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_linux.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_linux.py index 0818d51..4df43e0 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_linux.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_linux.py @@ -81,15 +81,15 @@ class ChromiumLinuxPort(chromium.ChromiumPort): # def _build_path(self, *comps): - if self._options.use_drt: - base = os.path.join(self.path_from_webkit_base(), 'WebKit', - 'chromium') - else: - base = self.path_from_chromium_base() + base = self.path_from_chromium_base() if os.path.exists(os.path.join(base, 'sconsbuild')): return os.path.join(base, 'sconsbuild', *comps) - else: + if os.path.exists(os.path.join(base, 'out', *comps)) or not self._options.use_drt: return os.path.join(base, 'out', *comps) + base = self.path_from_webkit_base() + if os.path.exists(os.path.join(base, 'sconsbuild')): + return os.path.join(base, 'sconsbuild', *comps) + return os.path.join(base, 'out', *comps) def _check_apache_install(self): result = chromium.check_file_exists(self._path_to_apache(), diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_mac.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_mac.py index aa3ac8d..abd84ae 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_mac.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_mac.py @@ -102,10 +102,11 @@ class ChromiumMacPort(chromium.ChromiumPort): # def _build_path(self, *comps): - if self._options.use_drt: - return self.path_from_webkit_base('WebKit', 'chromium', - 'xcodebuild', *comps) - return self.path_from_chromium_base('xcodebuild', *comps) + path = self.path_from_chromium_base('xcodebuild', *comps) + if os.path.exists(path) or not self._options.use_drt: + return path + return self.path_from_webkit_base('WebKit', 'chromium', 'xcodebuild', + *comps) def _check_wdiff_install(self): try: diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_win.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_win.py index ec1c33c..8072bc0 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_win.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_win.py @@ -117,13 +117,14 @@ class ChromiumWinPort(chromium.ChromiumPort): # def _build_path(self, *comps): - if self._options.use_drt: - return os.path.join(self.path_from_webkit_base(), 'WebKit', - 'chromium', *comps) p = self.path_from_chromium_base('webkit', *comps) if os.path.exists(p): return p - return self.path_from_chromium_base('chrome', *comps) + p = self.path_from_chromium_base('chrome', *comps) + if os.path.exists(p) or not self._options.use_drt: + return p + return os.path.join(self.path_from_webkit_base(), 'WebKit', 'chromium', + *comps) def _lighttpd_path(self, *comps): return self.path_from_chromium_base('third_party', 'lighttpd', 'win', diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/websocket_server.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/websocket_server.py index 22ae780..81bf39e 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/websocket_server.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/websocket_server.py @@ -207,12 +207,13 @@ class PyWebSocket(http_server.Lighttpd): url = 'http' url = url + '://127.0.0.1:%d/' % self._port if not url_is_alive(url): - fp = codecs.open(output_log, "utf-8") - try: + if self._process.returncode == None: + # FIXME: We should use a non-static Executive for easier + # testing. + Executive().kill_process(self._process.pid) + with codecs.open(output_log, "r", "utf-8") as fp: for line in fp: _log.error(line) - finally: - fp.close() raise PyWebSocketNotStarted( 'Failed to start %s server on port %s.' % (self._server_name, self._port)) diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py index 3642286..e1fa673 100644 --- a/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py +++ b/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py @@ -71,7 +71,8 @@ installer.install(url="http://pypi.python.org/packages/source/m/mechanize/mechan url_subpath="mechanize") installer.install(url="http://pypi.python.org/packages/source/p/pep8/pep8-0.5.0.tar.gz#md5=512a818af9979290cd619cce8e9c2e2b", url_subpath="pep8-0.5.0/pep8.py") - +installer.install(url="http://www.adambarth.com/webkit/eliza", + target_name="eliza.py") rietveld_dir = os.path.join(autoinstalled_dir, "rietveld") installer = AutoInstaller(target_dir=rietveld_dir) @@ -84,9 +85,9 @@ installer.install(url="http://webkit-rietveld.googlecode.com/svn/trunk/static/up # organization purposes. irc_dir = os.path.join(autoinstalled_dir, "irc") installer = AutoInstaller(target_dir=irc_dir) -installer.install(url="http://iweb.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip", +installer.install(url="http://hivelocity.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip", url_subpath="irclib.py") -installer.install(url="http://iweb.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip", +installer.install(url="http://hivelocity.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip", url_subpath="ircbot.py") pywebsocket_dir = os.path.join(autoinstalled_dir, "pywebsocket") diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.mechanize.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.mechanize.url deleted file mode 100644 index 4186aee..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.mechanize.url +++ /dev/null @@ -1 +0,0 @@ -http://pypi.python.org/packages/source/m/mechanize/mechanize-0.1.11.zip
\ No newline at end of file diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.pep8.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.pep8.py.url deleted file mode 100644 index 0fb1ef6..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.pep8.py.url +++ /dev/null @@ -1 +0,0 @@ -http://pypi.python.org/packages/source/p/pep8/pep8-0.5.0.tar.gz#md5=512a818af9979290cd619cce8e9c2e2b
\ No newline at end of file diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/README b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/README deleted file mode 100644 index 1d68cf3..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/README +++ /dev/null @@ -1,2 +0,0 @@ -This directory is auto-generated by WebKit and is safe to delete. -It contains needed third-party Python packages automatically downloaded from the web.
\ No newline at end of file diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/__init__.py deleted file mode 100644 index c1e4c6d..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# This file is required for Python to search this directory for modules. diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/.ClientForm.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/.ClientForm.py.url deleted file mode 100644 index c723abf..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/.ClientForm.py.url +++ /dev/null @@ -1 +0,0 @@ -http://pypi.python.org/packages/source/C/ClientForm/ClientForm-0.2.10.zip
\ No newline at end of file diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/ClientForm.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/ClientForm.py deleted file mode 100644 index a622de7..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/ClientForm.py +++ /dev/null @@ -1,3401 +0,0 @@ -"""HTML form handling for web clients. - -ClientForm is a Python module for handling HTML forms on the client -side, useful for parsing HTML forms, filling them in and returning the -completed forms to the server. It has developed from a port of Gisle -Aas' Perl module HTML::Form, from the libwww-perl library, but the -interface is not the same. - -The most useful docstring is the one for HTMLForm. - -RFC 1866: HTML 2.0 -RFC 1867: Form-based File Upload in HTML -RFC 2388: Returning Values from Forms: multipart/form-data -HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX) -HTML 4.01 Specification, W3C Recommendation 24 December 1999 - - -Copyright 2002-2007 John J. Lee <jjl@pobox.com> -Copyright 2005 Gary Poster -Copyright 2005 Zope Corporation -Copyright 1998-2000 Gisle Aas. - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -# XXX -# Remove parser testing hack -# safeUrl()-ize action -# Switch to unicode throughout (would be 0.3.x) -# See Wichert Akkerman's 2004-01-22 message to c.l.py. -# Add charset parameter to Content-type headers? How to find value?? -# Add some more functional tests -# Especially single and multiple file upload on the internet. -# Does file upload work when name is missing? Sourceforge tracker form -# doesn't like it. Check standards, and test with Apache. Test -# binary upload with Apache. -# mailto submission & enctype text/plain -# I'm not going to fix this unless somebody tells me what real servers -# that want this encoding actually expect: If enctype is -# application/x-www-form-urlencoded and there's a FILE control present. -# Strictly, it should be 'name=data' (see HTML 4.01 spec., section -# 17.13.2), but I send "name=" ATM. What about multiple file upload?? - -# Would be nice, but I'm not going to do it myself: -# ------------------------------------------------- -# Maybe a 0.4.x? -# Replace by_label etc. with moniker / selector concept. Allows, eg., -# a choice between selection by value / id / label / element -# contents. Or choice between matching labels exactly or by -# substring. Etc. -# Remove deprecated methods. -# ...what else? -# Work on DOMForm. -# XForms? Don't know if there's a need here. - -__all__ = ['AmbiguityError', 'CheckboxControl', 'Control', - 'ControlNotFoundError', 'FileControl', 'FormParser', 'HTMLForm', - 'HiddenControl', 'IgnoreControl', 'ImageControl', 'IsindexControl', - 'Item', 'ItemCountError', 'ItemNotFoundError', 'Label', - 'ListControl', 'LocateError', 'Missing', 'ParseError', 'ParseFile', - 'ParseFileEx', 'ParseResponse', 'ParseResponseEx','PasswordControl', - 'RadioControl', 'ScalarControl', 'SelectControl', - 'SubmitButtonControl', 'SubmitControl', 'TextControl', - 'TextareaControl', 'XHTMLCompatibleFormParser'] - -try: True -except NameError: - True = 1 - False = 0 - -try: bool -except NameError: - def bool(expr): - if expr: return True - else: return False - -try: - import logging - import inspect -except ImportError: - def debug(msg, *args, **kwds): - pass -else: - _logger = logging.getLogger("ClientForm") - OPTIMIZATION_HACK = True - - def debug(msg, *args, **kwds): - if OPTIMIZATION_HACK: - return - - caller_name = inspect.stack()[1][3] - extended_msg = '%%s %s' % msg - extended_args = (caller_name,)+args - debug = _logger.debug(extended_msg, *extended_args, **kwds) - - def _show_debug_messages(): - global OPTIMIZATION_HACK - OPTIMIZATION_HACK = False - _logger.setLevel(logging.DEBUG) - handler = logging.StreamHandler(sys.stdout) - handler.setLevel(logging.DEBUG) - _logger.addHandler(handler) - -import sys, urllib, urllib2, types, mimetools, copy, urlparse, \ - htmlentitydefs, re, random -from cStringIO import StringIO - -import sgmllib -# monkeypatch to fix http://www.python.org/sf/803422 :-( -sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]") - -# HTMLParser.HTMLParser is recent, so live without it if it's not available -# (also, sgmllib.SGMLParser is much more tolerant of bad HTML) -try: - import HTMLParser -except ImportError: - HAVE_MODULE_HTMLPARSER = False -else: - HAVE_MODULE_HTMLPARSER = True - -try: - import warnings -except ImportError: - def deprecation(message, stack_offset=0): - pass -else: - def deprecation(message, stack_offset=0): - warnings.warn(message, DeprecationWarning, stacklevel=3+stack_offset) - -VERSION = "0.2.10" - -CHUNK = 1024 # size of chunks fed to parser, in bytes - -DEFAULT_ENCODING = "latin-1" - -class Missing: pass - -_compress_re = re.compile(r"\s+") -def compress_text(text): return _compress_re.sub(" ", text.strip()) - -def normalize_line_endings(text): - return re.sub(r"(?:(?<!\r)\n)|(?:\r(?!\n))", "\r\n", text) - - -# This version of urlencode is from my Python 1.5.2 back-port of the -# Python 2.1 CVS maintenance branch of urllib. It will accept a sequence -# of pairs instead of a mapping -- the 2.0 version only accepts a mapping. -def urlencode(query,doseq=False,): - """Encode a sequence of two-element tuples or dictionary into a URL query \ -string. - - If any values in the query arg are sequences and doseq is true, each - sequence element is converted to a separate parameter. - - If the query arg is a sequence of two-element tuples, the order of the - parameters in the output will match the order of parameters in the - input. - """ - - if hasattr(query,"items"): - # mapping objects - query = query.items() - else: - # it's a bother at times that strings and string-like objects are - # sequences... - try: - # non-sequence items should not work with len() - x = len(query) - # non-empty strings will fail this - if len(query) and type(query[0]) != types.TupleType: - raise TypeError() - # zero-length sequences of all types will get here and succeed, - # but that's a minor nit - since the original implementation - # allowed empty dicts that type of behavior probably should be - # preserved for consistency - except TypeError: - ty,va,tb = sys.exc_info() - raise TypeError("not a valid non-string sequence or mapping " - "object", tb) - - l = [] - if not doseq: - # preserve old behavior - for k, v in query: - k = urllib.quote_plus(str(k)) - v = urllib.quote_plus(str(v)) - l.append(k + '=' + v) - else: - for k, v in query: - k = urllib.quote_plus(str(k)) - if type(v) == types.StringType: - v = urllib.quote_plus(v) - l.append(k + '=' + v) - elif type(v) == types.UnicodeType: - # is there a reasonable way to convert to ASCII? - # encode generates a string, but "replace" or "ignore" - # lose information and "strict" can raise UnicodeError - v = urllib.quote_plus(v.encode("ASCII","replace")) - l.append(k + '=' + v) - else: - try: - # is this a sufficient test for sequence-ness? - x = len(v) - except TypeError: - # not a sequence - v = urllib.quote_plus(str(v)) - l.append(k + '=' + v) - else: - # loop over the sequence - for elt in v: - l.append(k + '=' + urllib.quote_plus(str(elt))) - return '&'.join(l) - -def unescape(data, entities, encoding=DEFAULT_ENCODING): - if data is None or "&" not in data: - return data - - def replace_entities(match, entities=entities, encoding=encoding): - ent = match.group() - if ent[1] == "#": - return unescape_charref(ent[2:-1], encoding) - - repl = entities.get(ent) - if repl is not None: - if type(repl) != type(""): - try: - repl = repl.encode(encoding) - except UnicodeError: - repl = ent - else: - repl = ent - - return repl - - return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data) - -def unescape_charref(data, encoding): - name, base = data, 10 - if name.startswith("x"): - name, base= name[1:], 16 - uc = unichr(int(name, base)) - if encoding is None: - return uc - else: - try: - repl = uc.encode(encoding) - except UnicodeError: - repl = "&#%s;" % data - return repl - -def get_entitydefs(): - import htmlentitydefs - from codecs import latin_1_decode - entitydefs = {} - try: - htmlentitydefs.name2codepoint - except AttributeError: - entitydefs = {} - for name, char in htmlentitydefs.entitydefs.items(): - uc = latin_1_decode(char)[0] - if uc.startswith("&#") and uc.endswith(";"): - uc = unescape_charref(uc[2:-1], None) - entitydefs["&%s;" % name] = uc - else: - for name, codepoint in htmlentitydefs.name2codepoint.items(): - entitydefs["&%s;" % name] = unichr(codepoint) - return entitydefs - - -def issequence(x): - try: - x[0] - except (TypeError, KeyError): - return False - except IndexError: - pass - return True - -def isstringlike(x): - try: x+"" - except: return False - else: return True - - -def choose_boundary(): - """Return a string usable as a multipart boundary.""" - # follow IE and firefox - nonce = "".join([str(random.randint(0, sys.maxint-1)) for i in 0,1,2]) - return "-"*27 + nonce - -# This cut-n-pasted MimeWriter from standard library is here so can add -# to HTTP headers rather than message body when appropriate. It also uses -# \r\n in place of \n. This is a bit nasty. -class MimeWriter: - - """Generic MIME writer. - - Methods: - - __init__() - addheader() - flushheaders() - startbody() - startmultipartbody() - nextpart() - lastpart() - - A MIME writer is much more primitive than a MIME parser. It - doesn't seek around on the output file, and it doesn't use large - amounts of buffer space, so you have to write the parts in the - order they should occur on the output file. It does buffer the - headers you add, allowing you to rearrange their order. - - General usage is: - - f = <open the output file> - w = MimeWriter(f) - ...call w.addheader(key, value) 0 or more times... - - followed by either: - - f = w.startbody(content_type) - ...call f.write(data) for body data... - - or: - - w.startmultipartbody(subtype) - for each part: - subwriter = w.nextpart() - ...use the subwriter's methods to create the subpart... - w.lastpart() - - The subwriter is another MimeWriter instance, and should be - treated in the same way as the toplevel MimeWriter. This way, - writing recursive body parts is easy. - - Warning: don't forget to call lastpart()! - - XXX There should be more state so calls made in the wrong order - are detected. - - Some special cases: - - - startbody() just returns the file passed to the constructor; - but don't use this knowledge, as it may be changed. - - - startmultipartbody() actually returns a file as well; - this can be used to write the initial 'if you can read this your - mailer is not MIME-aware' message. - - - If you call flushheaders(), the headers accumulated so far are - written out (and forgotten); this is useful if you don't need a - body part at all, e.g. for a subpart of type message/rfc822 - that's (mis)used to store some header-like information. - - - Passing a keyword argument 'prefix=<flag>' to addheader(), - start*body() affects where the header is inserted; 0 means - append at the end, 1 means insert at the start; default is - append for addheader(), but insert for start*body(), which use - it to determine where the Content-type header goes. - - """ - - def __init__(self, fp, http_hdrs=None): - self._http_hdrs = http_hdrs - self._fp = fp - self._headers = [] - self._boundary = [] - self._first_part = True - - def addheader(self, key, value, prefix=0, - add_to_http_hdrs=0): - """ - prefix is ignored if add_to_http_hdrs is true. - """ - lines = value.split("\r\n") - while lines and not lines[-1]: del lines[-1] - while lines and not lines[0]: del lines[0] - if add_to_http_hdrs: - value = "".join(lines) - # 2.2 urllib2 doesn't normalize header case - self._http_hdrs.append((key.capitalize(), value)) - else: - for i in range(1, len(lines)): - lines[i] = " " + lines[i].strip() - value = "\r\n".join(lines) + "\r\n" - line = key.title() + ": " + value - if prefix: - self._headers.insert(0, line) - else: - self._headers.append(line) - - def flushheaders(self): - self._fp.writelines(self._headers) - self._headers = [] - - def startbody(self, ctype=None, plist=[], prefix=1, - add_to_http_hdrs=0, content_type=1): - """ - prefix is ignored if add_to_http_hdrs is true. - """ - if content_type and ctype: - for name, value in plist: - ctype = ctype + ';\r\n %s=%s' % (name, value) - self.addheader("Content-Type", ctype, prefix=prefix, - add_to_http_hdrs=add_to_http_hdrs) - self.flushheaders() - if not add_to_http_hdrs: self._fp.write("\r\n") - self._first_part = True - return self._fp - - def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1, - add_to_http_hdrs=0, content_type=1): - boundary = boundary or choose_boundary() - self._boundary.append(boundary) - return self.startbody("multipart/" + subtype, - [("boundary", boundary)] + plist, - prefix=prefix, - add_to_http_hdrs=add_to_http_hdrs, - content_type=content_type) - - def nextpart(self): - boundary = self._boundary[-1] - if self._first_part: - self._first_part = False - else: - self._fp.write("\r\n") - self._fp.write("--" + boundary + "\r\n") - return self.__class__(self._fp) - - def lastpart(self): - if self._first_part: - self.nextpart() - boundary = self._boundary.pop() - self._fp.write("\r\n--" + boundary + "--\r\n") - - -class LocateError(ValueError): pass -class AmbiguityError(LocateError): pass -class ControlNotFoundError(LocateError): pass -class ItemNotFoundError(LocateError): pass - -class ItemCountError(ValueError): pass - -# for backwards compatibility, ParseError derives from exceptions that were -# raised by versions of ClientForm <= 0.2.5 -if HAVE_MODULE_HTMLPARSER: - SGMLLIB_PARSEERROR = sgmllib.SGMLParseError - class ParseError(sgmllib.SGMLParseError, - HTMLParser.HTMLParseError, - ): - pass -else: - if hasattr(sgmllib, "SGMLParseError"): - SGMLLIB_PARSEERROR = sgmllib.SGMLParseError - class ParseError(sgmllib.SGMLParseError): - pass - else: - SGMLLIB_PARSEERROR = RuntimeError - class ParseError(RuntimeError): - pass - - -class _AbstractFormParser: - """forms attribute contains HTMLForm instances on completion.""" - # thanks to Moshe Zadka for an example of sgmllib/htmllib usage - def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): - if entitydefs is None: - entitydefs = get_entitydefs() - self._entitydefs = entitydefs - self._encoding = encoding - - self.base = None - self.forms = [] - self.labels = [] - self._current_label = None - self._current_form = None - self._select = None - self._optgroup = None - self._option = None - self._textarea = None - - # forms[0] will contain all controls that are outside of any form - # self._global_form is an alias for self.forms[0] - self._global_form = None - self.start_form([]) - self.end_form() - self._current_form = self._global_form = self.forms[0] - - def do_base(self, attrs): - debug("%s", attrs) - for key, value in attrs: - if key == "href": - self.base = self.unescape_attr_if_required(value) - - def end_body(self): - debug("") - if self._current_label is not None: - self.end_label() - if self._current_form is not self._global_form: - self.end_form() - - def start_form(self, attrs): - debug("%s", attrs) - if self._current_form is not self._global_form: - raise ParseError("nested FORMs") - name = None - action = None - enctype = "application/x-www-form-urlencoded" - method = "GET" - d = {} - for key, value in attrs: - if key == "name": - name = self.unescape_attr_if_required(value) - elif key == "action": - action = self.unescape_attr_if_required(value) - elif key == "method": - method = self.unescape_attr_if_required(value.upper()) - elif key == "enctype": - enctype = self.unescape_attr_if_required(value.lower()) - d[key] = self.unescape_attr_if_required(value) - controls = [] - self._current_form = (name, action, method, enctype), d, controls - - def end_form(self): - debug("") - if self._current_label is not None: - self.end_label() - if self._current_form is self._global_form: - raise ParseError("end of FORM before start") - self.forms.append(self._current_form) - self._current_form = self._global_form - - def start_select(self, attrs): - debug("%s", attrs) - if self._select is not None: - raise ParseError("nested SELECTs") - if self._textarea is not None: - raise ParseError("SELECT inside TEXTAREA") - d = {} - for key, val in attrs: - d[key] = self.unescape_attr_if_required(val) - - self._select = d - self._add_label(d) - - self._append_select_control({"__select": d}) - - def end_select(self): - debug("") - if self._select is None: - raise ParseError("end of SELECT before start") - - if self._option is not None: - self._end_option() - - self._select = None - - def start_optgroup(self, attrs): - debug("%s", attrs) - if self._select is None: - raise ParseError("OPTGROUP outside of SELECT") - d = {} - for key, val in attrs: - d[key] = self.unescape_attr_if_required(val) - - self._optgroup = d - - def end_optgroup(self): - debug("") - if self._optgroup is None: - raise ParseError("end of OPTGROUP before start") - self._optgroup = None - - def _start_option(self, attrs): - debug("%s", attrs) - if self._select is None: - raise ParseError("OPTION outside of SELECT") - if self._option is not None: - self._end_option() - - d = {} - for key, val in attrs: - d[key] = self.unescape_attr_if_required(val) - - self._option = {} - self._option.update(d) - if (self._optgroup and self._optgroup.has_key("disabled") and - not self._option.has_key("disabled")): - self._option["disabled"] = None - - def _end_option(self): - debug("") - if self._option is None: - raise ParseError("end of OPTION before start") - - contents = self._option.get("contents", "").strip() - self._option["contents"] = contents - if not self._option.has_key("value"): - self._option["value"] = contents - if not self._option.has_key("label"): - self._option["label"] = contents - # stuff dict of SELECT HTML attrs into a special private key - # (gets deleted again later) - self._option["__select"] = self._select - self._append_select_control(self._option) - self._option = None - - def _append_select_control(self, attrs): - debug("%s", attrs) - controls = self._current_form[2] - name = self._select.get("name") - controls.append(("select", name, attrs)) - - def start_textarea(self, attrs): - debug("%s", attrs) - if self._textarea is not None: - raise ParseError("nested TEXTAREAs") - if self._select is not None: - raise ParseError("TEXTAREA inside SELECT") - d = {} - for key, val in attrs: - d[key] = self.unescape_attr_if_required(val) - self._add_label(d) - - self._textarea = d - - def end_textarea(self): - debug("") - if self._textarea is None: - raise ParseError("end of TEXTAREA before start") - controls = self._current_form[2] - name = self._textarea.get("name") - controls.append(("textarea", name, self._textarea)) - self._textarea = None - - def start_label(self, attrs): - debug("%s", attrs) - if self._current_label: - self.end_label() - d = {} - for key, val in attrs: - d[key] = self.unescape_attr_if_required(val) - taken = bool(d.get("for")) # empty id is invalid - d["__text"] = "" - d["__taken"] = taken - if taken: - self.labels.append(d) - self._current_label = d - - def end_label(self): - debug("") - label = self._current_label - if label is None: - # something is ugly in the HTML, but we're ignoring it - return - self._current_label = None - # if it is staying around, it is True in all cases - del label["__taken"] - - def _add_label(self, d): - #debug("%s", d) - if self._current_label is not None: - if not self._current_label["__taken"]: - self._current_label["__taken"] = True - d["__label"] = self._current_label - - def handle_data(self, data): - debug("%s", data) - - if self._option is not None: - # self._option is a dictionary of the OPTION element's HTML - # attributes, but it has two special keys, one of which is the - # special "contents" key contains text between OPTION tags (the - # other is the "__select" key: see the end_option method) - map = self._option - key = "contents" - elif self._textarea is not None: - map = self._textarea - key = "value" - data = normalize_line_endings(data) - # not if within option or textarea - elif self._current_label is not None: - map = self._current_label - key = "__text" - else: - return - - if data and not map.has_key(key): - # according to - # http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.1 line break - # immediately after start tags or immediately before end tags must - # be ignored, but real browsers only ignore a line break after a - # start tag, so we'll do that. - if data[0:2] == "\r\n": - data = data[2:] - elif data[0:1] in ["\n", "\r"]: - data = data[1:] - map[key] = data - else: - map[key] = map[key] + data - - def do_button(self, attrs): - debug("%s", attrs) - d = {} - d["type"] = "submit" # default - for key, val in attrs: - d[key] = self.unescape_attr_if_required(val) - controls = self._current_form[2] - - type = d["type"] - name = d.get("name") - # we don't want to lose information, so use a type string that - # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON} - # e.g. type for BUTTON/RESET is "resetbutton" - # (type for INPUT/RESET is "reset") - type = type+"button" - self._add_label(d) - controls.append((type, name, d)) - - def do_input(self, attrs): - debug("%s", attrs) - d = {} - d["type"] = "text" # default - for key, val in attrs: - d[key] = self.unescape_attr_if_required(val) - controls = self._current_form[2] - - type = d["type"] - name = d.get("name") - self._add_label(d) - controls.append((type, name, d)) - - def do_isindex(self, attrs): - debug("%s", attrs) - d = {} - for key, val in attrs: - d[key] = self.unescape_attr_if_required(val) - controls = self._current_form[2] - - self._add_label(d) - # isindex doesn't have type or name HTML attributes - controls.append(("isindex", None, d)) - - def handle_entityref(self, name): - #debug("%s", name) - self.handle_data(unescape( - '&%s;' % name, self._entitydefs, self._encoding)) - - def handle_charref(self, name): - #debug("%s", name) - self.handle_data(unescape_charref(name, self._encoding)) - - def unescape_attr(self, name): - #debug("%s", name) - return unescape(name, self._entitydefs, self._encoding) - - def unescape_attrs(self, attrs): - #debug("%s", attrs) - escaped_attrs = {} - for key, val in attrs.items(): - try: - val.items - except AttributeError: - escaped_attrs[key] = self.unescape_attr(val) - else: - # e.g. "__select" -- yuck! - escaped_attrs[key] = self.unescape_attrs(val) - return escaped_attrs - - def unknown_entityref(self, ref): self.handle_data("&%s;" % ref) - def unknown_charref(self, ref): self.handle_data("&#%s;" % ref) - - -if not HAVE_MODULE_HTMLPARSER: - class XHTMLCompatibleFormParser: - def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): - raise ValueError("HTMLParser could not be imported") -else: - class XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser): - """Good for XHTML, bad for tolerance of incorrect HTML.""" - # thanks to Michael Howitz for this! - def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): - HTMLParser.HTMLParser.__init__(self) - _AbstractFormParser.__init__(self, entitydefs, encoding) - - def feed(self, data): - try: - HTMLParser.HTMLParser.feed(self, data) - except HTMLParser.HTMLParseError, exc: - raise ParseError(exc) - - def start_option(self, attrs): - _AbstractFormParser._start_option(self, attrs) - - def end_option(self): - _AbstractFormParser._end_option(self) - - def handle_starttag(self, tag, attrs): - try: - method = getattr(self, "start_" + tag) - except AttributeError: - try: - method = getattr(self, "do_" + tag) - except AttributeError: - pass # unknown tag - else: - method(attrs) - else: - method(attrs) - - def handle_endtag(self, tag): - try: - method = getattr(self, "end_" + tag) - except AttributeError: - pass # unknown tag - else: - method() - - def unescape(self, name): - # Use the entitydefs passed into constructor, not - # HTMLParser.HTMLParser's entitydefs. - return self.unescape_attr(name) - - def unescape_attr_if_required(self, name): - return name # HTMLParser.HTMLParser already did it - def unescape_attrs_if_required(self, attrs): - return attrs # ditto - - def close(self): - HTMLParser.HTMLParser.close(self) - self.end_body() - - -class _AbstractSgmllibParser(_AbstractFormParser): - - def do_option(self, attrs): - _AbstractFormParser._start_option(self, attrs) - - if sys.version_info[:2] >= (2,5): - # we override this attr to decode hex charrefs - entity_or_charref = re.compile( - '&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)') - def convert_entityref(self, name): - return unescape("&%s;" % name, self._entitydefs, self._encoding) - def convert_charref(self, name): - return unescape_charref("%s" % name, self._encoding) - def unescape_attr_if_required(self, name): - return name # sgmllib already did it - def unescape_attrs_if_required(self, attrs): - return attrs # ditto - else: - def unescape_attr_if_required(self, name): - return self.unescape_attr(name) - def unescape_attrs_if_required(self, attrs): - return self.unescape_attrs(attrs) - - -class FormParser(_AbstractSgmllibParser, sgmllib.SGMLParser): - """Good for tolerance of incorrect HTML, bad for XHTML.""" - def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): - sgmllib.SGMLParser.__init__(self) - _AbstractFormParser.__init__(self, entitydefs, encoding) - - def feed(self, data): - try: - sgmllib.SGMLParser.feed(self, data) - except SGMLLIB_PARSEERROR, exc: - raise ParseError(exc) - - def close(self): - sgmllib.SGMLParser.close(self) - self.end_body() - - -# sigh, must support mechanize by allowing dynamic creation of classes based on -# its bundled copy of BeautifulSoup (which was necessary because of dependency -# problems) - -def _create_bs_classes(bs, - icbinbs, - ): - class _AbstractBSFormParser(_AbstractSgmllibParser): - bs_base_class = None - def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): - _AbstractFormParser.__init__(self, entitydefs, encoding) - self.bs_base_class.__init__(self) - def handle_data(self, data): - _AbstractFormParser.handle_data(self, data) - self.bs_base_class.handle_data(self, data) - def feed(self, data): - try: - self.bs_base_class.feed(self, data) - except SGMLLIB_PARSEERROR, exc: - raise ParseError(exc) - def close(self): - self.bs_base_class.close(self) - self.end_body() - - class RobustFormParser(_AbstractBSFormParser, bs): - """Tries to be highly tolerant of incorrect HTML.""" - pass - RobustFormParser.bs_base_class = bs - class NestingRobustFormParser(_AbstractBSFormParser, icbinbs): - """Tries to be highly tolerant of incorrect HTML. - - Different from RobustFormParser in that it more often guesses nesting - above missing end tags (see BeautifulSoup docs). - - """ - pass - NestingRobustFormParser.bs_base_class = icbinbs - - return RobustFormParser, NestingRobustFormParser - -try: - if sys.version_info[:2] < (2, 2): - raise ImportError # BeautifulSoup uses generators - import BeautifulSoup -except ImportError: - pass -else: - RobustFormParser, NestingRobustFormParser = _create_bs_classes( - BeautifulSoup.BeautifulSoup, BeautifulSoup.ICantBelieveItsBeautifulSoup - ) - __all__ += ['RobustFormParser', 'NestingRobustFormParser'] - - -#FormParser = XHTMLCompatibleFormParser # testing hack -#FormParser = RobustFormParser # testing hack - - -def ParseResponseEx(response, - select_default=False, - form_parser_class=FormParser, - request_class=urllib2.Request, - entitydefs=None, - encoding=DEFAULT_ENCODING, - - # private - _urljoin=urlparse.urljoin, - _urlparse=urlparse.urlparse, - _urlunparse=urlparse.urlunparse, - ): - """Identical to ParseResponse, except that: - - 1. The returned list contains an extra item. The first form in the list - contains all controls not contained in any FORM element. - - 2. The arguments ignore_errors and backwards_compat have been removed. - - 3. Backwards-compatibility mode (backwards_compat=True) is not available. - """ - return _ParseFileEx(response, response.geturl(), - select_default, - False, - form_parser_class, - request_class, - entitydefs, - False, - encoding, - _urljoin=_urljoin, - _urlparse=_urlparse, - _urlunparse=_urlunparse, - ) - -def ParseFileEx(file, base_uri, - select_default=False, - form_parser_class=FormParser, - request_class=urllib2.Request, - entitydefs=None, - encoding=DEFAULT_ENCODING, - - # private - _urljoin=urlparse.urljoin, - _urlparse=urlparse.urlparse, - _urlunparse=urlparse.urlunparse, - ): - """Identical to ParseFile, except that: - - 1. The returned list contains an extra item. The first form in the list - contains all controls not contained in any FORM element. - - 2. The arguments ignore_errors and backwards_compat have been removed. - - 3. Backwards-compatibility mode (backwards_compat=True) is not available. - """ - return _ParseFileEx(file, base_uri, - select_default, - False, - form_parser_class, - request_class, - entitydefs, - False, - encoding, - _urljoin=_urljoin, - _urlparse=_urlparse, - _urlunparse=_urlunparse, - ) - -def ParseResponse(response, *args, **kwds): - """Parse HTTP response and return a list of HTMLForm instances. - - The return value of urllib2.urlopen can be conveniently passed to this - function as the response parameter. - - ClientForm.ParseError is raised on parse errors. - - response: file-like object (supporting read() method) with a method - geturl(), returning the URI of the HTTP response - select_default: for multiple-selection SELECT controls and RADIO controls, - pick the first item as the default if none are selected in the HTML - form_parser_class: class to instantiate and use to pass - request_class: class to return from .click() method (default is - urllib2.Request) - entitydefs: mapping like {"&": "&", ...} containing HTML entity - definitions (a sensible default is used) - encoding: character encoding used for encoding numeric character references - when matching link text. ClientForm does not attempt to find the encoding - in a META HTTP-EQUIV attribute in the document itself (mechanize, for - example, does do that and will pass the correct value to ClientForm using - this parameter). - - backwards_compat: boolean that determines whether the returned HTMLForm - objects are backwards-compatible with old code. If backwards_compat is - true: - - - ClientForm 0.1 code will continue to work as before. - - - Label searches that do not specify a nr (number or count) will always - get the first match, even if other controls match. If - backwards_compat is False, label searches that have ambiguous results - will raise an AmbiguityError. - - - Item label matching is done by strict string comparison rather than - substring matching. - - - De-selecting individual list items is allowed even if the Item is - disabled. - - The backwards_compat argument will be deprecated in a future release. - - Pass a true value for select_default if you want the behaviour specified by - RFC 1866 (the HTML 2.0 standard), which is to select the first item in a - RADIO or multiple-selection SELECT control if none were selected in the - HTML. Most browsers (including Microsoft Internet Explorer (IE) and - Netscape Navigator) instead leave all items unselected in these cases. The - W3C HTML 4.0 standard leaves this behaviour undefined in the case of - multiple-selection SELECT controls, but insists that at least one RADIO - button should be checked at all times, in contradiction to browser - behaviour. - - There is a choice of parsers. ClientForm.XHTMLCompatibleFormParser (uses - HTMLParser.HTMLParser) works best for XHTML, ClientForm.FormParser (uses - sgmllib.SGMLParser) (the default) works better for ordinary grubby HTML. - Note that HTMLParser is only available in Python 2.2 and later. You can - pass your own class in here as a hack to work around bad HTML, but at your - own risk: there is no well-defined interface. - - """ - return _ParseFileEx(response, response.geturl(), *args, **kwds)[1:] - -def ParseFile(file, base_uri, *args, **kwds): - """Parse HTML and return a list of HTMLForm instances. - - ClientForm.ParseError is raised on parse errors. - - file: file-like object (supporting read() method) containing HTML with zero - or more forms to be parsed - base_uri: the URI of the document (note that the base URI used to submit - the form will be that given in the BASE element if present, not that of - the document) - - For the other arguments and further details, see ParseResponse.__doc__. - - """ - return _ParseFileEx(file, base_uri, *args, **kwds)[1:] - -def _ParseFileEx(file, base_uri, - select_default=False, - ignore_errors=False, - form_parser_class=FormParser, - request_class=urllib2.Request, - entitydefs=None, - backwards_compat=True, - encoding=DEFAULT_ENCODING, - _urljoin=urlparse.urljoin, - _urlparse=urlparse.urlparse, - _urlunparse=urlparse.urlunparse, - ): - if backwards_compat: - deprecation("operating in backwards-compatibility mode", 1) - fp = form_parser_class(entitydefs, encoding) - while 1: - data = file.read(CHUNK) - try: - fp.feed(data) - except ParseError, e: - e.base_uri = base_uri - raise - if len(data) != CHUNK: break - fp.close() - if fp.base is not None: - # HTML BASE element takes precedence over document URI - base_uri = fp.base - labels = [] # Label(label) for label in fp.labels] - id_to_labels = {} - for l in fp.labels: - label = Label(l) - labels.append(label) - for_id = l["for"] - coll = id_to_labels.get(for_id) - if coll is None: - id_to_labels[for_id] = [label] - else: - coll.append(label) - forms = [] - for (name, action, method, enctype), attrs, controls in fp.forms: - if action is None: - action = base_uri - else: - action = _urljoin(base_uri, action) - # would be nice to make HTMLForm class (form builder) pluggable - form = HTMLForm( - action, method, enctype, name, attrs, request_class, - forms, labels, id_to_labels, backwards_compat) - form._urlparse = _urlparse - form._urlunparse = _urlunparse - for ii in range(len(controls)): - type, name, attrs = controls[ii] - # index=ii*10 allows ImageControl to return multiple ordered pairs - form.new_control( - type, name, attrs, select_default=select_default, index=ii*10) - forms.append(form) - for form in forms: - form.fixup() - return forms - - -class Label: - def __init__(self, attrs): - self.id = attrs.get("for") - self._text = attrs.get("__text").strip() - self._ctext = compress_text(self._text) - self.attrs = attrs - self._backwards_compat = False # maintained by HTMLForm - - def __getattr__(self, name): - if name == "text": - if self._backwards_compat: - return self._text - else: - return self._ctext - return getattr(Label, name) - - def __setattr__(self, name, value): - if name == "text": - # don't see any need for this, so make it read-only - raise AttributeError("text attribute is read-only") - self.__dict__[name] = value - - def __str__(self): - return "<Label(id=%r, text=%r)>" % (self.id, self.text) - - -def _get_label(attrs): - text = attrs.get("__label") - if text is not None: - return Label(text) - else: - return None - -class Control: - """An HTML form control. - - An HTMLForm contains a sequence of Controls. The Controls in an HTMLForm - are accessed using the HTMLForm.find_control method or the - HTMLForm.controls attribute. - - Control instances are usually constructed using the ParseFile / - ParseResponse functions. If you use those functions, you can ignore the - rest of this paragraph. A Control is only properly initialised after the - fixup method has been called. In fact, this is only strictly necessary for - ListControl instances. This is necessary because ListControls are built up - from ListControls each containing only a single item, and their initial - value(s) can only be known after the sequence is complete. - - The types and values that are acceptable for assignment to the value - attribute are defined by subclasses. - - If the disabled attribute is true, this represents the state typically - represented by browsers by 'greying out' a control. If the disabled - attribute is true, the Control will raise AttributeError if an attempt is - made to change its value. In addition, the control will not be considered - 'successful' as defined by the W3C HTML 4 standard -- ie. it will - contribute no data to the return value of the HTMLForm.click* methods. To - enable a control, set the disabled attribute to a false value. - - If the readonly attribute is true, the Control will raise AttributeError if - an attempt is made to change its value. To make a control writable, set - the readonly attribute to a false value. - - All controls have the disabled and readonly attributes, not only those that - may have the HTML attributes of the same names. - - On assignment to the value attribute, the following exceptions are raised: - TypeError, AttributeError (if the value attribute should not be assigned - to, because the control is disabled, for example) and ValueError. - - If the name or value attributes are None, or the value is an empty list, or - if the control is disabled, the control is not successful. - - Public attributes: - - type: string describing type of control (see the keys of the - HTMLForm.type2class dictionary for the allowable values) (readonly) - name: name of control (readonly) - value: current value of control (subclasses may allow a single value, a - sequence of values, or either) - disabled: disabled state - readonly: readonly state - id: value of id HTML attribute - - """ - def __init__(self, type, name, attrs, index=None): - """ - type: string describing type of control (see the keys of the - HTMLForm.type2class dictionary for the allowable values) - name: control name - attrs: HTML attributes of control's HTML element - - """ - raise NotImplementedError() - - def add_to_form(self, form): - self._form = form - form.controls.append(self) - - def fixup(self): - pass - - def is_of_kind(self, kind): - raise NotImplementedError() - - def clear(self): - raise NotImplementedError() - - def __getattr__(self, name): raise NotImplementedError() - def __setattr__(self, name, value): raise NotImplementedError() - - def pairs(self): - """Return list of (key, value) pairs suitable for passing to urlencode. - """ - return [(k, v) for (i, k, v) in self._totally_ordered_pairs()] - - def _totally_ordered_pairs(self): - """Return list of (key, value, index) tuples. - - Like pairs, but allows preserving correct ordering even where several - controls are involved. - - """ - raise NotImplementedError() - - def _write_mime_data(self, mw, name, value): - """Write data for a subitem of this control to a MimeWriter.""" - # called by HTMLForm - mw2 = mw.nextpart() - mw2.addheader("Content-Disposition", - 'form-data; name="%s"' % name, 1) - f = mw2.startbody(prefix=0) - f.write(value) - - def __str__(self): - raise NotImplementedError() - - def get_labels(self): - """Return all labels (Label instances) for this control. - - If the control was surrounded by a <label> tag, that will be the first - label; all other labels, connected by 'for' and 'id', are in the order - that appear in the HTML. - - """ - res = [] - if self._label: - res.append(self._label) - if self.id: - res.extend(self._form._id_to_labels.get(self.id, ())) - return res - - -#--------------------------------------------------- -class ScalarControl(Control): - """Control whose value is not restricted to one of a prescribed set. - - Some ScalarControls don't accept any value attribute. Otherwise, takes a - single value, which must be string-like. - - Additional read-only public attribute: - - attrs: dictionary mapping the names of original HTML attributes of the - control to their values - - """ - def __init__(self, type, name, attrs, index=None): - self._index = index - self._label = _get_label(attrs) - self.__dict__["type"] = type.lower() - self.__dict__["name"] = name - self._value = attrs.get("value") - self.disabled = attrs.has_key("disabled") - self.readonly = attrs.has_key("readonly") - self.id = attrs.get("id") - - self.attrs = attrs.copy() - - self._clicked = False - - self._urlparse = urlparse.urlparse - self._urlunparse = urlparse.urlunparse - - def __getattr__(self, name): - if name == "value": - return self.__dict__["_value"] - else: - raise AttributeError("%s instance has no attribute '%s'" % - (self.__class__.__name__, name)) - - def __setattr__(self, name, value): - if name == "value": - if not isstringlike(value): - raise TypeError("must assign a string") - elif self.readonly: - raise AttributeError("control '%s' is readonly" % self.name) - elif self.disabled: - raise AttributeError("control '%s' is disabled" % self.name) - self.__dict__["_value"] = value - elif name in ("name", "type"): - raise AttributeError("%s attribute is readonly" % name) - else: - self.__dict__[name] = value - - def _totally_ordered_pairs(self): - name = self.name - value = self.value - if name is None or value is None or self.disabled: - return [] - return [(self._index, name, value)] - - def clear(self): - if self.readonly: - raise AttributeError("control '%s' is readonly" % self.name) - self.__dict__["_value"] = None - - def __str__(self): - name = self.name - value = self.value - if name is None: name = "<None>" - if value is None: value = "<None>" - - infos = [] - if self.disabled: infos.append("disabled") - if self.readonly: infos.append("readonly") - info = ", ".join(infos) - if info: info = " (%s)" % info - - return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info) - - -#--------------------------------------------------- -class TextControl(ScalarControl): - """Textual input control. - - Covers: - - INPUT/TEXT - INPUT/PASSWORD - INPUT/HIDDEN - TEXTAREA - - """ - def __init__(self, type, name, attrs, index=None): - ScalarControl.__init__(self, type, name, attrs, index) - if self.type == "hidden": self.readonly = True - if self._value is None: - self._value = "" - - def is_of_kind(self, kind): return kind == "text" - -#--------------------------------------------------- -class FileControl(ScalarControl): - """File upload with INPUT TYPE=FILE. - - The value attribute of a FileControl is always None. Use add_file instead. - - Additional public method: add_file - - """ - - def __init__(self, type, name, attrs, index=None): - ScalarControl.__init__(self, type, name, attrs, index) - self._value = None - self._upload_data = [] - - def is_of_kind(self, kind): return kind == "file" - - def clear(self): - if self.readonly: - raise AttributeError("control '%s' is readonly" % self.name) - self._upload_data = [] - - def __setattr__(self, name, value): - if name in ("value", "name", "type"): - raise AttributeError("%s attribute is readonly" % name) - else: - self.__dict__[name] = value - - def add_file(self, file_object, content_type=None, filename=None): - if not hasattr(file_object, "read"): - raise TypeError("file-like object must have read method") - if content_type is not None and not isstringlike(content_type): - raise TypeError("content type must be None or string-like") - if filename is not None and not isstringlike(filename): - raise TypeError("filename must be None or string-like") - if content_type is None: - content_type = "application/octet-stream" - self._upload_data.append((file_object, content_type, filename)) - - def _totally_ordered_pairs(self): - # XXX should it be successful even if unnamed? - if self.name is None or self.disabled: - return [] - return [(self._index, self.name, "")] - - def _write_mime_data(self, mw, _name, _value): - # called by HTMLForm - # assert _name == self.name and _value == '' - if len(self._upload_data) < 2: - if len(self._upload_data) == 0: - file_object = StringIO() - content_type = "application/octet-stream" - filename = "" - else: - file_object, content_type, filename = self._upload_data[0] - if filename is None: - filename = "" - mw2 = mw.nextpart() - fn_part = '; filename="%s"' % filename - disp = 'form-data; name="%s"%s' % (self.name, fn_part) - mw2.addheader("Content-Disposition", disp, prefix=1) - fh = mw2.startbody(content_type, prefix=0) - fh.write(file_object.read()) - else: - # multiple files - mw2 = mw.nextpart() - disp = 'form-data; name="%s"' % self.name - mw2.addheader("Content-Disposition", disp, prefix=1) - fh = mw2.startmultipartbody("mixed", prefix=0) - for file_object, content_type, filename in self._upload_data: - mw3 = mw2.nextpart() - if filename is None: - filename = "" - fn_part = '; filename="%s"' % filename - disp = "file%s" % fn_part - mw3.addheader("Content-Disposition", disp, prefix=1) - fh2 = mw3.startbody(content_type, prefix=0) - fh2.write(file_object.read()) - mw2.lastpart() - - def __str__(self): - name = self.name - if name is None: name = "<None>" - - if not self._upload_data: - value = "<No files added>" - else: - value = [] - for file, ctype, filename in self._upload_data: - if filename is None: - value.append("<Unnamed file>") - else: - value.append(filename) - value = ", ".join(value) - - info = [] - if self.disabled: info.append("disabled") - if self.readonly: info.append("readonly") - info = ", ".join(info) - if info: info = " (%s)" % info - - return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info) - - -#--------------------------------------------------- -class IsindexControl(ScalarControl): - """ISINDEX control. - - ISINDEX is the odd-one-out of HTML form controls. In fact, it isn't really - part of regular HTML forms at all, and predates it. You're only allowed - one ISINDEX per HTML document. ISINDEX and regular form submission are - mutually exclusive -- either submit a form, or the ISINDEX. - - Having said this, since ISINDEX controls may appear in forms (which is - probably bad HTML), ParseFile / ParseResponse will include them in the - HTMLForm instances it returns. You can set the ISINDEX's value, as with - any other control (but note that ISINDEX controls have no name, so you'll - need to use the type argument of set_value!). When you submit the form, - the ISINDEX will not be successful (ie., no data will get returned to the - server as a result of its presence), unless you click on the ISINDEX - control, in which case the ISINDEX gets submitted instead of the form: - - form.set_value("my isindex value", type="isindex") - urllib2.urlopen(form.click(type="isindex")) - - ISINDEX elements outside of FORMs are ignored. If you want to submit one - by hand, do it like so: - - url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value")) - result = urllib2.urlopen(url) - - """ - def __init__(self, type, name, attrs, index=None): - ScalarControl.__init__(self, type, name, attrs, index) - if self._value is None: - self._value = "" - - def is_of_kind(self, kind): return kind in ["text", "clickable"] - - def _totally_ordered_pairs(self): - return [] - - def _click(self, form, coord, return_type, request_class=urllib2.Request): - # Relative URL for ISINDEX submission: instead of "foo=bar+baz", - # want "bar+baz". - # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is - # deprecated in 4.01, but it should still say how to submit it). - # Submission of ISINDEX is explained in the HTML 3.2 spec, though. - parts = self._urlparse(form.action) - rest, (query, frag) = parts[:-2], parts[-2:] - parts = rest + (urllib.quote_plus(self.value), None) - url = self._urlunparse(parts) - req_data = url, None, [] - - if return_type == "pairs": - return [] - elif return_type == "request_data": - return req_data - else: - return request_class(url) - - def __str__(self): - value = self.value - if value is None: value = "<None>" - - infos = [] - if self.disabled: infos.append("disabled") - if self.readonly: infos.append("readonly") - info = ", ".join(infos) - if info: info = " (%s)" % info - - return "<%s(%s)%s>" % (self.__class__.__name__, value, info) - - -#--------------------------------------------------- -class IgnoreControl(ScalarControl): - """Control that we're not interested in. - - Covers: - - INPUT/RESET - BUTTON/RESET - INPUT/BUTTON - BUTTON/BUTTON - - These controls are always unsuccessful, in the terminology of HTML 4 (ie. - they never require any information to be returned to the server). - - BUTTON/BUTTON is used to generate events for script embedded in HTML. - - The value attribute of IgnoreControl is always None. - - """ - def __init__(self, type, name, attrs, index=None): - ScalarControl.__init__(self, type, name, attrs, index) - self._value = None - - def is_of_kind(self, kind): return False - - def __setattr__(self, name, value): - if name == "value": - raise AttributeError( - "control '%s' is ignored, hence read-only" % self.name) - elif name in ("name", "type"): - raise AttributeError("%s attribute is readonly" % name) - else: - self.__dict__[name] = value - - -#--------------------------------------------------- -# ListControls - -# helpers and subsidiary classes - -class Item: - def __init__(self, control, attrs, index=None): - label = _get_label(attrs) - self.__dict__.update({ - "name": attrs["value"], - "_labels": label and [label] or [], - "attrs": attrs, - "_control": control, - "disabled": attrs.has_key("disabled"), - "_selected": False, - "id": attrs.get("id"), - "_index": index, - }) - control.items.append(self) - - def get_labels(self): - """Return all labels (Label instances) for this item. - - For items that represent radio buttons or checkboxes, if the item was - surrounded by a <label> tag, that will be the first label; all other - labels, connected by 'for' and 'id', are in the order that appear in - the HTML. - - For items that represent select options, if the option had a label - attribute, that will be the first label. If the option has contents - (text within the option tags) and it is not the same as the label - attribute (if any), that will be a label. There is nothing in the - spec to my knowledge that makes an option with an id unable to be the - target of a label's for attribute, so those are included, if any, for - the sake of consistency and completeness. - - """ - res = [] - res.extend(self._labels) - if self.id: - res.extend(self._control._form._id_to_labels.get(self.id, ())) - return res - - def __getattr__(self, name): - if name=="selected": - return self._selected - raise AttributeError(name) - - def __setattr__(self, name, value): - if name == "selected": - self._control._set_selected_state(self, value) - elif name == "disabled": - self.__dict__["disabled"] = bool(value) - else: - raise AttributeError(name) - - def __str__(self): - res = self.name - if self.selected: - res = "*" + res - if self.disabled: - res = "(%s)" % res - return res - - def __repr__(self): - # XXX appending the attrs without distinguishing them from name and id - # is silly - attrs = [("name", self.name), ("id", self.id)]+self.attrs.items() - return "<%s %s>" % ( - self.__class__.__name__, - " ".join(["%s=%r" % (k, v) for k, v in attrs]) - ) - -def disambiguate(items, nr, **kwds): - msgs = [] - for key, value in kwds.items(): - msgs.append("%s=%r" % (key, value)) - msg = " ".join(msgs) - if not items: - raise ItemNotFoundError(msg) - if nr is None: - if len(items) > 1: - raise AmbiguityError(msg) - nr = 0 - if len(items) <= nr: - raise ItemNotFoundError(msg) - return items[nr] - -class ListControl(Control): - """Control representing a sequence of items. - - The value attribute of a ListControl represents the successful list items - in the control. The successful list items are those that are selected and - not disabled. - - ListControl implements both list controls that take a length-1 value - (single-selection) and those that take length >1 values - (multiple-selection). - - ListControls accept sequence values only. Some controls only accept - sequences of length 0 or 1 (RADIO, and single-selection SELECT). - In those cases, ItemCountError is raised if len(sequence) > 1. CHECKBOXes - and multiple-selection SELECTs (those having the "multiple" HTML attribute) - accept sequences of any length. - - Note the following mistake: - - control.value = some_value - assert control.value == some_value # not necessarily true - - The reason for this is that the value attribute always gives the list items - in the order they were listed in the HTML. - - ListControl items can also be referred to by their labels instead of names. - Use the label argument to .get(), and the .set_value_by_label(), - .get_value_by_label() methods. - - Note that, rather confusingly, though SELECT controls are represented in - HTML by SELECT elements (which contain OPTION elements, representing - individual list items), CHECKBOXes and RADIOs are not represented by *any* - element. Instead, those controls are represented by a collection of INPUT - elements. For example, this is a SELECT control, named "control1": - - <select name="control1"> - <option>foo</option> - <option value="1">bar</option> - </select> - - and this is a CHECKBOX control, named "control2": - - <input type="checkbox" name="control2" value="foo" id="cbe1"> - <input type="checkbox" name="control2" value="bar" id="cbe2"> - - The id attribute of a CHECKBOX or RADIO ListControl is always that of its - first element (for example, "cbe1" above). - - - Additional read-only public attribute: multiple. - - """ - - # ListControls are built up by the parser from their component items by - # creating one ListControl per item, consolidating them into a single - # master ListControl held by the HTMLForm: - - # -User calls form.new_control(...) - # -Form creates Control, and calls control.add_to_form(self). - # -Control looks for a Control with the same name and type in the form, - # and if it finds one, merges itself with that control by calling - # control.merge_control(self). The first Control added to the form, of - # a particular name and type, is the only one that survives in the - # form. - # -Form calls control.fixup for all its controls. ListControls in the - # form know they can now safely pick their default values. - - # To create a ListControl without an HTMLForm, use: - - # control.merge_control(new_control) - - # (actually, it's much easier just to use ParseFile) - - _label = None - - def __init__(self, type, name, attrs={}, select_default=False, - called_as_base_class=False, index=None): - """ - select_default: for RADIO and multiple-selection SELECT controls, pick - the first item as the default if no 'selected' HTML attribute is - present - - """ - if not called_as_base_class: - raise NotImplementedError() - - self.__dict__["type"] = type.lower() - self.__dict__["name"] = name - self._value = attrs.get("value") - self.disabled = False - self.readonly = False - self.id = attrs.get("id") - self._closed = False - - # As Controls are merged in with .merge_control(), self.attrs will - # refer to each Control in turn -- always the most recently merged - # control. Each merged-in Control instance corresponds to a single - # list item: see ListControl.__doc__. - self.items = [] - self._form = None - - self._select_default = select_default - self._clicked = False - - def clear(self): - self.value = [] - - def is_of_kind(self, kind): - if kind == "list": - return True - elif kind == "multilist": - return bool(self.multiple) - elif kind == "singlelist": - return not self.multiple - else: - return False - - def get_items(self, name=None, label=None, id=None, - exclude_disabled=False): - """Return matching items by name or label. - - For argument docs, see the docstring for .get() - - """ - if name is not None and not isstringlike(name): - raise TypeError("item name must be string-like") - if label is not None and not isstringlike(label): - raise TypeError("item label must be string-like") - if id is not None and not isstringlike(id): - raise TypeError("item id must be string-like") - items = [] # order is important - compat = self._form.backwards_compat - for o in self.items: - if exclude_disabled and o.disabled: - continue - if name is not None and o.name != name: - continue - if label is not None: - for l in o.get_labels(): - if ((compat and l.text == label) or - (not compat and l.text.find(label) > -1)): - break - else: - continue - if id is not None and o.id != id: - continue - items.append(o) - return items - - def get(self, name=None, label=None, id=None, nr=None, - exclude_disabled=False): - """Return item by name or label, disambiguating if necessary with nr. - - All arguments must be passed by name, with the exception of 'name', - which may be used as a positional argument. - - If name is specified, then the item must have the indicated name. - - If label is specified, then the item must have a label whose - whitespace-compressed, stripped, text substring-matches the indicated - label string (eg. label="please choose" will match - " Do please choose an item "). - - If id is specified, then the item must have the indicated id. - - nr is an optional 0-based index of the items matching the query. - - If nr is the default None value and more than item is found, raises - AmbiguityError (unless the HTMLForm instance's backwards_compat - attribute is true). - - If no item is found, or if items are found but nr is specified and not - found, raises ItemNotFoundError. - - Optionally excludes disabled items. - - """ - if nr is None and self._form.backwards_compat: - nr = 0 # :-/ - items = self.get_items(name, label, id, exclude_disabled) - return disambiguate(items, nr, name=name, label=label, id=id) - - def _get(self, name, by_label=False, nr=None, exclude_disabled=False): - # strictly for use by deprecated methods - if by_label: - name, label = None, name - else: - name, label = name, None - return self.get(name, label, nr, exclude_disabled) - - def toggle(self, name, by_label=False, nr=None): - """Deprecated: given a name or label and optional disambiguating index - nr, toggle the matching item's selection. - - Selecting items follows the behavior described in the docstring of the - 'get' method. - - if the item is disabled, or this control is disabled or readonly, - raise AttributeError. - - """ - deprecation( - "item = control.get(...); item.selected = not item.selected") - o = self._get(name, by_label, nr) - self._set_selected_state(o, not o.selected) - - def set(self, selected, name, by_label=False, nr=None): - """Deprecated: given a name or label and optional disambiguating index - nr, set the matching item's selection to the bool value of selected. - - Selecting items follows the behavior described in the docstring of the - 'get' method. - - if the item is disabled, or this control is disabled or readonly, - raise AttributeError. - - """ - deprecation( - "control.get(...).selected = <boolean>") - self._set_selected_state(self._get(name, by_label, nr), selected) - - def _set_selected_state(self, item, action): - # action: - # bool False: off - # bool True: on - if self.disabled: - raise AttributeError("control '%s' is disabled" % self.name) - if self.readonly: - raise AttributeError("control '%s' is readonly" % self.name) - action == bool(action) - compat = self._form.backwards_compat - if not compat and item.disabled: - raise AttributeError("item is disabled") - else: - if compat and item.disabled and action: - raise AttributeError("item is disabled") - if self.multiple: - item.__dict__["_selected"] = action - else: - if not action: - item.__dict__["_selected"] = False - else: - for o in self.items: - o.__dict__["_selected"] = False - item.__dict__["_selected"] = True - - def toggle_single(self, by_label=None): - """Deprecated: toggle the selection of the single item in this control. - - Raises ItemCountError if the control does not contain only one item. - - by_label argument is ignored, and included only for backwards - compatibility. - - """ - deprecation( - "control.items[0].selected = not control.items[0].selected") - if len(self.items) != 1: - raise ItemCountError( - "'%s' is not a single-item control" % self.name) - item = self.items[0] - self._set_selected_state(item, not item.selected) - - def set_single(self, selected, by_label=None): - """Deprecated: set the selection of the single item in this control. - - Raises ItemCountError if the control does not contain only one item. - - by_label argument is ignored, and included only for backwards - compatibility. - - """ - deprecation( - "control.items[0].selected = <boolean>") - if len(self.items) != 1: - raise ItemCountError( - "'%s' is not a single-item control" % self.name) - self._set_selected_state(self.items[0], selected) - - def get_item_disabled(self, name, by_label=False, nr=None): - """Get disabled state of named list item in a ListControl.""" - deprecation( - "control.get(...).disabled") - return self._get(name, by_label, nr).disabled - - def set_item_disabled(self, disabled, name, by_label=False, nr=None): - """Set disabled state of named list item in a ListControl. - - disabled: boolean disabled state - - """ - deprecation( - "control.get(...).disabled = <boolean>") - self._get(name, by_label, nr).disabled = disabled - - def set_all_items_disabled(self, disabled): - """Set disabled state of all list items in a ListControl. - - disabled: boolean disabled state - - """ - for o in self.items: - o.disabled = disabled - - def get_item_attrs(self, name, by_label=False, nr=None): - """Return dictionary of HTML attributes for a single ListControl item. - - The HTML element types that describe list items are: OPTION for SELECT - controls, INPUT for the rest. These elements have HTML attributes that - you may occasionally want to know about -- for example, the "alt" HTML - attribute gives a text string describing the item (graphical browsers - usually display this as a tooltip). - - The returned dictionary maps HTML attribute names to values. The names - and values are taken from the original HTML. - - """ - deprecation( - "control.get(...).attrs") - return self._get(name, by_label, nr).attrs - - def close_control(self): - self._closed = True - - def add_to_form(self, form): - assert self._form is None or form == self._form, ( - "can't add control to more than one form") - self._form = form - if self.name is None: - # always count nameless elements as separate controls - Control.add_to_form(self, form) - else: - for ii in range(len(form.controls)-1, -1, -1): - control = form.controls[ii] - if control.name == self.name and control.type == self.type: - if control._closed: - Control.add_to_form(self, form) - else: - control.merge_control(self) - break - else: - Control.add_to_form(self, form) - - def merge_control(self, control): - assert bool(control.multiple) == bool(self.multiple) - # usually, isinstance(control, self.__class__) - self.items.extend(control.items) - - def fixup(self): - """ - ListControls are built up from component list items (which are also - ListControls) during parsing. This method should be called after all - items have been added. See ListControl.__doc__ for the reason this is - required. - - """ - # Need to set default selection where no item was indicated as being - # selected by the HTML: - - # CHECKBOX: - # Nothing should be selected. - # SELECT/single, SELECT/multiple and RADIO: - # RFC 1866 (HTML 2.0): says first item should be selected. - # W3C HTML 4.01 Specification: says that client behaviour is - # undefined in this case. For RADIO, exactly one must be selected, - # though which one is undefined. - # Both Netscape and Microsoft Internet Explorer (IE) choose first - # item for SELECT/single. However, both IE5 and Mozilla (both 1.0 - # and Firebird 0.6) leave all items unselected for RADIO and - # SELECT/multiple. - - # Since both Netscape and IE all choose the first item for - # SELECT/single, we do the same. OTOH, both Netscape and IE - # leave SELECT/multiple with nothing selected, in violation of RFC 1866 - # (but not in violation of the W3C HTML 4 standard); the same is true - # of RADIO (which *is* in violation of the HTML 4 standard). We follow - # RFC 1866 if the _select_default attribute is set, and Netscape and IE - # otherwise. RFC 1866 and HTML 4 are always violated insofar as you - # can deselect all items in a RadioControl. - - for o in self.items: - # set items' controls to self, now that we've merged - o.__dict__["_control"] = self - - def __getattr__(self, name): - if name == "value": - compat = self._form.backwards_compat - if self.name is None: - return [] - return [o.name for o in self.items if o.selected and - (not o.disabled or compat)] - else: - raise AttributeError("%s instance has no attribute '%s'" % - (self.__class__.__name__, name)) - - def __setattr__(self, name, value): - if name == "value": - if self.disabled: - raise AttributeError("control '%s' is disabled" % self.name) - if self.readonly: - raise AttributeError("control '%s' is readonly" % self.name) - self._set_value(value) - elif name in ("name", "type", "multiple"): - raise AttributeError("%s attribute is readonly" % name) - else: - self.__dict__[name] = value - - def _set_value(self, value): - if value is None or isstringlike(value): - raise TypeError("ListControl, must set a sequence") - if not value: - compat = self._form.backwards_compat - for o in self.items: - if not o.disabled or compat: - o.selected = False - elif self.multiple: - self._multiple_set_value(value) - elif len(value) > 1: - raise ItemCountError( - "single selection list, must set sequence of " - "length 0 or 1") - else: - self._single_set_value(value) - - def _get_items(self, name, target=1): - all_items = self.get_items(name) - items = [o for o in all_items if not o.disabled] - if len(items) < target: - if len(all_items) < target: - raise ItemNotFoundError( - "insufficient items with name %r" % name) - else: - raise AttributeError( - "insufficient non-disabled items with name %s" % name) - on = [] - off = [] - for o in items: - if o.selected: - on.append(o) - else: - off.append(o) - return on, off - - def _single_set_value(self, value): - assert len(value) == 1 - on, off = self._get_items(value[0]) - assert len(on) <= 1 - if not on: - off[0].selected = True - - def _multiple_set_value(self, value): - compat = self._form.backwards_compat - turn_on = [] # transactional-ish - turn_off = [item for item in self.items if - item.selected and (not item.disabled or compat)] - names = {} - for nn in value: - if nn in names.keys(): - names[nn] += 1 - else: - names[nn] = 1 - for name, count in names.items(): - on, off = self._get_items(name, count) - for i in range(count): - if on: - item = on[0] - del on[0] - del turn_off[turn_off.index(item)] - else: - item = off[0] - del off[0] - turn_on.append(item) - for item in turn_off: - item.selected = False - for item in turn_on: - item.selected = True - - def set_value_by_label(self, value): - """Set the value of control by item labels. - - value is expected to be an iterable of strings that are substrings of - the item labels that should be selected. Before substring matching is - performed, the original label text is whitespace-compressed - (consecutive whitespace characters are converted to a single space - character) and leading and trailing whitespace is stripped. Ambiguous - labels are accepted without complaint if the form's backwards_compat is - True; otherwise, it will not complain as long as all ambiguous labels - share the same item name (e.g. OPTION value). - - """ - if isstringlike(value): - raise TypeError(value) - if not self.multiple and len(value) > 1: - raise ItemCountError( - "single selection list, must set sequence of " - "length 0 or 1") - items = [] - for nn in value: - found = self.get_items(label=nn) - if len(found) > 1: - if not self._form.backwards_compat: - # ambiguous labels are fine as long as item names (e.g. - # OPTION values) are same - opt_name = found[0].name - if [o for o in found[1:] if o.name != opt_name]: - raise AmbiguityError(nn) - else: - # OK, we'll guess :-( Assume first available item. - found = found[:1] - for o in found: - # For the multiple-item case, we could try to be smarter, - # saving them up and trying to resolve, but that's too much. - if self._form.backwards_compat or o not in items: - items.append(o) - break - else: # all of them are used - raise ItemNotFoundError(nn) - # now we have all the items that should be on - # let's just turn everything off and then back on. - self.value = [] - for o in items: - o.selected = True - - def get_value_by_label(self): - """Return the value of the control as given by normalized labels.""" - res = [] - compat = self._form.backwards_compat - for o in self.items: - if (not o.disabled or compat) and o.selected: - for l in o.get_labels(): - if l.text: - res.append(l.text) - break - else: - res.append(None) - return res - - def possible_items(self, by_label=False): - """Deprecated: return the names or labels of all possible items. - - Includes disabled items, which may be misleading for some use cases. - - """ - deprecation( - "[item.name for item in self.items]") - if by_label: - res = [] - for o in self.items: - for l in o.get_labels(): - if l.text: - res.append(l.text) - break - else: - res.append(None) - return res - return [o.name for o in self.items] - - def _totally_ordered_pairs(self): - if self.disabled or self.name is None: - return [] - else: - return [(o._index, self.name, o.name) for o in self.items - if o.selected and not o.disabled] - - def __str__(self): - name = self.name - if name is None: name = "<None>" - - display = [str(o) for o in self.items] - - infos = [] - if self.disabled: infos.append("disabled") - if self.readonly: infos.append("readonly") - info = ", ".join(infos) - if info: info = " (%s)" % info - - return "<%s(%s=[%s])%s>" % (self.__class__.__name__, - name, ", ".join(display), info) - - -class RadioControl(ListControl): - """ - Covers: - - INPUT/RADIO - - """ - def __init__(self, type, name, attrs, select_default=False, index=None): - attrs.setdefault("value", "on") - ListControl.__init__(self, type, name, attrs, select_default, - called_as_base_class=True, index=index) - self.__dict__["multiple"] = False - o = Item(self, attrs, index) - o.__dict__["_selected"] = attrs.has_key("checked") - - def fixup(self): - ListControl.fixup(self) - found = [o for o in self.items if o.selected and not o.disabled] - if not found: - if self._select_default: - for o in self.items: - if not o.disabled: - o.selected = True - break - else: - # Ensure only one item selected. Choose the last one, - # following IE and Firefox. - for o in found[:-1]: - o.selected = False - - def get_labels(self): - return [] - -class CheckboxControl(ListControl): - """ - Covers: - - INPUT/CHECKBOX - - """ - def __init__(self, type, name, attrs, select_default=False, index=None): - attrs.setdefault("value", "on") - ListControl.__init__(self, type, name, attrs, select_default, - called_as_base_class=True, index=index) - self.__dict__["multiple"] = True - o = Item(self, attrs, index) - o.__dict__["_selected"] = attrs.has_key("checked") - - def get_labels(self): - return [] - - -class SelectControl(ListControl): - """ - Covers: - - SELECT (and OPTION) - - - OPTION 'values', in HTML parlance, are Item 'names' in ClientForm parlance. - - SELECT control values and labels are subject to some messy defaulting - rules. For example, if the HTML representation of the control is: - - <SELECT name=year> - <OPTION value=0 label="2002">current year</OPTION> - <OPTION value=1>2001</OPTION> - <OPTION>2000</OPTION> - </SELECT> - - The items, in order, have labels "2002", "2001" and "2000", whereas their - names (the OPTION values) are "0", "1" and "2000" respectively. Note that - the value of the last OPTION in this example defaults to its contents, as - specified by RFC 1866, as do the labels of the second and third OPTIONs. - - The OPTION labels are sometimes more meaningful than the OPTION values, - which can make for more maintainable code. - - Additional read-only public attribute: attrs - - The attrs attribute is a dictionary of the original HTML attributes of the - SELECT element. Other ListControls do not have this attribute, because in - other cases the control as a whole does not correspond to any single HTML - element. control.get(...).attrs may be used as usual to get at the HTML - attributes of the HTML elements corresponding to individual list items (for - SELECT controls, these are OPTION elements). - - Another special case is that the Item.attrs dictionaries have a special key - "contents" which does not correspond to any real HTML attribute, but rather - contains the contents of the OPTION element: - - <OPTION>this bit</OPTION> - - """ - # HTML attributes here are treated slightly differently from other list - # controls: - # -The SELECT HTML attributes dictionary is stuffed into the OPTION - # HTML attributes dictionary under the "__select" key. - # -The content of each OPTION element is stored under the special - # "contents" key of the dictionary. - # After all this, the dictionary is passed to the SelectControl constructor - # as the attrs argument, as usual. However: - # -The first SelectControl constructed when building up a SELECT control - # has a constructor attrs argument containing only the __select key -- so - # this SelectControl represents an empty SELECT control. - # -Subsequent SelectControls have both OPTION HTML-attribute in attrs and - # the __select dictionary containing the SELECT HTML-attributes. - - def __init__(self, type, name, attrs, select_default=False, index=None): - # fish out the SELECT HTML attributes from the OPTION HTML attributes - # dictionary - self.attrs = attrs["__select"].copy() - self.__dict__["_label"] = _get_label(self.attrs) - self.__dict__["id"] = self.attrs.get("id") - self.__dict__["multiple"] = self.attrs.has_key("multiple") - # the majority of the contents, label, and value dance already happened - contents = attrs.get("contents") - attrs = attrs.copy() - del attrs["__select"] - - ListControl.__init__(self, type, name, self.attrs, select_default, - called_as_base_class=True, index=index) - self.disabled = self.attrs.has_key("disabled") - self.readonly = self.attrs.has_key("readonly") - if attrs.has_key("value"): - # otherwise it is a marker 'select started' token - o = Item(self, attrs, index) - o.__dict__["_selected"] = attrs.has_key("selected") - # add 'label' label and contents label, if different. If both are - # provided, the 'label' label is used for display in HTML - # 4.0-compliant browsers (and any lower spec? not sure) while the - # contents are used for display in older or less-compliant - # browsers. We make label objects for both, if the values are - # different. - label = attrs.get("label") - if label: - o._labels.append(Label({"__text": label})) - if contents and contents != label: - o._labels.append(Label({"__text": contents})) - elif contents: - o._labels.append(Label({"__text": contents})) - - def fixup(self): - ListControl.fixup(self) - # Firefox doesn't exclude disabled items from those considered here - # (i.e. from 'found', for both branches of the if below). Note that - # IE6 doesn't support the disabled attribute on OPTIONs at all. - found = [o for o in self.items if o.selected] - if not found: - if not self.multiple or self._select_default: - for o in self.items: - if not o.disabled: - was_disabled = self.disabled - self.disabled = False - try: - o.selected = True - finally: - o.disabled = was_disabled - break - elif not self.multiple: - # Ensure only one item selected. Choose the last one, - # following IE and Firefox. - for o in found[:-1]: - o.selected = False - - -#--------------------------------------------------- -class SubmitControl(ScalarControl): - """ - Covers: - - INPUT/SUBMIT - BUTTON/SUBMIT - - """ - def __init__(self, type, name, attrs, index=None): - ScalarControl.__init__(self, type, name, attrs, index) - # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it - # blank, Konqueror 3.1 defaults to "Submit". HTML spec. doesn't seem - # to define this. - if self.value is None: self.value = "" - self.readonly = True - - def get_labels(self): - res = [] - if self.value: - res.append(Label({"__text": self.value})) - res.extend(ScalarControl.get_labels(self)) - return res - - def is_of_kind(self, kind): return kind == "clickable" - - def _click(self, form, coord, return_type, request_class=urllib2.Request): - self._clicked = coord - r = form._switch_click(return_type, request_class) - self._clicked = False - return r - - def _totally_ordered_pairs(self): - if not self._clicked: - return [] - return ScalarControl._totally_ordered_pairs(self) - - -#--------------------------------------------------- -class ImageControl(SubmitControl): - """ - Covers: - - INPUT/IMAGE - - Coordinates are specified using one of the HTMLForm.click* methods. - - """ - def __init__(self, type, name, attrs, index=None): - SubmitControl.__init__(self, type, name, attrs, index) - self.readonly = False - - def _totally_ordered_pairs(self): - clicked = self._clicked - if self.disabled or not clicked: - return [] - name = self.name - if name is None: return [] - pairs = [ - (self._index, "%s.x" % name, str(clicked[0])), - (self._index+1, "%s.y" % name, str(clicked[1])), - ] - value = self._value - if value: - pairs.append((self._index+2, name, value)) - return pairs - - get_labels = ScalarControl.get_labels - -# aliases, just to make str(control) and str(form) clearer -class PasswordControl(TextControl): pass -class HiddenControl(TextControl): pass -class TextareaControl(TextControl): pass -class SubmitButtonControl(SubmitControl): pass - - -def is_listcontrol(control): return control.is_of_kind("list") - - -class HTMLForm: - """Represents a single HTML <form> ... </form> element. - - A form consists of a sequence of controls that usually have names, and - which can take on various values. The values of the various types of - controls represent variously: text, zero-or-one-of-many or many-of-many - choices, and files to be uploaded. Some controls can be clicked on to - submit the form, and clickable controls' values sometimes include the - coordinates of the click. - - Forms can be filled in with data to be returned to the server, and then - submitted, using the click method to generate a request object suitable for - passing to urllib2.urlopen (or the click_request_data or click_pairs - methods if you're not using urllib2). - - import ClientForm - forms = ClientForm.ParseFile(html, base_uri) - form = forms[0] - - form["query"] = "Python" - form.find_control("nr_results").get("lots").selected = True - - response = urllib2.urlopen(form.click()) - - Usually, HTMLForm instances are not created directly. Instead, the - ParseFile or ParseResponse factory functions are used. If you do construct - HTMLForm objects yourself, however, note that an HTMLForm instance is only - properly initialised after the fixup method has been called (ParseFile and - ParseResponse do this for you). See ListControl.__doc__ for the reason - this is required. - - Indexing a form (form["control_name"]) returns the named Control's value - attribute. Assignment to a form index (form["control_name"] = something) - is equivalent to assignment to the named Control's value attribute. If you - need to be more specific than just supplying the control's name, use the - set_value and get_value methods. - - ListControl values are lists of item names (specifically, the names of the - items that are selected and not disabled, and hence are "successful" -- ie. - cause data to be returned to the server). The list item's name is the - value of the corresponding HTML element's"value" attribute. - - Example: - - <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT> - <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT> - - defines a CHECKBOX control with name "cheeses" which has two items, named - "leicester" and "cheddar". - - Another example: - - <SELECT name="more_cheeses"> - <OPTION>1</OPTION> - <OPTION value="2" label="CHEDDAR">cheddar</OPTION> - </SELECT> - - defines a SELECT control with name "more_cheeses" which has two items, - named "1" and "2" (because the OPTION element's value HTML attribute - defaults to the element contents -- see SelectControl.__doc__ for more on - these defaulting rules). - - To select, deselect or otherwise manipulate individual list items, use the - HTMLForm.find_control() and ListControl.get() methods. To set the whole - value, do as for any other control: use indexing or the set_/get_value - methods. - - Example: - - # select *only* the item named "cheddar" - form["cheeses"] = ["cheddar"] - # select "cheddar", leave other items unaffected - form.find_control("cheeses").get("cheddar").selected = True - - Some controls (RADIO and SELECT without the multiple attribute) can only - have zero or one items selected at a time. Some controls (CHECKBOX and - SELECT with the multiple attribute) can have multiple items selected at a - time. To set the whole value of a ListControl, assign a sequence to a form - index: - - form["cheeses"] = ["cheddar", "leicester"] - - If the ListControl is not multiple-selection, the assigned list must be of - length one. - - To check if a control has an item, if an item is selected, or if an item is - successful (selected and not disabled), respectively: - - "cheddar" in [item.name for item in form.find_control("cheeses").items] - "cheddar" in [item.name for item in form.find_control("cheeses").items and - item.selected] - "cheddar" in form["cheeses"] # (or "cheddar" in form.get_value("cheeses")) - - Note that some list items may be disabled (see below). - - Note the following mistake: - - form[control_name] = control_value - assert form[control_name] == control_value # not necessarily true - - The reason for this is that form[control_name] always gives the list items - in the order they were listed in the HTML. - - List items (hence list values, too) can be referred to in terms of list - item labels rather than list item names using the appropriate label - arguments. Note that each item may have several labels. - - The question of default values of OPTION contents, labels and values is - somewhat complicated: see SelectControl.__doc__ and - ListControl.get_item_attrs.__doc__ if you think you need to know. - - Controls can be disabled or readonly. In either case, the control's value - cannot be changed until you clear those flags (see example below). - Disabled is the state typically represented by browsers by 'greying out' a - control. Disabled controls are not 'successful' -- they don't cause data - to get returned to the server. Readonly controls usually appear in - browsers as read-only text boxes. Readonly controls are successful. List - items can also be disabled. Attempts to select or deselect disabled items - fail with AttributeError. - - If a lot of controls are readonly, it can be useful to do this: - - form.set_all_readonly(False) - - To clear a control's value attribute, so that it is not successful (until a - value is subsequently set): - - form.clear("cheeses") - - More examples: - - control = form.find_control("cheeses") - control.disabled = False - control.readonly = False - control.get("gruyere").disabled = True - control.items[0].selected = True - - See the various Control classes for further documentation. Many methods - take name, type, kind, id, label and nr arguments to specify the control to - be operated on: see HTMLForm.find_control.__doc__. - - ControlNotFoundError (subclass of ValueError) is raised if the specified - control can't be found. This includes occasions where a non-ListControl - is found, but the method (set, for example) requires a ListControl. - ItemNotFoundError (subclass of ValueError) is raised if a list item can't - be found. ItemCountError (subclass of ValueError) is raised if an attempt - is made to select more than one item and the control doesn't allow that, or - set/get_single are called and the control contains more than one item. - AttributeError is raised if a control or item is readonly or disabled and - an attempt is made to alter its value. - - Security note: Remember that any passwords you store in HTMLForm instances - will be saved to disk in the clear if you pickle them (directly or - indirectly). The simplest solution to this is to avoid pickling HTMLForm - objects. You could also pickle before filling in any password, or just set - the password to "" before pickling. - - - Public attributes: - - action: full (absolute URI) form action - method: "GET" or "POST" - enctype: form transfer encoding MIME type - name: name of form (None if no name was specified) - attrs: dictionary mapping original HTML form attributes to their values - - controls: list of Control instances; do not alter this list - (instead, call form.new_control to make a Control and add it to the - form, or control.add_to_form if you already have a Control instance) - - - - Methods for form filling: - ------------------------- - - Most of the these methods have very similar arguments. See - HTMLForm.find_control.__doc__ for details of the name, type, kind, label - and nr arguments. - - def find_control(self, - name=None, type=None, kind=None, id=None, predicate=None, - nr=None, label=None) - - get_value(name=None, type=None, kind=None, id=None, nr=None, - by_label=False, # by_label is deprecated - label=None) - set_value(value, - name=None, type=None, kind=None, id=None, nr=None, - by_label=False, # by_label is deprecated - label=None) - - clear_all() - clear(name=None, type=None, kind=None, id=None, nr=None, label=None) - - set_all_readonly(readonly) - - - Method applying only to FileControls: - - add_file(file_object, - content_type="application/octet-stream", filename=None, - name=None, id=None, nr=None, label=None) - - - Methods applying only to clickable controls: - - click(name=None, type=None, id=None, nr=0, coord=(1,1), label=None) - click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1), - label=None) - click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1), label=None) - - """ - - type2class = { - "text": TextControl, - "password": PasswordControl, - "hidden": HiddenControl, - "textarea": TextareaControl, - - "isindex": IsindexControl, - - "file": FileControl, - - "button": IgnoreControl, - "buttonbutton": IgnoreControl, - "reset": IgnoreControl, - "resetbutton": IgnoreControl, - - "submit": SubmitControl, - "submitbutton": SubmitButtonControl, - "image": ImageControl, - - "radio": RadioControl, - "checkbox": CheckboxControl, - "select": SelectControl, - } - -#--------------------------------------------------- -# Initialisation. Use ParseResponse / ParseFile instead. - - def __init__(self, action, method="GET", - enctype="application/x-www-form-urlencoded", - name=None, attrs=None, - request_class=urllib2.Request, - forms=None, labels=None, id_to_labels=None, - backwards_compat=True): - """ - In the usual case, use ParseResponse (or ParseFile) to create new - HTMLForm objects. - - action: full (absolute URI) form action - method: "GET" or "POST" - enctype: form transfer encoding MIME type - name: name of form - attrs: dictionary mapping original HTML form attributes to their values - - """ - self.action = action - self.method = method - self.enctype = enctype - self.name = name - if attrs is not None: - self.attrs = attrs.copy() - else: - self.attrs = {} - self.controls = [] - self._request_class = request_class - - # these attributes are used by zope.testbrowser - self._forms = forms # this is a semi-public API! - self._labels = labels # this is a semi-public API! - self._id_to_labels = id_to_labels # this is a semi-public API! - - self.backwards_compat = backwards_compat # note __setattr__ - - self._urlunparse = urlparse.urlunparse - self._urlparse = urlparse.urlparse - - def __getattr__(self, name): - if name == "backwards_compat": - return self._backwards_compat - return getattr(HTMLForm, name) - - def __setattr__(self, name, value): - # yuck - if name == "backwards_compat": - name = "_backwards_compat" - value = bool(value) - for cc in self.controls: - try: - items = cc.items - except AttributeError: - continue - else: - for ii in items: - for ll in ii.get_labels(): - ll._backwards_compat = value - self.__dict__[name] = value - - def new_control(self, type, name, attrs, - ignore_unknown=False, select_default=False, index=None): - """Adds a new control to the form. - - This is usually called by ParseFile and ParseResponse. Don't call it - youself unless you're building your own Control instances. - - Note that controls representing lists of items are built up from - controls holding only a single list item. See ListControl.__doc__ for - further information. - - type: type of control (see Control.__doc__ for a list) - attrs: HTML attributes of control - ignore_unknown: if true, use a dummy Control instance for controls of - unknown type; otherwise, use a TextControl - select_default: for RADIO and multiple-selection SELECT controls, pick - the first item as the default if no 'selected' HTML attribute is - present (this defaulting happens when the HTMLForm.fixup method is - called) - index: index of corresponding element in HTML (see - MoreFormTests.test_interspersed_controls for motivation) - - """ - type = type.lower() - klass = self.type2class.get(type) - if klass is None: - if ignore_unknown: - klass = IgnoreControl - else: - klass = TextControl - - a = attrs.copy() - if issubclass(klass, ListControl): - control = klass(type, name, a, select_default, index) - else: - control = klass(type, name, a, index) - - if type == "select" and len(attrs) == 1: - for ii in range(len(self.controls)-1, -1, -1): - ctl = self.controls[ii] - if ctl.type == "select": - ctl.close_control() - break - - control.add_to_form(self) - control._urlparse = self._urlparse - control._urlunparse = self._urlunparse - - def fixup(self): - """Normalise form after all controls have been added. - - This is usually called by ParseFile and ParseResponse. Don't call it - youself unless you're building your own Control instances. - - This method should only be called once, after all controls have been - added to the form. - - """ - for control in self.controls: - control.fixup() - self.backwards_compat = self._backwards_compat - -#--------------------------------------------------- - def __str__(self): - header = "%s%s %s %s" % ( - (self.name and self.name+" " or ""), - self.method, self.action, self.enctype) - rep = [header] - for control in self.controls: - rep.append(" %s" % str(control)) - return "<%s>" % "\n".join(rep) - -#--------------------------------------------------- -# Form-filling methods. - - def __getitem__(self, name): - return self.find_control(name).value - def __contains__(self, name): - return bool(self.find_control(name)) - def __setitem__(self, name, value): - control = self.find_control(name) - try: - control.value = value - except AttributeError, e: - raise ValueError(str(e)) - - def get_value(self, - name=None, type=None, kind=None, id=None, nr=None, - by_label=False, # by_label is deprecated - label=None): - """Return value of control. - - If only name and value arguments are supplied, equivalent to - - form[name] - - """ - if by_label: - deprecation("form.get_value_by_label(...)") - c = self.find_control(name, type, kind, id, label=label, nr=nr) - if by_label: - try: - meth = c.get_value_by_label - except AttributeError: - raise NotImplementedError( - "control '%s' does not yet support by_label" % c.name) - else: - return meth() - else: - return c.value - def set_value(self, value, - name=None, type=None, kind=None, id=None, nr=None, - by_label=False, # by_label is deprecated - label=None): - """Set value of control. - - If only name and value arguments are supplied, equivalent to - - form[name] = value - - """ - if by_label: - deprecation("form.get_value_by_label(...)") - c = self.find_control(name, type, kind, id, label=label, nr=nr) - if by_label: - try: - meth = c.set_value_by_label - except AttributeError: - raise NotImplementedError( - "control '%s' does not yet support by_label" % c.name) - else: - meth(value) - else: - c.value = value - def get_value_by_label( - self, name=None, type=None, kind=None, id=None, label=None, nr=None): - """ - - All arguments should be passed by name. - - """ - c = self.find_control(name, type, kind, id, label=label, nr=nr) - return c.get_value_by_label() - - def set_value_by_label( - self, value, - name=None, type=None, kind=None, id=None, label=None, nr=None): - """ - - All arguments should be passed by name. - - """ - c = self.find_control(name, type, kind, id, label=label, nr=nr) - c.set_value_by_label(value) - - def set_all_readonly(self, readonly): - for control in self.controls: - control.readonly = bool(readonly) - - def clear_all(self): - """Clear the value attributes of all controls in the form. - - See HTMLForm.clear.__doc__. - - """ - for control in self.controls: - control.clear() - - def clear(self, - name=None, type=None, kind=None, id=None, nr=None, label=None): - """Clear the value attribute of a control. - - As a result, the affected control will not be successful until a value - is subsequently set. AttributeError is raised on readonly controls. - - """ - c = self.find_control(name, type, kind, id, label=label, nr=nr) - c.clear() - - -#--------------------------------------------------- -# Form-filling methods applying only to ListControls. - - def possible_items(self, # deprecated - name=None, type=None, kind=None, id=None, - nr=None, by_label=False, label=None): - """Return a list of all values that the specified control can take.""" - c = self._find_list_control(name, type, kind, id, label, nr) - return c.possible_items(by_label) - - def set(self, selected, item_name, # deprecated - name=None, type=None, kind=None, id=None, nr=None, - by_label=False, label=None): - """Select / deselect named list item. - - selected: boolean selected state - - """ - self._find_list_control(name, type, kind, id, label, nr).set( - selected, item_name, by_label) - def toggle(self, item_name, # deprecated - name=None, type=None, kind=None, id=None, nr=None, - by_label=False, label=None): - """Toggle selected state of named list item.""" - self._find_list_control(name, type, kind, id, label, nr).toggle( - item_name, by_label) - - def set_single(self, selected, # deprecated - name=None, type=None, kind=None, id=None, - nr=None, by_label=None, label=None): - """Select / deselect list item in a control having only one item. - - If the control has multiple list items, ItemCountError is raised. - - This is just a convenience method, so you don't need to know the item's - name -- the item name in these single-item controls is usually - something meaningless like "1" or "on". - - For example, if a checkbox has a single item named "on", the following - two calls are equivalent: - - control.toggle("on") - control.toggle_single() - - """ # by_label ignored and deprecated - self._find_list_control( - name, type, kind, id, label, nr).set_single(selected) - def toggle_single(self, name=None, type=None, kind=None, id=None, - nr=None, by_label=None, label=None): # deprecated - """Toggle selected state of list item in control having only one item. - - The rest is as for HTMLForm.set_single.__doc__. - - """ # by_label ignored and deprecated - self._find_list_control(name, type, kind, id, label, nr).toggle_single() - -#--------------------------------------------------- -# Form-filling method applying only to FileControls. - - def add_file(self, file_object, content_type=None, filename=None, - name=None, id=None, nr=None, label=None): - """Add a file to be uploaded. - - file_object: file-like object (with read method) from which to read - data to upload - content_type: MIME content type of data to upload - filename: filename to pass to server - - If filename is None, no filename is sent to the server. - - If content_type is None, the content type is guessed based on the - filename and the data from read from the file object. - - XXX - At the moment, guessed content type is always application/octet-stream. - Use sndhdr, imghdr modules. Should also try to guess HTML, XML, and - plain text. - - Note the following useful HTML attributes of file upload controls (see - HTML 4.01 spec, section 17): - - accept: comma-separated list of content types that the server will - handle correctly; you can use this to filter out non-conforming files - size: XXX IIRC, this is indicative of whether form wants multiple or - single files - maxlength: XXX hint of max content length in bytes? - - """ - self.find_control(name, "file", id=id, label=label, nr=nr).add_file( - file_object, content_type, filename) - -#--------------------------------------------------- -# Form submission methods, applying only to clickable controls. - - def click(self, name=None, type=None, id=None, nr=0, coord=(1,1), - request_class=urllib2.Request, - label=None): - """Return request that would result from clicking on a control. - - The request object is a urllib2.Request instance, which you can pass to - urllib2.urlopen (or ClientCookie.urlopen). - - Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and - IMAGEs) can be clicked. - - Will click on the first clickable control, subject to the name, type - and nr arguments (as for find_control). If no name, type, id or number - is specified and there are no clickable controls, a request will be - returned for the form in its current, un-clicked, state. - - IndexError is raised if any of name, type, id or nr is specified but no - matching control is found. ValueError is raised if the HTMLForm has an - enctype attribute that is not recognised. - - You can optionally specify a coordinate to click at, which only makes a - difference if you clicked on an image. - - """ - return self._click(name, type, id, label, nr, coord, "request", - self._request_class) - - def click_request_data(self, - name=None, type=None, id=None, - nr=0, coord=(1,1), - request_class=urllib2.Request, - label=None): - """As for click method, but return a tuple (url, data, headers). - - You can use this data to send a request to the server. This is useful - if you're using httplib or urllib rather than urllib2. Otherwise, use - the click method. - - # Untested. Have to subclass to add headers, I think -- so use urllib2 - # instead! - import urllib - url, data, hdrs = form.click_request_data() - r = urllib.urlopen(url, data) - - # Untested. I don't know of any reason to use httplib -- you can get - # just as much control with urllib2. - import httplib, urlparse - url, data, hdrs = form.click_request_data() - tup = urlparse(url) - host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:]) - conn = httplib.HTTPConnection(host) - if data: - httplib.request("POST", path, data, hdrs) - else: - httplib.request("GET", path, headers=hdrs) - r = conn.getresponse() - - """ - return self._click(name, type, id, label, nr, coord, "request_data", - self._request_class) - - def click_pairs(self, name=None, type=None, id=None, - nr=0, coord=(1,1), - label=None): - """As for click_request_data, but returns a list of (key, value) pairs. - - You can use this list as an argument to ClientForm.urlencode. This is - usually only useful if you're using httplib or urllib rather than - urllib2 or ClientCookie. It may also be useful if you want to manually - tweak the keys and/or values, but this should not be necessary. - Otherwise, use the click method. - - Note that this method is only useful for forms of MIME type - x-www-form-urlencoded. In particular, it does not return the - information required for file upload. If you need file upload and are - not using urllib2, use click_request_data. - - Also note that Python 2.0's urllib.urlencode is slightly broken: it - only accepts a mapping, not a sequence of pairs, as an argument. This - messes up any ordering in the argument. Use ClientForm.urlencode - instead. - - """ - return self._click(name, type, id, label, nr, coord, "pairs", - self._request_class) - -#--------------------------------------------------- - - def find_control(self, - name=None, type=None, kind=None, id=None, - predicate=None, nr=None, - label=None): - """Locate and return some specific control within the form. - - At least one of the name, type, kind, predicate and nr arguments must - be supplied. If no matching control is found, ControlNotFoundError is - raised. - - If name is specified, then the control must have the indicated name. - - If type is specified then the control must have the specified type (in - addition to the types possible for <input> HTML tags: "text", - "password", "hidden", "submit", "image", "button", "radio", "checkbox", - "file" we also have "reset", "buttonbutton", "submitbutton", - "resetbutton", "textarea", "select" and "isindex"). - - If kind is specified, then the control must fall into the specified - group, each of which satisfies a particular interface. The types are - "text", "list", "multilist", "singlelist", "clickable" and "file". - - If id is specified, then the control must have the indicated id. - - If predicate is specified, then the control must match that function. - The predicate function is passed the control as its single argument, - and should return a boolean value indicating whether the control - matched. - - nr, if supplied, is the sequence number of the control (where 0 is the - first). Note that control 0 is the first control matching all the - other arguments (if supplied); it is not necessarily the first control - in the form. If no nr is supplied, AmbiguityError is raised if - multiple controls match the other arguments (unless the - .backwards-compat attribute is true). - - If label is specified, then the control must have this label. Note - that radio controls and checkboxes never have labels: their items do. - - """ - if ((name is None) and (type is None) and (kind is None) and - (id is None) and (label is None) and (predicate is None) and - (nr is None)): - raise ValueError( - "at least one argument must be supplied to specify control") - return self._find_control(name, type, kind, id, label, predicate, nr) - -#--------------------------------------------------- -# Private methods. - - def _find_list_control(self, - name=None, type=None, kind=None, id=None, - label=None, nr=None): - if ((name is None) and (type is None) and (kind is None) and - (id is None) and (label is None) and (nr is None)): - raise ValueError( - "at least one argument must be supplied to specify control") - - return self._find_control(name, type, kind, id, label, - is_listcontrol, nr) - - def _find_control(self, name, type, kind, id, label, predicate, nr): - if ((name is not None) and (name is not Missing) and - not isstringlike(name)): - raise TypeError("control name must be string-like") - if (type is not None) and not isstringlike(type): - raise TypeError("control type must be string-like") - if (kind is not None) and not isstringlike(kind): - raise TypeError("control kind must be string-like") - if (id is not None) and not isstringlike(id): - raise TypeError("control id must be string-like") - if (label is not None) and not isstringlike(label): - raise TypeError("control label must be string-like") - if (predicate is not None) and not callable(predicate): - raise TypeError("control predicate must be callable") - if (nr is not None) and nr < 0: - raise ValueError("control number must be a positive integer") - - orig_nr = nr - found = None - ambiguous = False - if nr is None and self.backwards_compat: - nr = 0 - - for control in self.controls: - if ((name is not None and name != control.name) and - (name is not Missing or control.name is not None)): - continue - if type is not None and type != control.type: - continue - if kind is not None and not control.is_of_kind(kind): - continue - if id is not None and id != control.id: - continue - if predicate and not predicate(control): - continue - if label: - for l in control.get_labels(): - if l.text.find(label) > -1: - break - else: - continue - if nr is not None: - if nr == 0: - return control # early exit: unambiguous due to nr - nr -= 1 - continue - if found: - ambiguous = True - break - found = control - - if found and not ambiguous: - return found - - description = [] - if name is not None: description.append("name %s" % repr(name)) - if type is not None: description.append("type '%s'" % type) - if kind is not None: description.append("kind '%s'" % kind) - if id is not None: description.append("id '%s'" % id) - if label is not None: description.append("label '%s'" % label) - if predicate is not None: - description.append("predicate %s" % predicate) - if orig_nr: description.append("nr %d" % orig_nr) - description = ", ".join(description) - - if ambiguous: - raise AmbiguityError("more than one control matching "+description) - elif not found: - raise ControlNotFoundError("no control matching "+description) - assert False - - def _click(self, name, type, id, label, nr, coord, return_type, - request_class=urllib2.Request): - try: - control = self._find_control( - name, type, "clickable", id, label, None, nr) - except ControlNotFoundError: - if ((name is not None) or (type is not None) or (id is not None) or - (nr != 0)): - raise - # no clickable controls, but no control was explicitly requested, - # so return state without clicking any control - return self._switch_click(return_type, request_class) - else: - return control._click(self, coord, return_type, request_class) - - def _pairs(self): - """Return sequence of (key, value) pairs suitable for urlencoding.""" - return [(k, v) for (i, k, v, c_i) in self._pairs_and_controls()] - - - def _pairs_and_controls(self): - """Return sequence of (index, key, value, control_index) - of totally ordered pairs suitable for urlencoding. - - control_index is the index of the control in self.controls - """ - pairs = [] - for control_index in range(len(self.controls)): - control = self.controls[control_index] - for ii, key, val in control._totally_ordered_pairs(): - pairs.append((ii, key, val, control_index)) - - # stable sort by ONLY first item in tuple - pairs.sort() - - return pairs - - def _request_data(self): - """Return a tuple (url, data, headers).""" - method = self.method.upper() - #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(self.action) - parts = self._urlparse(self.action) - rest, (query, frag) = parts[:-2], parts[-2:] - - if method == "GET": - if self.enctype != "application/x-www-form-urlencoded": - raise ValueError( - "unknown GET form encoding type '%s'" % self.enctype) - parts = rest + (urlencode(self._pairs()), None) - uri = self._urlunparse(parts) - return uri, None, [] - elif method == "POST": - parts = rest + (query, None) - uri = self._urlunparse(parts) - if self.enctype == "application/x-www-form-urlencoded": - return (uri, urlencode(self._pairs()), - [("Content-Type", self.enctype)]) - elif self.enctype == "multipart/form-data": - data = StringIO() - http_hdrs = [] - mw = MimeWriter(data, http_hdrs) - f = mw.startmultipartbody("form-data", add_to_http_hdrs=True, - prefix=0) - for ii, k, v, control_index in self._pairs_and_controls(): - self.controls[control_index]._write_mime_data(mw, k, v) - mw.lastpart() - return uri, data.getvalue(), http_hdrs - else: - raise ValueError( - "unknown POST form encoding type '%s'" % self.enctype) - else: - raise ValueError("Unknown method '%s'" % method) - - def _switch_click(self, return_type, request_class=urllib2.Request): - # This is called by HTMLForm and clickable Controls to hide switching - # on return_type. - if return_type == "pairs": - return self._pairs() - elif return_type == "request_data": - return self._request_data() - else: - req_data = self._request_data() - req = request_class(req_data[0], req_data[1]) - for key, val in req_data[2]: - add_hdr = req.add_header - if key.lower() == "content-type": - try: - add_hdr = req.add_unredirected_header - except AttributeError: - # pre-2.4 and not using ClientCookie - pass - add_hdr(key, val) - return req diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/__init__.py deleted file mode 100644 index c1e4c6d..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# This file is required for Python to search this directory for modules. diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.ircbot.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.ircbot.py.url deleted file mode 100644 index f34e243..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.ircbot.py.url +++ /dev/null @@ -1 +0,0 @@ -http://iweb.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip
\ No newline at end of file diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.irclib.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.irclib.py.url deleted file mode 100644 index f34e243..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.irclib.py.url +++ /dev/null @@ -1 +0,0 @@ -http://iweb.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip
\ No newline at end of file diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/__init__.py deleted file mode 100644 index c1e4c6d..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# This file is required for Python to search this directory for modules. diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/ircbot.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/ircbot.py deleted file mode 100644 index 6f29a65..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/ircbot.py +++ /dev/null @@ -1,438 +0,0 @@ -# Copyright (C) 1999--2002 Joel Rosdahl -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# -# Joel Rosdahl <joel@rosdahl.net> -# -# $Id: ircbot.py,v 1.23 2008/09/11 07:38:30 keltus Exp $ - -"""ircbot -- Simple IRC bot library. - -This module contains a single-server IRC bot class that can be used to -write simpler bots. -""" - -import sys -from UserDict import UserDict - -from irclib import SimpleIRCClient -from irclib import nm_to_n, irc_lower, all_events -from irclib import parse_channel_modes, is_channel -from irclib import ServerConnectionError - -class SingleServerIRCBot(SimpleIRCClient): - """A single-server IRC bot class. - - The bot tries to reconnect if it is disconnected. - - The bot keeps track of the channels it has joined, the other - clients that are present in the channels and which of those that - have operator or voice modes. The "database" is kept in the - self.channels attribute, which is an IRCDict of Channels. - """ - def __init__(self, server_list, nickname, realname, reconnection_interval=60): - """Constructor for SingleServerIRCBot objects. - - Arguments: - - server_list -- A list of tuples (server, port) that - defines which servers the bot should try to - connect to. - - nickname -- The bot's nickname. - - realname -- The bot's realname. - - reconnection_interval -- How long the bot should wait - before trying to reconnect. - - dcc_connections -- A list of initiated/accepted DCC - connections. - """ - - SimpleIRCClient.__init__(self) - self.channels = IRCDict() - self.server_list = server_list - if not reconnection_interval or reconnection_interval < 0: - reconnection_interval = 2**31 - self.reconnection_interval = reconnection_interval - - self._nickname = nickname - self._realname = realname - for i in ["disconnect", "join", "kick", "mode", - "namreply", "nick", "part", "quit"]: - self.connection.add_global_handler(i, - getattr(self, "_on_" + i), - -10) - def _connected_checker(self): - """[Internal]""" - if not self.connection.is_connected(): - self.connection.execute_delayed(self.reconnection_interval, - self._connected_checker) - self.jump_server() - - def _connect(self): - """[Internal]""" - password = None - if len(self.server_list[0]) > 2: - password = self.server_list[0][2] - try: - self.connect(self.server_list[0][0], - self.server_list[0][1], - self._nickname, - password, - ircname=self._realname) - except ServerConnectionError: - pass - - def _on_disconnect(self, c, e): - """[Internal]""" - self.channels = IRCDict() - self.connection.execute_delayed(self.reconnection_interval, - self._connected_checker) - - def _on_join(self, c, e): - """[Internal]""" - ch = e.target() - nick = nm_to_n(e.source()) - if nick == c.get_nickname(): - self.channels[ch] = Channel() - self.channels[ch].add_user(nick) - - def _on_kick(self, c, e): - """[Internal]""" - nick = e.arguments()[0] - channel = e.target() - - if nick == c.get_nickname(): - del self.channels[channel] - else: - self.channels[channel].remove_user(nick) - - def _on_mode(self, c, e): - """[Internal]""" - modes = parse_channel_modes(" ".join(e.arguments())) - t = e.target() - if is_channel(t): - ch = self.channels[t] - for mode in modes: - if mode[0] == "+": - f = ch.set_mode - else: - f = ch.clear_mode - f(mode[1], mode[2]) - else: - # Mode on self... XXX - pass - - def _on_namreply(self, c, e): - """[Internal]""" - - # e.arguments()[0] == "@" for secret channels, - # "*" for private channels, - # "=" for others (public channels) - # e.arguments()[1] == channel - # e.arguments()[2] == nick list - - ch = e.arguments()[1] - for nick in e.arguments()[2].split(): - if nick[0] == "@": - nick = nick[1:] - self.channels[ch].set_mode("o", nick) - elif nick[0] == "+": - nick = nick[1:] - self.channels[ch].set_mode("v", nick) - self.channels[ch].add_user(nick) - - def _on_nick(self, c, e): - """[Internal]""" - before = nm_to_n(e.source()) - after = e.target() - for ch in self.channels.values(): - if ch.has_user(before): - ch.change_nick(before, after) - - def _on_part(self, c, e): - """[Internal]""" - nick = nm_to_n(e.source()) - channel = e.target() - - if nick == c.get_nickname(): - del self.channels[channel] - else: - self.channels[channel].remove_user(nick) - - def _on_quit(self, c, e): - """[Internal]""" - nick = nm_to_n(e.source()) - for ch in self.channels.values(): - if ch.has_user(nick): - ch.remove_user(nick) - - def die(self, msg="Bye, cruel world!"): - """Let the bot die. - - Arguments: - - msg -- Quit message. - """ - - self.connection.disconnect(msg) - sys.exit(0) - - def disconnect(self, msg="I'll be back!"): - """Disconnect the bot. - - The bot will try to reconnect after a while. - - Arguments: - - msg -- Quit message. - """ - self.connection.disconnect(msg) - - def get_version(self): - """Returns the bot version. - - Used when answering a CTCP VERSION request. - """ - return "ircbot.py by Joel Rosdahl <joel@rosdahl.net>" - - def jump_server(self, msg="Changing servers"): - """Connect to a new server, possibly disconnecting from the current. - - The bot will skip to next server in the server_list each time - jump_server is called. - """ - if self.connection.is_connected(): - self.connection.disconnect(msg) - - self.server_list.append(self.server_list.pop(0)) - self._connect() - - def on_ctcp(self, c, e): - """Default handler for ctcp events. - - Replies to VERSION and PING requests and relays DCC requests - to the on_dccchat method. - """ - if e.arguments()[0] == "VERSION": - c.ctcp_reply(nm_to_n(e.source()), - "VERSION " + self.get_version()) - elif e.arguments()[0] == "PING": - if len(e.arguments()) > 1: - c.ctcp_reply(nm_to_n(e.source()), - "PING " + e.arguments()[1]) - elif e.arguments()[0] == "DCC" and e.arguments()[1].split(" ", 1)[0] == "CHAT": - self.on_dccchat(c, e) - - def on_dccchat(self, c, e): - pass - - def start(self): - """Start the bot.""" - self._connect() - SimpleIRCClient.start(self) - - -class IRCDict: - """A dictionary suitable for storing IRC-related things. - - Dictionary keys a and b are considered equal if and only if - irc_lower(a) == irc_lower(b) - - Otherwise, it should behave exactly as a normal dictionary. - """ - - def __init__(self, dict=None): - self.data = {} - self.canon_keys = {} # Canonical keys - if dict is not None: - self.update(dict) - def __repr__(self): - return repr(self.data) - def __cmp__(self, dict): - if isinstance(dict, IRCDict): - return cmp(self.data, dict.data) - else: - return cmp(self.data, dict) - def __len__(self): - return len(self.data) - def __getitem__(self, key): - return self.data[self.canon_keys[irc_lower(key)]] - def __setitem__(self, key, item): - if key in self: - del self[key] - self.data[key] = item - self.canon_keys[irc_lower(key)] = key - def __delitem__(self, key): - ck = irc_lower(key) - del self.data[self.canon_keys[ck]] - del self.canon_keys[ck] - def __iter__(self): - return iter(self.data) - def __contains__(self, key): - return self.has_key(key) - def clear(self): - self.data.clear() - self.canon_keys.clear() - def copy(self): - if self.__class__ is UserDict: - return UserDict(self.data) - import copy - return copy.copy(self) - def keys(self): - return self.data.keys() - def items(self): - return self.data.items() - def values(self): - return self.data.values() - def has_key(self, key): - return irc_lower(key) in self.canon_keys - def update(self, dict): - for k, v in dict.items(): - self.data[k] = v - def get(self, key, failobj=None): - return self.data.get(key, failobj) - - -class Channel: - """A class for keeping information about an IRC channel. - - This class can be improved a lot. - """ - - def __init__(self): - self.userdict = IRCDict() - self.operdict = IRCDict() - self.voiceddict = IRCDict() - self.modes = {} - - def users(self): - """Returns an unsorted list of the channel's users.""" - return self.userdict.keys() - - def opers(self): - """Returns an unsorted list of the channel's operators.""" - return self.operdict.keys() - - def voiced(self): - """Returns an unsorted list of the persons that have voice - mode set in the channel.""" - return self.voiceddict.keys() - - def has_user(self, nick): - """Check whether the channel has a user.""" - return nick in self.userdict - - def is_oper(self, nick): - """Check whether a user has operator status in the channel.""" - return nick in self.operdict - - def is_voiced(self, nick): - """Check whether a user has voice mode set in the channel.""" - return nick in self.voiceddict - - def add_user(self, nick): - self.userdict[nick] = 1 - - def remove_user(self, nick): - for d in self.userdict, self.operdict, self.voiceddict: - if nick in d: - del d[nick] - - def change_nick(self, before, after): - self.userdict[after] = 1 - del self.userdict[before] - if before in self.operdict: - self.operdict[after] = 1 - del self.operdict[before] - if before in self.voiceddict: - self.voiceddict[after] = 1 - del self.voiceddict[before] - - def set_mode(self, mode, value=None): - """Set mode on the channel. - - Arguments: - - mode -- The mode (a single-character string). - - value -- Value - """ - if mode == "o": - self.operdict[value] = 1 - elif mode == "v": - self.voiceddict[value] = 1 - else: - self.modes[mode] = value - - def clear_mode(self, mode, value=None): - """Clear mode on the channel. - - Arguments: - - mode -- The mode (a single-character string). - - value -- Value - """ - try: - if mode == "o": - del self.operdict[value] - elif mode == "v": - del self.voiceddict[value] - else: - del self.modes[mode] - except KeyError: - pass - - def has_mode(self, mode): - return mode in self.modes - - def is_moderated(self): - return self.has_mode("m") - - def is_secret(self): - return self.has_mode("s") - - def is_protected(self): - return self.has_mode("p") - - def has_topic_lock(self): - return self.has_mode("t") - - def is_invite_only(self): - return self.has_mode("i") - - def has_allow_external_messages(self): - return self.has_mode("n") - - def has_limit(self): - return self.has_mode("l") - - def limit(self): - if self.has_limit(): - return self.modes[l] - else: - return None - - def has_key(self): - return self.has_mode("k") - - def key(self): - if self.has_key(): - return self.modes["k"] - else: - return None diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/irclib.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/irclib.py deleted file mode 100644 index 5f7141c..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/irclib.py +++ /dev/null @@ -1,1560 +0,0 @@ -# Copyright (C) 1999--2002 Joel Rosdahl -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# -# keltus <keltus@users.sourceforge.net> -# -# $Id: irclib.py,v 1.47 2008/09/25 22:00:59 keltus Exp $ - -"""irclib -- Internet Relay Chat (IRC) protocol client library. - -This library is intended to encapsulate the IRC protocol at a quite -low level. It provides an event-driven IRC client framework. It has -a fairly thorough support for the basic IRC protocol, CTCP, DCC chat, -but DCC file transfers is not yet supported. - -In order to understand how to make an IRC client, I'm afraid you more -or less must understand the IRC specifications. They are available -here: [IRC specifications]. - -The main features of the IRC client framework are: - - * Abstraction of the IRC protocol. - * Handles multiple simultaneous IRC server connections. - * Handles server PONGing transparently. - * Messages to the IRC server are done by calling methods on an IRC - connection object. - * Messages from an IRC server triggers events, which can be caught - by event handlers. - * Reading from and writing to IRC server sockets are normally done - by an internal select() loop, but the select()ing may be done by - an external main loop. - * Functions can be registered to execute at specified times by the - event-loop. - * Decodes CTCP tagging correctly (hopefully); I haven't seen any - other IRC client implementation that handles the CTCP - specification subtilties. - * A kind of simple, single-server, object-oriented IRC client class - that dispatches events to instance methods is included. - -Current limitations: - - * The IRC protocol shines through the abstraction a bit too much. - * Data is not written asynchronously to the server, i.e. the write() - may block if the TCP buffers are stuffed. - * There are no support for DCC file transfers. - * The author haven't even read RFC 2810, 2811, 2812 and 2813. - * Like most projects, documentation is lacking... - -.. [IRC specifications] http://www.irchelp.org/irchelp/rfc/ -""" - -import bisect -import re -import select -import socket -import string -import sys -import time -import types - -VERSION = 0, 4, 8 -DEBUG = 0 - -# TODO -# ---- -# (maybe) thread safety -# (maybe) color parser convenience functions -# documentation (including all event types) -# (maybe) add awareness of different types of ircds -# send data asynchronously to the server (and DCC connections) -# (maybe) automatically close unused, passive DCC connections after a while - -# NOTES -# ----- -# connection.quit() only sends QUIT to the server. -# ERROR from the server triggers the error event and the disconnect event. -# dropping of the connection triggers the disconnect event. - -class IRCError(Exception): - """Represents an IRC exception.""" - pass - - -class IRC: - """Class that handles one or several IRC server connections. - - When an IRC object has been instantiated, it can be used to create - Connection objects that represent the IRC connections. The - responsibility of the IRC object is to provide an event-driven - framework for the connections and to keep the connections alive. - It runs a select loop to poll each connection's TCP socket and - hands over the sockets with incoming data for processing by the - corresponding connection. - - The methods of most interest for an IRC client writer are server, - add_global_handler, remove_global_handler, execute_at, - execute_delayed, process_once and process_forever. - - Here is an example: - - irc = irclib.IRC() - server = irc.server() - server.connect(\"irc.some.where\", 6667, \"my_nickname\") - server.privmsg(\"a_nickname\", \"Hi there!\") - irc.process_forever() - - This will connect to the IRC server irc.some.where on port 6667 - using the nickname my_nickname and send the message \"Hi there!\" - to the nickname a_nickname. - """ - - def __init__(self, fn_to_add_socket=None, - fn_to_remove_socket=None, - fn_to_add_timeout=None): - """Constructor for IRC objects. - - Optional arguments are fn_to_add_socket, fn_to_remove_socket - and fn_to_add_timeout. The first two specify functions that - will be called with a socket object as argument when the IRC - object wants to be notified (or stop being notified) of data - coming on a new socket. When new data arrives, the method - process_data should be called. Similarly, fn_to_add_timeout - is called with a number of seconds (a floating point number) - as first argument when the IRC object wants to receive a - notification (by calling the process_timeout method). So, if - e.g. the argument is 42.17, the object wants the - process_timeout method to be called after 42 seconds and 170 - milliseconds. - - The three arguments mainly exist to be able to use an external - main loop (for example Tkinter's or PyGTK's main app loop) - instead of calling the process_forever method. - - An alternative is to just call ServerConnection.process_once() - once in a while. - """ - - if fn_to_add_socket and fn_to_remove_socket: - self.fn_to_add_socket = fn_to_add_socket - self.fn_to_remove_socket = fn_to_remove_socket - else: - self.fn_to_add_socket = None - self.fn_to_remove_socket = None - - self.fn_to_add_timeout = fn_to_add_timeout - self.connections = [] - self.handlers = {} - self.delayed_commands = [] # list of tuples in the format (time, function, arguments) - - self.add_global_handler("ping", _ping_ponger, -42) - - def server(self): - """Creates and returns a ServerConnection object.""" - - c = ServerConnection(self) - self.connections.append(c) - return c - - def process_data(self, sockets): - """Called when there is more data to read on connection sockets. - - Arguments: - - sockets -- A list of socket objects. - - See documentation for IRC.__init__. - """ - for s in sockets: - for c in self.connections: - if s == c._get_socket(): - c.process_data() - - def process_timeout(self): - """Called when a timeout notification is due. - - See documentation for IRC.__init__. - """ - t = time.time() - while self.delayed_commands: - if t >= self.delayed_commands[0][0]: - self.delayed_commands[0][1](*self.delayed_commands[0][2]) - del self.delayed_commands[0] - else: - break - - def process_once(self, timeout=0): - """Process data from connections once. - - Arguments: - - timeout -- How long the select() call should wait if no - data is available. - - This method should be called periodically to check and process - incoming data, if there are any. If that seems boring, look - at the process_forever method. - """ - sockets = map(lambda x: x._get_socket(), self.connections) - sockets = filter(lambda x: x != None, sockets) - if sockets: - (i, o, e) = select.select(sockets, [], [], timeout) - self.process_data(i) - else: - time.sleep(timeout) - self.process_timeout() - - def process_forever(self, timeout=0.2): - """Run an infinite loop, processing data from connections. - - This method repeatedly calls process_once. - - Arguments: - - timeout -- Parameter to pass to process_once. - """ - while 1: - self.process_once(timeout) - - def disconnect_all(self, message=""): - """Disconnects all connections.""" - for c in self.connections: - c.disconnect(message) - - def add_global_handler(self, event, handler, priority=0): - """Adds a global handler function for a specific event type. - - Arguments: - - event -- Event type (a string). Check the values of the - numeric_events dictionary in irclib.py for possible event - types. - - handler -- Callback function. - - priority -- A number (the lower number, the higher priority). - - The handler function is called whenever the specified event is - triggered in any of the connections. See documentation for - the Event class. - - The handler functions are called in priority order (lowest - number is highest priority). If a handler function returns - \"NO MORE\", no more handlers will be called. - """ - if not event in self.handlers: - self.handlers[event] = [] - bisect.insort(self.handlers[event], ((priority, handler))) - - def remove_global_handler(self, event, handler): - """Removes a global handler function. - - Arguments: - - event -- Event type (a string). - - handler -- Callback function. - - Returns 1 on success, otherwise 0. - """ - if not event in self.handlers: - return 0 - for h in self.handlers[event]: - if handler == h[1]: - self.handlers[event].remove(h) - return 1 - - def execute_at(self, at, function, arguments=()): - """Execute a function at a specified time. - - Arguments: - - at -- Execute at this time (standard \"time_t\" time). - - function -- Function to call. - - arguments -- Arguments to give the function. - """ - self.execute_delayed(at-time.time(), function, arguments) - - def execute_delayed(self, delay, function, arguments=()): - """Execute a function after a specified time. - - Arguments: - - delay -- How many seconds to wait. - - function -- Function to call. - - arguments -- Arguments to give the function. - """ - bisect.insort(self.delayed_commands, (delay+time.time(), function, arguments)) - if self.fn_to_add_timeout: - self.fn_to_add_timeout(delay) - - def dcc(self, dcctype="chat"): - """Creates and returns a DCCConnection object. - - Arguments: - - dcctype -- "chat" for DCC CHAT connections or "raw" for - DCC SEND (or other DCC types). If "chat", - incoming data will be split in newline-separated - chunks. If "raw", incoming data is not touched. - """ - c = DCCConnection(self, dcctype) - self.connections.append(c) - return c - - def _handle_event(self, connection, event): - """[Internal]""" - h = self.handlers - for handler in h.get("all_events", []) + h.get(event.eventtype(), []): - if handler[1](connection, event) == "NO MORE": - return - - def _remove_connection(self, connection): - """[Internal]""" - self.connections.remove(connection) - if self.fn_to_remove_socket: - self.fn_to_remove_socket(connection._get_socket()) - -_rfc_1459_command_regexp = re.compile("^(:(?P<prefix>[^ ]+) +)?(?P<command>[^ ]+)( *(?P<argument> .+))?") - -class Connection: - """Base class for IRC connections. - - Must be overridden. - """ - def __init__(self, irclibobj): - self.irclibobj = irclibobj - - def _get_socket(): - raise IRCError, "Not overridden" - - ############################## - ### Convenience wrappers. - - def execute_at(self, at, function, arguments=()): - self.irclibobj.execute_at(at, function, arguments) - - def execute_delayed(self, delay, function, arguments=()): - self.irclibobj.execute_delayed(delay, function, arguments) - - -class ServerConnectionError(IRCError): - pass - -class ServerNotConnectedError(ServerConnectionError): - pass - - -# Huh!? Crrrrazy EFNet doesn't follow the RFC: their ircd seems to -# use \n as message separator! :P -_linesep_regexp = re.compile("\r?\n") - -class ServerConnection(Connection): - """This class represents an IRC server connection. - - ServerConnection objects are instantiated by calling the server - method on an IRC object. - """ - - def __init__(self, irclibobj): - Connection.__init__(self, irclibobj) - self.connected = 0 # Not connected yet. - self.socket = None - self.ssl = None - - def connect(self, server, port, nickname, password=None, username=None, - ircname=None, localaddress="", localport=0, ssl=False, ipv6=False): - """Connect/reconnect to a server. - - Arguments: - - server -- Server name. - - port -- Port number. - - nickname -- The nickname. - - password -- Password (if any). - - username -- The username. - - ircname -- The IRC name ("realname"). - - localaddress -- Bind the connection to a specific local IP address. - - localport -- Bind the connection to a specific local port. - - ssl -- Enable support for ssl. - - ipv6 -- Enable support for ipv6. - - This function can be called to reconnect a closed connection. - - Returns the ServerConnection object. - """ - if self.connected: - self.disconnect("Changing servers") - - self.previous_buffer = "" - self.handlers = {} - self.real_server_name = "" - self.real_nickname = nickname - self.server = server - self.port = port - self.nickname = nickname - self.username = username or nickname - self.ircname = ircname or nickname - self.password = password - self.localaddress = localaddress - self.localport = localport - self.localhost = socket.gethostname() - if ipv6: - self.socket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) - else: - self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - try: - self.socket.bind((self.localaddress, self.localport)) - self.socket.connect((self.server, self.port)) - if ssl: - self.ssl = socket.ssl(self.socket) - except socket.error, x: - self.socket.close() - self.socket = None - raise ServerConnectionError, "Couldn't connect to socket: %s" % x - self.connected = 1 - if self.irclibobj.fn_to_add_socket: - self.irclibobj.fn_to_add_socket(self.socket) - - # Log on... - if self.password: - self.pass_(self.password) - self.nick(self.nickname) - self.user(self.username, self.ircname) - return self - - def close(self): - """Close the connection. - - This method closes the connection permanently; after it has - been called, the object is unusable. - """ - - self.disconnect("Closing object") - self.irclibobj._remove_connection(self) - - def _get_socket(self): - """[Internal]""" - return self.socket - - def get_server_name(self): - """Get the (real) server name. - - This method returns the (real) server name, or, more - specifically, what the server calls itself. - """ - - if self.real_server_name: - return self.real_server_name - else: - return "" - - def get_nickname(self): - """Get the (real) nick name. - - This method returns the (real) nickname. The library keeps - track of nick changes, so it might not be the nick name that - was passed to the connect() method. """ - - return self.real_nickname - - def process_data(self): - """[Internal]""" - - try: - if self.ssl: - new_data = self.ssl.read(2**14) - else: - new_data = self.socket.recv(2**14) - except socket.error, x: - # The server hung up. - self.disconnect("Connection reset by peer") - return - if not new_data: - # Read nothing: connection must be down. - self.disconnect("Connection reset by peer") - return - - lines = _linesep_regexp.split(self.previous_buffer + new_data) - - # Save the last, unfinished line. - self.previous_buffer = lines.pop() - - for line in lines: - if DEBUG: - print "FROM SERVER:", line - - if not line: - continue - - prefix = None - command = None - arguments = None - self._handle_event(Event("all_raw_messages", - self.get_server_name(), - None, - [line])) - - m = _rfc_1459_command_regexp.match(line) - if m.group("prefix"): - prefix = m.group("prefix") - if not self.real_server_name: - self.real_server_name = prefix - - if m.group("command"): - command = m.group("command").lower() - - if m.group("argument"): - a = m.group("argument").split(" :", 1) - arguments = a[0].split() - if len(a) == 2: - arguments.append(a[1]) - - # Translate numerics into more readable strings. - if command in numeric_events: - command = numeric_events[command] - - if command == "nick": - if nm_to_n(prefix) == self.real_nickname: - self.real_nickname = arguments[0] - elif command == "welcome": - # Record the nickname in case the client changed nick - # in a nicknameinuse callback. - self.real_nickname = arguments[0] - - if command in ["privmsg", "notice"]: - target, message = arguments[0], arguments[1] - messages = _ctcp_dequote(message) - - if command == "privmsg": - if is_channel(target): - command = "pubmsg" - else: - if is_channel(target): - command = "pubnotice" - else: - command = "privnotice" - - for m in messages: - if type(m) is types.TupleType: - if command in ["privmsg", "pubmsg"]: - command = "ctcp" - else: - command = "ctcpreply" - - m = list(m) - if DEBUG: - print "command: %s, source: %s, target: %s, arguments: %s" % ( - command, prefix, target, m) - self._handle_event(Event(command, prefix, target, m)) - if command == "ctcp" and m[0] == "ACTION": - self._handle_event(Event("action", prefix, target, m[1:])) - else: - if DEBUG: - print "command: %s, source: %s, target: %s, arguments: %s" % ( - command, prefix, target, [m]) - self._handle_event(Event(command, prefix, target, [m])) - else: - target = None - - if command == "quit": - arguments = [arguments[0]] - elif command == "ping": - target = arguments[0] - else: - target = arguments[0] - arguments = arguments[1:] - - if command == "mode": - if not is_channel(target): - command = "umode" - - if DEBUG: - print "command: %s, source: %s, target: %s, arguments: %s" % ( - command, prefix, target, arguments) - self._handle_event(Event(command, prefix, target, arguments)) - - def _handle_event(self, event): - """[Internal]""" - self.irclibobj._handle_event(self, event) - if event.eventtype() in self.handlers: - for fn in self.handlers[event.eventtype()]: - fn(self, event) - - def is_connected(self): - """Return connection status. - - Returns true if connected, otherwise false. - """ - return self.connected - - def add_global_handler(self, *args): - """Add global handler. - - See documentation for IRC.add_global_handler. - """ - self.irclibobj.add_global_handler(*args) - - def remove_global_handler(self, *args): - """Remove global handler. - - See documentation for IRC.remove_global_handler. - """ - self.irclibobj.remove_global_handler(*args) - - def action(self, target, action): - """Send a CTCP ACTION command.""" - self.ctcp("ACTION", target, action) - - def admin(self, server=""): - """Send an ADMIN command.""" - self.send_raw(" ".join(["ADMIN", server]).strip()) - - def ctcp(self, ctcptype, target, parameter=""): - """Send a CTCP command.""" - ctcptype = ctcptype.upper() - self.privmsg(target, "\001%s%s\001" % (ctcptype, parameter and (" " + parameter) or "")) - - def ctcp_reply(self, target, parameter): - """Send a CTCP REPLY command.""" - self.notice(target, "\001%s\001" % parameter) - - def disconnect(self, message=""): - """Hang up the connection. - - Arguments: - - message -- Quit message. - """ - if not self.connected: - return - - self.connected = 0 - - self.quit(message) - - try: - self.socket.close() - except socket.error, x: - pass - self.socket = None - self._handle_event(Event("disconnect", self.server, "", [message])) - - def globops(self, text): - """Send a GLOBOPS command.""" - self.send_raw("GLOBOPS :" + text) - - def info(self, server=""): - """Send an INFO command.""" - self.send_raw(" ".join(["INFO", server]).strip()) - - def invite(self, nick, channel): - """Send an INVITE command.""" - self.send_raw(" ".join(["INVITE", nick, channel]).strip()) - - def ison(self, nicks): - """Send an ISON command. - - Arguments: - - nicks -- List of nicks. - """ - self.send_raw("ISON " + " ".join(nicks)) - - def join(self, channel, key=""): - """Send a JOIN command.""" - self.send_raw("JOIN %s%s" % (channel, (key and (" " + key)))) - - def kick(self, channel, nick, comment=""): - """Send a KICK command.""" - self.send_raw("KICK %s %s%s" % (channel, nick, (comment and (" :" + comment)))) - - def links(self, remote_server="", server_mask=""): - """Send a LINKS command.""" - command = "LINKS" - if remote_server: - command = command + " " + remote_server - if server_mask: - command = command + " " + server_mask - self.send_raw(command) - - def list(self, channels=None, server=""): - """Send a LIST command.""" - command = "LIST" - if channels: - command = command + " " + ",".join(channels) - if server: - command = command + " " + server - self.send_raw(command) - - def lusers(self, server=""): - """Send a LUSERS command.""" - self.send_raw("LUSERS" + (server and (" " + server))) - - def mode(self, target, command): - """Send a MODE command.""" - self.send_raw("MODE %s %s" % (target, command)) - - def motd(self, server=""): - """Send an MOTD command.""" - self.send_raw("MOTD" + (server and (" " + server))) - - def names(self, channels=None): - """Send a NAMES command.""" - self.send_raw("NAMES" + (channels and (" " + ",".join(channels)) or "")) - - def nick(self, newnick): - """Send a NICK command.""" - self.send_raw("NICK " + newnick) - - def notice(self, target, text): - """Send a NOTICE command.""" - # Should limit len(text) here! - self.send_raw("NOTICE %s :%s" % (target, text)) - - def oper(self, nick, password): - """Send an OPER command.""" - self.send_raw("OPER %s %s" % (nick, password)) - - def part(self, channels, message=""): - """Send a PART command.""" - if type(channels) == types.StringType: - self.send_raw("PART " + channels + (message and (" " + message))) - else: - self.send_raw("PART " + ",".join(channels) + (message and (" " + message))) - - def pass_(self, password): - """Send a PASS command.""" - self.send_raw("PASS " + password) - - def ping(self, target, target2=""): - """Send a PING command.""" - self.send_raw("PING %s%s" % (target, target2 and (" " + target2))) - - def pong(self, target, target2=""): - """Send a PONG command.""" - self.send_raw("PONG %s%s" % (target, target2 and (" " + target2))) - - def privmsg(self, target, text): - """Send a PRIVMSG command.""" - # Should limit len(text) here! - self.send_raw("PRIVMSG %s :%s" % (target, text)) - - def privmsg_many(self, targets, text): - """Send a PRIVMSG command to multiple targets.""" - # Should limit len(text) here! - self.send_raw("PRIVMSG %s :%s" % (",".join(targets), text)) - - def quit(self, message=""): - """Send a QUIT command.""" - # Note that many IRC servers don't use your QUIT message - # unless you've been connected for at least 5 minutes! - self.send_raw("QUIT" + (message and (" :" + message))) - - def send_raw(self, string): - """Send raw string to the server. - - The string will be padded with appropriate CR LF. - """ - if self.socket is None: - raise ServerNotConnectedError, "Not connected." - try: - if self.ssl: - self.ssl.write(string + "\r\n") - else: - self.socket.send(string + "\r\n") - if DEBUG: - print "TO SERVER:", string - except socket.error, x: - # Ouch! - self.disconnect("Connection reset by peer.") - - def squit(self, server, comment=""): - """Send an SQUIT command.""" - self.send_raw("SQUIT %s%s" % (server, comment and (" :" + comment))) - - def stats(self, statstype, server=""): - """Send a STATS command.""" - self.send_raw("STATS %s%s" % (statstype, server and (" " + server))) - - def time(self, server=""): - """Send a TIME command.""" - self.send_raw("TIME" + (server and (" " + server))) - - def topic(self, channel, new_topic=None): - """Send a TOPIC command.""" - if new_topic is None: - self.send_raw("TOPIC " + channel) - else: - self.send_raw("TOPIC %s :%s" % (channel, new_topic)) - - def trace(self, target=""): - """Send a TRACE command.""" - self.send_raw("TRACE" + (target and (" " + target))) - - def user(self, username, realname): - """Send a USER command.""" - self.send_raw("USER %s 0 * :%s" % (username, realname)) - - def userhost(self, nicks): - """Send a USERHOST command.""" - self.send_raw("USERHOST " + ",".join(nicks)) - - def users(self, server=""): - """Send a USERS command.""" - self.send_raw("USERS" + (server and (" " + server))) - - def version(self, server=""): - """Send a VERSION command.""" - self.send_raw("VERSION" + (server and (" " + server))) - - def wallops(self, text): - """Send a WALLOPS command.""" - self.send_raw("WALLOPS :" + text) - - def who(self, target="", op=""): - """Send a WHO command.""" - self.send_raw("WHO%s%s" % (target and (" " + target), op and (" o"))) - - def whois(self, targets): - """Send a WHOIS command.""" - self.send_raw("WHOIS " + ",".join(targets)) - - def whowas(self, nick, max="", server=""): - """Send a WHOWAS command.""" - self.send_raw("WHOWAS %s%s%s" % (nick, - max and (" " + max), - server and (" " + server))) - -class DCCConnectionError(IRCError): - pass - - -class DCCConnection(Connection): - """This class represents a DCC connection. - - DCCConnection objects are instantiated by calling the dcc - method on an IRC object. - """ - def __init__(self, irclibobj, dcctype): - Connection.__init__(self, irclibobj) - self.connected = 0 - self.passive = 0 - self.dcctype = dcctype - self.peeraddress = None - self.peerport = None - - def connect(self, address, port): - """Connect/reconnect to a DCC peer. - - Arguments: - address -- Host/IP address of the peer. - - port -- The port number to connect to. - - Returns the DCCConnection object. - """ - self.peeraddress = socket.gethostbyname(address) - self.peerport = port - self.socket = None - self.previous_buffer = "" - self.handlers = {} - self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.passive = 0 - try: - self.socket.connect((self.peeraddress, self.peerport)) - except socket.error, x: - raise DCCConnectionError, "Couldn't connect to socket: %s" % x - self.connected = 1 - if self.irclibobj.fn_to_add_socket: - self.irclibobj.fn_to_add_socket(self.socket) - return self - - def listen(self): - """Wait for a connection/reconnection from a DCC peer. - - Returns the DCCConnection object. - - The local IP address and port are available as - self.localaddress and self.localport. After connection from a - peer, the peer address and port are available as - self.peeraddress and self.peerport. - """ - self.previous_buffer = "" - self.handlers = {} - self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.passive = 1 - try: - self.socket.bind((socket.gethostbyname(socket.gethostname()), 0)) - self.localaddress, self.localport = self.socket.getsockname() - self.socket.listen(10) - except socket.error, x: - raise DCCConnectionError, "Couldn't bind socket: %s" % x - return self - - def disconnect(self, message=""): - """Hang up the connection and close the object. - - Arguments: - - message -- Quit message. - """ - if not self.connected: - return - - self.connected = 0 - try: - self.socket.close() - except socket.error, x: - pass - self.socket = None - self.irclibobj._handle_event( - self, - Event("dcc_disconnect", self.peeraddress, "", [message])) - self.irclibobj._remove_connection(self) - - def process_data(self): - """[Internal]""" - - if self.passive and not self.connected: - conn, (self.peeraddress, self.peerport) = self.socket.accept() - self.socket.close() - self.socket = conn - self.connected = 1 - if DEBUG: - print "DCC connection from %s:%d" % ( - self.peeraddress, self.peerport) - self.irclibobj._handle_event( - self, - Event("dcc_connect", self.peeraddress, None, None)) - return - - try: - new_data = self.socket.recv(2**14) - except socket.error, x: - # The server hung up. - self.disconnect("Connection reset by peer") - return - if not new_data: - # Read nothing: connection must be down. - self.disconnect("Connection reset by peer") - return - - if self.dcctype == "chat": - # The specification says lines are terminated with LF, but - # it seems safer to handle CR LF terminations too. - chunks = _linesep_regexp.split(self.previous_buffer + new_data) - - # Save the last, unfinished line. - self.previous_buffer = chunks[-1] - if len(self.previous_buffer) > 2**14: - # Bad peer! Naughty peer! - self.disconnect() - return - chunks = chunks[:-1] - else: - chunks = [new_data] - - command = "dccmsg" - prefix = self.peeraddress - target = None - for chunk in chunks: - if DEBUG: - print "FROM PEER:", chunk - arguments = [chunk] - if DEBUG: - print "command: %s, source: %s, target: %s, arguments: %s" % ( - command, prefix, target, arguments) - self.irclibobj._handle_event( - self, - Event(command, prefix, target, arguments)) - - def _get_socket(self): - """[Internal]""" - return self.socket - - def privmsg(self, string): - """Send data to DCC peer. - - The string will be padded with appropriate LF if it's a DCC - CHAT session. - """ - try: - self.socket.send(string) - if self.dcctype == "chat": - self.socket.send("\n") - if DEBUG: - print "TO PEER: %s\n" % string - except socket.error, x: - # Ouch! - self.disconnect("Connection reset by peer.") - -class SimpleIRCClient: - """A simple single-server IRC client class. - - This is an example of an object-oriented wrapper of the IRC - framework. A real IRC client can be made by subclassing this - class and adding appropriate methods. - - The method on_join will be called when a "join" event is created - (which is done when the server sends a JOIN messsage/command), - on_privmsg will be called for "privmsg" events, and so on. The - handler methods get two arguments: the connection object (same as - self.connection) and the event object. - - Instance attributes that can be used by sub classes: - - ircobj -- The IRC instance. - - connection -- The ServerConnection instance. - - dcc_connections -- A list of DCCConnection instances. - """ - def __init__(self): - self.ircobj = IRC() - self.connection = self.ircobj.server() - self.dcc_connections = [] - self.ircobj.add_global_handler("all_events", self._dispatcher, -10) - self.ircobj.add_global_handler("dcc_disconnect", self._dcc_disconnect, -10) - - def _dispatcher(self, c, e): - """[Internal]""" - m = "on_" + e.eventtype() - if hasattr(self, m): - getattr(self, m)(c, e) - - def _dcc_disconnect(self, c, e): - self.dcc_connections.remove(c) - - def connect(self, server, port, nickname, password=None, username=None, - ircname=None, localaddress="", localport=0, ssl=False, ipv6=False): - """Connect/reconnect to a server. - - Arguments: - - server -- Server name. - - port -- Port number. - - nickname -- The nickname. - - password -- Password (if any). - - username -- The username. - - ircname -- The IRC name. - - localaddress -- Bind the connection to a specific local IP address. - - localport -- Bind the connection to a specific local port. - - ssl -- Enable support for ssl. - - ipv6 -- Enable support for ipv6. - - This function can be called to reconnect a closed connection. - """ - self.connection.connect(server, port, nickname, - password, username, ircname, - localaddress, localport, ssl, ipv6) - - def dcc_connect(self, address, port, dcctype="chat"): - """Connect to a DCC peer. - - Arguments: - - address -- IP address of the peer. - - port -- Port to connect to. - - Returns a DCCConnection instance. - """ - dcc = self.ircobj.dcc(dcctype) - self.dcc_connections.append(dcc) - dcc.connect(address, port) - return dcc - - def dcc_listen(self, dcctype="chat"): - """Listen for connections from a DCC peer. - - Returns a DCCConnection instance. - """ - dcc = self.ircobj.dcc(dcctype) - self.dcc_connections.append(dcc) - dcc.listen() - return dcc - - def start(self): - """Start the IRC client.""" - self.ircobj.process_forever() - - -class Event: - """Class representing an IRC event.""" - def __init__(self, eventtype, source, target, arguments=None): - """Constructor of Event objects. - - Arguments: - - eventtype -- A string describing the event. - - source -- The originator of the event (a nick mask or a server). - - target -- The target of the event (a nick or a channel). - - arguments -- Any event specific arguments. - """ - self._eventtype = eventtype - self._source = source - self._target = target - if arguments: - self._arguments = arguments - else: - self._arguments = [] - - def eventtype(self): - """Get the event type.""" - return self._eventtype - - def source(self): - """Get the event source.""" - return self._source - - def target(self): - """Get the event target.""" - return self._target - - def arguments(self): - """Get the event arguments.""" - return self._arguments - -_LOW_LEVEL_QUOTE = "\020" -_CTCP_LEVEL_QUOTE = "\134" -_CTCP_DELIMITER = "\001" - -_low_level_mapping = { - "0": "\000", - "n": "\n", - "r": "\r", - _LOW_LEVEL_QUOTE: _LOW_LEVEL_QUOTE -} - -_low_level_regexp = re.compile(_LOW_LEVEL_QUOTE + "(.)") - -def mask_matches(nick, mask): - """Check if a nick matches a mask. - - Returns true if the nick matches, otherwise false. - """ - nick = irc_lower(nick) - mask = irc_lower(mask) - mask = mask.replace("\\", "\\\\") - for ch in ".$|[](){}+": - mask = mask.replace(ch, "\\" + ch) - mask = mask.replace("?", ".") - mask = mask.replace("*", ".*") - r = re.compile(mask, re.IGNORECASE) - return r.match(nick) - -_special = "-[]\\`^{}" -nick_characters = string.ascii_letters + string.digits + _special -_ircstring_translation = string.maketrans(string.ascii_uppercase + "[]\\^", - string.ascii_lowercase + "{}|~") - -def irc_lower(s): - """Returns a lowercased string. - - The definition of lowercased comes from the IRC specification (RFC - 1459). - """ - return s.translate(_ircstring_translation) - -def _ctcp_dequote(message): - """[Internal] Dequote a message according to CTCP specifications. - - The function returns a list where each element can be either a - string (normal message) or a tuple of one or two strings (tagged - messages). If a tuple has only one element (ie is a singleton), - that element is the tag; otherwise the tuple has two elements: the - tag and the data. - - Arguments: - - message -- The message to be decoded. - """ - - def _low_level_replace(match_obj): - ch = match_obj.group(1) - - # If low_level_mapping doesn't have the character as key, we - # should just return the character. - return _low_level_mapping.get(ch, ch) - - if _LOW_LEVEL_QUOTE in message: - # Yup, there was a quote. Release the dequoter, man! - message = _low_level_regexp.sub(_low_level_replace, message) - - if _CTCP_DELIMITER not in message: - return [message] - else: - # Split it into parts. (Does any IRC client actually *use* - # CTCP stacking like this?) - chunks = message.split(_CTCP_DELIMITER) - - messages = [] - i = 0 - while i < len(chunks)-1: - # Add message if it's non-empty. - if len(chunks[i]) > 0: - messages.append(chunks[i]) - - if i < len(chunks)-2: - # Aye! CTCP tagged data ahead! - messages.append(tuple(chunks[i+1].split(" ", 1))) - - i = i + 2 - - if len(chunks) % 2 == 0: - # Hey, a lonely _CTCP_DELIMITER at the end! This means - # that the last chunk, including the delimiter, is a - # normal message! (This is according to the CTCP - # specification.) - messages.append(_CTCP_DELIMITER + chunks[-1]) - - return messages - -def is_channel(string): - """Check if a string is a channel name. - - Returns true if the argument is a channel name, otherwise false. - """ - return string and string[0] in "#&+!" - -def ip_numstr_to_quad(num): - """Convert an IP number as an integer given in ASCII - representation (e.g. '3232235521') to an IP address string - (e.g. '192.168.0.1').""" - n = long(num) - p = map(str, map(int, [n >> 24 & 0xFF, n >> 16 & 0xFF, - n >> 8 & 0xFF, n & 0xFF])) - return ".".join(p) - -def ip_quad_to_numstr(quad): - """Convert an IP address string (e.g. '192.168.0.1') to an IP - number as an integer given in ASCII representation - (e.g. '3232235521').""" - p = map(long, quad.split(".")) - s = str((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]) - if s[-1] == "L": - s = s[:-1] - return s - -def nm_to_n(s): - """Get the nick part of a nickmask. - - (The source of an Event is a nickmask.) - """ - return s.split("!")[0] - -def nm_to_uh(s): - """Get the userhost part of a nickmask. - - (The source of an Event is a nickmask.) - """ - return s.split("!")[1] - -def nm_to_h(s): - """Get the host part of a nickmask. - - (The source of an Event is a nickmask.) - """ - return s.split("@")[1] - -def nm_to_u(s): - """Get the user part of a nickmask. - - (The source of an Event is a nickmask.) - """ - s = s.split("!")[1] - return s.split("@")[0] - -def parse_nick_modes(mode_string): - """Parse a nick mode string. - - The function returns a list of lists with three members: sign, - mode and argument. The sign is \"+\" or \"-\". The argument is - always None. - - Example: - - >>> irclib.parse_nick_modes(\"+ab-c\") - [['+', 'a', None], ['+', 'b', None], ['-', 'c', None]] - """ - - return _parse_modes(mode_string, "") - -def parse_channel_modes(mode_string): - """Parse a channel mode string. - - The function returns a list of lists with three members: sign, - mode and argument. The sign is \"+\" or \"-\". The argument is - None if mode isn't one of \"b\", \"k\", \"l\", \"v\" or \"o\". - - Example: - - >>> irclib.parse_channel_modes(\"+ab-c foo\") - [['+', 'a', None], ['+', 'b', 'foo'], ['-', 'c', None]] - """ - - return _parse_modes(mode_string, "bklvo") - -def _parse_modes(mode_string, unary_modes=""): - """[Internal]""" - modes = [] - arg_count = 0 - - # State variable. - sign = "" - - a = mode_string.split() - if len(a) == 0: - return [] - else: - mode_part, args = a[0], a[1:] - - if mode_part[0] not in "+-": - return [] - for ch in mode_part: - if ch in "+-": - sign = ch - elif ch == " ": - collecting_arguments = 1 - elif ch in unary_modes: - if len(args) >= arg_count + 1: - modes.append([sign, ch, args[arg_count]]) - arg_count = arg_count + 1 - else: - modes.append([sign, ch, None]) - else: - modes.append([sign, ch, None]) - return modes - -def _ping_ponger(connection, event): - """[Internal]""" - connection.pong(event.target()) - -# Numeric table mostly stolen from the Perl IRC module (Net::IRC). -numeric_events = { - "001": "welcome", - "002": "yourhost", - "003": "created", - "004": "myinfo", - "005": "featurelist", # XXX - "200": "tracelink", - "201": "traceconnecting", - "202": "tracehandshake", - "203": "traceunknown", - "204": "traceoperator", - "205": "traceuser", - "206": "traceserver", - "207": "traceservice", - "208": "tracenewtype", - "209": "traceclass", - "210": "tracereconnect", - "211": "statslinkinfo", - "212": "statscommands", - "213": "statscline", - "214": "statsnline", - "215": "statsiline", - "216": "statskline", - "217": "statsqline", - "218": "statsyline", - "219": "endofstats", - "221": "umodeis", - "231": "serviceinfo", - "232": "endofservices", - "233": "service", - "234": "servlist", - "235": "servlistend", - "241": "statslline", - "242": "statsuptime", - "243": "statsoline", - "244": "statshline", - "250": "luserconns", - "251": "luserclient", - "252": "luserop", - "253": "luserunknown", - "254": "luserchannels", - "255": "luserme", - "256": "adminme", - "257": "adminloc1", - "258": "adminloc2", - "259": "adminemail", - "261": "tracelog", - "262": "endoftrace", - "263": "tryagain", - "265": "n_local", - "266": "n_global", - "300": "none", - "301": "away", - "302": "userhost", - "303": "ison", - "305": "unaway", - "306": "nowaway", - "311": "whoisuser", - "312": "whoisserver", - "313": "whoisoperator", - "314": "whowasuser", - "315": "endofwho", - "316": "whoischanop", - "317": "whoisidle", - "318": "endofwhois", - "319": "whoischannels", - "321": "liststart", - "322": "list", - "323": "listend", - "324": "channelmodeis", - "329": "channelcreate", - "331": "notopic", - "332": "currenttopic", - "333": "topicinfo", - "341": "inviting", - "342": "summoning", - "346": "invitelist", - "347": "endofinvitelist", - "348": "exceptlist", - "349": "endofexceptlist", - "351": "version", - "352": "whoreply", - "353": "namreply", - "361": "killdone", - "362": "closing", - "363": "closeend", - "364": "links", - "365": "endoflinks", - "366": "endofnames", - "367": "banlist", - "368": "endofbanlist", - "369": "endofwhowas", - "371": "info", - "372": "motd", - "373": "infostart", - "374": "endofinfo", - "375": "motdstart", - "376": "endofmotd", - "377": "motd2", # 1997-10-16 -- tkil - "381": "youreoper", - "382": "rehashing", - "384": "myportis", - "391": "time", - "392": "usersstart", - "393": "users", - "394": "endofusers", - "395": "nousers", - "401": "nosuchnick", - "402": "nosuchserver", - "403": "nosuchchannel", - "404": "cannotsendtochan", - "405": "toomanychannels", - "406": "wasnosuchnick", - "407": "toomanytargets", - "409": "noorigin", - "411": "norecipient", - "412": "notexttosend", - "413": "notoplevel", - "414": "wildtoplevel", - "421": "unknowncommand", - "422": "nomotd", - "423": "noadmininfo", - "424": "fileerror", - "431": "nonicknamegiven", - "432": "erroneusnickname", # Thiss iz how its speld in thee RFC. - "433": "nicknameinuse", - "436": "nickcollision", - "437": "unavailresource", # "Nick temporally unavailable" - "441": "usernotinchannel", - "442": "notonchannel", - "443": "useronchannel", - "444": "nologin", - "445": "summondisabled", - "446": "usersdisabled", - "451": "notregistered", - "461": "needmoreparams", - "462": "alreadyregistered", - "463": "nopermforhost", - "464": "passwdmismatch", - "465": "yourebannedcreep", # I love this one... - "466": "youwillbebanned", - "467": "keyset", - "471": "channelisfull", - "472": "unknownmode", - "473": "inviteonlychan", - "474": "bannedfromchan", - "475": "badchannelkey", - "476": "badchanmask", - "477": "nochanmodes", # "Channel doesn't support modes" - "478": "banlistfull", - "481": "noprivileges", - "482": "chanoprivsneeded", - "483": "cantkillserver", - "484": "restricted", # Connection is restricted - "485": "uniqopprivsneeded", - "491": "nooperhost", - "492": "noservicehost", - "501": "umodeunknownflag", - "502": "usersdontmatch", -} - -generated_events = [ - # Generated events - "dcc_connect", - "dcc_disconnect", - "dccmsg", - "disconnect", - "ctcp", - "ctcpreply", -] - -protocol_events = [ - # IRC protocol events - "error", - "join", - "kick", - "mode", - "part", - "ping", - "privmsg", - "privnotice", - "pubmsg", - "pubnotice", - "quit", - "invite", - "pong", -] - -all_events = generated_events + protocol_events + numeric_events.values() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/__init__.py deleted file mode 100644 index 4bb20aa..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/__init__.py +++ /dev/null @@ -1,140 +0,0 @@ -__all__ = [ - 'AbstractBasicAuthHandler', - 'AbstractDigestAuthHandler', - 'BaseHandler', - 'Browser', - 'BrowserStateError', - 'CacheFTPHandler', - 'ContentTooShortError', - 'Cookie', - 'CookieJar', - 'CookiePolicy', - 'DefaultCookiePolicy', - 'DefaultFactory', - 'FTPHandler', - 'Factory', - 'FileCookieJar', - 'FileHandler', - 'FormNotFoundError', - 'FormsFactory', - 'HTTPBasicAuthHandler', - 'HTTPCookieProcessor', - 'HTTPDefaultErrorHandler', - 'HTTPDigestAuthHandler', - 'HTTPEquivProcessor', - 'HTTPError', - 'HTTPErrorProcessor', - 'HTTPHandler', - 'HTTPPasswordMgr', - 'HTTPPasswordMgrWithDefaultRealm', - 'HTTPProxyPasswordMgr', - 'HTTPRedirectDebugProcessor', - 'HTTPRedirectHandler', - 'HTTPRefererProcessor', - 'HTTPRefreshProcessor', - 'HTTPRequestUpgradeProcessor', - 'HTTPResponseDebugProcessor', - 'HTTPRobotRulesProcessor', - 'HTTPSClientCertMgr', - 'HTTPSHandler', - 'HeadParser', - 'History', - 'LWPCookieJar', - 'Link', - 'LinkNotFoundError', - 'LinksFactory', - 'LoadError', - 'MSIECookieJar', - 'MozillaCookieJar', - 'OpenerDirector', - 'OpenerFactory', - 'ParseError', - 'ProxyBasicAuthHandler', - 'ProxyDigestAuthHandler', - 'ProxyHandler', - 'Request', - 'ResponseUpgradeProcessor', - 'RobotExclusionError', - 'RobustFactory', - 'RobustFormsFactory', - 'RobustLinksFactory', - 'RobustTitleFactory', - 'SeekableProcessor', - 'SeekableResponseOpener', - 'TitleFactory', - 'URLError', - 'USE_BARE_EXCEPT', - 'UnknownHandler', - 'UserAgent', - 'UserAgentBase', - 'XHTMLCompatibleHeadParser', - '__version__', - 'build_opener', - 'install_opener', - 'lwp_cookie_str', - 'make_response', - 'request_host', - 'response_seek_wrapper', # XXX deprecate in public interface? - 'seek_wrapped_response' # XXX should probably use this internally in place of response_seek_wrapper() - 'str2time', - 'urlopen', - 'urlretrieve'] - -import logging -import sys - -from _mechanize import __version__ - -# high-level stateful browser-style interface -from _mechanize import \ - Browser, History, \ - BrowserStateError, LinkNotFoundError, FormNotFoundError - -# configurable URL-opener interface -from _useragent import UserAgentBase, UserAgent -from _html import \ - ParseError, \ - Link, \ - Factory, DefaultFactory, RobustFactory, \ - FormsFactory, LinksFactory, TitleFactory, \ - RobustFormsFactory, RobustLinksFactory, RobustTitleFactory - -# urllib2 work-alike interface (part from mechanize, part from urllib2) -# This is a superset of the urllib2 interface. -from _urllib2 import * - -# misc -from _opener import ContentTooShortError, OpenerFactory, urlretrieve -from _util import http2time as str2time -from _response import \ - response_seek_wrapper, seek_wrapped_response, make_response -from _http import HeadParser -try: - from _http import XHTMLCompatibleHeadParser -except ImportError: - pass - -# cookies -from _clientcookie import Cookie, CookiePolicy, DefaultCookiePolicy, \ - CookieJar, FileCookieJar, LoadError, request_host_lc as request_host, \ - effective_request_host -from _lwpcookiejar import LWPCookieJar, lwp_cookie_str -# 2.4 raises SyntaxError due to generator / try/finally use -if sys.version_info[:2] > (2,4): - try: - import sqlite3 - except ImportError: - pass - else: - from _firefox3cookiejar import Firefox3CookieJar -from _mozillacookiejar import MozillaCookieJar -from _msiecookiejar import MSIECookieJar - -# If you hate the idea of turning bugs into warnings, do: -# import mechanize; mechanize.USE_BARE_EXCEPT = False -USE_BARE_EXCEPT = True - -logger = logging.getLogger("mechanize") -if logger.level is logging.NOTSET: - logger.setLevel(logging.CRITICAL) -del logger diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_auth.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_auth.py deleted file mode 100644 index 232f7d8..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_auth.py +++ /dev/null @@ -1,522 +0,0 @@ -"""HTTP Authentication and Proxy support. - -All but HTTPProxyPasswordMgr come from Python 2.5. - - -Copyright 2006 John J. Lee <jjl@pobox.com> - -This code is free software; you can redistribute it and/or modify it under -the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt -included with the distribution). - -""" - -import base64 -import copy -import os -import posixpath -import random -import re -import time -import urlparse - -try: - import hashlib -except ImportError: - import md5 - import sha - def sha1_digest(bytes): - return sha.new(bytes).hexdigest() - def md5_digest(bytes): - return md5.new(bytes).hexdigest() -else: - def sha1_digest(bytes): - return hashlib.sha1(bytes).hexdigest() - def md5_digest(bytes): - return hashlib.md5(bytes).hexdigest() - -from urllib2 import BaseHandler, HTTPError, parse_keqv_list, parse_http_list -from urllib import getproxies, unquote, splittype, splituser, splitpasswd, \ - splitport - - -def _parse_proxy(proxy): - """Return (scheme, user, password, host/port) given a URL or an authority. - - If a URL is supplied, it must have an authority (host:port) component. - According to RFC 3986, having an authority component means the URL must - have two slashes after the scheme: - - >>> _parse_proxy('file:/ftp.example.com/') - Traceback (most recent call last): - ValueError: proxy URL with no authority: 'file:/ftp.example.com/' - - The first three items of the returned tuple may be None. - - Examples of authority parsing: - - >>> _parse_proxy('proxy.example.com') - (None, None, None, 'proxy.example.com') - >>> _parse_proxy('proxy.example.com:3128') - (None, None, None, 'proxy.example.com:3128') - - The authority component may optionally include userinfo (assumed to be - username:password): - - >>> _parse_proxy('joe:password@proxy.example.com') - (None, 'joe', 'password', 'proxy.example.com') - >>> _parse_proxy('joe:password@proxy.example.com:3128') - (None, 'joe', 'password', 'proxy.example.com:3128') - - Same examples, but with URLs instead: - - >>> _parse_proxy('http://proxy.example.com/') - ('http', None, None, 'proxy.example.com') - >>> _parse_proxy('http://proxy.example.com:3128/') - ('http', None, None, 'proxy.example.com:3128') - >>> _parse_proxy('http://joe:password@proxy.example.com/') - ('http', 'joe', 'password', 'proxy.example.com') - >>> _parse_proxy('http://joe:password@proxy.example.com:3128') - ('http', 'joe', 'password', 'proxy.example.com:3128') - - Everything after the authority is ignored: - - >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128') - ('ftp', 'joe', 'password', 'proxy.example.com') - - Test for no trailing '/' case: - - >>> _parse_proxy('http://joe:password@proxy.example.com') - ('http', 'joe', 'password', 'proxy.example.com') - - """ - scheme, r_scheme = splittype(proxy) - if not r_scheme.startswith("/"): - # authority - scheme = None - authority = proxy - else: - # URL - if not r_scheme.startswith("//"): - raise ValueError("proxy URL with no authority: %r" % proxy) - # We have an authority, so for RFC 3986-compliant URLs (by ss 3. - # and 3.3.), path is empty or starts with '/' - end = r_scheme.find("/", 2) - if end == -1: - end = None - authority = r_scheme[2:end] - userinfo, hostport = splituser(authority) - if userinfo is not None: - user, password = splitpasswd(userinfo) - else: - user = password = None - return scheme, user, password, hostport - -class ProxyHandler(BaseHandler): - # Proxies must be in front - handler_order = 100 - - def __init__(self, proxies=None): - if proxies is None: - proxies = getproxies() - assert hasattr(proxies, 'has_key'), "proxies must be a mapping" - self.proxies = proxies - for type, url in proxies.items(): - setattr(self, '%s_open' % type, - lambda r, proxy=url, type=type, meth=self.proxy_open: \ - meth(r, proxy, type)) - - def proxy_open(self, req, proxy, type): - orig_type = req.get_type() - proxy_type, user, password, hostport = _parse_proxy(proxy) - if proxy_type is None: - proxy_type = orig_type - if user and password: - user_pass = '%s:%s' % (unquote(user), unquote(password)) - creds = base64.encodestring(user_pass).strip() - req.add_header('Proxy-authorization', 'Basic ' + creds) - hostport = unquote(hostport) - req.set_proxy(hostport, proxy_type) - if orig_type == proxy_type: - # let other handlers take care of it - return None - else: - # need to start over, because the other handlers don't - # grok the proxy's URL type - # e.g. if we have a constructor arg proxies like so: - # {'http': 'ftp://proxy.example.com'}, we may end up turning - # a request for http://acme.example.com/a into one for - # ftp://proxy.example.com/a - return self.parent.open(req) - -class HTTPPasswordMgr: - - def __init__(self): - self.passwd = {} - - def add_password(self, realm, uri, user, passwd): - # uri could be a single URI or a sequence - if isinstance(uri, basestring): - uri = [uri] - if not realm in self.passwd: - self.passwd[realm] = {} - for default_port in True, False: - reduced_uri = tuple( - [self.reduce_uri(u, default_port) for u in uri]) - self.passwd[realm][reduced_uri] = (user, passwd) - - def find_user_password(self, realm, authuri): - domains = self.passwd.get(realm, {}) - for default_port in True, False: - reduced_authuri = self.reduce_uri(authuri, default_port) - for uris, authinfo in domains.iteritems(): - for uri in uris: - if self.is_suburi(uri, reduced_authuri): - return authinfo - return None, None - - def reduce_uri(self, uri, default_port=True): - """Accept authority or URI and extract only the authority and path.""" - # note HTTP URLs do not have a userinfo component - parts = urlparse.urlsplit(uri) - if parts[1]: - # URI - scheme = parts[0] - authority = parts[1] - path = parts[2] or '/' - else: - # host or host:port - scheme = None - authority = uri - path = '/' - host, port = splitport(authority) - if default_port and port is None and scheme is not None: - dport = {"http": 80, - "https": 443, - }.get(scheme) - if dport is not None: - authority = "%s:%d" % (host, dport) - return authority, path - - def is_suburi(self, base, test): - """Check if test is below base in a URI tree - - Both args must be URIs in reduced form. - """ - if base == test: - return True - if base[0] != test[0]: - return False - common = posixpath.commonprefix((base[1], test[1])) - if len(common) == len(base[1]): - return True - return False - - -class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr): - - def find_user_password(self, realm, authuri): - user, password = HTTPPasswordMgr.find_user_password(self, realm, - authuri) - if user is not None: - return user, password - return HTTPPasswordMgr.find_user_password(self, None, authuri) - - -class AbstractBasicAuthHandler: - - rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I) - - # XXX there can actually be multiple auth-schemes in a - # www-authenticate header. should probably be a lot more careful - # in parsing them to extract multiple alternatives - - def __init__(self, password_mgr=None): - if password_mgr is None: - password_mgr = HTTPPasswordMgr() - self.passwd = password_mgr - self.add_password = self.passwd.add_password - - def http_error_auth_reqed(self, authreq, host, req, headers): - # host may be an authority (without userinfo) or a URL with an - # authority - # XXX could be multiple headers - authreq = headers.get(authreq, None) - if authreq: - mo = AbstractBasicAuthHandler.rx.search(authreq) - if mo: - scheme, realm = mo.groups() - if scheme.lower() == 'basic': - return self.retry_http_basic_auth(host, req, realm) - - def retry_http_basic_auth(self, host, req, realm): - user, pw = self.passwd.find_user_password(realm, host) - if pw is not None: - raw = "%s:%s" % (user, pw) - auth = 'Basic %s' % base64.encodestring(raw).strip() - if req.headers.get(self.auth_header, None) == auth: - return None - newreq = copy.copy(req) - newreq.add_header(self.auth_header, auth) - newreq.visit = False - return self.parent.open(newreq) - else: - return None - - -class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): - - auth_header = 'Authorization' - - def http_error_401(self, req, fp, code, msg, headers): - url = req.get_full_url() - return self.http_error_auth_reqed('www-authenticate', - url, req, headers) - - -class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): - - auth_header = 'Proxy-authorization' - - def http_error_407(self, req, fp, code, msg, headers): - # http_error_auth_reqed requires that there is no userinfo component in - # authority. Assume there isn't one, since urllib2 does not (and - # should not, RFC 3986 s. 3.2.1) support requests for URLs containing - # userinfo. - authority = req.get_host() - return self.http_error_auth_reqed('proxy-authenticate', - authority, req, headers) - - -def randombytes(n): - """Return n random bytes.""" - # Use /dev/urandom if it is available. Fall back to random module - # if not. It might be worthwhile to extend this function to use - # other platform-specific mechanisms for getting random bytes. - if os.path.exists("/dev/urandom"): - f = open("/dev/urandom") - s = f.read(n) - f.close() - return s - else: - L = [chr(random.randrange(0, 256)) for i in range(n)] - return "".join(L) - -class AbstractDigestAuthHandler: - # Digest authentication is specified in RFC 2617. - - # XXX The client does not inspect the Authentication-Info header - # in a successful response. - - # XXX It should be possible to test this implementation against - # a mock server that just generates a static set of challenges. - - # XXX qop="auth-int" supports is shaky - - def __init__(self, passwd=None): - if passwd is None: - passwd = HTTPPasswordMgr() - self.passwd = passwd - self.add_password = self.passwd.add_password - self.retried = 0 - self.nonce_count = 0 - - def reset_retry_count(self): - self.retried = 0 - - def http_error_auth_reqed(self, auth_header, host, req, headers): - authreq = headers.get(auth_header, None) - if self.retried > 5: - # Don't fail endlessly - if we failed once, we'll probably - # fail a second time. Hm. Unless the Password Manager is - # prompting for the information. Crap. This isn't great - # but it's better than the current 'repeat until recursion - # depth exceeded' approach <wink> - raise HTTPError(req.get_full_url(), 401, "digest auth failed", - headers, None) - else: - self.retried += 1 - if authreq: - scheme = authreq.split()[0] - if scheme.lower() == 'digest': - return self.retry_http_digest_auth(req, authreq) - - def retry_http_digest_auth(self, req, auth): - token, challenge = auth.split(' ', 1) - chal = parse_keqv_list(parse_http_list(challenge)) - auth = self.get_authorization(req, chal) - if auth: - auth_val = 'Digest %s' % auth - if req.headers.get(self.auth_header, None) == auth_val: - return None - newreq = copy.copy(req) - newreq.add_unredirected_header(self.auth_header, auth_val) - newreq.visit = False - return self.parent.open(newreq) - - def get_cnonce(self, nonce): - # The cnonce-value is an opaque - # quoted string value provided by the client and used by both client - # and server to avoid chosen plaintext attacks, to provide mutual - # authentication, and to provide some message integrity protection. - # This isn't a fabulous effort, but it's probably Good Enough. - dig = sha1_digest("%s:%s:%s:%s" % (self.nonce_count, nonce, - time.ctime(), randombytes(8))) - return dig[:16] - - def get_authorization(self, req, chal): - try: - realm = chal['realm'] - nonce = chal['nonce'] - qop = chal.get('qop') - algorithm = chal.get('algorithm', 'MD5') - # mod_digest doesn't send an opaque, even though it isn't - # supposed to be optional - opaque = chal.get('opaque', None) - except KeyError: - return None - - H, KD = self.get_algorithm_impls(algorithm) - if H is None: - return None - - user, pw = self.passwd.find_user_password(realm, req.get_full_url()) - if user is None: - return None - - # XXX not implemented yet - if req.has_data(): - entdig = self.get_entity_digest(req.get_data(), chal) - else: - entdig = None - - A1 = "%s:%s:%s" % (user, realm, pw) - A2 = "%s:%s" % (req.get_method(), - # XXX selector: what about proxies and full urls - req.get_selector()) - if qop == 'auth': - self.nonce_count += 1 - ncvalue = '%08x' % self.nonce_count - cnonce = self.get_cnonce(nonce) - noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2)) - respdig = KD(H(A1), noncebit) - elif qop is None: - respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) - else: - # XXX handle auth-int. - pass - - # XXX should the partial digests be encoded too? - - base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ - 'response="%s"' % (user, realm, nonce, req.get_selector(), - respdig) - if opaque: - base += ', opaque="%s"' % opaque - if entdig: - base += ', digest="%s"' % entdig - base += ', algorithm="%s"' % algorithm - if qop: - base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce) - return base - - def get_algorithm_impls(self, algorithm): - # lambdas assume digest modules are imported at the top level - if algorithm == 'MD5': - H = md5_digest - elif algorithm == 'SHA': - H = sha1_digest - # XXX MD5-sess - KD = lambda s, d: H("%s:%s" % (s, d)) - return H, KD - - def get_entity_digest(self, data, chal): - # XXX not implemented yet - return None - - -class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): - """An authentication protocol defined by RFC 2069 - - Digest authentication improves on basic authentication because it - does not transmit passwords in the clear. - """ - - auth_header = 'Authorization' - handler_order = 490 - - def http_error_401(self, req, fp, code, msg, headers): - host = urlparse.urlparse(req.get_full_url())[1] - retry = self.http_error_auth_reqed('www-authenticate', - host, req, headers) - self.reset_retry_count() - return retry - - -class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): - - auth_header = 'Proxy-Authorization' - handler_order = 490 - - def http_error_407(self, req, fp, code, msg, headers): - host = req.get_host() - retry = self.http_error_auth_reqed('proxy-authenticate', - host, req, headers) - self.reset_retry_count() - return retry - - -# XXX ugly implementation, should probably not bother deriving -class HTTPProxyPasswordMgr(HTTPPasswordMgr): - # has default realm and host/port - def add_password(self, realm, uri, user, passwd): - # uri could be a single URI or a sequence - if uri is None or isinstance(uri, basestring): - uris = [uri] - else: - uris = uri - passwd_by_domain = self.passwd.setdefault(realm, {}) - for uri in uris: - for default_port in True, False: - reduced_uri = self.reduce_uri(uri, default_port) - passwd_by_domain[reduced_uri] = (user, passwd) - - def find_user_password(self, realm, authuri): - attempts = [(realm, authuri), (None, authuri)] - # bleh, want default realm to take precedence over default - # URI/authority, hence this outer loop - for default_uri in False, True: - for realm, authuri in attempts: - authinfo_by_domain = self.passwd.get(realm, {}) - for default_port in True, False: - reduced_authuri = self.reduce_uri(authuri, default_port) - for uri, authinfo in authinfo_by_domain.iteritems(): - if uri is None and not default_uri: - continue - if self.is_suburi(uri, reduced_authuri): - return authinfo - user, password = None, None - - if user is not None: - break - return user, password - - def reduce_uri(self, uri, default_port=True): - if uri is None: - return None - return HTTPPasswordMgr.reduce_uri(self, uri, default_port) - - def is_suburi(self, base, test): - if base is None: - # default to the proxy's host/port - hostport, path = test - base = (hostport, "/") - return HTTPPasswordMgr.is_suburi(self, base, test) - - -class HTTPSClientCertMgr(HTTPPasswordMgr): - # implementation inheritance: this is not a proper subclass - def add_key_cert(self, uri, key_file, cert_file): - self.add_password(None, uri, key_file, cert_file) - def find_key_cert(self, authuri): - return HTTPPasswordMgr.find_user_password(self, None, authuri) diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_beautifulsoup.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_beautifulsoup.py deleted file mode 100644 index 268b305..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_beautifulsoup.py +++ /dev/null @@ -1,1080 +0,0 @@ -"""Beautiful Soup -Elixir and Tonic -"The Screen-Scraper's Friend" -v2.1.1 -http://www.crummy.com/software/BeautifulSoup/ - -Beautiful Soup parses arbitrarily invalid XML- or HTML-like substance -into a tree representation. It provides methods and Pythonic idioms -that make it easy to search and modify the tree. - -A well-formed XML/HTML document will yield a well-formed data -structure. An ill-formed XML/HTML document will yield a -correspondingly ill-formed data structure. If your document is only -locally well-formed, you can use this library to find and process the -well-formed part of it. The BeautifulSoup class has heuristics for -obtaining a sensible parse tree in the face of common HTML errors. - -Beautiful Soup has no external dependencies. It works with Python 2.2 -and up. - -Beautiful Soup defines classes for four different parsing strategies: - - * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific - language that kind of looks like XML. - - * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid - or invalid. - - * ICantBelieveItsBeautifulSoup, for parsing valid but bizarre HTML - that trips up BeautifulSoup. - - * BeautifulSOAP, for making it easier to parse XML documents that use - lots of subelements containing a single string, where you'd prefer - they put that string into an attribute (such as SOAP messages). - -You can subclass BeautifulStoneSoup or BeautifulSoup to create a -parsing strategy specific to an XML schema or a particular bizarre -HTML document. Typically your subclass would just override -SELF_CLOSING_TAGS and/or NESTABLE_TAGS. -""" #" -from __future__ import generators - -__author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "2.1.1" -__date__ = "$Date: 2004/10/18 00:14:20 $" -__copyright__ = "Copyright (c) 2004-2005 Leonard Richardson" -__license__ = "PSF" - -from sgmllib import SGMLParser, SGMLParseError -import types -import re -import sgmllib - -#This code makes Beautiful Soup able to parse XML with namespaces -sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') - -class NullType(object): - - """Similar to NoneType with a corresponding singleton instance - 'Null' that, unlike None, accepts any message and returns itself. - - Examples: - >>> Null("send", "a", "message")("and one more", - ... "and what you get still") is Null - True - """ - - def __new__(cls): return Null - def __call__(self, *args, **kwargs): return Null -## def __getstate__(self, *args): return Null - def __getattr__(self, attr): return Null - def __getitem__(self, item): return Null - def __setattr__(self, attr, value): pass - def __setitem__(self, item, value): pass - def __len__(self): return 0 - # FIXME: is this a python bug? otherwise ``for x in Null: pass`` - # never terminates... - def __iter__(self): return iter([]) - def __contains__(self, item): return False - def __repr__(self): return "Null" -Null = object.__new__(NullType) - -class PageElement: - """Contains the navigational information for some part of the page - (either a tag or a piece of text)""" - - def setup(self, parent=Null, previous=Null): - """Sets up the initial relations between this element and - other elements.""" - self.parent = parent - self.previous = previous - self.next = Null - self.previousSibling = Null - self.nextSibling = Null - if self.parent and self.parent.contents: - self.previousSibling = self.parent.contents[-1] - self.previousSibling.nextSibling = self - - def findNext(self, name=None, attrs={}, text=None): - """Returns the first item that matches the given criteria and - appears after this Tag in the document.""" - return self._first(self.fetchNext, name, attrs, text) - firstNext = findNext - - def fetchNext(self, name=None, attrs={}, text=None, limit=None): - """Returns all items that match the given criteria and appear - before after Tag in the document.""" - return self._fetch(name, attrs, text, limit, self.nextGenerator) - - def findNextSibling(self, name=None, attrs={}, text=None): - """Returns the closest sibling to this Tag that matches the - given criteria and appears after this Tag in the document.""" - return self._first(self.fetchNextSiblings, name, attrs, text) - firstNextSibling = findNextSibling - - def fetchNextSiblings(self, name=None, attrs={}, text=None, limit=None): - """Returns the siblings of this Tag that match the given - criteria and appear after this Tag in the document.""" - return self._fetch(name, attrs, text, limit, self.nextSiblingGenerator) - - def findPrevious(self, name=None, attrs={}, text=None): - """Returns the first item that matches the given criteria and - appears before this Tag in the document.""" - return self._first(self.fetchPrevious, name, attrs, text) - - def fetchPrevious(self, name=None, attrs={}, text=None, limit=None): - """Returns all items that match the given criteria and appear - before this Tag in the document.""" - return self._fetch(name, attrs, text, limit, self.previousGenerator) - firstPrevious = findPrevious - - def findPreviousSibling(self, name=None, attrs={}, text=None): - """Returns the closest sibling to this Tag that matches the - given criteria and appears before this Tag in the document.""" - return self._first(self.fetchPreviousSiblings, name, attrs, text) - firstPreviousSibling = findPreviousSibling - - def fetchPreviousSiblings(self, name=None, attrs={}, text=None, - limit=None): - """Returns the siblings of this Tag that match the given - criteria and appear before this Tag in the document.""" - return self._fetch(name, attrs, text, limit, - self.previousSiblingGenerator) - - def findParent(self, name=None, attrs={}): - """Returns the closest parent of this Tag that matches the given - criteria.""" - r = Null - l = self.fetchParents(name, attrs, 1) - if l: - r = l[0] - return r - firstParent = findParent - - def fetchParents(self, name=None, attrs={}, limit=None): - """Returns the parents of this Tag that match the given - criteria.""" - return self._fetch(name, attrs, None, limit, self.parentGenerator) - - #These methods do the real heavy lifting. - - def _first(self, method, name, attrs, text): - r = Null - l = method(name, attrs, text, 1) - if l: - r = l[0] - return r - - def _fetch(self, name, attrs, text, limit, generator): - "Iterates over a generator looking for things that match." - if not hasattr(attrs, 'items'): - attrs = {'class' : attrs} - - results = [] - g = generator() - while True: - try: - i = g.next() - except StopIteration: - break - found = None - if isinstance(i, Tag): - if not text: - if not name or self._matches(i, name): - match = True - for attr, matchAgainst in attrs.items(): - check = i.get(attr) - if not self._matches(check, matchAgainst): - match = False - break - if match: - found = i - elif text: - if self._matches(i, text): - found = i - if found: - results.append(found) - if limit and len(results) >= limit: - break - return results - - #Generators that can be used to navigate starting from both - #NavigableTexts and Tags. - def nextGenerator(self): - i = self - while i: - i = i.next - yield i - - def nextSiblingGenerator(self): - i = self - while i: - i = i.nextSibling - yield i - - def previousGenerator(self): - i = self - while i: - i = i.previous - yield i - - def previousSiblingGenerator(self): - i = self - while i: - i = i.previousSibling - yield i - - def parentGenerator(self): - i = self - while i: - i = i.parent - yield i - - def _matches(self, chunk, howToMatch): - #print 'looking for %s in %s' % (howToMatch, chunk) - # - # If given a list of items, return true if the list contains a - # text element that matches. - if isList(chunk) and not isinstance(chunk, Tag): - for tag in chunk: - if isinstance(tag, NavigableText) and self._matches(tag, howToMatch): - return True - return False - if callable(howToMatch): - return howToMatch(chunk) - if isinstance(chunk, Tag): - #Custom match methods take the tag as an argument, but all other - #ways of matching match the tag name as a string - chunk = chunk.name - #Now we know that chunk is a string - if not isinstance(chunk, basestring): - chunk = str(chunk) - if hasattr(howToMatch, 'match'): - # It's a regexp object. - return howToMatch.search(chunk) - if isList(howToMatch): - return chunk in howToMatch - if hasattr(howToMatch, 'items'): - return howToMatch.has_key(chunk) - #It's just a string - return str(howToMatch) == chunk - -class NavigableText(PageElement): - - def __getattr__(self, attr): - "For backwards compatibility, text.string gives you text" - if attr == 'string': - return self - else: - raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) - -class NavigableString(str, NavigableText): - pass - -class NavigableUnicodeString(unicode, NavigableText): - pass - -class Tag(PageElement): - - """Represents a found HTML tag with its attributes and contents.""" - - def __init__(self, name, attrs=None, parent=Null, previous=Null): - "Basic constructor." - self.name = name - if attrs == None: - attrs = [] - self.attrs = attrs - self.contents = [] - self.setup(parent, previous) - self.hidden = False - - def get(self, key, default=None): - """Returns the value of the 'key' attribute for the tag, or - the value given for 'default' if it doesn't have that - attribute.""" - return self._getAttrMap().get(key, default) - - def __getitem__(self, key): - """tag[key] returns the value of the 'key' attribute for the tag, - and throws an exception if it's not there.""" - return self._getAttrMap()[key] - - def __iter__(self): - "Iterating over a tag iterates over its contents." - return iter(self.contents) - - def __len__(self): - "The length of a tag is the length of its list of contents." - return len(self.contents) - - def __contains__(self, x): - return x in self.contents - - def __nonzero__(self): - "A tag is non-None even if it has no contents." - return True - - def __setitem__(self, key, value): - """Setting tag[key] sets the value of the 'key' attribute for the - tag.""" - self._getAttrMap() - self.attrMap[key] = value - found = False - for i in range(0, len(self.attrs)): - if self.attrs[i][0] == key: - self.attrs[i] = (key, value) - found = True - if not found: - self.attrs.append((key, value)) - self._getAttrMap()[key] = value - - def __delitem__(self, key): - "Deleting tag[key] deletes all 'key' attributes for the tag." - for item in self.attrs: - if item[0] == key: - self.attrs.remove(item) - #We don't break because bad HTML can define the same - #attribute multiple times. - self._getAttrMap() - if self.attrMap.has_key(key): - del self.attrMap[key] - - def __call__(self, *args, **kwargs): - """Calling a tag like a function is the same as calling its - fetch() method. Eg. tag('a') returns a list of all the A tags - found within this tag.""" - return apply(self.fetch, args, kwargs) - - def __getattr__(self, tag): - if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: - return self.first(tag[:-3]) - elif tag.find('__') != 0: - return self.first(tag) - - def __eq__(self, other): - """Returns true iff this tag has the same name, the same attributes, - and the same contents (recursively) as the given tag. - - NOTE: right now this will return false if two tags have the - same attributes in a different order. Should this be fixed?""" - if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): - return False - for i in range(0, len(self.contents)): - if self.contents[i] != other.contents[i]: - return False - return True - - def __ne__(self, other): - """Returns true iff this tag is not identical to the other tag, - as defined in __eq__.""" - return not self == other - - def __repr__(self): - """Renders this tag as a string.""" - return str(self) - - def __unicode__(self): - return self.__str__(1) - - def __str__(self, needUnicode=None, showStructureIndent=None): - """Returns a string or Unicode representation of this tag and - its contents. - - NOTE: since Python's HTML parser consumes whitespace, this - method is not certain to reproduce the whitespace present in - the original string.""" - - attrs = [] - if self.attrs: - for key, val in self.attrs: - attrs.append('%s="%s"' % (key, val)) - close = '' - closeTag = '' - if self.isSelfClosing(): - close = ' /' - else: - closeTag = '</%s>' % self.name - indentIncrement = None - if showStructureIndent != None: - indentIncrement = showStructureIndent - if not self.hidden: - indentIncrement += 1 - contents = self.renderContents(indentIncrement, needUnicode=needUnicode) - if showStructureIndent: - space = '\n%s' % (' ' * showStructureIndent) - if self.hidden: - s = contents - else: - s = [] - attributeString = '' - if attrs: - attributeString = ' ' + ' '.join(attrs) - if showStructureIndent: - s.append(space) - s.append('<%s%s%s>' % (self.name, attributeString, close)) - s.append(contents) - if closeTag and showStructureIndent != None: - s.append(space) - s.append(closeTag) - s = ''.join(s) - isUnicode = type(s) == types.UnicodeType - if needUnicode and not isUnicode: - s = unicode(s) - elif isUnicode and needUnicode==False: - s = str(s) - return s - - def prettify(self, needUnicode=None): - return self.__str__(needUnicode, showStructureIndent=True) - - def renderContents(self, showStructureIndent=None, needUnicode=None): - """Renders the contents of this tag as a (possibly Unicode) - string.""" - s=[] - for c in self: - text = None - if isinstance(c, NavigableUnicodeString) or type(c) == types.UnicodeType: - text = unicode(c) - elif isinstance(c, Tag): - s.append(c.__str__(needUnicode, showStructureIndent)) - elif needUnicode: - text = unicode(c) - else: - text = str(c) - if text: - if showStructureIndent != None: - if text[-1] == '\n': - text = text[:-1] - s.append(text) - return ''.join(s) - - #Soup methods - - def firstText(self, text, recursive=True): - """Convenience method to retrieve the first piece of text matching the - given criteria. 'text' can be a string, a regular expression object, - a callable that takes a string and returns whether or not the - string 'matches', etc.""" - return self.first(recursive=recursive, text=text) - - def fetchText(self, text, recursive=True, limit=None): - """Convenience method to retrieve all pieces of text matching the - given criteria. 'text' can be a string, a regular expression object, - a callable that takes a string and returns whether or not the - string 'matches', etc.""" - return self.fetch(recursive=recursive, text=text, limit=limit) - - def first(self, name=None, attrs={}, recursive=True, text=None): - """Return only the first child of this - Tag matching the given criteria.""" - r = Null - l = self.fetch(name, attrs, recursive, text, 1) - if l: - r = l[0] - return r - findChild = first - - def fetch(self, name=None, attrs={}, recursive=True, text=None, - limit=None): - """Extracts a list of Tag objects that match the given - criteria. You can specify the name of the Tag and any - attributes you want the Tag to have. - - The value of a key-value pair in the 'attrs' map can be a - string, a list of strings, a regular expression object, or a - callable that takes a string and returns whether or not the - string matches for some custom definition of 'matches'. The - same is true of the tag name.""" - generator = self.recursiveChildGenerator - if not recursive: - generator = self.childGenerator - return self._fetch(name, attrs, text, limit, generator) - fetchChildren = fetch - - #Utility methods - - def isSelfClosing(self): - """Returns true iff this is a self-closing tag as defined in the HTML - standard. - - TODO: This is specific to BeautifulSoup and its subclasses, but it's - used by __str__""" - return self.name in BeautifulSoup.SELF_CLOSING_TAGS - - def append(self, tag): - """Appends the given tag to the contents of this tag.""" - self.contents.append(tag) - - #Private methods - - def _getAttrMap(self): - """Initializes a map representation of this tag's attributes, - if not already initialized.""" - if not getattr(self, 'attrMap'): - self.attrMap = {} - for (key, value) in self.attrs: - self.attrMap[key] = value - return self.attrMap - - #Generator methods - def childGenerator(self): - for i in range(0, len(self.contents)): - yield self.contents[i] - raise StopIteration - - def recursiveChildGenerator(self): - stack = [(self, 0)] - while stack: - tag, start = stack.pop() - if isinstance(tag, Tag): - for i in range(start, len(tag.contents)): - a = tag.contents[i] - yield a - if isinstance(a, Tag) and tag.contents: - if i < len(tag.contents) - 1: - stack.append((tag, i+1)) - stack.append((a, 0)) - break - raise StopIteration - - -def isList(l): - """Convenience method that works with all 2.x versions of Python - to determine whether or not something is listlike.""" - return hasattr(l, '__iter__') \ - or (type(l) in (types.ListType, types.TupleType)) - -def buildTagMap(default, *args): - """Turns a list of maps, lists, or scalars into a single map. - Used to build the SELF_CLOSING_TAGS and NESTABLE_TAGS maps out - of lists and partial maps.""" - built = {} - for portion in args: - if hasattr(portion, 'items'): - #It's a map. Merge it. - for k,v in portion.items(): - built[k] = v - elif isList(portion): - #It's a list. Map each item to the default. - for k in portion: - built[k] = default - else: - #It's a scalar. Map it to the default. - built[portion] = default - return built - -class BeautifulStoneSoup(Tag, SGMLParser): - - """This class contains the basic parser and fetch code. It defines - a parser that knows nothing about tag behavior except for the - following: - - You can't close a tag without closing all the tags it encloses. - That is, "<foo><bar></foo>" actually means - "<foo><bar></bar></foo>". - - [Another possible explanation is "<foo><bar /></foo>", but since - this class defines no SELF_CLOSING_TAGS, it will never use that - explanation.] - - This class is useful for parsing XML or made-up markup languages, - or when BeautifulSoup makes an assumption counter to what you were - expecting.""" - - SELF_CLOSING_TAGS = {} - NESTABLE_TAGS = {} - RESET_NESTING_TAGS = {} - QUOTE_TAGS = {} - - #As a public service we will by default silently replace MS smart quotes - #and similar characters with their HTML or ASCII equivalents. - MS_CHARS = { '\x80' : '€', - '\x81' : ' ', - '\x82' : '‚', - '\x83' : 'ƒ', - '\x84' : '„', - '\x85' : '…', - '\x86' : '†', - '\x87' : '‡', - '\x88' : '⁁', - '\x89' : '%', - '\x8A' : 'Š', - '\x8B' : '<', - '\x8C' : 'Œ', - '\x8D' : '?', - '\x8E' : 'Z', - '\x8F' : '?', - '\x90' : '?', - '\x91' : '‘', - '\x92' : '’', - '\x93' : '“', - '\x94' : '”', - '\x95' : '•', - '\x96' : '–', - '\x97' : '—', - '\x98' : '˜', - '\x99' : '™', - '\x9a' : 'š', - '\x9b' : '>', - '\x9c' : 'œ', - '\x9d' : '?', - '\x9e' : 'z', - '\x9f' : 'Ÿ',} - - PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'), - lambda(x):x.group(1) + ' />'), - (re.compile('<!\s+([^<>]*)>'), - lambda(x):'<!' + x.group(1) + '>'), - (re.compile("([\x80-\x9f])"), - lambda(x): BeautifulStoneSoup.MS_CHARS.get(x.group(1))) - ] - - ROOT_TAG_NAME = '[document]' - - def __init__(self, text=None, avoidParserProblems=True, - initialTextIsEverything=True): - """Initialize this as the 'root tag' and feed in any text to - the parser. - - NOTE about avoidParserProblems: sgmllib will process most bad - HTML, and BeautifulSoup has tricks for dealing with some HTML - that kills sgmllib, but Beautiful Soup can nonetheless choke - or lose data if your data uses self-closing tags or - declarations incorrectly. By default, Beautiful Soup sanitizes - its input to avoid the vast majority of these problems. The - problems are relatively rare, even in bad HTML, so feel free - to pass in False to avoidParserProblems if they don't apply to - you, and you'll get better performance. The only reason I have - this turned on by default is so I don't get so many tech - support questions. - - The two most common instances of invalid HTML that will choke - sgmllib are fixed by the default parser massage techniques: - - <br/> (No space between name of closing tag and tag close) - <! --Comment--> (Extraneous whitespace in declaration) - - You can pass in a custom list of (RE object, replace method) - tuples to get Beautiful Soup to scrub your input the way you - want.""" - Tag.__init__(self, self.ROOT_TAG_NAME) - if avoidParserProblems \ - and not isList(avoidParserProblems): - avoidParserProblems = self.PARSER_MASSAGE - self.avoidParserProblems = avoidParserProblems - SGMLParser.__init__(self) - self.quoteStack = [] - self.hidden = 1 - self.reset() - if hasattr(text, 'read'): - #It's a file-type object. - text = text.read() - if text: - self.feed(text) - if initialTextIsEverything: - self.done() - - def __getattr__(self, methodName): - """This method routes method call requests to either the SGMLParser - superclass or the Tag superclass, depending on the method name.""" - if methodName.find('start_') == 0 or methodName.find('end_') == 0 \ - or methodName.find('do_') == 0: - return SGMLParser.__getattr__(self, methodName) - elif methodName.find('__') != 0: - return Tag.__getattr__(self, methodName) - else: - raise AttributeError - - def feed(self, text): - if self.avoidParserProblems: - for fix, m in self.avoidParserProblems: - text = fix.sub(m, text) - SGMLParser.feed(self, text) - - def done(self): - """Called when you're done parsing, so that the unclosed tags can be - correctly processed.""" - self.endData() #NEW - while self.currentTag.name != self.ROOT_TAG_NAME: - self.popTag() - - def reset(self): - SGMLParser.reset(self) - self.currentData = [] - self.currentTag = None - self.tagStack = [] - self.pushTag(self) - - def popTag(self): - tag = self.tagStack.pop() - # Tags with just one string-owning child get the child as a - # 'string' property, so that soup.tag.string is shorthand for - # soup.tag.contents[0] - if len(self.currentTag.contents) == 1 and \ - isinstance(self.currentTag.contents[0], NavigableText): - self.currentTag.string = self.currentTag.contents[0] - - #print "Pop", tag.name - if self.tagStack: - self.currentTag = self.tagStack[-1] - return self.currentTag - - def pushTag(self, tag): - #print "Push", tag.name - if self.currentTag: - self.currentTag.append(tag) - self.tagStack.append(tag) - self.currentTag = self.tagStack[-1] - - def endData(self): - currentData = ''.join(self.currentData) - if currentData: - if not currentData.strip(): - if '\n' in currentData: - currentData = '\n' - else: - currentData = ' ' - c = NavigableString - if type(currentData) == types.UnicodeType: - c = NavigableUnicodeString - o = c(currentData) - o.setup(self.currentTag, self.previous) - if self.previous: - self.previous.next = o - self.previous = o - self.currentTag.contents.append(o) - self.currentData = [] - - def _popToTag(self, name, inclusivePop=True): - """Pops the tag stack up to and including the most recent - instance of the given tag. If inclusivePop is false, pops the tag - stack up to but *not* including the most recent instqance of - the given tag.""" - if name == self.ROOT_TAG_NAME: - return - - numPops = 0 - mostRecentTag = None - for i in range(len(self.tagStack)-1, 0, -1): - if name == self.tagStack[i].name: - numPops = len(self.tagStack)-i - break - if not inclusivePop: - numPops = numPops - 1 - - for i in range(0, numPops): - mostRecentTag = self.popTag() - return mostRecentTag - - def _smartPop(self, name): - - """We need to pop up to the previous tag of this type, unless - one of this tag's nesting reset triggers comes between this - tag and the previous tag of this type, OR unless this tag is a - generic nesting trigger and another generic nesting trigger - comes between this tag and the previous tag of this type. - - Examples: - <p>Foo<b>Bar<p> should pop to 'p', not 'b'. - <p>Foo<table>Bar<p> should pop to 'table', not 'p'. - <p>Foo<table><tr>Bar<p> should pop to 'tr', not 'p'. - <p>Foo<b>Bar<p> should pop to 'p', not 'b'. - - <li><ul><li> *<li>* should pop to 'ul', not the first 'li'. - <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr' - <td><tr><td> *<td>* should pop to 'tr', not the first 'td' - """ - - nestingResetTriggers = self.NESTABLE_TAGS.get(name) - isNestable = nestingResetTriggers != None - isResetNesting = self.RESET_NESTING_TAGS.has_key(name) - popTo = None - inclusive = True - for i in range(len(self.tagStack)-1, 0, -1): - p = self.tagStack[i] - if (not p or p.name == name) and not isNestable: - #Non-nestable tags get popped to the top or to their - #last occurance. - popTo = name - break - if (nestingResetTriggers != None - and p.name in nestingResetTriggers) \ - or (nestingResetTriggers == None and isResetNesting - and self.RESET_NESTING_TAGS.has_key(p.name)): - - #If we encounter one of the nesting reset triggers - #peculiar to this tag, or we encounter another tag - #that causes nesting to reset, pop up to but not - #including that tag. - - popTo = p.name - inclusive = False - break - p = p.parent - if popTo: - self._popToTag(popTo, inclusive) - - def unknown_starttag(self, name, attrs, selfClosing=0): - #print "Start tag %s" % name - if self.quoteStack: - #This is not a real tag. - #print "<%s> is not real!" % name - attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs)) - self.handle_data('<%s%s>' % (name, attrs)) - return - self.endData() - if not name in self.SELF_CLOSING_TAGS and not selfClosing: - self._smartPop(name) - tag = Tag(name, attrs, self.currentTag, self.previous) - if self.previous: - self.previous.next = tag - self.previous = tag - self.pushTag(tag) - if selfClosing or name in self.SELF_CLOSING_TAGS: - self.popTag() - if name in self.QUOTE_TAGS: - #print "Beginning quote (%s)" % name - self.quoteStack.append(name) - self.literal = 1 - - def unknown_endtag(self, name): - if self.quoteStack and self.quoteStack[-1] != name: - #This is not a real end tag. - #print "</%s> is not real!" % name - self.handle_data('</%s>' % name) - return - self.endData() - self._popToTag(name) - if self.quoteStack and self.quoteStack[-1] == name: - self.quoteStack.pop() - self.literal = (len(self.quoteStack) > 0) - - def handle_data(self, data): - self.currentData.append(data) - - def handle_pi(self, text): - "Propagate processing instructions right through." - self.handle_data("<?%s>" % text) - - def handle_comment(self, text): - "Propagate comments right through." - self.handle_data("<!--%s-->" % text) - - def handle_charref(self, ref): - "Propagate char refs right through." - self.handle_data('&#%s;' % ref) - - def handle_entityref(self, ref): - "Propagate entity refs right through." - self.handle_data('&%s;' % ref) - - def handle_decl(self, data): - "Propagate DOCTYPEs and the like right through." - self.handle_data('<!%s>' % data) - - def parse_declaration(self, i): - """Treat a bogus SGML declaration as raw data. Treat a CDATA - declaration as regular data.""" - j = None - if self.rawdata[i:i+9] == '<![CDATA[': - k = self.rawdata.find(']]>', i) - if k == -1: - k = len(self.rawdata) - self.handle_data(self.rawdata[i+9:k]) - j = k+3 - else: - try: - j = SGMLParser.parse_declaration(self, i) - except SGMLParseError: - toHandle = self.rawdata[i:] - self.handle_data(toHandle) - j = i + len(toHandle) - return j - -class BeautifulSoup(BeautifulStoneSoup): - - """This parser knows the following facts about HTML: - - * Some tags have no closing tag and should be interpreted as being - closed as soon as they are encountered. - - * The text inside some tags (ie. 'script') may contain tags which - are not really part of the document and which should be parsed - as text, not tags. If you want to parse the text as tags, you can - always fetch it and parse it explicitly. - - * Tag nesting rules: - - Most tags can't be nested at all. For instance, the occurance of - a <p> tag should implicitly close the previous <p> tag. - - <p>Para1<p>Para2 - should be transformed into: - <p>Para1</p><p>Para2 - - Some tags can be nested arbitrarily. For instance, the occurance - of a <blockquote> tag should _not_ implicitly close the previous - <blockquote> tag. - - Alice said: <blockquote>Bob said: <blockquote>Blah - should NOT be transformed into: - Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah - - Some tags can be nested, but the nesting is reset by the - interposition of other tags. For instance, a <tr> tag should - implicitly close the previous <tr> tag within the same <table>, - but not close a <tr> tag in another table. - - <table><tr>Blah<tr>Blah - should be transformed into: - <table><tr>Blah</tr><tr>Blah - but, - <tr>Blah<table><tr>Blah - should NOT be transformed into - <tr>Blah<table></tr><tr>Blah - - Differing assumptions about tag nesting rules are a major source - of problems with the BeautifulSoup class. If BeautifulSoup is not - treating as nestable a tag your page author treats as nestable, - try ICantBelieveItsBeautifulSoup before writing your own - subclass.""" - - SELF_CLOSING_TAGS = buildTagMap(None, ['br' , 'hr', 'input', 'img', 'meta', - 'spacer', 'link', 'frame', 'base']) - - QUOTE_TAGS = {'script': None} - - #According to the HTML standard, each of these inline tags can - #contain another tag of the same type. Furthermore, it's common - #to actually use these tags this way. - NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', - 'center'] - - #According to the HTML standard, these block tags can contain - #another tag of the same type. Furthermore, it's common - #to actually use these tags this way. - NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del'] - - #Lists can contain other lists, but there are restrictions. - NESTABLE_LIST_TAGS = { 'ol' : [], - 'ul' : [], - 'li' : ['ul', 'ol'], - 'dl' : [], - 'dd' : ['dl'], - 'dt' : ['dl'] } - - #Tables can contain other tables, but there are restrictions. - NESTABLE_TABLE_TAGS = {'table' : [], - 'tr' : ['table', 'tbody', 'tfoot', 'thead'], - 'td' : ['tr'], - 'th' : ['tr'], - } - - NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre'] - - #If one of these tags is encountered, all tags up to the next tag of - #this type are popped. - RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', - NON_NESTABLE_BLOCK_TAGS, - NESTABLE_LIST_TAGS, - NESTABLE_TABLE_TAGS) - - NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, - NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) - -class ICantBelieveItsBeautifulSoup(BeautifulSoup): - - """The BeautifulSoup class is oriented towards skipping over - common HTML errors like unclosed tags. However, sometimes it makes - errors of its own. For instance, consider this fragment: - - <b>Foo<b>Bar</b></b> - - This is perfectly valid (if bizarre) HTML. However, the - BeautifulSoup class will implicitly close the first b tag when it - encounters the second 'b'. It will think the author wrote - "<b>Foo<b>Bar", and didn't close the first 'b' tag, because - there's no real-world reason to bold something that's already - bold. When it encounters '</b></b>' it will close two more 'b' - tags, for a grand total of three tags closed instead of two. This - can throw off the rest of your document structure. The same is - true of a number of other tags, listed below. - - It's much more common for someone to forget to close (eg.) a 'b' - tag than to actually use nested 'b' tags, and the BeautifulSoup - class handles the common case. This class handles the - not-co-common case: where you can't believe someone wrote what - they did, but it's valid HTML and BeautifulSoup screwed up by - assuming it wouldn't be. - - If this doesn't do what you need, try subclassing this class or - BeautifulSoup, and providing your own list of NESTABLE_TAGS.""" - - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ - ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', - 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', - 'big'] - - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript'] - - NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) - -class BeautifulSOAP(BeautifulStoneSoup): - """This class will push a tag with only a single string child into - the tag's parent as an attribute. The attribute's name is the tag - name, and the value is the string child. An example should give - the flavor of the change: - - <foo><bar>baz</bar></foo> - => - <foo bar="baz"><bar>baz</bar></foo> - - You can then access fooTag['bar'] instead of fooTag.barTag.string. - - This is, of course, useful for scraping structures that tend to - use subelements instead of attributes, such as SOAP messages. Note - that it modifies its input, so don't print the modified version - out. - - I'm not sure how many people really want to use this class; let me - know if you do. Mainly I like the name.""" - - def popTag(self): - if len(self.tagStack) > 1: - tag = self.tagStack[-1] - parent = self.tagStack[-2] - parent._getAttrMap() - if (isinstance(tag, Tag) and len(tag.contents) == 1 and - isinstance(tag.contents[0], NavigableText) and - not parent.attrMap.has_key(tag.name)): - parent[tag.name] = tag.contents[0] - BeautifulStoneSoup.popTag(self) - -#Enterprise class names! It has come to our attention that some people -#think the names of the Beautiful Soup parser classes are too silly -#and "unprofessional" for use in enterprise screen-scraping. We feel -#your pain! For such-minded folk, the Beautiful Soup Consortium And -#All-Night Kosher Bakery recommends renaming this file to -#"RobustParser.py" (or, in cases of extreme enterprisitude, -#"RobustParserBeanInterface.class") and using the following -#enterprise-friendly class aliases: -class RobustXMLParser(BeautifulStoneSoup): - pass -class RobustHTMLParser(BeautifulSoup): - pass -class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup): - pass -class SimplifyingSOAPParser(BeautifulSOAP): - pass - -### - - -#By default, act as an HTML pretty-printer. -if __name__ == '__main__': - import sys - soup = BeautifulStoneSoup(sys.stdin.read()) - print soup.prettify() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_clientcookie.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_clientcookie.py deleted file mode 100644 index caeb82b..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_clientcookie.py +++ /dev/null @@ -1,1707 +0,0 @@ -"""HTTP cookie handling for web clients. - -This module originally developed from my port of Gisle Aas' Perl module -HTTP::Cookies, from the libwww-perl library. - -Docstrings, comments and debug strings in this code refer to the -attributes of the HTTP cookie system as cookie-attributes, to distinguish -them clearly from Python attributes. - - CookieJar____ - / \ \ - FileCookieJar \ \ - / | \ \ \ - MozillaCookieJar | LWPCookieJar \ \ - | | \ - | ---MSIEBase | \ - | / | | \ - | / MSIEDBCookieJar BSDDBCookieJar - |/ - MSIECookieJar - -Comments to John J Lee <jjl@pobox.com>. - - -Copyright 2002-2006 John J Lee <jjl@pobox.com> -Copyright 1997-1999 Gisle Aas (original libwww-perl code) -Copyright 2002-2003 Johnny Lee (original MSIE Perl code) - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -import sys, re, copy, time, urllib, types, logging -try: - import threading - _threading = threading; del threading -except ImportError: - import dummy_threading - _threading = dummy_threading; del dummy_threading - -MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " - "instance initialised with one)") -DEFAULT_HTTP_PORT = "80" - -from _headersutil import split_header_words, parse_ns_headers -from _util import isstringlike -import _rfc3986 - -debug = logging.getLogger("mechanize.cookies").debug - - -def reraise_unmasked_exceptions(unmasked=()): - # There are a few catch-all except: statements in this module, for - # catching input that's bad in unexpected ways. - # This function re-raises some exceptions we don't want to trap. - import mechanize, warnings - if not mechanize.USE_BARE_EXCEPT: - raise - unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError) - etype = sys.exc_info()[0] - if issubclass(etype, unmasked): - raise - # swallowed an exception - import traceback, StringIO - f = StringIO.StringIO() - traceback.print_exc(None, f) - msg = f.getvalue() - warnings.warn("mechanize bug!\n%s" % msg, stacklevel=2) - - -IPV4_RE = re.compile(r"\.\d+$") -def is_HDN(text): - """Return True if text is a host domain name.""" - # XXX - # This may well be wrong. Which RFC is HDN defined in, if any (for - # the purposes of RFC 2965)? - # For the current implementation, what about IPv6? Remember to look - # at other uses of IPV4_RE also, if change this. - return not (IPV4_RE.search(text) or - text == "" or - text[0] == "." or text[-1] == ".") - -def domain_match(A, B): - """Return True if domain A domain-matches domain B, according to RFC 2965. - - A and B may be host domain names or IP addresses. - - RFC 2965, section 1: - - Host names can be specified either as an IP address or a HDN string. - Sometimes we compare one host name with another. (Such comparisons SHALL - be case-insensitive.) Host A's name domain-matches host B's if - - * their host name strings string-compare equal; or - - * A is a HDN string and has the form NB, where N is a non-empty - name string, B has the form .B', and B' is a HDN string. (So, - x.y.com domain-matches .Y.com but not Y.com.) - - Note that domain-match is not a commutative operation: a.b.c.com - domain-matches .c.com, but not the reverse. - - """ - # Note that, if A or B are IP addresses, the only relevant part of the - # definition of the domain-match algorithm is the direct string-compare. - A = A.lower() - B = B.lower() - if A == B: - return True - if not is_HDN(A): - return False - i = A.rfind(B) - has_form_nb = not (i == -1 or i == 0) - return ( - has_form_nb and - B.startswith(".") and - is_HDN(B[1:]) - ) - -def liberal_is_HDN(text): - """Return True if text is a sort-of-like a host domain name. - - For accepting/blocking domains. - - """ - return not IPV4_RE.search(text) - -def user_domain_match(A, B): - """For blocking/accepting domains. - - A and B may be host domain names or IP addresses. - - """ - A = A.lower() - B = B.lower() - if not (liberal_is_HDN(A) and liberal_is_HDN(B)): - if A == B: - # equal IP addresses - return True - return False - initial_dot = B.startswith(".") - if initial_dot and A.endswith(B): - return True - if not initial_dot and A == B: - return True - return False - -cut_port_re = re.compile(r":\d+$") -def request_host(request): - """Return request-host, as defined by RFC 2965. - - Variation from RFC: returned value is lowercased, for convenient - comparison. - - """ - url = request.get_full_url() - host = _rfc3986.urlsplit(url)[1] - if host is None: - host = request.get_header("Host", "") - # remove port, if present - return cut_port_re.sub("", host, 1) - -def request_host_lc(request): - return request_host(request).lower() - -def eff_request_host(request): - """Return a tuple (request-host, effective request-host name).""" - erhn = req_host = request_host(request) - if req_host.find(".") == -1 and not IPV4_RE.search(req_host): - erhn = req_host + ".local" - return req_host, erhn - -def eff_request_host_lc(request): - req_host, erhn = eff_request_host(request) - return req_host.lower(), erhn.lower() - -def effective_request_host(request): - """Return the effective request-host, as defined by RFC 2965.""" - return eff_request_host(request)[1] - -def request_path(request): - """request-URI, as defined by RFC 2965.""" - url = request.get_full_url() - path, query, frag = _rfc3986.urlsplit(url)[2:] - path = escape_path(path) - req_path = _rfc3986.urlunsplit((None, None, path, query, frag)) - if not req_path.startswith("/"): - req_path = "/"+req_path - return req_path - -def request_port(request): - host = request.get_host() - i = host.find(':') - if i >= 0: - port = host[i+1:] - try: - int(port) - except ValueError: - debug("nonnumeric port: '%s'", port) - return None - else: - port = DEFAULT_HTTP_PORT - return port - -def request_is_unverifiable(request): - try: - return request.is_unverifiable() - except AttributeError: - if hasattr(request, "unverifiable"): - return request.unverifiable - else: - raise - -# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't -# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). -HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" -ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") -def uppercase_escaped_char(match): - return "%%%s" % match.group(1).upper() -def escape_path(path): - """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" - # There's no knowing what character encoding was used to create URLs - # containing %-escapes, but since we have to pick one to escape invalid - # path characters, we pick UTF-8, as recommended in the HTML 4.0 - # specification: - # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 - # And here, kind of: draft-fielding-uri-rfc2396bis-03 - # (And in draft IRI specification: draft-duerst-iri-05) - # (And here, for new URI schemes: RFC 2718) - if isinstance(path, types.UnicodeType): - path = path.encode("utf-8") - path = urllib.quote(path, HTTP_PATH_SAFE) - path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) - return path - -def reach(h): - """Return reach of host h, as defined by RFC 2965, section 1. - - The reach R of a host name H is defined as follows: - - * If - - - H is the host domain name of a host; and, - - - H has the form A.B; and - - - A has no embedded (that is, interior) dots; and - - - B has at least one embedded dot, or B is the string "local". - then the reach of H is .B. - - * Otherwise, the reach of H is H. - - >>> reach("www.acme.com") - '.acme.com' - >>> reach("acme.com") - 'acme.com' - >>> reach("acme.local") - '.local' - - """ - i = h.find(".") - if i >= 0: - #a = h[:i] # this line is only here to show what a is - b = h[i+1:] - i = b.find(".") - if is_HDN(h) and (i >= 0 or b == "local"): - return "."+b - return h - -def is_third_party(request): - """ - - RFC 2965, section 3.3.6: - - An unverifiable transaction is to a third-party host if its request- - host U does not domain-match the reach R of the request-host O in the - origin transaction. - - """ - req_host = request_host_lc(request) - # the origin request's request-host was stuffed into request by - # _urllib2_support.AbstractHTTPHandler - return not domain_match(req_host, reach(request.origin_req_host)) - - -class Cookie: - """HTTP Cookie. - - This class represents both Netscape and RFC 2965 cookies. - - This is deliberately a very simple class. It just holds attributes. It's - possible to construct Cookie instances that don't comply with the cookie - standards. CookieJar.make_cookies is the factory function for Cookie - objects -- it deals with cookie parsing, supplying defaults, and - normalising to the representation used in this class. CookiePolicy is - responsible for checking them to see whether they should be accepted from - and returned to the server. - - version: integer; - name: string; - value: string (may be None); - port: string; None indicates no attribute was supplied (eg. "Port", rather - than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list - string (eg. "80,8080") - port_specified: boolean; true if a value was supplied with the Port - cookie-attribute - domain: string; - domain_specified: boolean; true if Domain was explicitly set - domain_initial_dot: boolean; true if Domain as set in HTTP header by server - started with a dot (yes, this really is necessary!) - path: string; - path_specified: boolean; true if Path was explicitly set - secure: boolean; true if should only be returned over secure connection - expires: integer; seconds since epoch (RFC 2965 cookies should calculate - this value from the Max-Age attribute) - discard: boolean, true if this is a session cookie; (if no expires value, - this should be true) - comment: string; - comment_url: string; - rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not - Set-Cookie2:) header, but had a version cookie-attribute of 1 - rest: mapping of other cookie-attributes - - Note that the port may be present in the headers, but unspecified ("Port" - rather than"Port=80", for example); if this is the case, port is None. - - """ - - def __init__(self, version, name, value, - port, port_specified, - domain, domain_specified, domain_initial_dot, - path, path_specified, - secure, - expires, - discard, - comment, - comment_url, - rest, - rfc2109=False, - ): - - if version is not None: version = int(version) - if expires is not None: expires = int(expires) - if port is None and port_specified is True: - raise ValueError("if port is None, port_specified must be false") - - self.version = version - self.name = name - self.value = value - self.port = port - self.port_specified = port_specified - # normalise case, as per RFC 2965 section 3.3.3 - self.domain = domain.lower() - self.domain_specified = domain_specified - # Sigh. We need to know whether the domain given in the - # cookie-attribute had an initial dot, in order to follow RFC 2965 - # (as clarified in draft errata). Needed for the returned $Domain - # value. - self.domain_initial_dot = domain_initial_dot - self.path = path - self.path_specified = path_specified - self.secure = secure - self.expires = expires - self.discard = discard - self.comment = comment - self.comment_url = comment_url - self.rfc2109 = rfc2109 - - self._rest = copy.copy(rest) - - def has_nonstandard_attr(self, name): - return self._rest.has_key(name) - def get_nonstandard_attr(self, name, default=None): - return self._rest.get(name, default) - def set_nonstandard_attr(self, name, value): - self._rest[name] = value - def nonstandard_attr_keys(self): - return self._rest.keys() - - def is_expired(self, now=None): - if now is None: now = time.time() - return (self.expires is not None) and (self.expires <= now) - - def __str__(self): - if self.port is None: p = "" - else: p = ":"+self.port - limit = self.domain + p + self.path - if self.value is not None: - namevalue = "%s=%s" % (self.name, self.value) - else: - namevalue = self.name - return "<Cookie %s for %s>" % (namevalue, limit) - - def __repr__(self): - args = [] - for name in ["version", "name", "value", - "port", "port_specified", - "domain", "domain_specified", "domain_initial_dot", - "path", "path_specified", - "secure", "expires", "discard", "comment", "comment_url", - ]: - attr = getattr(self, name) - args.append("%s=%s" % (name, repr(attr))) - args.append("rest=%s" % repr(self._rest)) - args.append("rfc2109=%s" % repr(self.rfc2109)) - return "Cookie(%s)" % ", ".join(args) - - -class CookiePolicy: - """Defines which cookies get accepted from and returned to server. - - May also modify cookies. - - The subclass DefaultCookiePolicy defines the standard rules for Netscape - and RFC 2965 cookies -- override that if you want a customised policy. - - As well as implementing set_ok and return_ok, implementations of this - interface must also supply the following attributes, indicating which - protocols should be used, and how. These can be read and set at any time, - though whether that makes complete sense from the protocol point of view is - doubtful. - - Public attributes: - - netscape: implement netscape protocol - rfc2965: implement RFC 2965 protocol - rfc2109_as_netscape: - WARNING: This argument will change or go away if is not accepted into - the Python standard library in this form! - If true, treat RFC 2109 cookies as though they were Netscape cookies. The - default is for this attribute to be None, which means treat 2109 cookies - as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is, - by default), and as Netscape cookies otherwise. - hide_cookie2: don't add Cookie2 header to requests (the presence of - this header indicates to the server that we understand RFC 2965 - cookies) - - """ - def set_ok(self, cookie, request): - """Return true if (and only if) cookie should be accepted from server. - - Currently, pre-expired cookies never get this far -- the CookieJar - class deletes such cookies itself. - - cookie: mechanize.Cookie object - request: object implementing the interface defined by - CookieJar.extract_cookies.__doc__ - - """ - raise NotImplementedError() - - def return_ok(self, cookie, request): - """Return true if (and only if) cookie should be returned to server. - - cookie: mechanize.Cookie object - request: object implementing the interface defined by - CookieJar.add_cookie_header.__doc__ - - """ - raise NotImplementedError() - - def domain_return_ok(self, domain, request): - """Return false if cookies should not be returned, given cookie domain. - - This is here as an optimization, to remove the need for checking every - cookie with a particular domain (which may involve reading many files). - The default implementations of domain_return_ok and path_return_ok - (return True) leave all the work to return_ok. - - If domain_return_ok returns true for the cookie domain, path_return_ok - is called for the cookie path. Otherwise, path_return_ok and return_ok - are never called for that cookie domain. If path_return_ok returns - true, return_ok is called with the Cookie object itself for a full - check. Otherwise, return_ok is never called for that cookie path. - - Note that domain_return_ok is called for every *cookie* domain, not - just for the *request* domain. For example, the function might be - called with both ".acme.com" and "www.acme.com" if the request domain - is "www.acme.com". The same goes for path_return_ok. - - For argument documentation, see the docstring for return_ok. - - """ - return True - - def path_return_ok(self, path, request): - """Return false if cookies should not be returned, given cookie path. - - See the docstring for domain_return_ok. - - """ - return True - - -class DefaultCookiePolicy(CookiePolicy): - """Implements the standard rules for accepting and returning cookies. - - Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is - switched off by default. - - The easiest way to provide your own policy is to override this class and - call its methods in your overriden implementations before adding your own - additional checks. - - import mechanize - class MyCookiePolicy(mechanize.DefaultCookiePolicy): - def set_ok(self, cookie, request): - if not mechanize.DefaultCookiePolicy.set_ok( - self, cookie, request): - return False - if i_dont_want_to_store_this_cookie(): - return False - return True - - In addition to the features required to implement the CookiePolicy - interface, this class allows you to block and allow domains from setting - and receiving cookies. There are also some strictness switches that allow - you to tighten up the rather loose Netscape protocol rules a little bit (at - the cost of blocking some benign cookies). - - A domain blacklist and whitelist is provided (both off by default). Only - domains not in the blacklist and present in the whitelist (if the whitelist - is active) participate in cookie setting and returning. Use the - blocked_domains constructor argument, and blocked_domains and - set_blocked_domains methods (and the corresponding argument and methods for - allowed_domains). If you set a whitelist, you can turn it off again by - setting it to None. - - Domains in block or allow lists that do not start with a dot must - string-compare equal. For example, "acme.com" matches a blacklist entry of - "acme.com", but "www.acme.com" does not. Domains that do start with a dot - are matched by more specific domains too. For example, both "www.acme.com" - and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does - not). IP addresses are an exception, and must match exactly. For example, - if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is - blocked, but 193.168.1.2 is not. - - Additional Public Attributes: - - General strictness switches - - strict_domain: don't allow sites to set two-component domains with - country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc. - This is far from perfect and isn't guaranteed to work! - - RFC 2965 protocol strictness switches - - strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable - transactions (usually, an unverifiable transaction is one resulting from - a redirect or an image hosted on another site); if this is false, cookies - are NEVER blocked on the basis of verifiability - - Netscape protocol strictness switches - - strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions - even to Netscape cookies - strict_ns_domain: flags indicating how strict to be with domain-matching - rules for Netscape cookies: - DomainStrictNoDots: when setting cookies, host prefix must not contain a - dot (eg. www.foo.bar.com can't set a cookie for .bar.com, because - www.foo contains a dot) - DomainStrictNonDomain: cookies that did not explicitly specify a Domain - cookie-attribute can only be returned to a domain that string-compares - equal to the domain that set the cookie (eg. rockets.acme.com won't - be returned cookies from acme.com that had no Domain cookie-attribute) - DomainRFC2965Match: when setting cookies, require a full RFC 2965 - domain-match - DomainLiberal and DomainStrict are the most useful combinations of the - above flags, for convenience - strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that - have names starting with '$' - strict_ns_set_path: don't allow setting cookies whose path doesn't - path-match request URI - - """ - - DomainStrictNoDots = 1 - DomainStrictNonDomain = 2 - DomainRFC2965Match = 4 - - DomainLiberal = 0 - DomainStrict = DomainStrictNoDots|DomainStrictNonDomain - - def __init__(self, - blocked_domains=None, allowed_domains=None, - netscape=True, rfc2965=False, - # WARNING: this argument will change or go away if is not - # accepted into the Python standard library in this form! - # default, ie. treat 2109 as netscape iff not rfc2965 - rfc2109_as_netscape=None, - hide_cookie2=False, - strict_domain=False, - strict_rfc2965_unverifiable=True, - strict_ns_unverifiable=False, - strict_ns_domain=DomainLiberal, - strict_ns_set_initial_dollar=False, - strict_ns_set_path=False, - ): - """ - Constructor arguments should be used as keyword arguments only. - - blocked_domains: sequence of domain names that we never accept cookies - from, nor return cookies to - allowed_domains: if not None, this is a sequence of the only domains - for which we accept and return cookies - - For other arguments, see CookiePolicy.__doc__ and - DefaultCookiePolicy.__doc__.. - - """ - self.netscape = netscape - self.rfc2965 = rfc2965 - self.rfc2109_as_netscape = rfc2109_as_netscape - self.hide_cookie2 = hide_cookie2 - self.strict_domain = strict_domain - self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable - self.strict_ns_unverifiable = strict_ns_unverifiable - self.strict_ns_domain = strict_ns_domain - self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar - self.strict_ns_set_path = strict_ns_set_path - - if blocked_domains is not None: - self._blocked_domains = tuple(blocked_domains) - else: - self._blocked_domains = () - - if allowed_domains is not None: - allowed_domains = tuple(allowed_domains) - self._allowed_domains = allowed_domains - - def blocked_domains(self): - """Return the sequence of blocked domains (as a tuple).""" - return self._blocked_domains - def set_blocked_domains(self, blocked_domains): - """Set the sequence of blocked domains.""" - self._blocked_domains = tuple(blocked_domains) - - def is_blocked(self, domain): - for blocked_domain in self._blocked_domains: - if user_domain_match(domain, blocked_domain): - return True - return False - - def allowed_domains(self): - """Return None, or the sequence of allowed domains (as a tuple).""" - return self._allowed_domains - def set_allowed_domains(self, allowed_domains): - """Set the sequence of allowed domains, or None.""" - if allowed_domains is not None: - allowed_domains = tuple(allowed_domains) - self._allowed_domains = allowed_domains - - def is_not_allowed(self, domain): - if self._allowed_domains is None: - return False - for allowed_domain in self._allowed_domains: - if user_domain_match(domain, allowed_domain): - return False - return True - - def set_ok(self, cookie, request): - """ - If you override set_ok, be sure to call this method. If it returns - false, so should your subclass (assuming your subclass wants to be more - strict about which cookies to accept). - - """ - debug(" - checking cookie %s", cookie) - - assert cookie.name is not None - - for n in "version", "verifiability", "name", "path", "domain", "port": - fn_name = "set_ok_"+n - fn = getattr(self, fn_name) - if not fn(cookie, request): - return False - - return True - - def set_ok_version(self, cookie, request): - if cookie.version is None: - # Version is always set to 0 by parse_ns_headers if it's a Netscape - # cookie, so this must be an invalid RFC 2965 cookie. - debug(" Set-Cookie2 without version attribute (%s)", cookie) - return False - if cookie.version > 0 and not self.rfc2965: - debug(" RFC 2965 cookies are switched off") - return False - elif cookie.version == 0 and not self.netscape: - debug(" Netscape cookies are switched off") - return False - return True - - def set_ok_verifiability(self, cookie, request): - if request_is_unverifiable(request) and is_third_party(request): - if cookie.version > 0 and self.strict_rfc2965_unverifiable: - debug(" third-party RFC 2965 cookie during " - "unverifiable transaction") - return False - elif cookie.version == 0 and self.strict_ns_unverifiable: - debug(" third-party Netscape cookie during " - "unverifiable transaction") - return False - return True - - def set_ok_name(self, cookie, request): - # Try and stop servers setting V0 cookies designed to hack other - # servers that know both V0 and V1 protocols. - if (cookie.version == 0 and self.strict_ns_set_initial_dollar and - cookie.name.startswith("$")): - debug(" illegal name (starts with '$'): '%s'", cookie.name) - return False - return True - - def set_ok_path(self, cookie, request): - if cookie.path_specified: - req_path = request_path(request) - if ((cookie.version > 0 or - (cookie.version == 0 and self.strict_ns_set_path)) and - not req_path.startswith(cookie.path)): - debug(" path attribute %s is not a prefix of request " - "path %s", cookie.path, req_path) - return False - return True - - def set_ok_countrycode_domain(self, cookie, request): - """Return False if explicit cookie domain is not acceptable. - - Called by set_ok_domain, for convenience of overriding by - subclasses. - - """ - if cookie.domain_specified and self.strict_domain: - domain = cookie.domain - # since domain was specified, we know that: - assert domain.startswith(".") - if domain.count(".") == 2: - # domain like .foo.bar - i = domain.rfind(".") - tld = domain[i+1:] - sld = domain[1:i] - if (sld.lower() in [ - "co", "ac", - "com", "edu", "org", "net", "gov", "mil", "int", - "aero", "biz", "cat", "coop", "info", "jobs", "mobi", - "museum", "name", "pro", "travel", - ] and - len(tld) == 2): - # domain like .co.uk - return False - return True - - def set_ok_domain(self, cookie, request): - if self.is_blocked(cookie.domain): - debug(" domain %s is in user block-list", cookie.domain) - return False - if self.is_not_allowed(cookie.domain): - debug(" domain %s is not in user allow-list", cookie.domain) - return False - if not self.set_ok_countrycode_domain(cookie, request): - debug(" country-code second level domain %s", cookie.domain) - return False - if cookie.domain_specified: - req_host, erhn = eff_request_host_lc(request) - domain = cookie.domain - if domain.startswith("."): - undotted_domain = domain[1:] - else: - undotted_domain = domain - embedded_dots = (undotted_domain.find(".") >= 0) - if not embedded_dots and domain != ".local": - debug(" non-local domain %s contains no embedded dot", - domain) - return False - if cookie.version == 0: - if (not erhn.endswith(domain) and - (not erhn.startswith(".") and - not ("."+erhn).endswith(domain))): - debug(" effective request-host %s (even with added " - "initial dot) does not end end with %s", - erhn, domain) - return False - if (cookie.version > 0 or - (self.strict_ns_domain & self.DomainRFC2965Match)): - if not domain_match(erhn, domain): - debug(" effective request-host %s does not domain-match " - "%s", erhn, domain) - return False - if (cookie.version > 0 or - (self.strict_ns_domain & self.DomainStrictNoDots)): - host_prefix = req_host[:-len(domain)] - if (host_prefix.find(".") >= 0 and - not IPV4_RE.search(req_host)): - debug(" host prefix %s for domain %s contains a dot", - host_prefix, domain) - return False - return True - - def set_ok_port(self, cookie, request): - if cookie.port_specified: - req_port = request_port(request) - if req_port is None: - req_port = "80" - else: - req_port = str(req_port) - for p in cookie.port.split(","): - try: - int(p) - except ValueError: - debug(" bad port %s (not numeric)", p) - return False - if p == req_port: - break - else: - debug(" request port (%s) not found in %s", - req_port, cookie.port) - return False - return True - - def return_ok(self, cookie, request): - """ - If you override return_ok, be sure to call this method. If it returns - false, so should your subclass (assuming your subclass wants to be more - strict about which cookies to return). - - """ - # Path has already been checked by path_return_ok, and domain blocking - # done by domain_return_ok. - debug(" - checking cookie %s", cookie) - - for n in ("version", "verifiability", "secure", "expires", "port", - "domain"): - fn_name = "return_ok_"+n - fn = getattr(self, fn_name) - if not fn(cookie, request): - return False - return True - - def return_ok_version(self, cookie, request): - if cookie.version > 0 and not self.rfc2965: - debug(" RFC 2965 cookies are switched off") - return False - elif cookie.version == 0 and not self.netscape: - debug(" Netscape cookies are switched off") - return False - return True - - def return_ok_verifiability(self, cookie, request): - if request_is_unverifiable(request) and is_third_party(request): - if cookie.version > 0 and self.strict_rfc2965_unverifiable: - debug(" third-party RFC 2965 cookie during unverifiable " - "transaction") - return False - elif cookie.version == 0 and self.strict_ns_unverifiable: - debug(" third-party Netscape cookie during unverifiable " - "transaction") - return False - return True - - def return_ok_secure(self, cookie, request): - if cookie.secure and request.get_type() != "https": - debug(" secure cookie with non-secure request") - return False - return True - - def return_ok_expires(self, cookie, request): - if cookie.is_expired(self._now): - debug(" cookie expired") - return False - return True - - def return_ok_port(self, cookie, request): - if cookie.port: - req_port = request_port(request) - if req_port is None: - req_port = "80" - for p in cookie.port.split(","): - if p == req_port: - break - else: - debug(" request port %s does not match cookie port %s", - req_port, cookie.port) - return False - return True - - def return_ok_domain(self, cookie, request): - req_host, erhn = eff_request_host_lc(request) - domain = cookie.domain - - # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't - if (cookie.version == 0 and - (self.strict_ns_domain & self.DomainStrictNonDomain) and - not cookie.domain_specified and domain != erhn): - debug(" cookie with unspecified domain does not string-compare " - "equal to request domain") - return False - - if cookie.version > 0 and not domain_match(erhn, domain): - debug(" effective request-host name %s does not domain-match " - "RFC 2965 cookie domain %s", erhn, domain) - return False - if cookie.version == 0 and not ("."+erhn).endswith(domain): - debug(" request-host %s does not match Netscape cookie domain " - "%s", req_host, domain) - return False - return True - - def domain_return_ok(self, domain, request): - # Liberal check of domain. This is here as an optimization to avoid - # having to load lots of MSIE cookie files unless necessary. - - # Munge req_host and erhn to always start with a dot, so as to err on - # the side of letting cookies through. - dotted_req_host, dotted_erhn = eff_request_host_lc(request) - if not dotted_req_host.startswith("."): - dotted_req_host = "."+dotted_req_host - if not dotted_erhn.startswith("."): - dotted_erhn = "."+dotted_erhn - if not (dotted_req_host.endswith(domain) or - dotted_erhn.endswith(domain)): - #debug(" request domain %s does not match cookie domain %s", - # req_host, domain) - return False - - if self.is_blocked(domain): - debug(" domain %s is in user block-list", domain) - return False - if self.is_not_allowed(domain): - debug(" domain %s is not in user allow-list", domain) - return False - - return True - - def path_return_ok(self, path, request): - debug("- checking cookie path=%s", path) - req_path = request_path(request) - if not req_path.startswith(path): - debug(" %s does not path-match %s", req_path, path) - return False - return True - - -def vals_sorted_by_key(adict): - keys = adict.keys() - keys.sort() - return map(adict.get, keys) - -class MappingIterator: - """Iterates over nested mapping, depth-first, in sorted order by key.""" - def __init__(self, mapping): - self._s = [(vals_sorted_by_key(mapping), 0, None)] # LIFO stack - - def __iter__(self): return self - - def next(self): - # this is hairy because of lack of generators - while 1: - try: - vals, i, prev_item = self._s.pop() - except IndexError: - raise StopIteration() - if i < len(vals): - item = vals[i] - i = i + 1 - self._s.append((vals, i, prev_item)) - try: - item.items - except AttributeError: - # non-mapping - break - else: - # mapping - self._s.append((vals_sorted_by_key(item), 0, item)) - continue - return item - - -# Used as second parameter to dict.get method, to distinguish absent -# dict key from one with a None value. -class Absent: pass - -class CookieJar: - """Collection of HTTP cookies. - - You may not need to know about this class: try mechanize.urlopen(). - - The major methods are extract_cookies and add_cookie_header; these are all - you are likely to need. - - CookieJar supports the iterator protocol: - - for cookie in cookiejar: - # do something with cookie - - Methods: - - add_cookie_header(request) - extract_cookies(response, request) - get_policy() - set_policy(policy) - cookies_for_request(request) - make_cookies(response, request) - set_cookie_if_ok(cookie, request) - set_cookie(cookie) - clear_session_cookies() - clear_expired_cookies() - clear(domain=None, path=None, name=None) - - Public attributes - - policy: CookiePolicy object - - """ - - non_word_re = re.compile(r"\W") - quote_re = re.compile(r"([\"\\])") - strict_domain_re = re.compile(r"\.?[^.]*") - domain_re = re.compile(r"[^.]*") - dots_re = re.compile(r"^\.+") - - def __init__(self, policy=None): - """ - See CookieJar.__doc__ for argument documentation. - - """ - if policy is None: - policy = DefaultCookiePolicy() - self._policy = policy - - self._cookies = {} - - # for __getitem__ iteration in pre-2.2 Pythons - self._prev_getitem_index = 0 - - def get_policy(self): - return self._policy - - def set_policy(self, policy): - self._policy = policy - - def _cookies_for_domain(self, domain, request): - cookies = [] - if not self._policy.domain_return_ok(domain, request): - return [] - debug("Checking %s for cookies to return", domain) - cookies_by_path = self._cookies[domain] - for path in cookies_by_path.keys(): - if not self._policy.path_return_ok(path, request): - continue - cookies_by_name = cookies_by_path[path] - for cookie in cookies_by_name.values(): - if not self._policy.return_ok(cookie, request): - debug(" not returning cookie") - continue - debug(" it's a match") - cookies.append(cookie) - return cookies - - def cookies_for_request(self, request): - """Return a list of cookies to be returned to server. - - The returned list of cookie instances is sorted in the order they - should appear in the Cookie: header for return to the server. - - See add_cookie_header.__doc__ for the interface required of the - request argument. - - New in version 0.1.10 - - """ - self._policy._now = self._now = int(time.time()) - cookies = self._cookies_for_request(request) - # add cookies in order of most specific (i.e. longest) path first - def decreasing_size(a, b): return cmp(len(b.path), len(a.path)) - cookies.sort(decreasing_size) - return cookies - - def _cookies_for_request(self, request): - """Return a list of cookies to be returned to server.""" - # this method still exists (alongside cookies_for_request) because it - # is part of an implied protected interface for subclasses of cookiejar - # XXX document that implied interface, or provide another way of - # implementing cookiejars than subclassing - cookies = [] - for domain in self._cookies.keys(): - cookies.extend(self._cookies_for_domain(domain, request)) - return cookies - - def _cookie_attrs(self, cookies): - """Return a list of cookie-attributes to be returned to server. - - The $Version attribute is also added when appropriate (currently only - once per request). - - >>> jar = CookieJar() - >>> ns_cookie = Cookie(0, "foo", '"bar"', None, False, - ... "example.com", False, False, - ... "/", False, False, None, True, - ... None, None, {}) - >>> jar._cookie_attrs([ns_cookie]) - ['foo="bar"'] - >>> rfc2965_cookie = Cookie(1, "foo", "bar", None, False, - ... ".example.com", True, False, - ... "/", False, False, None, True, - ... None, None, {}) - >>> jar._cookie_attrs([rfc2965_cookie]) - ['$Version=1', 'foo=bar', '$Domain="example.com"'] - - """ - version_set = False - - attrs = [] - for cookie in cookies: - # set version of Cookie header - # XXX - # What should it be if multiple matching Set-Cookie headers have - # different versions themselves? - # Answer: there is no answer; was supposed to be settled by - # RFC 2965 errata, but that may never appear... - version = cookie.version - if not version_set: - version_set = True - if version > 0: - attrs.append("$Version=%s" % version) - - # quote cookie value if necessary - # (not for Netscape protocol, which already has any quotes - # intact, due to the poorly-specified Netscape Cookie: syntax) - if ((cookie.value is not None) and - self.non_word_re.search(cookie.value) and version > 0): - value = self.quote_re.sub(r"\\\1", cookie.value) - else: - value = cookie.value - - # add cookie-attributes to be returned in Cookie header - if cookie.value is None: - attrs.append(cookie.name) - else: - attrs.append("%s=%s" % (cookie.name, value)) - if version > 0: - if cookie.path_specified: - attrs.append('$Path="%s"' % cookie.path) - if cookie.domain.startswith("."): - domain = cookie.domain - if (not cookie.domain_initial_dot and - domain.startswith(".")): - domain = domain[1:] - attrs.append('$Domain="%s"' % domain) - if cookie.port is not None: - p = "$Port" - if cookie.port_specified: - p = p + ('="%s"' % cookie.port) - attrs.append(p) - - return attrs - - def add_cookie_header(self, request): - """Add correct Cookie: header to request (urllib2.Request object). - - The Cookie2 header is also added unless policy.hide_cookie2 is true. - - The request object (usually a urllib2.Request instance) must support - the methods get_full_url, get_host, is_unverifiable, get_type, - has_header, get_header, header_items and add_unredirected_header, as - documented by urllib2, and the port attribute (the port number). - Actually, RequestUpgradeProcessor will automatically upgrade your - Request object to one with has_header, get_header, header_items and - add_unredirected_header, if it lacks those methods, for compatibility - with pre-2.4 versions of urllib2. - - """ - debug("add_cookie_header") - cookies = self.cookies_for_request(request) - - attrs = self._cookie_attrs(cookies) - if attrs: - if not request.has_header("Cookie"): - request.add_unredirected_header("Cookie", "; ".join(attrs)) - - # if necessary, advertise that we know RFC 2965 - if self._policy.rfc2965 and not self._policy.hide_cookie2: - for cookie in cookies: - if cookie.version != 1 and not request.has_header("Cookie2"): - request.add_unredirected_header("Cookie2", '$Version="1"') - break - - self.clear_expired_cookies() - - def _normalized_cookie_tuples(self, attrs_set): - """Return list of tuples containing normalised cookie information. - - attrs_set is the list of lists of key,value pairs extracted from - the Set-Cookie or Set-Cookie2 headers. - - Tuples are name, value, standard, rest, where name and value are the - cookie name and value, standard is a dictionary containing the standard - cookie-attributes (discard, secure, version, expires or max-age, - domain, path and port) and rest is a dictionary containing the rest of - the cookie-attributes. - - """ - cookie_tuples = [] - - boolean_attrs = "discard", "secure" - value_attrs = ("version", - "expires", "max-age", - "domain", "path", "port", - "comment", "commenturl") - - for cookie_attrs in attrs_set: - name, value = cookie_attrs[0] - - # Build dictionary of standard cookie-attributes (standard) and - # dictionary of other cookie-attributes (rest). - - # Note: expiry time is normalised to seconds since epoch. V0 - # cookies should have the Expires cookie-attribute, and V1 cookies - # should have Max-Age, but since V1 includes RFC 2109 cookies (and - # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we - # accept either (but prefer Max-Age). - max_age_set = False - - bad_cookie = False - - standard = {} - rest = {} - for k, v in cookie_attrs[1:]: - lc = k.lower() - # don't lose case distinction for unknown fields - if lc in value_attrs or lc in boolean_attrs: - k = lc - if k in boolean_attrs and v is None: - # boolean cookie-attribute is present, but has no value - # (like "discard", rather than "port=80") - v = True - if standard.has_key(k): - # only first value is significant - continue - if k == "domain": - if v is None: - debug(" missing value for domain attribute") - bad_cookie = True - break - # RFC 2965 section 3.3.3 - v = v.lower() - if k == "expires": - if max_age_set: - # Prefer max-age to expires (like Mozilla) - continue - if v is None: - debug(" missing or invalid value for expires " - "attribute: treating as session cookie") - continue - if k == "max-age": - max_age_set = True - if v is None: - debug(" missing value for max-age attribute") - bad_cookie = True - break - try: - v = int(v) - except ValueError: - debug(" missing or invalid (non-numeric) value for " - "max-age attribute") - bad_cookie = True - break - # convert RFC 2965 Max-Age to seconds since epoch - # XXX Strictly you're supposed to follow RFC 2616 - # age-calculation rules. Remember that zero Max-Age is a - # is a request to discard (old and new) cookie, though. - k = "expires" - v = self._now + v - if (k in value_attrs) or (k in boolean_attrs): - if (v is None and - k not in ["port", "comment", "commenturl"]): - debug(" missing value for %s attribute" % k) - bad_cookie = True - break - standard[k] = v - else: - rest[k] = v - - if bad_cookie: - continue - - cookie_tuples.append((name, value, standard, rest)) - - return cookie_tuples - - def _cookie_from_cookie_tuple(self, tup, request): - # standard is dict of standard cookie-attributes, rest is dict of the - # rest of them - name, value, standard, rest = tup - - domain = standard.get("domain", Absent) - path = standard.get("path", Absent) - port = standard.get("port", Absent) - expires = standard.get("expires", Absent) - - # set the easy defaults - version = standard.get("version", None) - if version is not None: - try: - version = int(version) - except ValueError: - return None # invalid version, ignore cookie - secure = standard.get("secure", False) - # (discard is also set if expires is Absent) - discard = standard.get("discard", False) - comment = standard.get("comment", None) - comment_url = standard.get("commenturl", None) - - # set default path - if path is not Absent and path != "": - path_specified = True - path = escape_path(path) - else: - path_specified = False - path = request_path(request) - i = path.rfind("/") - if i != -1: - if version == 0: - # Netscape spec parts company from reality here - path = path[:i] - else: - path = path[:i+1] - if len(path) == 0: path = "/" - - # set default domain - domain_specified = domain is not Absent - # but first we have to remember whether it starts with a dot - domain_initial_dot = False - if domain_specified: - domain_initial_dot = bool(domain.startswith(".")) - if domain is Absent: - req_host, erhn = eff_request_host_lc(request) - domain = erhn - elif not domain.startswith("."): - domain = "."+domain - - # set default port - port_specified = False - if port is not Absent: - if port is None: - # Port attr present, but has no value: default to request port. - # Cookie should then only be sent back on that port. - port = request_port(request) - else: - port_specified = True - port = re.sub(r"\s+", "", port) - else: - # No port attr present. Cookie can be sent back on any port. - port = None - - # set default expires and discard - if expires is Absent: - expires = None - discard = True - - return Cookie(version, - name, value, - port, port_specified, - domain, domain_specified, domain_initial_dot, - path, path_specified, - secure, - expires, - discard, - comment, - comment_url, - rest) - - def _cookies_from_attrs_set(self, attrs_set, request): - cookie_tuples = self._normalized_cookie_tuples(attrs_set) - - cookies = [] - for tup in cookie_tuples: - cookie = self._cookie_from_cookie_tuple(tup, request) - if cookie: cookies.append(cookie) - return cookies - - def _process_rfc2109_cookies(self, cookies): - if self._policy.rfc2109_as_netscape is None: - rfc2109_as_netscape = not self._policy.rfc2965 - else: - rfc2109_as_netscape = self._policy.rfc2109_as_netscape - for cookie in cookies: - if cookie.version == 1: - cookie.rfc2109 = True - if rfc2109_as_netscape: - # treat 2109 cookies as Netscape cookies rather than - # as RFC2965 cookies - cookie.version = 0 - - def _make_cookies(self, response, request): - # get cookie-attributes for RFC 2965 and Netscape protocols - headers = response.info() - rfc2965_hdrs = headers.getheaders("Set-Cookie2") - ns_hdrs = headers.getheaders("Set-Cookie") - - rfc2965 = self._policy.rfc2965 - netscape = self._policy.netscape - - if ((not rfc2965_hdrs and not ns_hdrs) or - (not ns_hdrs and not rfc2965) or - (not rfc2965_hdrs and not netscape) or - (not netscape and not rfc2965)): - return [] # no relevant cookie headers: quick exit - - try: - cookies = self._cookies_from_attrs_set( - split_header_words(rfc2965_hdrs), request) - except: - reraise_unmasked_exceptions() - cookies = [] - - if ns_hdrs and netscape: - try: - # RFC 2109 and Netscape cookies - ns_cookies = self._cookies_from_attrs_set( - parse_ns_headers(ns_hdrs), request) - except: - reraise_unmasked_exceptions() - ns_cookies = [] - self._process_rfc2109_cookies(ns_cookies) - - # Look for Netscape cookies (from Set-Cookie headers) that match - # corresponding RFC 2965 cookies (from Set-Cookie2 headers). - # For each match, keep the RFC 2965 cookie and ignore the Netscape - # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are - # bundled in with the Netscape cookies for this purpose, which is - # reasonable behaviour. - if rfc2965: - lookup = {} - for cookie in cookies: - lookup[(cookie.domain, cookie.path, cookie.name)] = None - - def no_matching_rfc2965(ns_cookie, lookup=lookup): - key = ns_cookie.domain, ns_cookie.path, ns_cookie.name - return not lookup.has_key(key) - ns_cookies = filter(no_matching_rfc2965, ns_cookies) - - if ns_cookies: - cookies.extend(ns_cookies) - - return cookies - - def make_cookies(self, response, request): - """Return sequence of Cookie objects extracted from response object. - - See extract_cookies.__doc__ for the interface required of the - response and request arguments. - - """ - self._policy._now = self._now = int(time.time()) - return [cookie for cookie in self._make_cookies(response, request) - if cookie.expires is None or not cookie.expires <= self._now] - - def set_cookie_if_ok(self, cookie, request): - """Set a cookie if policy says it's OK to do so. - - cookie: mechanize.Cookie instance - request: see extract_cookies.__doc__ for the required interface - - """ - self._policy._now = self._now = int(time.time()) - - if self._policy.set_ok(cookie, request): - self.set_cookie(cookie) - - def set_cookie(self, cookie): - """Set a cookie, without checking whether or not it should be set. - - cookie: mechanize.Cookie instance - """ - c = self._cookies - if not c.has_key(cookie.domain): c[cookie.domain] = {} - c2 = c[cookie.domain] - if not c2.has_key(cookie.path): c2[cookie.path] = {} - c3 = c2[cookie.path] - c3[cookie.name] = cookie - - def extract_cookies(self, response, request): - """Extract cookies from response, where allowable given the request. - - Look for allowable Set-Cookie: and Set-Cookie2: headers in the response - object passed as argument. Any of these headers that are found are - used to update the state of the object (subject to the policy.set_ok - method's approval). - - The response object (usually be the result of a call to - mechanize.urlopen, or similar) should support an info method, which - returns a mimetools.Message object (in fact, the 'mimetools.Message - object' may be any object that provides a getheaders method). - - The request object (usually a urllib2.Request instance) must support - the methods get_full_url, get_type, get_host, and is_unverifiable, as - documented by urllib2, and the port attribute (the port number). The - request is used to set default values for cookie-attributes as well as - for checking that the cookie is OK to be set. - - """ - debug("extract_cookies: %s", response.info()) - self._policy._now = self._now = int(time.time()) - - for cookie in self._make_cookies(response, request): - if cookie.expires is not None and cookie.expires <= self._now: - # Expiry date in past is request to delete cookie. This can't be - # in DefaultCookiePolicy, because can't delete cookies there. - try: - self.clear(cookie.domain, cookie.path, cookie.name) - except KeyError: - pass - debug("Expiring cookie, domain='%s', path='%s', name='%s'", - cookie.domain, cookie.path, cookie.name) - elif self._policy.set_ok(cookie, request): - debug(" setting cookie: %s", cookie) - self.set_cookie(cookie) - - def clear(self, domain=None, path=None, name=None): - """Clear some cookies. - - Invoking this method without arguments will clear all cookies. If - given a single argument, only cookies belonging to that domain will be - removed. If given two arguments, cookies belonging to the specified - path within that domain are removed. If given three arguments, then - the cookie with the specified name, path and domain is removed. - - Raises KeyError if no matching cookie exists. - - """ - if name is not None: - if (domain is None) or (path is None): - raise ValueError( - "domain and path must be given to remove a cookie by name") - del self._cookies[domain][path][name] - elif path is not None: - if domain is None: - raise ValueError( - "domain must be given to remove cookies by path") - del self._cookies[domain][path] - elif domain is not None: - del self._cookies[domain] - else: - self._cookies = {} - - def clear_session_cookies(self): - """Discard all session cookies. - - Discards all cookies held by object which had either no Max-Age or - Expires cookie-attribute or an explicit Discard cookie-attribute, or - which otherwise have ended up with a true discard attribute. For - interactive browsers, the end of a session usually corresponds to - closing the browser window. - - Note that the save method won't save session cookies anyway, unless you - ask otherwise by passing a true ignore_discard argument. - - """ - for cookie in self: - if cookie.discard: - self.clear(cookie.domain, cookie.path, cookie.name) - - def clear_expired_cookies(self): - """Discard all expired cookies. - - You probably don't need to call this method: expired cookies are never - sent back to the server (provided you're using DefaultCookiePolicy), - this method is called by CookieJar itself every so often, and the save - method won't save expired cookies anyway (unless you ask otherwise by - passing a true ignore_expires argument). - - """ - now = time.time() - for cookie in self: - if cookie.is_expired(now): - self.clear(cookie.domain, cookie.path, cookie.name) - - def __getitem__(self, i): - if i == 0: - self._getitem_iterator = self.__iter__() - elif self._prev_getitem_index != i-1: raise IndexError( - "CookieJar.__getitem__ only supports sequential iteration") - self._prev_getitem_index = i - try: - return self._getitem_iterator.next() - except StopIteration: - raise IndexError() - - def __iter__(self): - return MappingIterator(self._cookies) - - def __len__(self): - """Return number of contained cookies.""" - i = 0 - for cookie in self: i = i + 1 - return i - - def __repr__(self): - r = [] - for cookie in self: r.append(repr(cookie)) - return "<%s[%s]>" % (self.__class__, ", ".join(r)) - - def __str__(self): - r = [] - for cookie in self: r.append(str(cookie)) - return "<%s[%s]>" % (self.__class__, ", ".join(r)) - - -class LoadError(Exception): pass - -class FileCookieJar(CookieJar): - """CookieJar that can be loaded from and saved to a file. - - Additional methods - - save(filename=None, ignore_discard=False, ignore_expires=False) - load(filename=None, ignore_discard=False, ignore_expires=False) - revert(filename=None, ignore_discard=False, ignore_expires=False) - - Additional public attributes - - filename: filename for loading and saving cookies - - Additional public readable attributes - - delayload: request that cookies are lazily loaded from disk; this is only - a hint since this only affects performance, not behaviour (unless the - cookies on disk are changing); a CookieJar object may ignore it (in fact, - only MSIECookieJar lazily loads cookies at the moment) - - """ - - def __init__(self, filename=None, delayload=False, policy=None): - """ - See FileCookieJar.__doc__ for argument documentation. - - Cookies are NOT loaded from the named file until either the load or - revert method is called. - - """ - CookieJar.__init__(self, policy) - if filename is not None and not isstringlike(filename): - raise ValueError("filename must be string-like") - self.filename = filename - self.delayload = bool(delayload) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - """Save cookies to a file. - - filename: name of file in which to save cookies - ignore_discard: save even cookies set to be discarded - ignore_expires: save even cookies that have expired - - The file is overwritten if it already exists, thus wiping all its - cookies. Saved cookies can be restored later using the load or revert - methods. If filename is not specified, self.filename is used; if - self.filename is None, ValueError is raised. - - """ - raise NotImplementedError() - - def load(self, filename=None, ignore_discard=False, ignore_expires=False): - """Load cookies from a file. - - Old cookies are kept unless overwritten by newly loaded ones. - - Arguments are as for .save(). - - If filename is not specified, self.filename is used; if self.filename - is None, ValueError is raised. The named file must be in the format - understood by the class, or LoadError will be raised. This format will - be identical to that written by the save method, unless the load format - is not sufficiently well understood (as is the case for MSIECookieJar). - - """ - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - f = open(filename) - try: - self._really_load(f, filename, ignore_discard, ignore_expires) - finally: - f.close() - - def revert(self, filename=None, - ignore_discard=False, ignore_expires=False): - """Clear all cookies and reload cookies from a saved file. - - Raises LoadError (or IOError) if reversion is not successful; the - object's state will not be altered if this happens. - - """ - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - old_state = copy.deepcopy(self._cookies) - self._cookies = {} - try: - self.load(filename, ignore_discard, ignore_expires) - except (LoadError, IOError): - self._cookies = old_state - raise diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_debug.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_debug.py deleted file mode 100644 index 596b114..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_debug.py +++ /dev/null @@ -1,28 +0,0 @@ -import logging - -from urllib2 import BaseHandler -from _response import response_seek_wrapper - - -class HTTPResponseDebugProcessor(BaseHandler): - handler_order = 900 # before redirections, after everything else - - def http_response(self, request, response): - if not hasattr(response, "seek"): - response = response_seek_wrapper(response) - info = logging.getLogger("mechanize.http_responses").info - try: - info(response.read()) - finally: - response.seek(0) - info("*****************************************************") - return response - - https_response = http_response - -class HTTPRedirectDebugProcessor(BaseHandler): - def http_request(self, request): - if hasattr(request, "redirect_dict"): - info = logging.getLogger("mechanize.http_redirects").info - info("redirecting to %s", request.get_full_url()) - return request diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_file.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_file.py deleted file mode 100644 index db662a8..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_file.py +++ /dev/null @@ -1,60 +0,0 @@ -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO -import mimetools -import os -import socket -import urllib -from urllib2 import BaseHandler, URLError - - -class FileHandler(BaseHandler): - # Use local file or FTP depending on form of URL - def file_open(self, req): - url = req.get_selector() - if url[:2] == '//' and url[2:3] != '/': - req.type = 'ftp' - return self.parent.open(req) - else: - return self.open_local_file(req) - - # names for the localhost - names = None - def get_names(self): - if FileHandler.names is None: - try: - FileHandler.names = (socket.gethostbyname('localhost'), - socket.gethostbyname(socket.gethostname())) - except socket.gaierror: - FileHandler.names = (socket.gethostbyname('localhost'),) - return FileHandler.names - - # not entirely sure what the rules are here - def open_local_file(self, req): - try: - import email.utils as emailutils - except ImportError: - import email.Utils as emailutils - import mimetypes - host = req.get_host() - file = req.get_selector() - localfile = urllib.url2pathname(file) - try: - stats = os.stat(localfile) - size = stats.st_size - modified = emailutils.formatdate(stats.st_mtime, usegmt=True) - mtype = mimetypes.guess_type(file)[0] - headers = mimetools.Message(StringIO( - 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % - (mtype or 'text/plain', size, modified))) - if host: - host, port = urllib.splitport(host) - if not host or \ - (not port and socket.gethostbyname(host) in self.get_names()): - return urllib.addinfourl(open(localfile, 'rb'), - headers, 'file:'+file) - except OSError, msg: - # urllib2 users shouldn't expect OSErrors coming from urlopen() - raise URLError(msg) - raise URLError('file not on local host') diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_firefox3cookiejar.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_firefox3cookiejar.py deleted file mode 100644 index 34fe979..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_firefox3cookiejar.py +++ /dev/null @@ -1,249 +0,0 @@ -"""Firefox 3 "cookies.sqlite" cookie persistence. - -Copyright 2008 John J Lee <jjl@pobox.com> - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -import logging -import time -import sqlite3 - -from _clientcookie import CookieJar, Cookie, MappingIterator -from _util import isstringlike, experimental -debug = logging.getLogger("mechanize.cookies").debug - - -class Firefox3CookieJar(CookieJar): - - """Firefox 3 cookie jar. - - The cookies are stored in Firefox 3's "cookies.sqlite" format. - - Constructor arguments: - - filename: filename of cookies.sqlite (typically found at the top level - of a firefox profile directory) - autoconnect: as a convenience, connect to the SQLite cookies database at - Firefox3CookieJar construction time (default True) - policy: an object satisfying the mechanize.CookiePolicy interface - - Note that this is NOT a FileCookieJar, and there are no .load(), - .save() or .restore() methods. The database is in sync with the - cookiejar object's state after each public method call. - - Following Firefox's own behaviour, session cookies are never saved to - the database. - - The file is created, and an sqlite database written to it, if it does - not already exist. The moz_cookies database table is created if it does - not already exist. - """ - - # XXX - # handle DatabaseError exceptions - # add a FileCookieJar (explicit .save() / .revert() / .load() methods) - - def __init__(self, filename, autoconnect=True, policy=None): - experimental("Firefox3CookieJar is experimental code") - CookieJar.__init__(self, policy) - if filename is not None and not isstringlike(filename): - raise ValueError("filename must be string-like") - self.filename = filename - self._conn = None - if autoconnect: - self.connect() - - def connect(self): - self._conn = sqlite3.connect(self.filename) - self._conn.isolation_level = "DEFERRED" - self._create_table_if_necessary() - - def close(self): - self._conn.close() - - def _transaction(self, func): - try: - cur = self._conn.cursor() - try: - result = func(cur) - finally: - cur.close() - except: - self._conn.rollback() - raise - else: - self._conn.commit() - return result - - def _execute(self, query, params=()): - return self._transaction(lambda cur: cur.execute(query, params)) - - def _query(self, query, params=()): - # XXX should we bother with a transaction? - cur = self._conn.cursor() - try: - cur.execute(query, params) - for row in cur.fetchall(): - yield row - finally: - cur.close() - - def _create_table_if_necessary(self): - self._execute("""\ -CREATE TABLE IF NOT EXISTS moz_cookies (id INTEGER PRIMARY KEY, name TEXT, - value TEXT, host TEXT, path TEXT,expiry INTEGER, - lastAccessed INTEGER, isSecure INTEGER, isHttpOnly INTEGER)""") - - def _cookie_from_row(self, row): - (pk, name, value, domain, path, expires, - last_accessed, secure, http_only) = row - - version = 0 - domain = domain.encode("ascii", "ignore") - path = path.encode("ascii", "ignore") - name = name.encode("ascii", "ignore") - value = value.encode("ascii", "ignore") - secure = bool(secure) - - # last_accessed isn't a cookie attribute, so isn't added to rest - rest = {} - if http_only: - rest["HttpOnly"] = None - - if name == "": - name = value - value = None - - initial_dot = domain.startswith(".") - domain_specified = initial_dot - - discard = False - if expires == "": - expires = None - discard = True - - return Cookie(version, name, value, - None, False, - domain, domain_specified, initial_dot, - path, False, - secure, - expires, - discard, - None, - None, - rest) - - def clear(self, domain=None, path=None, name=None): - CookieJar.clear(self, domain, path, name) - where_parts = [] - sql_params = [] - if domain is not None: - where_parts.append("host = ?") - sql_params.append(domain) - if path is not None: - where_parts.append("path = ?") - sql_params.append(path) - if name is not None: - where_parts.append("name = ?") - sql_params.append(name) - where = " AND ".join(where_parts) - if where: - where = " WHERE " + where - def clear(cur): - cur.execute("DELETE FROM moz_cookies%s" % where, - tuple(sql_params)) - self._transaction(clear) - - def _row_from_cookie(self, cookie, cur): - expires = cookie.expires - if cookie.discard: - expires = "" - - domain = unicode(cookie.domain) - path = unicode(cookie.path) - name = unicode(cookie.name) - value = unicode(cookie.value) - secure = bool(int(cookie.secure)) - - if value is None: - value = name - name = "" - - last_accessed = int(time.time()) - http_only = cookie.has_nonstandard_attr("HttpOnly") - - query = cur.execute("""SELECT MAX(id) + 1 from moz_cookies""") - pk = query.fetchone()[0] - if pk is None: - pk = 1 - - return (pk, name, value, domain, path, expires, - last_accessed, secure, http_only) - - def set_cookie(self, cookie): - if cookie.discard: - CookieJar.set_cookie(self, cookie) - return - - def set_cookie(cur): - # XXX - # is this RFC 2965-correct? - # could this do an UPDATE instead? - row = self._row_from_cookie(cookie, cur) - name, unused, domain, path = row[1:5] - cur.execute("""\ -DELETE FROM moz_cookies WHERE host = ? AND path = ? AND name = ?""", - (domain, path, name)) - cur.execute("""\ -INSERT INTO moz_cookies VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) -""", row) - self._transaction(set_cookie) - - def __iter__(self): - # session (non-persistent) cookies - for cookie in MappingIterator(self._cookies): - yield cookie - # persistent cookies - for row in self._query("""\ -SELECT * FROM moz_cookies ORDER BY name, path, host"""): - yield self._cookie_from_row(row) - - def _cookies_for_request(self, request): - session_cookies = CookieJar._cookies_for_request(self, request) - def get_cookies(cur): - query = cur.execute("SELECT host from moz_cookies") - domains = [row[0] for row in query.fetchmany()] - cookies = [] - for domain in domains: - cookies += self._persistent_cookies_for_domain(domain, - request, cur) - return cookies - persistent_coookies = self._transaction(get_cookies) - return session_cookies + persistent_coookies - - def _persistent_cookies_for_domain(self, domain, request, cur): - cookies = [] - if not self._policy.domain_return_ok(domain, request): - return [] - debug("Checking %s for cookies to return", domain) - query = cur.execute("""\ -SELECT * from moz_cookies WHERE host = ? ORDER BY path""", - (domain,)) - cookies = [self._cookie_from_row(row) for row in query.fetchmany()] - last_path = None - r = [] - for cookie in cookies: - if (cookie.path != last_path and - not self._policy.path_return_ok(cookie.path, request)): - last_path = cookie.path - continue - if not self._policy.return_ok(cookie, request): - debug(" not returning cookie") - continue - debug(" it's a match") - r.append(cookie) - return r diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_gzip.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_gzip.py deleted file mode 100644 index 26c2743..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_gzip.py +++ /dev/null @@ -1,103 +0,0 @@ -import urllib2 -from cStringIO import StringIO -import _response - -# GzipConsumer was taken from Fredrik Lundh's effbot.org-0.1-20041009 library -class GzipConsumer: - - def __init__(self, consumer): - self.__consumer = consumer - self.__decoder = None - self.__data = "" - - def __getattr__(self, key): - return getattr(self.__consumer, key) - - def feed(self, data): - if self.__decoder is None: - # check if we have a full gzip header - data = self.__data + data - try: - i = 10 - flag = ord(data[3]) - if flag & 4: # extra - x = ord(data[i]) + 256*ord(data[i+1]) - i = i + 2 + x - if flag & 8: # filename - while ord(data[i]): - i = i + 1 - i = i + 1 - if flag & 16: # comment - while ord(data[i]): - i = i + 1 - i = i + 1 - if flag & 2: # crc - i = i + 2 - if len(data) < i: - raise IndexError("not enough data") - if data[:3] != "\x1f\x8b\x08": - raise IOError("invalid gzip data") - data = data[i:] - except IndexError: - self.__data = data - return # need more data - import zlib - self.__data = "" - self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS) - data = self.__decoder.decompress(data) - if data: - self.__consumer.feed(data) - - def close(self): - if self.__decoder: - data = self.__decoder.flush() - if data: - self.__consumer.feed(data) - self.__consumer.close() - - -# -------------------------------------------------------------------- - -# the rest of this module is John Lee's stupid code, not -# Fredrik's nice code :-) - -class stupid_gzip_consumer: - def __init__(self): self.data = [] - def feed(self, data): self.data.append(data) - -class stupid_gzip_wrapper(_response.closeable_response): - def __init__(self, response): - self._response = response - - c = stupid_gzip_consumer() - gzc = GzipConsumer(c) - gzc.feed(response.read()) - self.__data = StringIO("".join(c.data)) - - def read(self, size=-1): - return self.__data.read(size) - def readline(self, size=-1): - return self.__data.readline(size) - def readlines(self, sizehint=-1): - return self.__data.readlines(sizehint) - - def __getattr__(self, name): - # delegate unknown methods/attributes - return getattr(self._response, name) - -class HTTPGzipProcessor(urllib2.BaseHandler): - handler_order = 200 # response processing before HTTPEquivProcessor - - def http_request(self, request): - request.add_header("Accept-Encoding", "gzip") - return request - - def http_response(self, request, response): - # post-process response - enc_hdrs = response.info().getheaders("Content-encoding") - for enc_hdr in enc_hdrs: - if ("gzip" in enc_hdr) or ("compress" in enc_hdr): - return stupid_gzip_wrapper(response) - return response - - https_response = http_response diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_headersutil.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_headersutil.py deleted file mode 100644 index 49ba5de..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_headersutil.py +++ /dev/null @@ -1,232 +0,0 @@ -"""Utility functions for HTTP header value parsing and construction. - -Copyright 1997-1998, Gisle Aas -Copyright 2002-2006, John J. Lee - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -import os, re -from types import StringType -from types import UnicodeType -STRING_TYPES = StringType, UnicodeType - -from _util import http2time -import _rfc3986 - -def is_html(ct_headers, url, allow_xhtml=False): - """ - ct_headers: Sequence of Content-Type headers - url: Response URL - - """ - if not ct_headers: - # guess - ext = os.path.splitext(_rfc3986.urlsplit(url)[2])[1] - html_exts = [".htm", ".html"] - if allow_xhtml: - html_exts += [".xhtml"] - return ext in html_exts - # use first header - ct = split_header_words(ct_headers)[0][0][0] - html_types = ["text/html"] - if allow_xhtml: - html_types += [ - "text/xhtml", "text/xml", - "application/xml", "application/xhtml+xml", - ] - return ct in html_types - -def unmatched(match): - """Return unmatched part of re.Match object.""" - start, end = match.span(0) - return match.string[:start]+match.string[end:] - -token_re = re.compile(r"^\s*([^=\s;,]+)") -quoted_value_re = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") -value_re = re.compile(r"^\s*=\s*([^\s;,]*)") -escape_re = re.compile(r"\\(.)") -def split_header_words(header_values): - r"""Parse header values into a list of lists containing key,value pairs. - - The function knows how to deal with ",", ";" and "=" as well as quoted - values after "=". A list of space separated tokens are parsed as if they - were separated by ";". - - If the header_values passed as argument contains multiple values, then they - are treated as if they were a single value separated by comma ",". - - This means that this function is useful for parsing header fields that - follow this syntax (BNF as from the HTTP/1.1 specification, but we relax - the requirement for tokens). - - headers = #header - header = (token | parameter) *( [";"] (token | parameter)) - - token = 1*<any CHAR except CTLs or separators> - separators = "(" | ")" | "<" | ">" | "@" - | "," | ";" | ":" | "\" | <"> - | "/" | "[" | "]" | "?" | "=" - | "{" | "}" | SP | HT - - quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) - qdtext = <any TEXT except <">> - quoted-pair = "\" CHAR - - parameter = attribute "=" value - attribute = token - value = token | quoted-string - - Each header is represented by a list of key/value pairs. The value for a - simple token (not part of a parameter) is None. Syntactically incorrect - headers will not necessarily be parsed as you would want. - - This is easier to describe with some examples: - - >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) - [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] - >>> split_header_words(['text/html; charset="iso-8859-1"']) - [[('text/html', None), ('charset', 'iso-8859-1')]] - >>> split_header_words([r'Basic realm="\"foo\bar\""']) - [[('Basic', None), ('realm', '"foobar"')]] - - """ - assert type(header_values) not in STRING_TYPES - result = [] - for text in header_values: - orig_text = text - pairs = [] - while text: - m = token_re.search(text) - if m: - text = unmatched(m) - name = m.group(1) - m = quoted_value_re.search(text) - if m: # quoted value - text = unmatched(m) - value = m.group(1) - value = escape_re.sub(r"\1", value) - else: - m = value_re.search(text) - if m: # unquoted value - text = unmatched(m) - value = m.group(1) - value = value.rstrip() - else: - # no value, a lone token - value = None - pairs.append((name, value)) - elif text.lstrip().startswith(","): - # concatenated headers, as per RFC 2616 section 4.2 - text = text.lstrip()[1:] - if pairs: result.append(pairs) - pairs = [] - else: - # skip junk - non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text) - assert nr_junk_chars > 0, ( - "split_header_words bug: '%s', '%s', %s" % - (orig_text, text, pairs)) - text = non_junk - if pairs: result.append(pairs) - return result - -join_escape_re = re.compile(r"([\"\\])") -def join_header_words(lists): - """Do the inverse of the conversion done by split_header_words. - - Takes a list of lists of (key, value) pairs and produces a single header - value. Attribute values are quoted if needed. - - >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) - 'text/plain; charset="iso-8859/1"' - >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) - 'text/plain, charset="iso-8859/1"' - - """ - headers = [] - for pairs in lists: - attr = [] - for k, v in pairs: - if v is not None: - if not re.search(r"^\w+$", v): - v = join_escape_re.sub(r"\\\1", v) # escape " and \ - v = '"%s"' % v - if k is None: # Netscape cookies may have no name - k = v - else: - k = "%s=%s" % (k, v) - attr.append(k) - if attr: headers.append("; ".join(attr)) - return ", ".join(headers) - -def strip_quotes(text): - if text.startswith('"'): - text = text[1:] - if text.endswith('"'): - text = text[:-1] - return text - -def parse_ns_headers(ns_headers): - """Ad-hoc parser for Netscape protocol cookie-attributes. - - The old Netscape cookie format for Set-Cookie can for instance contain - an unquoted "," in the expires field, so we have to use this ad-hoc - parser instead of split_header_words. - - XXX This may not make the best possible effort to parse all the crap - that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient - parser is probably better, so could do worse than following that if - this ever gives any trouble. - - Currently, this is also used for parsing RFC 2109 cookies. - - """ - known_attrs = ("expires", "domain", "path", "secure", - # RFC 2109 attrs (may turn up in Netscape cookies, too) - "version", "port", "max-age") - - result = [] - for ns_header in ns_headers: - pairs = [] - version_set = False - params = re.split(r";\s*", ns_header) - for ii in range(len(params)): - param = params[ii] - param = param.rstrip() - if param == "": continue - if "=" not in param: - k, v = param, None - else: - k, v = re.split(r"\s*=\s*", param, 1) - k = k.lstrip() - if ii != 0: - lc = k.lower() - if lc in known_attrs: - k = lc - if k == "version": - # This is an RFC 2109 cookie. - v = strip_quotes(v) - version_set = True - if k == "expires": - # convert expires date to seconds since epoch - v = http2time(strip_quotes(v)) # None if invalid - pairs.append((k, v)) - - if pairs: - if not version_set: - pairs.append(("version", "0")) - result.append(pairs) - - return result - - -def _test(): - import doctest, _headersutil - return doctest.testmod(_headersutil) - -if __name__ == "__main__": - _test() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_html.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_html.py deleted file mode 100644 index 5da0815..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_html.py +++ /dev/null @@ -1,631 +0,0 @@ -"""HTML handling. - -Copyright 2003-2006 John J. Lee <jjl@pobox.com> - -This code is free software; you can redistribute it and/or modify it under -the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt -included with the distribution). - -""" - -import re, copy, htmlentitydefs -import sgmllib, ClientForm - -import _request -from _headersutil import split_header_words, is_html as _is_html -import _rfc3986 - -DEFAULT_ENCODING = "latin-1" - -COMPRESS_RE = re.compile(r"\s+") - - -# the base classe is purely for backwards compatibility -class ParseError(ClientForm.ParseError): pass - - -class CachingGeneratorFunction(object): - """Caching wrapper around a no-arguments iterable.""" - - def __init__(self, iterable): - self._cache = [] - # wrap iterable to make it non-restartable (otherwise, repeated - # __call__ would give incorrect results) - self._iterator = iter(iterable) - - def __call__(self): - cache = self._cache - for item in cache: - yield item - for item in self._iterator: - cache.append(item) - yield item - - -class EncodingFinder: - def __init__(self, default_encoding): - self._default_encoding = default_encoding - def encoding(self, response): - # HTTPEquivProcessor may be in use, so both HTTP and HTTP-EQUIV - # headers may be in the response. HTTP-EQUIV headers come last, - # so try in order from first to last. - for ct in response.info().getheaders("content-type"): - for k, v in split_header_words([ct])[0]: - if k == "charset": - return v - return self._default_encoding - -class ResponseTypeFinder: - def __init__(self, allow_xhtml): - self._allow_xhtml = allow_xhtml - def is_html(self, response, encoding): - ct_hdrs = response.info().getheaders("content-type") - url = response.geturl() - # XXX encoding - return _is_html(ct_hdrs, url, self._allow_xhtml) - - -# idea for this argument-processing trick is from Peter Otten -class Args: - def __init__(self, args_map): - self.dictionary = dict(args_map) - def __getattr__(self, key): - try: - return self.dictionary[key] - except KeyError: - return getattr(self.__class__, key) - -def form_parser_args( - select_default=False, - form_parser_class=None, - request_class=None, - backwards_compat=False, - ): - return Args(locals()) - - -class Link: - def __init__(self, base_url, url, text, tag, attrs): - assert None not in [url, tag, attrs] - self.base_url = base_url - self.absolute_url = _rfc3986.urljoin(base_url, url) - self.url, self.text, self.tag, self.attrs = url, text, tag, attrs - def __cmp__(self, other): - try: - for name in "url", "text", "tag", "attrs": - if getattr(self, name) != getattr(other, name): - return -1 - except AttributeError: - return -1 - return 0 - def __repr__(self): - return "Link(base_url=%r, url=%r, text=%r, tag=%r, attrs=%r)" % ( - self.base_url, self.url, self.text, self.tag, self.attrs) - - -class LinksFactory: - - def __init__(self, - link_parser_class=None, - link_class=Link, - urltags=None, - ): - import _pullparser - if link_parser_class is None: - link_parser_class = _pullparser.TolerantPullParser - self.link_parser_class = link_parser_class - self.link_class = link_class - if urltags is None: - urltags = { - "a": "href", - "area": "href", - "frame": "src", - "iframe": "src", - } - self.urltags = urltags - self._response = None - self._encoding = None - - def set_response(self, response, base_url, encoding): - self._response = response - self._encoding = encoding - self._base_url = base_url - - def links(self): - """Return an iterator that provides links of the document.""" - response = self._response - encoding = self._encoding - base_url = self._base_url - p = self.link_parser_class(response, encoding=encoding) - - try: - for token in p.tags(*(self.urltags.keys()+["base"])): - if token.type == "endtag": - continue - if token.data == "base": - base_href = dict(token.attrs).get("href") - if base_href is not None: - base_url = base_href - continue - attrs = dict(token.attrs) - tag = token.data - name = attrs.get("name") - text = None - # XXX use attr_encoding for ref'd doc if that doc does not - # provide one by other means - #attr_encoding = attrs.get("charset") - url = attrs.get(self.urltags[tag]) # XXX is "" a valid URL? - if not url: - # Probably an <A NAME="blah"> link or <AREA NOHREF...>. - # For our purposes a link is something with a URL, so - # ignore this. - continue - - url = _rfc3986.clean_url(url, encoding) - if tag == "a": - if token.type != "startendtag": - # hmm, this'd break if end tag is missing - text = p.get_compressed_text(("endtag", tag)) - # but this doesn't work for eg. - # <a href="blah"><b>Andy</b></a> - #text = p.get_compressed_text() - - yield Link(base_url, url, text, tag, token.attrs) - except sgmllib.SGMLParseError, exc: - raise ParseError(exc) - -class FormsFactory: - - """Makes a sequence of objects satisfying ClientForm.HTMLForm interface. - - After calling .forms(), the .global_form attribute is a form object - containing all controls not a descendant of any FORM element. - - For constructor argument docs, see ClientForm.ParseResponse - argument docs. - - """ - - def __init__(self, - select_default=False, - form_parser_class=None, - request_class=None, - backwards_compat=False, - ): - import ClientForm - self.select_default = select_default - if form_parser_class is None: - form_parser_class = ClientForm.FormParser - self.form_parser_class = form_parser_class - if request_class is None: - request_class = _request.Request - self.request_class = request_class - self.backwards_compat = backwards_compat - self._response = None - self.encoding = None - self.global_form = None - - def set_response(self, response, encoding): - self._response = response - self.encoding = encoding - self.global_form = None - - def forms(self): - import ClientForm - encoding = self.encoding - try: - forms = ClientForm.ParseResponseEx( - self._response, - select_default=self.select_default, - form_parser_class=self.form_parser_class, - request_class=self.request_class, - encoding=encoding, - _urljoin=_rfc3986.urljoin, - _urlparse=_rfc3986.urlsplit, - _urlunparse=_rfc3986.urlunsplit, - ) - except ClientForm.ParseError, exc: - raise ParseError(exc) - self.global_form = forms[0] - return forms[1:] - -class TitleFactory: - def __init__(self): - self._response = self._encoding = None - - def set_response(self, response, encoding): - self._response = response - self._encoding = encoding - - def _get_title_text(self, parser): - import _pullparser - text = [] - tok = None - while 1: - try: - tok = parser.get_token() - except _pullparser.NoMoreTokensError: - break - if tok.type == "data": - text.append(str(tok)) - elif tok.type == "entityref": - t = unescape("&%s;" % tok.data, - parser._entitydefs, parser.encoding) - text.append(t) - elif tok.type == "charref": - t = unescape_charref(tok.data, parser.encoding) - text.append(t) - elif tok.type in ["starttag", "endtag", "startendtag"]: - tag_name = tok.data - if tok.type == "endtag" and tag_name == "title": - break - text.append(str(tok)) - return COMPRESS_RE.sub(" ", "".join(text).strip()) - - def title(self): - import _pullparser - p = _pullparser.TolerantPullParser( - self._response, encoding=self._encoding) - try: - try: - p.get_tag("title") - except _pullparser.NoMoreTokensError: - return None - else: - return self._get_title_text(p) - except sgmllib.SGMLParseError, exc: - raise ParseError(exc) - - -def unescape(data, entities, encoding): - if data is None or "&" not in data: - return data - - def replace_entities(match): - ent = match.group() - if ent[1] == "#": - return unescape_charref(ent[2:-1], encoding) - - repl = entities.get(ent[1:-1]) - if repl is not None: - repl = unichr(repl) - if type(repl) != type(""): - try: - repl = repl.encode(encoding) - except UnicodeError: - repl = ent - else: - repl = ent - return repl - - return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data) - -def unescape_charref(data, encoding): - name, base = data, 10 - if name.startswith("x"): - name, base= name[1:], 16 - uc = unichr(int(name, base)) - if encoding is None: - return uc - else: - try: - repl = uc.encode(encoding) - except UnicodeError: - repl = "&#%s;" % data - return repl - - -# bizarre import gymnastics for bundled BeautifulSoup -import _beautifulsoup -import ClientForm -RobustFormParser, NestingRobustFormParser = ClientForm._create_bs_classes( - _beautifulsoup.BeautifulSoup, _beautifulsoup.ICantBelieveItsBeautifulSoup - ) -# monkeypatch sgmllib to fix http://www.python.org/sf/803422 :-( -sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]") - -class MechanizeBs(_beautifulsoup.BeautifulSoup): - _entitydefs = htmlentitydefs.name2codepoint - # don't want the magic Microsoft-char workaround - PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'), - lambda(x):x.group(1) + ' />'), - (re.compile('<!\s+([^<>]*)>'), - lambda(x):'<!' + x.group(1) + '>') - ] - - def __init__(self, encoding, text=None, avoidParserProblems=True, - initialTextIsEverything=True): - self._encoding = encoding - _beautifulsoup.BeautifulSoup.__init__( - self, text, avoidParserProblems, initialTextIsEverything) - - def handle_charref(self, ref): - t = unescape("&#%s;"%ref, self._entitydefs, self._encoding) - self.handle_data(t) - def handle_entityref(self, ref): - t = unescape("&%s;"%ref, self._entitydefs, self._encoding) - self.handle_data(t) - def unescape_attrs(self, attrs): - escaped_attrs = [] - for key, val in attrs: - val = unescape(val, self._entitydefs, self._encoding) - escaped_attrs.append((key, val)) - return escaped_attrs - -class RobustLinksFactory: - - compress_re = COMPRESS_RE - - def __init__(self, - link_parser_class=None, - link_class=Link, - urltags=None, - ): - if link_parser_class is None: - link_parser_class = MechanizeBs - self.link_parser_class = link_parser_class - self.link_class = link_class - if urltags is None: - urltags = { - "a": "href", - "area": "href", - "frame": "src", - "iframe": "src", - } - self.urltags = urltags - self._bs = None - self._encoding = None - self._base_url = None - - def set_soup(self, soup, base_url, encoding): - self._bs = soup - self._base_url = base_url - self._encoding = encoding - - def links(self): - import _beautifulsoup - bs = self._bs - base_url = self._base_url - encoding = self._encoding - gen = bs.recursiveChildGenerator() - for ch in bs.recursiveChildGenerator(): - if (isinstance(ch, _beautifulsoup.Tag) and - ch.name in self.urltags.keys()+["base"]): - link = ch - attrs = bs.unescape_attrs(link.attrs) - attrs_dict = dict(attrs) - if link.name == "base": - base_href = attrs_dict.get("href") - if base_href is not None: - base_url = base_href - continue - url_attr = self.urltags[link.name] - url = attrs_dict.get(url_attr) - if not url: - continue - url = _rfc3986.clean_url(url, encoding) - text = link.fetchText(lambda t: True) - if not text: - # follow _pullparser's weird behaviour rigidly - if link.name == "a": - text = "" - else: - text = None - else: - text = self.compress_re.sub(" ", " ".join(text).strip()) - yield Link(base_url, url, text, link.name, attrs) - - -class RobustFormsFactory(FormsFactory): - def __init__(self, *args, **kwds): - args = form_parser_args(*args, **kwds) - if args.form_parser_class is None: - args.form_parser_class = RobustFormParser - FormsFactory.__init__(self, **args.dictionary) - - def set_response(self, response, encoding): - self._response = response - self.encoding = encoding - - -class RobustTitleFactory: - def __init__(self): - self._bs = self._encoding = None - - def set_soup(self, soup, encoding): - self._bs = soup - self._encoding = encoding - - def title(self): - import _beautifulsoup - title = self._bs.first("title") - if title == _beautifulsoup.Null: - return None - else: - inner_html = "".join([str(node) for node in title.contents]) - return COMPRESS_RE.sub(" ", inner_html.strip()) - - -class Factory: - """Factory for forms, links, etc. - - This interface may expand in future. - - Public methods: - - set_request_class(request_class) - set_response(response) - forms() - links() - - Public attributes: - - Note that accessing these attributes may raise ParseError. - - encoding: string specifying the encoding of response if it contains a text - document (this value is left unspecified for documents that do not have - an encoding, e.g. an image file) - is_html: true if response contains an HTML document (XHTML may be - regarded as HTML too) - title: page title, or None if no title or not HTML - global_form: form object containing all controls that are not descendants - of any FORM element, or None if the forms_factory does not support - supplying a global form - - """ - - LAZY_ATTRS = ["encoding", "is_html", "title", "global_form"] - - def __init__(self, forms_factory, links_factory, title_factory, - encoding_finder=EncodingFinder(DEFAULT_ENCODING), - response_type_finder=ResponseTypeFinder(allow_xhtml=False), - ): - """ - - Pass keyword arguments only. - - default_encoding: character encoding to use if encoding cannot be - determined (or guessed) from the response. You should turn on - HTTP-EQUIV handling if you want the best chance of getting this right - without resorting to this default. The default value of this - parameter (currently latin-1) may change in future. - - """ - self._forms_factory = forms_factory - self._links_factory = links_factory - self._title_factory = title_factory - self._encoding_finder = encoding_finder - self._response_type_finder = response_type_finder - - self.set_response(None) - - def set_request_class(self, request_class): - """Set urllib2.Request class. - - ClientForm.HTMLForm instances returned by .forms() will return - instances of this class when .click()ed. - - """ - self._forms_factory.request_class = request_class - - def set_response(self, response): - """Set response. - - The response must either be None or implement the same interface as - objects returned by urllib2.urlopen(). - - """ - self._response = response - self._forms_genf = self._links_genf = None - self._get_title = None - for name in self.LAZY_ATTRS: - try: - delattr(self, name) - except AttributeError: - pass - - def __getattr__(self, name): - if name not in self.LAZY_ATTRS: - return getattr(self.__class__, name) - - if name == "encoding": - self.encoding = self._encoding_finder.encoding( - copy.copy(self._response)) - return self.encoding - elif name == "is_html": - self.is_html = self._response_type_finder.is_html( - copy.copy(self._response), self.encoding) - return self.is_html - elif name == "title": - if self.is_html: - self.title = self._title_factory.title() - else: - self.title = None - return self.title - elif name == "global_form": - self.forms() - return self.global_form - - def forms(self): - """Return iterable over ClientForm.HTMLForm-like objects. - - Raises mechanize.ParseError on failure. - """ - # this implementation sets .global_form as a side-effect, for benefit - # of __getattr__ impl - if self._forms_genf is None: - try: - self._forms_genf = CachingGeneratorFunction( - self._forms_factory.forms()) - except: # XXXX define exception! - self.set_response(self._response) - raise - self.global_form = getattr( - self._forms_factory, "global_form", None) - return self._forms_genf() - - def links(self): - """Return iterable over mechanize.Link-like objects. - - Raises mechanize.ParseError on failure. - """ - if self._links_genf is None: - try: - self._links_genf = CachingGeneratorFunction( - self._links_factory.links()) - except: # XXXX define exception! - self.set_response(self._response) - raise - return self._links_genf() - -class DefaultFactory(Factory): - """Based on sgmllib.""" - def __init__(self, i_want_broken_xhtml_support=False): - Factory.__init__( - self, - forms_factory=FormsFactory(), - links_factory=LinksFactory(), - title_factory=TitleFactory(), - response_type_finder=ResponseTypeFinder( - allow_xhtml=i_want_broken_xhtml_support), - ) - - def set_response(self, response): - Factory.set_response(self, response) - if response is not None: - self._forms_factory.set_response( - copy.copy(response), self.encoding) - self._links_factory.set_response( - copy.copy(response), response.geturl(), self.encoding) - self._title_factory.set_response( - copy.copy(response), self.encoding) - -class RobustFactory(Factory): - """Based on BeautifulSoup, hopefully a bit more robust to bad HTML than is - DefaultFactory. - - """ - def __init__(self, i_want_broken_xhtml_support=False, - soup_class=None): - Factory.__init__( - self, - forms_factory=RobustFormsFactory(), - links_factory=RobustLinksFactory(), - title_factory=RobustTitleFactory(), - response_type_finder=ResponseTypeFinder( - allow_xhtml=i_want_broken_xhtml_support), - ) - if soup_class is None: - soup_class = MechanizeBs - self._soup_class = soup_class - - def set_response(self, response): - Factory.set_response(self, response) - if response is not None: - data = response.read() - soup = self._soup_class(self.encoding, data) - self._forms_factory.set_response( - copy.copy(response), self.encoding) - self._links_factory.set_soup( - soup, response.geturl(), self.encoding) - self._title_factory.set_soup(soup, self.encoding) diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_http.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_http.py deleted file mode 100644 index 1b80e2b..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_http.py +++ /dev/null @@ -1,758 +0,0 @@ -"""HTTP related handlers. - -Note that some other HTTP handlers live in more specific modules: _auth.py, -_gzip.py, etc. - - -Copyright 2002-2006 John J Lee <jjl@pobox.com> - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -import time, htmlentitydefs, logging, socket, \ - urllib2, urllib, httplib, sgmllib -from urllib2 import URLError, HTTPError, BaseHandler -from cStringIO import StringIO - -from _clientcookie import CookieJar -from _headersutil import is_html -from _html import unescape, unescape_charref -from _request import Request -from _response import closeable_response, response_seek_wrapper -import _rfc3986 -import _sockettimeout - -debug = logging.getLogger("mechanize").debug -debug_robots = logging.getLogger("mechanize.robots").debug - -# monkeypatch urllib2.HTTPError to show URL -## def urllib2_str(self): -## return 'HTTP Error %s: %s (%s)' % ( -## self.code, self.msg, self.geturl()) -## urllib2.HTTPError.__str__ = urllib2_str - - -CHUNK = 1024 # size of chunks fed to HTML HEAD parser, in bytes -DEFAULT_ENCODING = 'latin-1' - - -try: - socket._fileobject("fake socket", close=True) -except TypeError: - # python <= 2.4 - create_readline_wrapper = socket._fileobject -else: - def create_readline_wrapper(fh): - return socket._fileobject(fh, close=True) - - -# This adds "refresh" to the list of redirectables and provides a redirection -# algorithm that doesn't go into a loop in the presence of cookies -# (Python 2.4 has this new algorithm, 2.3 doesn't). -class HTTPRedirectHandler(BaseHandler): - # maximum number of redirections to any single URL - # this is needed because of the state that cookies introduce - max_repeats = 4 - # maximum total number of redirections (regardless of URL) before - # assuming we're in a loop - max_redirections = 10 - - # Implementation notes: - - # To avoid the server sending us into an infinite loop, the request - # object needs to track what URLs we have already seen. Do this by - # adding a handler-specific attribute to the Request object. The value - # of the dict is used to count the number of times the same URL has - # been visited. This is needed because visiting the same URL twice - # does not necessarily imply a loop, thanks to state introduced by - # cookies. - - # Always unhandled redirection codes: - # 300 Multiple Choices: should not handle this here. - # 304 Not Modified: no need to handle here: only of interest to caches - # that do conditional GETs - # 305 Use Proxy: probably not worth dealing with here - # 306 Unused: what was this for in the previous versions of protocol?? - - def redirect_request(self, newurl, req, fp, code, msg, headers): - """Return a Request or None in response to a redirect. - - This is called by the http_error_30x methods when a redirection - response is received. If a redirection should take place, return a - new Request to allow http_error_30x to perform the redirect; - otherwise, return None to indicate that an HTTPError should be - raised. - - """ - if code in (301, 302, 303, "refresh") or \ - (code == 307 and not req.has_data()): - # Strictly (according to RFC 2616), 301 or 302 in response to - # a POST MUST NOT cause a redirection without confirmation - # from the user (of urllib2, in this case). In practice, - # essentially all clients do redirect in this case, so we do - # the same. - # XXX really refresh redirections should be visiting; tricky to - # fix, so this will wait until post-stable release - new = Request(newurl, - headers=req.headers, - origin_req_host=req.get_origin_req_host(), - unverifiable=True, - visit=False, - ) - new._origin_req = getattr(req, "_origin_req", req) - return new - else: - raise HTTPError(req.get_full_url(), code, msg, headers, fp) - - def http_error_302(self, req, fp, code, msg, headers): - # Some servers (incorrectly) return multiple Location headers - # (so probably same goes for URI). Use first header. - if headers.has_key('location'): - newurl = headers.getheaders('location')[0] - elif headers.has_key('uri'): - newurl = headers.getheaders('uri')[0] - else: - return - newurl = _rfc3986.clean_url(newurl, "latin-1") - newurl = _rfc3986.urljoin(req.get_full_url(), newurl) - - # XXX Probably want to forget about the state of the current - # request, although that might interact poorly with other - # handlers that also use handler-specific request attributes - new = self.redirect_request(newurl, req, fp, code, msg, headers) - if new is None: - return - - # loop detection - # .redirect_dict has a key url if url was previously visited. - if hasattr(req, 'redirect_dict'): - visited = new.redirect_dict = req.redirect_dict - if (visited.get(newurl, 0) >= self.max_repeats or - len(visited) >= self.max_redirections): - raise HTTPError(req.get_full_url(), code, - self.inf_msg + msg, headers, fp) - else: - visited = new.redirect_dict = req.redirect_dict = {} - visited[newurl] = visited.get(newurl, 0) + 1 - - # Don't close the fp until we are sure that we won't use it - # with HTTPError. - fp.read() - fp.close() - - return self.parent.open(new) - - http_error_301 = http_error_303 = http_error_307 = http_error_302 - http_error_refresh = http_error_302 - - inf_msg = "The HTTP server returned a redirect error that would " \ - "lead to an infinite loop.\n" \ - "The last 30x error message was:\n" - - -# XXX would self.reset() work, instead of raising this exception? -class EndOfHeadError(Exception): pass -class AbstractHeadParser: - # only these elements are allowed in or before HEAD of document - head_elems = ("html", "head", - "title", "base", - "script", "style", "meta", "link", "object") - _entitydefs = htmlentitydefs.name2codepoint - _encoding = DEFAULT_ENCODING - - def __init__(self): - self.http_equiv = [] - - def start_meta(self, attrs): - http_equiv = content = None - for key, value in attrs: - if key == "http-equiv": - http_equiv = self.unescape_attr_if_required(value) - elif key == "content": - content = self.unescape_attr_if_required(value) - if http_equiv is not None and content is not None: - self.http_equiv.append((http_equiv, content)) - - def end_head(self): - raise EndOfHeadError() - - def handle_entityref(self, name): - #debug("%s", name) - self.handle_data(unescape( - '&%s;' % name, self._entitydefs, self._encoding)) - - def handle_charref(self, name): - #debug("%s", name) - self.handle_data(unescape_charref(name, self._encoding)) - - def unescape_attr(self, name): - #debug("%s", name) - return unescape(name, self._entitydefs, self._encoding) - - def unescape_attrs(self, attrs): - #debug("%s", attrs) - escaped_attrs = {} - for key, val in attrs.items(): - escaped_attrs[key] = self.unescape_attr(val) - return escaped_attrs - - def unknown_entityref(self, ref): - self.handle_data("&%s;" % ref) - - def unknown_charref(self, ref): - self.handle_data("&#%s;" % ref) - - -try: - import HTMLParser -except ImportError: - pass -else: - class XHTMLCompatibleHeadParser(AbstractHeadParser, - HTMLParser.HTMLParser): - def __init__(self): - HTMLParser.HTMLParser.__init__(self) - AbstractHeadParser.__init__(self) - - def handle_starttag(self, tag, attrs): - if tag not in self.head_elems: - raise EndOfHeadError() - try: - method = getattr(self, 'start_' + tag) - except AttributeError: - try: - method = getattr(self, 'do_' + tag) - except AttributeError: - pass # unknown tag - else: - method(attrs) - else: - method(attrs) - - def handle_endtag(self, tag): - if tag not in self.head_elems: - raise EndOfHeadError() - try: - method = getattr(self, 'end_' + tag) - except AttributeError: - pass # unknown tag - else: - method() - - def unescape(self, name): - # Use the entitydefs passed into constructor, not - # HTMLParser.HTMLParser's entitydefs. - return self.unescape_attr(name) - - def unescape_attr_if_required(self, name): - return name # HTMLParser.HTMLParser already did it - -class HeadParser(AbstractHeadParser, sgmllib.SGMLParser): - - def _not_called(self): - assert False - - def __init__(self): - sgmllib.SGMLParser.__init__(self) - AbstractHeadParser.__init__(self) - - def handle_starttag(self, tag, method, attrs): - if tag not in self.head_elems: - raise EndOfHeadError() - if tag == "meta": - method(attrs) - - def unknown_starttag(self, tag, attrs): - self.handle_starttag(tag, self._not_called, attrs) - - def handle_endtag(self, tag, method): - if tag in self.head_elems: - method() - else: - raise EndOfHeadError() - - def unescape_attr_if_required(self, name): - return self.unescape_attr(name) - -def parse_head(fileobj, parser): - """Return a list of key, value pairs.""" - while 1: - data = fileobj.read(CHUNK) - try: - parser.feed(data) - except EndOfHeadError: - break - if len(data) != CHUNK: - # this should only happen if there is no HTML body, or if - # CHUNK is big - break - return parser.http_equiv - -class HTTPEquivProcessor(BaseHandler): - """Append META HTTP-EQUIV headers to regular HTTP headers.""" - - handler_order = 300 # before handlers that look at HTTP headers - - def __init__(self, head_parser_class=HeadParser, - i_want_broken_xhtml_support=False, - ): - self.head_parser_class = head_parser_class - self._allow_xhtml = i_want_broken_xhtml_support - - def http_response(self, request, response): - if not hasattr(response, "seek"): - response = response_seek_wrapper(response) - http_message = response.info() - url = response.geturl() - ct_hdrs = http_message.getheaders("content-type") - if is_html(ct_hdrs, url, self._allow_xhtml): - try: - try: - html_headers = parse_head(response, - self.head_parser_class()) - finally: - response.seek(0) - except (HTMLParser.HTMLParseError, - sgmllib.SGMLParseError): - pass - else: - for hdr, val in html_headers: - # add a header - http_message.dict[hdr.lower()] = val - text = hdr + ": " + val - for line in text.split("\n"): - http_message.headers.append(line + "\n") - return response - - https_response = http_response - -class HTTPCookieProcessor(BaseHandler): - """Handle HTTP cookies. - - Public attributes: - - cookiejar: CookieJar instance - - """ - def __init__(self, cookiejar=None): - if cookiejar is None: - cookiejar = CookieJar() - self.cookiejar = cookiejar - - def http_request(self, request): - self.cookiejar.add_cookie_header(request) - return request - - def http_response(self, request, response): - self.cookiejar.extract_cookies(response, request) - return response - - https_request = http_request - https_response = http_response - -try: - import robotparser -except ImportError: - pass -else: - class MechanizeRobotFileParser(robotparser.RobotFileParser): - - def __init__(self, url='', opener=None): - robotparser.RobotFileParser.__init__(self, url) - self._opener = opener - self._timeout = _sockettimeout._GLOBAL_DEFAULT_TIMEOUT - - def set_opener(self, opener=None): - import _opener - if opener is None: - opener = _opener.OpenerDirector() - self._opener = opener - - def set_timeout(self, timeout): - self._timeout = timeout - - def read(self): - """Reads the robots.txt URL and feeds it to the parser.""" - if self._opener is None: - self.set_opener() - req = Request(self.url, unverifiable=True, visit=False, - timeout=self._timeout) - try: - f = self._opener.open(req) - except HTTPError, f: - pass - except (IOError, socket.error, OSError), exc: - debug_robots("ignoring error opening %r: %s" % - (self.url, exc)) - return - lines = [] - line = f.readline() - while line: - lines.append(line.strip()) - line = f.readline() - status = f.code - if status == 401 or status == 403: - self.disallow_all = True - debug_robots("disallow all") - elif status >= 400: - self.allow_all = True - debug_robots("allow all") - elif status == 200 and lines: - debug_robots("parse lines") - self.parse(lines) - - class RobotExclusionError(urllib2.HTTPError): - def __init__(self, request, *args): - apply(urllib2.HTTPError.__init__, (self,)+args) - self.request = request - - class HTTPRobotRulesProcessor(BaseHandler): - # before redirections, after everything else - handler_order = 800 - - try: - from httplib import HTTPMessage - except: - from mimetools import Message - http_response_class = Message - else: - http_response_class = HTTPMessage - - def __init__(self, rfp_class=MechanizeRobotFileParser): - self.rfp_class = rfp_class - self.rfp = None - self._host = None - - def http_request(self, request): - scheme = request.get_type() - if scheme not in ["http", "https"]: - # robots exclusion only applies to HTTP - return request - - if request.get_selector() == "/robots.txt": - # /robots.txt is always OK to fetch - return request - - host = request.get_host() - - # robots.txt requests don't need to be allowed by robots.txt :-) - origin_req = getattr(request, "_origin_req", None) - if (origin_req is not None and - origin_req.get_selector() == "/robots.txt" and - origin_req.get_host() == host - ): - return request - - if host != self._host: - self.rfp = self.rfp_class() - try: - self.rfp.set_opener(self.parent) - except AttributeError: - debug("%r instance does not support set_opener" % - self.rfp.__class__) - self.rfp.set_url(scheme+"://"+host+"/robots.txt") - self.rfp.set_timeout(request.timeout) - self.rfp.read() - self._host = host - - ua = request.get_header("User-agent", "") - if self.rfp.can_fetch(ua, request.get_full_url()): - return request - else: - # XXX This should really have raised URLError. Too late now... - msg = "request disallowed by robots.txt" - raise RobotExclusionError( - request, - request.get_full_url(), - 403, msg, - self.http_response_class(StringIO()), StringIO(msg)) - - https_request = http_request - -class HTTPRefererProcessor(BaseHandler): - """Add Referer header to requests. - - This only makes sense if you use each RefererProcessor for a single - chain of requests only (so, for example, if you use a single - HTTPRefererProcessor to fetch a series of URLs extracted from a single - page, this will break). - - There's a proper implementation of this in mechanize.Browser. - - """ - def __init__(self): - self.referer = None - - def http_request(self, request): - if ((self.referer is not None) and - not request.has_header("Referer")): - request.add_unredirected_header("Referer", self.referer) - return request - - def http_response(self, request, response): - self.referer = response.geturl() - return response - - https_request = http_request - https_response = http_response - - -def clean_refresh_url(url): - # e.g. Firefox 1.5 does (something like) this - if ((url.startswith('"') and url.endswith('"')) or - (url.startswith("'") and url.endswith("'"))): - url = url[1:-1] - return _rfc3986.clean_url(url, "latin-1") # XXX encoding - -def parse_refresh_header(refresh): - """ - >>> parse_refresh_header("1; url=http://example.com/") - (1.0, 'http://example.com/') - >>> parse_refresh_header("1; url='http://example.com/'") - (1.0, 'http://example.com/') - >>> parse_refresh_header("1") - (1.0, None) - >>> parse_refresh_header("blah") - Traceback (most recent call last): - ValueError: invalid literal for float(): blah - - """ - - ii = refresh.find(";") - if ii != -1: - pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:] - jj = newurl_spec.find("=") - key = None - if jj != -1: - key, newurl = newurl_spec[:jj], newurl_spec[jj+1:] - newurl = clean_refresh_url(newurl) - if key is None or key.strip().lower() != "url": - raise ValueError() - else: - pause, newurl = float(refresh), None - return pause, newurl - -class HTTPRefreshProcessor(BaseHandler): - """Perform HTTP Refresh redirections. - - Note that if a non-200 HTTP code has occurred (for example, a 30x - redirect), this processor will do nothing. - - By default, only zero-time Refresh headers are redirected. Use the - max_time attribute / constructor argument to allow Refresh with longer - pauses. Use the honor_time attribute / constructor argument to control - whether the requested pause is honoured (with a time.sleep()) or - skipped in favour of immediate redirection. - - Public attributes: - - max_time: see above - honor_time: see above - - """ - handler_order = 1000 - - def __init__(self, max_time=0, honor_time=True): - self.max_time = max_time - self.honor_time = honor_time - self._sleep = time.sleep - - def http_response(self, request, response): - code, msg, hdrs = response.code, response.msg, response.info() - - if code == 200 and hdrs.has_key("refresh"): - refresh = hdrs.getheaders("refresh")[0] - try: - pause, newurl = parse_refresh_header(refresh) - except ValueError: - debug("bad Refresh header: %r" % refresh) - return response - - if newurl is None: - newurl = response.geturl() - if (self.max_time is None) or (pause <= self.max_time): - if pause > 1E-3 and self.honor_time: - self._sleep(pause) - hdrs["location"] = newurl - # hardcoded http is NOT a bug - response = self.parent.error( - "http", request, response, - "refresh", msg, hdrs) - else: - debug("Refresh header ignored: %r" % refresh) - - return response - - https_response = http_response - -class HTTPErrorProcessor(BaseHandler): - """Process HTTP error responses. - - The purpose of this handler is to to allow other response processors a - look-in by removing the call to parent.error() from - AbstractHTTPHandler. - - For non-200 error codes, this just passes the job on to the - Handler.<proto>_error_<code> methods, via the OpenerDirector.error - method. Eventually, urllib2.HTTPDefaultErrorHandler will raise an - HTTPError if no other handler handles the error. - - """ - handler_order = 1000 # after all other processors - - def http_response(self, request, response): - code, msg, hdrs = response.code, response.msg, response.info() - - if code != 200: - # hardcoded http is NOT a bug - response = self.parent.error( - "http", request, response, code, msg, hdrs) - - return response - - https_response = http_response - - -class HTTPDefaultErrorHandler(BaseHandler): - def http_error_default(self, req, fp, code, msg, hdrs): - # why these error methods took the code, msg, headers args in the first - # place rather than a response object, I don't know, but to avoid - # multiple wrapping, we're discarding them - - if isinstance(fp, urllib2.HTTPError): - response = fp - else: - response = urllib2.HTTPError( - req.get_full_url(), code, msg, hdrs, fp) - assert code == response.code - assert msg == response.msg - assert hdrs == response.hdrs - raise response - - -class AbstractHTTPHandler(BaseHandler): - - def __init__(self, debuglevel=0): - self._debuglevel = debuglevel - - def set_http_debuglevel(self, level): - self._debuglevel = level - - def do_request_(self, request): - host = request.get_host() - if not host: - raise URLError('no host given') - - if request.has_data(): # POST - data = request.get_data() - if not request.has_header('Content-type'): - request.add_unredirected_header( - 'Content-type', - 'application/x-www-form-urlencoded') - if not request.has_header('Content-length'): - request.add_unredirected_header( - 'Content-length', '%d' % len(data)) - - scheme, sel = urllib.splittype(request.get_selector()) - sel_host, sel_path = urllib.splithost(sel) - if not request.has_header('Host'): - request.add_unredirected_header('Host', sel_host or host) - for name, value in self.parent.addheaders: - name = name.capitalize() - if not request.has_header(name): - request.add_unredirected_header(name, value) - - return request - - def do_open(self, http_class, req): - """Return an addinfourl object for the request, using http_class. - - http_class must implement the HTTPConnection API from httplib. - The addinfourl return value is a file-like object. It also - has methods and attributes including: - - info(): return a mimetools.Message object for the headers - - geturl(): return the original request URL - - code: HTTP status code - """ - host_port = req.get_host() - if not host_port: - raise URLError('no host given') - - try: - h = http_class(host_port, timeout=req.timeout) - except TypeError: - # Python < 2.6, no per-connection timeout support - h = http_class(host_port) - h.set_debuglevel(self._debuglevel) - - headers = dict(req.headers) - headers.update(req.unredirected_hdrs) - # We want to make an HTTP/1.1 request, but the addinfourl - # class isn't prepared to deal with a persistent connection. - # It will try to read all remaining data from the socket, - # which will block while the server waits for the next request. - # So make sure the connection gets closed after the (only) - # request. - headers["Connection"] = "close" - headers = dict( - [(name.title(), val) for name, val in headers.items()]) - try: - h.request(req.get_method(), req.get_selector(), req.data, headers) - r = h.getresponse() - except socket.error, err: # XXX what error? - raise URLError(err) - - # Pick apart the HTTPResponse object to get the addinfourl - # object initialized properly. - - # Wrap the HTTPResponse object in socket's file object adapter - # for Windows. That adapter calls recv(), so delegate recv() - # to read(). This weird wrapping allows the returned object to - # have readline() and readlines() methods. - - # XXX It might be better to extract the read buffering code - # out of socket._fileobject() and into a base class. - - r.recv = r.read - fp = create_readline_wrapper(r) - - resp = closeable_response(fp, r.msg, req.get_full_url(), - r.status, r.reason) - return resp - - -class HTTPHandler(AbstractHTTPHandler): - def http_open(self, req): - return self.do_open(httplib.HTTPConnection, req) - - http_request = AbstractHTTPHandler.do_request_ - -if hasattr(httplib, 'HTTPS'): - - class HTTPSConnectionFactory: - def __init__(self, key_file, cert_file): - self._key_file = key_file - self._cert_file = cert_file - def __call__(self, hostport): - return httplib.HTTPSConnection( - hostport, - key_file=self._key_file, cert_file=self._cert_file) - - class HTTPSHandler(AbstractHTTPHandler): - def __init__(self, client_cert_manager=None): - AbstractHTTPHandler.__init__(self) - self.client_cert_manager = client_cert_manager - - def https_open(self, req): - if self.client_cert_manager is not None: - key_file, cert_file = self.client_cert_manager.find_key_cert( - req.get_full_url()) - conn_factory = HTTPSConnectionFactory(key_file, cert_file) - else: - conn_factory = httplib.HTTPSConnection - return self.do_open(conn_factory, req) - - https_request = AbstractHTTPHandler.do_request_ diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_lwpcookiejar.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_lwpcookiejar.py deleted file mode 100644 index f8d49cf..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_lwpcookiejar.py +++ /dev/null @@ -1,185 +0,0 @@ -"""Load / save to libwww-perl (LWP) format files. - -Actually, the format is slightly extended from that used by LWP's -(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information -not recorded by LWP. - -It uses the version string "2.0", though really there isn't an LWP Cookies -2.0 format. This indicates that there is extra information in here -(domain_dot and port_spec) while still being compatible with libwww-perl, -I hope. - -Copyright 2002-2006 John J Lee <jjl@pobox.com> -Copyright 1997-1999 Gisle Aas (original libwww-perl code) - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -import time, re, logging - -from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \ - MISSING_FILENAME_TEXT, LoadError -from _headersutil import join_header_words, split_header_words -from _util import iso2time, time2isoz - -debug = logging.getLogger("mechanize").debug - - -def lwp_cookie_str(cookie): - """Return string representation of Cookie in an the LWP cookie file format. - - Actually, the format is extended a bit -- see module docstring. - - """ - h = [(cookie.name, cookie.value), - ("path", cookie.path), - ("domain", cookie.domain)] - if cookie.port is not None: h.append(("port", cookie.port)) - if cookie.path_specified: h.append(("path_spec", None)) - if cookie.port_specified: h.append(("port_spec", None)) - if cookie.domain_initial_dot: h.append(("domain_dot", None)) - if cookie.secure: h.append(("secure", None)) - if cookie.expires: h.append(("expires", - time2isoz(float(cookie.expires)))) - if cookie.discard: h.append(("discard", None)) - if cookie.comment: h.append(("comment", cookie.comment)) - if cookie.comment_url: h.append(("commenturl", cookie.comment_url)) - if cookie.rfc2109: h.append(("rfc2109", None)) - - keys = cookie.nonstandard_attr_keys() - keys.sort() - for k in keys: - h.append((k, str(cookie.get_nonstandard_attr(k)))) - - h.append(("version", str(cookie.version))) - - return join_header_words([h]) - -class LWPCookieJar(FileCookieJar): - """ - The LWPCookieJar saves a sequence of"Set-Cookie3" lines. - "Set-Cookie3" is the format used by the libwww-perl libary, not known - to be compatible with any browser, but which is easy to read and - doesn't lose information about RFC 2965 cookies. - - Additional methods - - as_lwp_str(ignore_discard=True, ignore_expired=True) - - """ - - magic_re = r"^\#LWP-Cookies-(\d+\.\d+)" - - def as_lwp_str(self, ignore_discard=True, ignore_expires=True): - """Return cookies as a string of "\n"-separated "Set-Cookie3" headers. - - ignore_discard and ignore_expires: see docstring for FileCookieJar.save - - """ - now = time.time() - r = [] - for cookie in self: - if not ignore_discard and cookie.discard: - debug(" Not saving %s: marked for discard", cookie.name) - continue - if not ignore_expires and cookie.is_expired(now): - debug(" Not saving %s: expired", cookie.name) - continue - r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie)) - return "\n".join(r+[""]) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - f = open(filename, "w") - try: - debug("Saving LWP cookies file") - # There really isn't an LWP Cookies 2.0 format, but this indicates - # that there is extra information in here (domain_dot and - # port_spec) while still being compatible with libwww-perl, I hope. - f.write("#LWP-Cookies-2.0\n") - f.write(self.as_lwp_str(ignore_discard, ignore_expires)) - finally: - f.close() - - def _really_load(self, f, filename, ignore_discard, ignore_expires): - magic = f.readline() - if not re.search(self.magic_re, magic): - msg = "%s does not seem to contain cookies" % filename - raise LoadError(msg) - - now = time.time() - - header = "Set-Cookie3:" - boolean_attrs = ("port_spec", "path_spec", "domain_dot", - "secure", "discard", "rfc2109") - value_attrs = ("version", - "port", "path", "domain", - "expires", - "comment", "commenturl") - - try: - while 1: - line = f.readline() - if line == "": break - if not line.startswith(header): - continue - line = line[len(header):].strip() - - for data in split_header_words([line]): - name, value = data[0] - standard = {} - rest = {} - for k in boolean_attrs: - standard[k] = False - for k, v in data[1:]: - if k is not None: - lc = k.lower() - else: - lc = None - # don't lose case distinction for unknown fields - if (lc in value_attrs) or (lc in boolean_attrs): - k = lc - if k in boolean_attrs: - if v is None: v = True - standard[k] = v - elif k in value_attrs: - standard[k] = v - else: - rest[k] = v - - h = standard.get - expires = h("expires") - discard = h("discard") - if expires is not None: - expires = iso2time(expires) - if expires is None: - discard = True - domain = h("domain") - domain_specified = domain.startswith(".") - c = Cookie(h("version"), name, value, - h("port"), h("port_spec"), - domain, domain_specified, h("domain_dot"), - h("path"), h("path_spec"), - h("secure"), - expires, - discard, - h("comment"), - h("commenturl"), - rest, - h("rfc2109"), - ) - if not ignore_discard and c.discard: - continue - if not ignore_expires and c.is_expired(now): - continue - self.set_cookie(c) - except: - reraise_unmasked_exceptions((IOError,)) - raise LoadError("invalid Set-Cookie3 format file %s" % filename) - diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mechanize.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mechanize.py deleted file mode 100644 index ad729c9..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mechanize.py +++ /dev/null @@ -1,676 +0,0 @@ -"""Stateful programmatic WWW navigation, after Perl's WWW::Mechanize. - -Copyright 2003-2006 John J. Lee <jjl@pobox.com> -Copyright 2003 Andy Lester (original Perl code) - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt -included with the distribution). - -""" - -import urllib2, copy, re, os, urllib - - -from _html import DefaultFactory -import _response -import _request -import _rfc3986 -import _sockettimeout -from _useragent import UserAgentBase - -__version__ = (0, 1, 11, None, None) # 0.1.11 - -class BrowserStateError(Exception): pass -class LinkNotFoundError(Exception): pass -class FormNotFoundError(Exception): pass - - -def sanepathname2url(path): - urlpath = urllib.pathname2url(path) - if os.name == "nt" and urlpath.startswith("///"): - urlpath = urlpath[2:] - # XXX don't ask me about the mac... - return urlpath - - -class History: - """ - - Though this will become public, the implied interface is not yet stable. - - """ - def __init__(self): - self._history = [] # LIFO - def add(self, request, response): - self._history.append((request, response)) - def back(self, n, _response): - response = _response # XXX move Browser._response into this class? - while n > 0 or response is None: - try: - request, response = self._history.pop() - except IndexError: - raise BrowserStateError("already at start of history") - n -= 1 - return request, response - def clear(self): - del self._history[:] - def close(self): - for request, response in self._history: - if response is not None: - response.close() - del self._history[:] - - -class HTTPRefererProcessor(urllib2.BaseHandler): - def http_request(self, request): - # See RFC 2616 14.36. The only times we know the source of the - # request URI has a URI associated with it are redirect, and - # Browser.click() / Browser.submit() / Browser.follow_link(). - # Otherwise, it's the user's job to add any Referer header before - # .open()ing. - if hasattr(request, "redirect_dict"): - request = self.parent._add_referer_header( - request, origin_request=False) - return request - - https_request = http_request - - -class Browser(UserAgentBase): - """Browser-like class with support for history, forms and links. - - BrowserStateError is raised whenever the browser is in the wrong state to - complete the requested operation - eg., when .back() is called when the - browser history is empty, or when .follow_link() is called when the current - response does not contain HTML data. - - Public attributes: - - request: current request (mechanize.Request or urllib2.Request) - form: currently selected form (see .select_form()) - - """ - - handler_classes = copy.copy(UserAgentBase.handler_classes) - handler_classes["_referer"] = HTTPRefererProcessor - default_features = copy.copy(UserAgentBase.default_features) - default_features.append("_referer") - - def __init__(self, - factory=None, - history=None, - request_class=None, - ): - """ - - Only named arguments should be passed to this constructor. - - factory: object implementing the mechanize.Factory interface. - history: object implementing the mechanize.History interface. Note - this interface is still experimental and may change in future. - request_class: Request class to use. Defaults to mechanize.Request - by default for Pythons older than 2.4, urllib2.Request otherwise. - - The Factory and History objects passed in are 'owned' by the Browser, - so they should not be shared across Browsers. In particular, - factory.set_response() should not be called except by the owning - Browser itself. - - Note that the supplied factory's request_class is overridden by this - constructor, to ensure only one Request class is used. - - """ - self._handle_referer = True - - if history is None: - history = History() - self._history = history - - if request_class is None: - if not hasattr(urllib2.Request, "add_unredirected_header"): - request_class = _request.Request - else: - request_class = urllib2.Request # Python >= 2.4 - - if factory is None: - factory = DefaultFactory() - factory.set_request_class(request_class) - self._factory = factory - self.request_class = request_class - - self.request = None - self._set_response(None, False) - - # do this last to avoid __getattr__ problems - UserAgentBase.__init__(self) - - def close(self): - UserAgentBase.close(self) - if self._response is not None: - self._response.close() - if self._history is not None: - self._history.close() - self._history = None - - # make use after .close easy to spot - self.form = None - self.request = self._response = None - self.request = self.response = self.set_response = None - self.geturl = self.reload = self.back = None - self.clear_history = self.set_cookie = self.links = self.forms = None - self.viewing_html = self.encoding = self.title = None - self.select_form = self.click = self.submit = self.click_link = None - self.follow_link = self.find_link = None - - def set_handle_referer(self, handle): - """Set whether to add Referer header to each request.""" - self._set_handler("_referer", handle) - self._handle_referer = bool(handle) - - def _add_referer_header(self, request, origin_request=True): - if self.request is None: - return request - scheme = request.get_type() - original_scheme = self.request.get_type() - if scheme not in ["http", "https"]: - return request - if not origin_request and not self.request.has_header("Referer"): - return request - - if (self._handle_referer and - original_scheme in ["http", "https"] and - not (original_scheme == "https" and scheme != "https")): - # strip URL fragment (RFC 2616 14.36) - parts = _rfc3986.urlsplit(self.request.get_full_url()) - parts = parts[:-1]+(None,) - referer = _rfc3986.urlunsplit(parts) - request.add_unredirected_header("Referer", referer) - return request - - def open_novisit(self, url, data=None, - timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): - """Open a URL without visiting it. - - Browser state (including request, response, history, forms and links) - is left unchanged by calling this function. - - The interface is the same as for .open(). - - This is useful for things like fetching images. - - See also .retrieve(). - - """ - return self._mech_open(url, data, visit=False, timeout=timeout) - - def open(self, url, data=None, - timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): - return self._mech_open(url, data, timeout=timeout) - - def _mech_open(self, url, data=None, update_history=True, visit=None, - timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): - try: - url.get_full_url - except AttributeError: - # string URL -- convert to absolute URL if required - scheme, authority = _rfc3986.urlsplit(url)[:2] - if scheme is None: - # relative URL - if self._response is None: - raise BrowserStateError( - "can't fetch relative reference: " - "not viewing any document") - url = _rfc3986.urljoin(self._response.geturl(), url) - - request = self._request(url, data, visit, timeout) - visit = request.visit - if visit is None: - visit = True - - if visit: - self._visit_request(request, update_history) - - success = True - try: - response = UserAgentBase.open(self, request, data) - except urllib2.HTTPError, error: - success = False - if error.fp is None: # not a response - raise - response = error -## except (IOError, socket.error, OSError), error: -## # Yes, urllib2 really does raise all these :-(( -## # See test_urllib2.py for examples of socket.gaierror and OSError, -## # plus note that FTPHandler raises IOError. -## # XXX I don't seem to have an example of exactly socket.error being -## # raised, only socket.gaierror... -## # I don't want to start fixing these here, though, since this is a -## # subclass of OpenerDirector, and it would break old code. Even in -## # Python core, a fix would need some backwards-compat. hack to be -## # acceptable. -## raise - - if visit: - self._set_response(response, False) - response = copy.copy(self._response) - elif response is not None: - response = _response.upgrade_response(response) - - if not success: - raise response - return response - - def __str__(self): - text = [] - text.append("<%s " % self.__class__.__name__) - if self._response: - text.append("visiting %s" % self._response.geturl()) - else: - text.append("(not visiting a URL)") - if self.form: - text.append("\n selected form:\n %s\n" % str(self.form)) - text.append(">") - return "".join(text) - - def response(self): - """Return a copy of the current response. - - The returned object has the same interface as the object returned by - .open() (or urllib2.urlopen()). - - """ - return copy.copy(self._response) - - def open_local_file(self, filename): - path = sanepathname2url(os.path.abspath(filename)) - url = 'file://'+path - return self.open(url) - - def set_response(self, response): - """Replace current response with (a copy of) response. - - response may be None. - - This is intended mostly for HTML-preprocessing. - """ - self._set_response(response, True) - - def _set_response(self, response, close_current): - # sanity check, necessary but far from sufficient - if not (response is None or - (hasattr(response, "info") and hasattr(response, "geturl") and - hasattr(response, "read") - ) - ): - raise ValueError("not a response object") - - self.form = None - if response is not None: - response = _response.upgrade_response(response) - if close_current and self._response is not None: - self._response.close() - self._response = response - self._factory.set_response(response) - - def visit_response(self, response, request=None): - """Visit the response, as if it had been .open()ed. - - Unlike .set_response(), this updates history rather than replacing the - current response. - """ - if request is None: - request = _request.Request(response.geturl()) - self._visit_request(request, True) - self._set_response(response, False) - - def _visit_request(self, request, update_history): - if self._response is not None: - self._response.close() - if self.request is not None and update_history: - self._history.add(self.request, self._response) - self._response = None - # we want self.request to be assigned even if UserAgentBase.open - # fails - self.request = request - - def geturl(self): - """Get URL of current document.""" - if self._response is None: - raise BrowserStateError("not viewing any document") - return self._response.geturl() - - def reload(self): - """Reload current document, and return response object.""" - if self.request is None: - raise BrowserStateError("no URL has yet been .open()ed") - if self._response is not None: - self._response.close() - return self._mech_open(self.request, update_history=False) - - def back(self, n=1): - """Go back n steps in history, and return response object. - - n: go back this number of steps (default 1 step) - - """ - if self._response is not None: - self._response.close() - self.request, response = self._history.back(n, self._response) - self.set_response(response) - if not response.read_complete: - return self.reload() - return copy.copy(response) - - def clear_history(self): - self._history.clear() - - def set_cookie(self, cookie_string): - """Request to set a cookie. - - Note that it is NOT necessary to call this method under ordinary - circumstances: cookie handling is normally entirely automatic. The - intended use case is rather to simulate the setting of a cookie by - client script in a web page (e.g. JavaScript). In that case, use of - this method is necessary because mechanize currently does not support - JavaScript, VBScript, etc. - - The cookie is added in the same way as if it had arrived with the - current response, as a result of the current request. This means that, - for example, if it is not appropriate to set the cookie based on the - current request, no cookie will be set. - - The cookie will be returned automatically with subsequent responses - made by the Browser instance whenever that's appropriate. - - cookie_string should be a valid value of the Set-Cookie header. - - For example: - - browser.set_cookie( - "sid=abcdef; expires=Wednesday, 09-Nov-06 23:12:40 GMT") - - Currently, this method does not allow for adding RFC 2986 cookies. - This limitation will be lifted if anybody requests it. - - """ - if self._response is None: - raise BrowserStateError("not viewing any document") - if self.request.get_type() not in ["http", "https"]: - raise BrowserStateError("can't set cookie for non-HTTP/HTTPS " - "transactions") - cookiejar = self._ua_handlers["_cookies"].cookiejar - response = self.response() # copy - headers = response.info() - headers["Set-cookie"] = cookie_string - cookiejar.extract_cookies(response, self.request) - - def links(self, **kwds): - """Return iterable over links (mechanize.Link objects).""" - if not self.viewing_html(): - raise BrowserStateError("not viewing HTML") - links = self._factory.links() - if kwds: - return self._filter_links(links, **kwds) - else: - return links - - def forms(self): - """Return iterable over forms. - - The returned form objects implement the ClientForm.HTMLForm interface. - - """ - if not self.viewing_html(): - raise BrowserStateError("not viewing HTML") - return self._factory.forms() - - def global_form(self): - """Return the global form object, or None if the factory implementation - did not supply one. - - The "global" form object contains all controls that are not descendants - of any FORM element. - - The returned form object implements the ClientForm.HTMLForm interface. - - This is a separate method since the global form is not regarded as part - of the sequence of forms in the document -- mostly for - backwards-compatibility. - - """ - if not self.viewing_html(): - raise BrowserStateError("not viewing HTML") - return self._factory.global_form - - def viewing_html(self): - """Return whether the current response contains HTML data.""" - if self._response is None: - raise BrowserStateError("not viewing any document") - return self._factory.is_html - - def encoding(self): - if self._response is None: - raise BrowserStateError("not viewing any document") - return self._factory.encoding - - def title(self): - r"""Return title, or None if there is no title element in the document. - - Treatment of any tag children of attempts to follow Firefox and IE - (currently, tags are preserved). - - """ - if not self.viewing_html(): - raise BrowserStateError("not viewing HTML") - return self._factory.title - - def select_form(self, name=None, predicate=None, nr=None): - """Select an HTML form for input. - - This is a bit like giving a form the "input focus" in a browser. - - If a form is selected, the Browser object supports the HTMLForm - interface, so you can call methods like .set_value(), .set(), and - .click(). - - Another way to select a form is to assign to the .form attribute. The - form assigned should be one of the objects returned by the .forms() - method. - - At least one of the name, predicate and nr arguments must be supplied. - If no matching form is found, mechanize.FormNotFoundError is raised. - - If name is specified, then the form must have the indicated name. - - If predicate is specified, then the form must match that function. The - predicate function is passed the HTMLForm as its single argument, and - should return a boolean value indicating whether the form matched. - - nr, if supplied, is the sequence number of the form (where 0 is the - first). Note that control 0 is the first form matching all the other - arguments (if supplied); it is not necessarily the first control in the - form. The "global form" (consisting of all form controls not contained - in any FORM element) is considered not to be part of this sequence and - to have no name, so will not be matched unless both name and nr are - None. - - """ - if not self.viewing_html(): - raise BrowserStateError("not viewing HTML") - if (name is None) and (predicate is None) and (nr is None): - raise ValueError( - "at least one argument must be supplied to specify form") - - global_form = self._factory.global_form - if nr is None and name is None and \ - predicate is not None and predicate(global_form): - self.form = global_form - return - - orig_nr = nr - for form in self.forms(): - if name is not None and name != form.name: - continue - if predicate is not None and not predicate(form): - continue - if nr: - nr -= 1 - continue - self.form = form - break # success - else: - # failure - description = [] - if name is not None: description.append("name '%s'" % name) - if predicate is not None: - description.append("predicate %s" % predicate) - if orig_nr is not None: description.append("nr %d" % orig_nr) - description = ", ".join(description) - raise FormNotFoundError("no form matching "+description) - - def click(self, *args, **kwds): - """See ClientForm.HTMLForm.click for documentation.""" - if not self.viewing_html(): - raise BrowserStateError("not viewing HTML") - request = self.form.click(*args, **kwds) - return self._add_referer_header(request) - - def submit(self, *args, **kwds): - """Submit current form. - - Arguments are as for ClientForm.HTMLForm.click(). - - Return value is same as for Browser.open(). - - """ - return self.open(self.click(*args, **kwds)) - - def click_link(self, link=None, **kwds): - """Find a link and return a Request object for it. - - Arguments are as for .find_link(), except that a link may be supplied - as the first argument. - - """ - if not self.viewing_html(): - raise BrowserStateError("not viewing HTML") - if not link: - link = self.find_link(**kwds) - else: - if kwds: - raise ValueError( - "either pass a Link, or keyword arguments, not both") - request = self.request_class(link.absolute_url) - return self._add_referer_header(request) - - def follow_link(self, link=None, **kwds): - """Find a link and .open() it. - - Arguments are as for .click_link(). - - Return value is same as for Browser.open(). - - """ - return self.open(self.click_link(link, **kwds)) - - def find_link(self, **kwds): - """Find a link in current page. - - Links are returned as mechanize.Link objects. - - # Return third link that .search()-matches the regexp "python" - # (by ".search()-matches", I mean that the regular expression method - # .search() is used, rather than .match()). - find_link(text_regex=re.compile("python"), nr=2) - - # Return first http link in the current page that points to somewhere - # on python.org whose link text (after tags have been removed) is - # exactly "monty python". - find_link(text="monty python", - url_regex=re.compile("http.*python.org")) - - # Return first link with exactly three HTML attributes. - find_link(predicate=lambda link: len(link.attrs) == 3) - - Links include anchors (<a>), image maps (<area>), and frames (<frame>, - <iframe>). - - All arguments must be passed by keyword, not position. Zero or more - arguments may be supplied. In order to find a link, all arguments - supplied must match. - - If a matching link is not found, mechanize.LinkNotFoundError is raised. - - text: link text between link tags: eg. <a href="blah">this bit</a> (as - returned by pullparser.get_compressed_text(), ie. without tags but - with opening tags "textified" as per the pullparser docs) must compare - equal to this argument, if supplied - text_regex: link text between tag (as defined above) must match the - regular expression object or regular expression string passed as this - argument, if supplied - name, name_regex: as for text and text_regex, but matched against the - name HTML attribute of the link tag - url, url_regex: as for text and text_regex, but matched against the - URL of the link tag (note this matches against Link.url, which is a - relative or absolute URL according to how it was written in the HTML) - tag: element name of opening tag, eg. "a" - predicate: a function taking a Link object as its single argument, - returning a boolean result, indicating whether the links - nr: matches the nth link that matches all other criteria (default 0) - - """ - try: - return self._filter_links(self._factory.links(), **kwds).next() - except StopIteration: - raise LinkNotFoundError() - - def __getattr__(self, name): - # pass through ClientForm / DOMForm methods and attributes - form = self.__dict__.get("form") - if form is None: - raise AttributeError( - "%s instance has no attribute %s (perhaps you forgot to " - ".select_form()?)" % (self.__class__, name)) - return getattr(form, name) - - def _filter_links(self, links, - text=None, text_regex=None, - name=None, name_regex=None, - url=None, url_regex=None, - tag=None, - predicate=None, - nr=0 - ): - if not self.viewing_html(): - raise BrowserStateError("not viewing HTML") - - found_links = [] - orig_nr = nr - - for link in links: - if url is not None and url != link.url: - continue - if url_regex is not None and not re.search(url_regex, link.url): - continue - if (text is not None and - (link.text is None or text != link.text)): - continue - if (text_regex is not None and - (link.text is None or not re.search(text_regex, link.text))): - continue - if name is not None and name != dict(link.attrs).get("name"): - continue - if name_regex is not None: - link_name = dict(link.attrs).get("name") - if link_name is None or not re.search(name_regex, link_name): - continue - if tag is not None and tag != link.tag: - continue - if predicate is not None and not predicate(link): - continue - if nr: - nr -= 1 - continue - yield link - nr = orig_nr diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mozillacookiejar.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mozillacookiejar.py deleted file mode 100644 index 51e81bb..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mozillacookiejar.py +++ /dev/null @@ -1,161 +0,0 @@ -"""Mozilla / Netscape cookie loading / saving. - -Copyright 2002-2006 John J Lee <jjl@pobox.com> -Copyright 1997-1999 Gisle Aas (original libwww-perl code) - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -import re, time, logging - -from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \ - MISSING_FILENAME_TEXT, LoadError -debug = logging.getLogger("ClientCookie").debug - - -class MozillaCookieJar(FileCookieJar): - """ - - WARNING: you may want to backup your browser's cookies file if you use - this class to save cookies. I *think* it works, but there have been - bugs in the past! - - This class differs from CookieJar only in the format it uses to save and - load cookies to and from a file. This class uses the Mozilla/Netscape - `cookies.txt' format. lynx uses this file format, too. - - Don't expect cookies saved while the browser is running to be noticed by - the browser (in fact, Mozilla on unix will overwrite your saved cookies if - you change them on disk while it's running; on Windows, you probably can't - save at all while the browser is running). - - Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to - Netscape cookies on saving. - - In particular, the cookie version and port number information is lost, - together with information about whether or not Path, Port and Discard were - specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the - domain as set in the HTTP header started with a dot (yes, I'm aware some - domains in Netscape files start with a dot and some don't -- trust me, you - really don't want to know any more about this). - - Note that though Mozilla and Netscape use the same format, they use - slightly different headers. The class saves cookies using the Netscape - header by default (Mozilla can cope with that). - - """ - magic_re = "#( Netscape)? HTTP Cookie File" - header = """\ - # Netscape HTTP Cookie File - # http://www.netscape.com/newsref/std/cookie_spec.html - # This is a generated file! Do not edit. - -""" - - def _really_load(self, f, filename, ignore_discard, ignore_expires): - now = time.time() - - magic = f.readline() - if not re.search(self.magic_re, magic): - f.close() - raise LoadError( - "%s does not look like a Netscape format cookies file" % - filename) - - try: - while 1: - line = f.readline() - if line == "": break - - # last field may be absent, so keep any trailing tab - if line.endswith("\n"): line = line[:-1] - - # skip comments and blank lines XXX what is $ for? - if (line.strip().startswith("#") or - line.strip().startswith("$") or - line.strip() == ""): - continue - - domain, domain_specified, path, secure, expires, name, value = \ - line.split("\t", 6) - secure = (secure == "TRUE") - domain_specified = (domain_specified == "TRUE") - if name == "": - name = value - value = None - - initial_dot = domain.startswith(".") - if domain_specified != initial_dot: - raise LoadError("domain and domain specified flag don't " - "match in %s: %s" % (filename, line)) - - discard = False - if expires == "": - expires = None - discard = True - - # assume path_specified is false - c = Cookie(0, name, value, - None, False, - domain, domain_specified, initial_dot, - path, False, - secure, - expires, - discard, - None, - None, - {}) - if not ignore_discard and c.discard: - continue - if not ignore_expires and c.is_expired(now): - continue - self.set_cookie(c) - - except: - reraise_unmasked_exceptions((IOError, LoadError)) - raise LoadError("invalid Netscape format file %s: %s" % - (filename, line)) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - f = open(filename, "w") - try: - debug("Saving Netscape cookies.txt file") - f.write(self.header) - now = time.time() - for cookie in self: - if not ignore_discard and cookie.discard: - debug(" Not saving %s: marked for discard", cookie.name) - continue - if not ignore_expires and cookie.is_expired(now): - debug(" Not saving %s: expired", cookie.name) - continue - if cookie.secure: secure = "TRUE" - else: secure = "FALSE" - if cookie.domain.startswith("."): initial_dot = "TRUE" - else: initial_dot = "FALSE" - if cookie.expires is not None: - expires = str(cookie.expires) - else: - expires = "" - if cookie.value is None: - # cookies.txt regards 'Set-Cookie: foo' as a cookie - # with no name, whereas cookielib regards it as a - # cookie with no value. - name = "" - value = cookie.name - else: - name = cookie.name - value = cookie.value - f.write( - "\t".join([cookie.domain, initial_dot, cookie.path, - secure, expires, name, value])+ - "\n") - finally: - f.close() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_msiecookiejar.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_msiecookiejar.py deleted file mode 100644 index 1057811..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_msiecookiejar.py +++ /dev/null @@ -1,388 +0,0 @@ -"""Microsoft Internet Explorer cookie loading on Windows. - -Copyright 2002-2003 Johnny Lee <typo_pl@hotmail.com> (MSIE Perl code) -Copyright 2002-2006 John J Lee <jjl@pobox.com> (The Python port) - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -# XXX names and comments are not great here - -import os, re, time, struct, logging -if os.name == "nt": - import _winreg - -from _clientcookie import FileCookieJar, CookieJar, Cookie, \ - MISSING_FILENAME_TEXT, LoadError - -debug = logging.getLogger("mechanize").debug - - -def regload(path, leaf): - key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, path, 0, - _winreg.KEY_ALL_ACCESS) - try: - value = _winreg.QueryValueEx(key, leaf)[0] - except WindowsError: - value = None - return value - -WIN32_EPOCH = 0x019db1ded53e8000L # 1970 Jan 01 00:00:00 in Win32 FILETIME - -def epoch_time_offset_from_win32_filetime(filetime): - """Convert from win32 filetime to seconds-since-epoch value. - - MSIE stores create and expire times as Win32 FILETIME, which is 64 - bits of 100 nanosecond intervals since Jan 01 1601. - - mechanize expects time in 32-bit value expressed in seconds since the - epoch (Jan 01 1970). - - """ - if filetime < WIN32_EPOCH: - raise ValueError("filetime (%d) is before epoch (%d)" % - (filetime, WIN32_EPOCH)) - - return divmod((filetime - WIN32_EPOCH), 10000000L)[0] - -def binary_to_char(c): return "%02X" % ord(c) -def binary_to_str(d): return "".join(map(binary_to_char, list(d))) - -class MSIEBase: - magic_re = re.compile(r"Client UrlCache MMF Ver \d\.\d.*") - padding = "\x0d\xf0\xad\x0b" - - msie_domain_re = re.compile(r"^([^/]+)(/.*)$") - cookie_re = re.compile("Cookie\:.+\@([\x21-\xFF]+).*?" - "(.+\@[\x21-\xFF]+\.txt)") - - # path under HKEY_CURRENT_USER from which to get location of index.dat - reg_path = r"software\microsoft\windows" \ - r"\currentversion\explorer\shell folders" - reg_key = "Cookies" - - def __init__(self): - self._delayload_domains = {} - - def _delayload_domain(self, domain): - # if necessary, lazily load cookies for this domain - delayload_info = self._delayload_domains.get(domain) - if delayload_info is not None: - cookie_file, ignore_discard, ignore_expires = delayload_info - try: - self.load_cookie_data(cookie_file, - ignore_discard, ignore_expires) - except (LoadError, IOError): - debug("error reading cookie file, skipping: %s", cookie_file) - else: - del self._delayload_domains[domain] - - def _load_cookies_from_file(self, filename): - debug("Loading MSIE cookies file: %s", filename) - cookies = [] - - cookies_fh = open(filename) - - try: - while 1: - key = cookies_fh.readline() - if key == "": break - - rl = cookies_fh.readline - def getlong(rl=rl): return long(rl().rstrip()) - def getstr(rl=rl): return rl().rstrip() - - key = key.rstrip() - value = getstr() - domain_path = getstr() - flags = getlong() # 0x2000 bit is for secure I think - lo_expire = getlong() - hi_expire = getlong() - lo_create = getlong() - hi_create = getlong() - sep = getstr() - - if "" in (key, value, domain_path, flags, hi_expire, lo_expire, - hi_create, lo_create, sep) or (sep != "*"): - break - - m = self.msie_domain_re.search(domain_path) - if m: - domain = m.group(1) - path = m.group(2) - - cookies.append({"KEY": key, "VALUE": value, - "DOMAIN": domain, "PATH": path, - "FLAGS": flags, "HIXP": hi_expire, - "LOXP": lo_expire, "HICREATE": hi_create, - "LOCREATE": lo_create}) - finally: - cookies_fh.close() - - return cookies - - def load_cookie_data(self, filename, - ignore_discard=False, ignore_expires=False): - """Load cookies from file containing actual cookie data. - - Old cookies are kept unless overwritten by newly loaded ones. - - You should not call this method if the delayload attribute is set. - - I think each of these files contain all cookies for one user, domain, - and path. - - filename: file containing cookies -- usually found in a file like - C:\WINNT\Profiles\joe\Cookies\joe@blah[1].txt - - """ - now = int(time.time()) - - cookie_data = self._load_cookies_from_file(filename) - - for cookie in cookie_data: - flags = cookie["FLAGS"] - secure = ((flags & 0x2000) != 0) - filetime = (cookie["HIXP"] << 32) + cookie["LOXP"] - expires = epoch_time_offset_from_win32_filetime(filetime) - if expires < now: - discard = True - else: - discard = False - domain = cookie["DOMAIN"] - initial_dot = domain.startswith(".") - if initial_dot: - domain_specified = True - else: - # MSIE 5 does not record whether the domain cookie-attribute - # was specified. - # Assuming it wasn't is conservative, because with strict - # domain matching this will match less frequently; with regular - # Netscape tail-matching, this will match at exactly the same - # times that domain_specified = True would. It also means we - # don't have to prepend a dot to achieve consistency with our - # own & Mozilla's domain-munging scheme. - domain_specified = False - - # assume path_specified is false - # XXX is there other stuff in here? -- eg. comment, commentURL? - c = Cookie(0, - cookie["KEY"], cookie["VALUE"], - None, False, - domain, domain_specified, initial_dot, - cookie["PATH"], False, - secure, - expires, - discard, - None, - None, - {"flags": flags}) - if not ignore_discard and c.discard: - continue - if not ignore_expires and c.is_expired(now): - continue - CookieJar.set_cookie(self, c) - - def load_from_registry(self, ignore_discard=False, ignore_expires=False, - username=None): - """ - username: only required on win9x - - """ - cookies_dir = regload(self.reg_path, self.reg_key) - filename = os.path.normpath(os.path.join(cookies_dir, "INDEX.DAT")) - self.load(filename, ignore_discard, ignore_expires, username) - - def _really_load(self, index, filename, ignore_discard, ignore_expires, - username): - now = int(time.time()) - - if username is None: - username = os.environ['USERNAME'].lower() - - cookie_dir = os.path.dirname(filename) - - data = index.read(256) - if len(data) != 256: - raise LoadError("%s file is too short" % filename) - - # Cookies' index.dat file starts with 32 bytes of signature - # followed by an offset to the first record, stored as a little- - # endian DWORD. - sig, size, data = data[:32], data[32:36], data[36:] - size = struct.unpack("<L", size)[0] - - # check that sig is valid - if not self.magic_re.match(sig) or size != 0x4000: - raise LoadError("%s ['%s' %s] does not seem to contain cookies" % - (str(filename), sig, size)) - - # skip to start of first record - index.seek(size, 0) - - sector = 128 # size of sector in bytes - - while 1: - data = "" - - # Cookies are usually in two contiguous sectors, so read in two - # sectors and adjust if not a Cookie. - to_read = 2 * sector - d = index.read(to_read) - if len(d) != to_read: - break - data = data + d - - # Each record starts with a 4-byte signature and a count - # (little-endian DWORD) of sectors for the record. - sig, size, data = data[:4], data[4:8], data[8:] - size = struct.unpack("<L", size)[0] - - to_read = (size - 2) * sector - -## from urllib import quote -## print "data", quote(data) -## print "sig", quote(sig) -## print "size in sectors", size -## print "size in bytes", size*sector -## print "size in units of 16 bytes", (size*sector) / 16 -## print "size to read in bytes", to_read -## print - - if sig != "URL ": - assert sig in ("HASH", "LEAK", \ - self.padding, "\x00\x00\x00\x00"), \ - "unrecognized MSIE index.dat record: %s" % \ - binary_to_str(sig) - if sig == "\x00\x00\x00\x00": - # assume we've got all the cookies, and stop - break - if sig == self.padding: - continue - # skip the rest of this record - assert to_read >= 0 - if size != 2: - assert to_read != 0 - index.seek(to_read, 1) - continue - - # read in rest of record if necessary - if size > 2: - more_data = index.read(to_read) - if len(more_data) != to_read: break - data = data + more_data - - cookie_re = ("Cookie\:%s\@([\x21-\xFF]+).*?" % username + - "(%s\@[\x21-\xFF]+\.txt)" % username) - m = re.search(cookie_re, data, re.I) - if m: - cookie_file = os.path.join(cookie_dir, m.group(2)) - if not self.delayload: - try: - self.load_cookie_data(cookie_file, - ignore_discard, ignore_expires) - except (LoadError, IOError): - debug("error reading cookie file, skipping: %s", - cookie_file) - else: - domain = m.group(1) - i = domain.find("/") - if i != -1: - domain = domain[:i] - - self._delayload_domains[domain] = ( - cookie_file, ignore_discard, ignore_expires) - - -class MSIECookieJar(MSIEBase, FileCookieJar): - """FileCookieJar that reads from the Windows MSIE cookies database. - - MSIECookieJar can read the cookie files of Microsoft Internet Explorer - (MSIE) for Windows version 5 on Windows NT and version 6 on Windows XP and - Windows 98. Other configurations may also work, but are untested. Saving - cookies in MSIE format is NOT supported. If you save cookies, they'll be - in the usual Set-Cookie3 format, which you can read back in using an - instance of the plain old CookieJar class. Don't save using the same - filename that you loaded cookies from, because you may succeed in - clobbering your MSIE cookies index file! - - You should be able to have LWP share Internet Explorer's cookies like - this (note you need to supply a username to load_from_registry if you're on - Windows 9x or Windows ME): - - cj = MSIECookieJar(delayload=1) - # find cookies index file in registry and load cookies from it - cj.load_from_registry() - opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj)) - response = opener.open("http://example.com/") - - Iterating over a delayloaded MSIECookieJar instance will not cause any - cookies to be read from disk. To force reading of all cookies from disk, - call read_all_cookies. Note that the following methods iterate over self: - clear_temporary_cookies, clear_expired_cookies, __len__, __repr__, __str__ - and as_string. - - Additional methods: - - load_from_registry(ignore_discard=False, ignore_expires=False, - username=None) - load_cookie_data(filename, ignore_discard=False, ignore_expires=False) - read_all_cookies() - - """ - def __init__(self, filename=None, delayload=False, policy=None): - MSIEBase.__init__(self) - FileCookieJar.__init__(self, filename, delayload, policy) - - def set_cookie(self, cookie): - if self.delayload: - self._delayload_domain(cookie.domain) - CookieJar.set_cookie(self, cookie) - - def _cookies_for_request(self, request): - """Return a list of cookies to be returned to server.""" - domains = self._cookies.copy() - domains.update(self._delayload_domains) - domains = domains.keys() - - cookies = [] - for domain in domains: - cookies.extend(self._cookies_for_domain(domain, request)) - return cookies - - def _cookies_for_domain(self, domain, request): - if not self._policy.domain_return_ok(domain, request): - return [] - debug("Checking %s for cookies to return", domain) - if self.delayload: - self._delayload_domain(domain) - return CookieJar._cookies_for_domain(self, domain, request) - - def read_all_cookies(self): - """Eagerly read in all cookies.""" - if self.delayload: - for domain in self._delayload_domains.keys(): - self._delayload_domain(domain) - - def load(self, filename, ignore_discard=False, ignore_expires=False, - username=None): - """Load cookies from an MSIE 'index.dat' cookies index file. - - filename: full path to cookie index file - username: only required on win9x - - """ - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - index = open(filename, "rb") - - try: - self._really_load(index, filename, ignore_discard, ignore_expires, - username) - finally: - index.close() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_opener.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_opener.py deleted file mode 100644 index d94eacf..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_opener.py +++ /dev/null @@ -1,436 +0,0 @@ -"""Integration with Python standard library module urllib2: OpenerDirector -class. - -Copyright 2004-2006 John J Lee <jjl@pobox.com> - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -import os, urllib2, bisect, httplib, types, tempfile -try: - import threading as _threading -except ImportError: - import dummy_threading as _threading -try: - set -except NameError: - import sets - set = sets.Set - -import _file -import _http -from _request import Request -import _response -import _rfc3986 -import _sockettimeout -import _upgrade -from _util import isstringlike - - -class ContentTooShortError(urllib2.URLError): - def __init__(self, reason, result): - urllib2.URLError.__init__(self, reason) - self.result = result - - -def set_request_attr(req, name, value, default): - try: - getattr(req, name) - except AttributeError: - setattr(req, name, default) - if value is not default: - setattr(req, name, value) - - -class OpenerDirector(urllib2.OpenerDirector): - def __init__(self): - urllib2.OpenerDirector.__init__(self) - # really none of these are (sanely) public -- the lack of initial - # underscore on some is just due to following urllib2 - self.process_response = {} - self.process_request = {} - self._any_request = {} - self._any_response = {} - self._handler_index_valid = True - self._tempfiles = [] - - def add_handler(self, handler): - if handler in self.handlers: - return - # XXX why does self.handlers need to be sorted? - bisect.insort(self.handlers, handler) - handler.add_parent(self) - self._handler_index_valid = False - - def _maybe_reindex_handlers(self): - if self._handler_index_valid: - return - - handle_error = {} - handle_open = {} - process_request = {} - process_response = {} - any_request = set() - any_response = set() - unwanted = [] - - for handler in self.handlers: - added = False - for meth in dir(handler): - if meth in ["redirect_request", "do_open", "proxy_open"]: - # oops, coincidental match - continue - - if meth == "any_request": - any_request.add(handler) - added = True - continue - elif meth == "any_response": - any_response.add(handler) - added = True - continue - - ii = meth.find("_") - scheme = meth[:ii] - condition = meth[ii+1:] - - if condition.startswith("error"): - jj = meth[ii+1:].find("_") + ii + 1 - kind = meth[jj+1:] - try: - kind = int(kind) - except ValueError: - pass - lookup = handle_error.setdefault(scheme, {}) - elif condition == "open": - kind = scheme - lookup = handle_open - elif condition == "request": - kind = scheme - lookup = process_request - elif condition == "response": - kind = scheme - lookup = process_response - else: - continue - - lookup.setdefault(kind, set()).add(handler) - added = True - - if not added: - unwanted.append(handler) - - for handler in unwanted: - self.handlers.remove(handler) - - # sort indexed methods - # XXX could be cleaned up - for lookup in [process_request, process_response]: - for scheme, handlers in lookup.iteritems(): - lookup[scheme] = handlers - for scheme, lookup in handle_error.iteritems(): - for code, handlers in lookup.iteritems(): - handlers = list(handlers) - handlers.sort() - lookup[code] = handlers - for scheme, handlers in handle_open.iteritems(): - handlers = list(handlers) - handlers.sort() - handle_open[scheme] = handlers - - # cache the indexes - self.handle_error = handle_error - self.handle_open = handle_open - self.process_request = process_request - self.process_response = process_response - self._any_request = any_request - self._any_response = any_response - - def _request(self, url_or_req, data, visit, - timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): - if isstringlike(url_or_req): - req = Request(url_or_req, data, visit=visit, timeout=timeout) - else: - # already a urllib2.Request or mechanize.Request instance - req = url_or_req - if data is not None: - req.add_data(data) - # XXX yuck - set_request_attr(req, "visit", visit, None) - set_request_attr(req, "timeout", timeout, - _sockettimeout._GLOBAL_DEFAULT_TIMEOUT) - return req - - def open(self, fullurl, data=None, - timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): - req = self._request(fullurl, data, None, timeout) - req_scheme = req.get_type() - - self._maybe_reindex_handlers() - - # pre-process request - # XXX should we allow a Processor to change the URL scheme - # of the request? - request_processors = set(self.process_request.get(req_scheme, [])) - request_processors.update(self._any_request) - request_processors = list(request_processors) - request_processors.sort() - for processor in request_processors: - for meth_name in ["any_request", req_scheme+"_request"]: - meth = getattr(processor, meth_name, None) - if meth: - req = meth(req) - - # In Python >= 2.4, .open() supports processors already, so we must - # call ._open() instead. - urlopen = getattr(urllib2.OpenerDirector, "_open", - urllib2.OpenerDirector.open) - response = urlopen(self, req, data) - - # post-process response - response_processors = set(self.process_response.get(req_scheme, [])) - response_processors.update(self._any_response) - response_processors = list(response_processors) - response_processors.sort() - for processor in response_processors: - for meth_name in ["any_response", req_scheme+"_response"]: - meth = getattr(processor, meth_name, None) - if meth: - response = meth(req, response) - - return response - - def error(self, proto, *args): - if proto in ['http', 'https']: - # XXX http[s] protocols are special-cased - dict = self.handle_error['http'] # https is not different than http - proto = args[2] # YUCK! - meth_name = 'http_error_%s' % proto - http_err = 1 - orig_args = args - else: - dict = self.handle_error - meth_name = proto + '_error' - http_err = 0 - args = (dict, proto, meth_name) + args - result = apply(self._call_chain, args) - if result: - return result - - if http_err: - args = (dict, 'default', 'http_error_default') + orig_args - return apply(self._call_chain, args) - - BLOCK_SIZE = 1024*8 - def retrieve(self, fullurl, filename=None, reporthook=None, data=None, - timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): - """Returns (filename, headers). - - For remote objects, the default filename will refer to a temporary - file. Temporary files are removed when the OpenerDirector.close() - method is called. - - For file: URLs, at present the returned filename is None. This may - change in future. - - If the actual number of bytes read is less than indicated by the - Content-Length header, raises ContentTooShortError (a URLError - subclass). The exception's .result attribute contains the (filename, - headers) that would have been returned. - - """ - req = self._request(fullurl, data, False, timeout) - scheme = req.get_type() - fp = self.open(req) - headers = fp.info() - if filename is None and scheme == 'file': - # XXX req.get_selector() seems broken here, return None, - # pending sanity :-/ - return None, headers - #return urllib.url2pathname(req.get_selector()), headers - if filename: - tfp = open(filename, 'wb') - else: - path = _rfc3986.urlsplit(req.get_full_url())[2] - suffix = os.path.splitext(path)[1] - fd, filename = tempfile.mkstemp(suffix) - self._tempfiles.append(filename) - tfp = os.fdopen(fd, 'wb') - - result = filename, headers - bs = self.BLOCK_SIZE - size = -1 - read = 0 - blocknum = 0 - if reporthook: - if "content-length" in headers: - size = int(headers["Content-Length"]) - reporthook(blocknum, bs, size) - while 1: - block = fp.read(bs) - if block == "": - break - read += len(block) - tfp.write(block) - blocknum += 1 - if reporthook: - reporthook(blocknum, bs, size) - fp.close() - tfp.close() - del fp - del tfp - - # raise exception if actual size does not match content-length header - if size >= 0 and read < size: - raise ContentTooShortError( - "retrieval incomplete: " - "got only %i out of %i bytes" % (read, size), - result - ) - - return result - - def close(self): - urllib2.OpenerDirector.close(self) - - # make it very obvious this object is no longer supposed to be used - self.open = self.error = self.retrieve = self.add_handler = None - - if self._tempfiles: - for filename in self._tempfiles: - try: - os.unlink(filename) - except OSError: - pass - del self._tempfiles[:] - - -def wrapped_open(urlopen, process_response_object, fullurl, data=None, - timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): - success = True - try: - response = urlopen(fullurl, data, timeout) - except urllib2.HTTPError, error: - success = False - if error.fp is None: # not a response - raise - response = error - - if response is not None: - response = process_response_object(response) - - if not success: - raise response - return response - -class ResponseProcessingOpener(OpenerDirector): - - def open(self, fullurl, data=None, - timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): - def bound_open(fullurl, data=None, - timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): - return OpenerDirector.open(self, fullurl, data, timeout) - return wrapped_open( - bound_open, self.process_response_object, fullurl, data, timeout) - - def process_response_object(self, response): - return response - - -class SeekableResponseOpener(ResponseProcessingOpener): - def process_response_object(self, response): - return _response.seek_wrapped_response(response) - - -class OpenerFactory: - """This class's interface is quite likely to change.""" - - default_classes = [ - # handlers - urllib2.ProxyHandler, - urllib2.UnknownHandler, - _http.HTTPHandler, # derived from new AbstractHTTPHandler - _http.HTTPDefaultErrorHandler, - _http.HTTPRedirectHandler, # bugfixed - urllib2.FTPHandler, - _file.FileHandler, - # processors - _upgrade.HTTPRequestUpgradeProcessor, - _http.HTTPCookieProcessor, - _http.HTTPErrorProcessor, - ] - if hasattr(httplib, 'HTTPS'): - default_classes.append(_http.HTTPSHandler) - handlers = [] - replacement_handlers = [] - - def __init__(self, klass=OpenerDirector): - self.klass = klass - - def build_opener(self, *handlers): - """Create an opener object from a list of handlers and processors. - - The opener will use several default handlers and processors, including - support for HTTP and FTP. - - If any of the handlers passed as arguments are subclasses of the - default handlers, the default handlers will not be used. - - """ - opener = self.klass() - default_classes = list(self.default_classes) - skip = [] - for klass in default_classes: - for check in handlers: - if type(check) == types.ClassType: - if issubclass(check, klass): - skip.append(klass) - elif type(check) == types.InstanceType: - if isinstance(check, klass): - skip.append(klass) - for klass in skip: - default_classes.remove(klass) - - for klass in default_classes: - opener.add_handler(klass()) - for h in handlers: - if type(h) == types.ClassType: - h = h() - opener.add_handler(h) - - return opener - - -build_opener = OpenerFactory().build_opener - -_opener = None -urlopen_lock = _threading.Lock() -def urlopen(url, data=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): - global _opener - if _opener is None: - urlopen_lock.acquire() - try: - if _opener is None: - _opener = build_opener() - finally: - urlopen_lock.release() - return _opener.open(url, data, timeout) - -def urlretrieve(url, filename=None, reporthook=None, data=None, - timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): - global _opener - if _opener is None: - urlopen_lock.acquire() - try: - if _opener is None: - _opener = build_opener() - finally: - urlopen_lock.release() - return _opener.retrieve(url, filename, reporthook, data, timeout) - -def install_opener(opener): - global _opener - _opener = opener diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_pullparser.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_pullparser.py deleted file mode 100644 index 4d8d9d3..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_pullparser.py +++ /dev/null @@ -1,390 +0,0 @@ -"""A simple "pull API" for HTML parsing, after Perl's HTML::TokeParser. - -Examples - -This program extracts all links from a document. It will print one -line for each link, containing the URL and the textual description -between the <A>...</A> tags: - -import pullparser, sys -f = file(sys.argv[1]) -p = pullparser.PullParser(f) -for token in p.tags("a"): - if token.type == "endtag": continue - url = dict(token.attrs).get("href", "-") - text = p.get_compressed_text(endat=("endtag", "a")) - print "%s\t%s" % (url, text) - -This program extracts the <TITLE> from the document: - -import pullparser, sys -f = file(sys.argv[1]) -p = pullparser.PullParser(f) -if p.get_tag("title"): - title = p.get_compressed_text() - print "Title: %s" % title - - -Copyright 2003-2006 John J. Lee <jjl@pobox.com> -Copyright 1998-2001 Gisle Aas (original libwww-perl code) - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses. - -""" - -import re, htmlentitydefs -import sgmllib, HTMLParser -from xml.sax import saxutils - -from _html import unescape, unescape_charref - - -class NoMoreTokensError(Exception): pass - -class Token: - """Represents an HTML tag, declaration, processing instruction etc. - - Behaves as both a tuple-like object (ie. iterable) and has attributes - .type, .data and .attrs. - - >>> t = Token("starttag", "a", [("href", "http://www.python.org/")]) - >>> t == ("starttag", "a", [("href", "http://www.python.org/")]) - True - >>> (t.type, t.data) == ("starttag", "a") - True - >>> t.attrs == [("href", "http://www.python.org/")] - True - - Public attributes - - type: one of "starttag", "endtag", "startendtag", "charref", "entityref", - "data", "comment", "decl", "pi", after the corresponding methods of - HTMLParser.HTMLParser - data: For a tag, the tag name; otherwise, the relevant data carried by the - tag, as a string - attrs: list of (name, value) pairs representing HTML attributes - (or None if token does not represent an opening tag) - - """ - def __init__(self, type, data, attrs=None): - self.type = type - self.data = data - self.attrs = attrs - def __iter__(self): - return iter((self.type, self.data, self.attrs)) - def __eq__(self, other): - type, data, attrs = other - if (self.type == type and - self.data == data and - self.attrs == attrs): - return True - else: - return False - def __ne__(self, other): return not self.__eq__(other) - def __repr__(self): - args = ", ".join(map(repr, [self.type, self.data, self.attrs])) - return self.__class__.__name__+"(%s)" % args - - def __str__(self): - """ - >>> print Token("starttag", "br") - <br> - >>> print Token("starttag", "a", - ... [("href", "http://www.python.org/"), ("alt", '"foo"')]) - <a href="http://www.python.org/" alt='"foo"'> - >>> print Token("startendtag", "br") - <br /> - >>> print Token("startendtag", "br", [("spam", "eggs")]) - <br spam="eggs" /> - >>> print Token("endtag", "p") - </p> - >>> print Token("charref", "38") - & - >>> print Token("entityref", "amp") - & - >>> print Token("data", "foo\\nbar") - foo - bar - >>> print Token("comment", "Life is a bowl\\nof cherries.") - <!--Life is a bowl - of cherries.--> - >>> print Token("decl", "decl") - <!decl> - >>> print Token("pi", "pi") - <?pi> - """ - if self.attrs is not None: - attrs = "".join([" %s=%s" % (k, saxutils.quoteattr(v)) for - k, v in self.attrs]) - else: - attrs = "" - if self.type == "starttag": - return "<%s%s>" % (self.data, attrs) - elif self.type == "startendtag": - return "<%s%s />" % (self.data, attrs) - elif self.type == "endtag": - return "</%s>" % self.data - elif self.type == "charref": - return "&#%s;" % self.data - elif self.type == "entityref": - return "&%s;" % self.data - elif self.type == "data": - return self.data - elif self.type == "comment": - return "<!--%s-->" % self.data - elif self.type == "decl": - return "<!%s>" % self.data - elif self.type == "pi": - return "<?%s>" % self.data - assert False - - -def iter_until_exception(fn, exception, *args, **kwds): - while 1: - try: - yield fn(*args, **kwds) - except exception: - raise StopIteration - - -class _AbstractParser: - chunk = 1024 - compress_re = re.compile(r"\s+") - def __init__(self, fh, textify={"img": "alt", "applet": "alt"}, - encoding="ascii", entitydefs=None): - """ - fh: file-like object (only a .read() method is required) from which to - read HTML to be parsed - textify: mapping used by .get_text() and .get_compressed_text() methods - to represent opening tags as text - encoding: encoding used to encode numeric character references by - .get_text() and .get_compressed_text() ("ascii" by default) - - entitydefs: mapping like {"amp": "&", ...} containing HTML entity - definitions (a sensible default is used). This is used to unescape - entities in .get_text() (and .get_compressed_text()) and attribute - values. If the encoding can not represent the character, the entity - reference is left unescaped. Note that entity references (both - numeric - e.g. { or ઼ - and non-numeric - e.g. &) are - unescaped in attribute values and the return value of .get_text(), but - not in data outside of tags. Instead, entity references outside of - tags are represented as tokens. This is a bit odd, it's true :-/ - - If the element name of an opening tag matches a key in the textify - mapping then that tag is converted to text. The corresponding value is - used to specify which tag attribute to obtain the text from. textify - maps from element names to either: - - - an HTML attribute name, in which case the HTML attribute value is - used as its text value along with the element name in square - brackets (eg."alt text goes here[IMG]", or, if the alt attribute - were missing, just "[IMG]") - - a callable object (eg. a function) which takes a Token and returns - the string to be used as its text value - - If textify has no key for an element name, nothing is substituted for - the opening tag. - - Public attributes: - - encoding and textify: see above - - """ - self._fh = fh - self._tokenstack = [] # FIFO - self.textify = textify - self.encoding = encoding - if entitydefs is None: - entitydefs = htmlentitydefs.name2codepoint - self._entitydefs = entitydefs - - def __iter__(self): return self - - def tags(self, *names): - return iter_until_exception(self.get_tag, NoMoreTokensError, *names) - - def tokens(self, *tokentypes): - return iter_until_exception(self.get_token, NoMoreTokensError, - *tokentypes) - - def next(self): - try: - return self.get_token() - except NoMoreTokensError: - raise StopIteration() - - def get_token(self, *tokentypes): - """Pop the next Token object from the stack of parsed tokens. - - If arguments are given, they are taken to be token types in which the - caller is interested: tokens representing other elements will be - skipped. Element names must be given in lower case. - - Raises NoMoreTokensError. - - """ - while 1: - while self._tokenstack: - token = self._tokenstack.pop(0) - if tokentypes: - if token.type in tokentypes: - return token - else: - return token - data = self._fh.read(self.chunk) - if not data: - raise NoMoreTokensError() - self.feed(data) - - def unget_token(self, token): - """Push a Token back onto the stack.""" - self._tokenstack.insert(0, token) - - def get_tag(self, *names): - """Return the next Token that represents an opening or closing tag. - - If arguments are given, they are taken to be element names in which the - caller is interested: tags representing other elements will be skipped. - Element names must be given in lower case. - - Raises NoMoreTokensError. - - """ - while 1: - tok = self.get_token() - if tok.type not in ["starttag", "endtag", "startendtag"]: - continue - if names: - if tok.data in names: - return tok - else: - return tok - - def get_text(self, endat=None): - """Get some text. - - endat: stop reading text at this tag (the tag is included in the - returned text); endtag is a tuple (type, name) where type is - "starttag", "endtag" or "startendtag", and name is the element name of - the tag (element names must be given in lower case) - - If endat is not given, .get_text() will stop at the next opening or - closing tag, or when there are no more tokens (no exception is raised). - Note that .get_text() includes the text representation (if any) of the - opening tag, but pushes the opening tag back onto the stack. As a - result, if you want to call .get_text() again, you need to call - .get_tag() first (unless you want an empty string returned when you - next call .get_text()). - - Entity references are translated using the value of the entitydefs - constructor argument (a mapping from names to characters like that - provided by the standard module htmlentitydefs). Named entity - references that are not in this mapping are left unchanged. - - The textify attribute is used to translate opening tags into text: see - the class docstring. - - """ - text = [] - tok = None - while 1: - try: - tok = self.get_token() - except NoMoreTokensError: - # unget last token (not the one we just failed to get) - if tok: self.unget_token(tok) - break - if tok.type == "data": - text.append(tok.data) - elif tok.type == "entityref": - t = unescape("&%s;"%tok.data, self._entitydefs, self.encoding) - text.append(t) - elif tok.type == "charref": - t = unescape_charref(tok.data, self.encoding) - text.append(t) - elif tok.type in ["starttag", "endtag", "startendtag"]: - tag_name = tok.data - if tok.type in ["starttag", "startendtag"]: - alt = self.textify.get(tag_name) - if alt is not None: - if callable(alt): - text.append(alt(tok)) - elif tok.attrs is not None: - for k, v in tok.attrs: - if k == alt: - text.append(v) - text.append("[%s]" % tag_name.upper()) - if endat is None or endat == (tok.type, tag_name): - self.unget_token(tok) - break - return "".join(text) - - def get_compressed_text(self, *args, **kwds): - """ - As .get_text(), but collapses each group of contiguous whitespace to a - single space character, and removes all initial and trailing - whitespace. - - """ - text = self.get_text(*args, **kwds) - text = text.strip() - return self.compress_re.sub(" ", text) - - def handle_startendtag(self, tag, attrs): - self._tokenstack.append(Token("startendtag", tag, attrs)) - def handle_starttag(self, tag, attrs): - self._tokenstack.append(Token("starttag", tag, attrs)) - def handle_endtag(self, tag): - self._tokenstack.append(Token("endtag", tag)) - def handle_charref(self, name): - self._tokenstack.append(Token("charref", name)) - def handle_entityref(self, name): - self._tokenstack.append(Token("entityref", name)) - def handle_data(self, data): - self._tokenstack.append(Token("data", data)) - def handle_comment(self, data): - self._tokenstack.append(Token("comment", data)) - def handle_decl(self, decl): - self._tokenstack.append(Token("decl", decl)) - def unknown_decl(self, data): - # XXX should this call self.error instead? - #self.error("unknown declaration: " + `data`) - self._tokenstack.append(Token("decl", data)) - def handle_pi(self, data): - self._tokenstack.append(Token("pi", data)) - - def unescape_attr(self, name): - return unescape(name, self._entitydefs, self.encoding) - def unescape_attrs(self, attrs): - escaped_attrs = [] - for key, val in attrs: - escaped_attrs.append((key, self.unescape_attr(val))) - return escaped_attrs - -class PullParser(_AbstractParser, HTMLParser.HTMLParser): - def __init__(self, *args, **kwds): - HTMLParser.HTMLParser.__init__(self) - _AbstractParser.__init__(self, *args, **kwds) - def unescape(self, name): - # Use the entitydefs passed into constructor, not - # HTMLParser.HTMLParser's entitydefs. - return self.unescape_attr(name) - -class TolerantPullParser(_AbstractParser, sgmllib.SGMLParser): - def __init__(self, *args, **kwds): - sgmllib.SGMLParser.__init__(self) - _AbstractParser.__init__(self, *args, **kwds) - def unknown_starttag(self, tag, attrs): - attrs = self.unescape_attrs(attrs) - self._tokenstack.append(Token("starttag", tag, attrs)) - def unknown_endtag(self, tag): - self._tokenstack.append(Token("endtag", tag)) - - -def _test(): - import doctest, _pullparser - return doctest.testmod(_pullparser) - -if __name__ == "__main__": - _test() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_request.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_request.py deleted file mode 100644 index 7824441..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_request.py +++ /dev/null @@ -1,87 +0,0 @@ -"""Integration with Python standard library module urllib2: Request class. - -Copyright 2004-2006 John J Lee <jjl@pobox.com> - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -import urllib2, urllib, logging - -from _clientcookie import request_host_lc -import _rfc3986 -import _sockettimeout - -warn = logging.getLogger("mechanize").warning - - -class Request(urllib2.Request): - def __init__(self, url, data=None, headers={}, - origin_req_host=None, unverifiable=False, visit=None, - timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): - # In mechanize 0.2, the interpretation of a unicode url argument will - # change: A unicode url argument will be interpreted as an IRI, and a - # bytestring as a URI. For now, we accept unicode or bytestring. We - # don't insist that the value is always a URI (specifically, must only - # contain characters which are legal), because that might break working - # code (who knows what bytes some servers want to see, especially with - # browser plugins for internationalised URIs). - if not _rfc3986.is_clean_uri(url): - warn("url argument is not a URI " - "(contains illegal characters) %r" % url) - urllib2.Request.__init__(self, url, data, headers) - self.selector = None - self.unredirected_hdrs = {} - self.visit = visit - self.timeout = timeout - - # All the terminology below comes from RFC 2965. - self.unverifiable = unverifiable - # Set request-host of origin transaction. - # The origin request-host is needed in order to decide whether - # unverifiable sub-requests (automatic redirects, images embedded - # in HTML, etc.) are to third-party hosts. If they are, the - # resulting transactions might need to be conducted with cookies - # turned off. - if origin_req_host is None: - origin_req_host = request_host_lc(self) - self.origin_req_host = origin_req_host - - def get_selector(self): - return urllib.splittag(self.__r_host)[0] - - def get_origin_req_host(self): - return self.origin_req_host - - def is_unverifiable(self): - return self.unverifiable - - def add_unredirected_header(self, key, val): - """Add a header that will not be added to a redirected request.""" - self.unredirected_hdrs[key.capitalize()] = val - - def has_header(self, header_name): - """True iff request has named header (regular or unredirected).""" - return (header_name in self.headers or - header_name in self.unredirected_hdrs) - - def get_header(self, header_name, default=None): - return self.headers.get( - header_name, - self.unredirected_hdrs.get(header_name, default)) - - def header_items(self): - hdrs = self.unredirected_hdrs.copy() - hdrs.update(self.headers) - return hdrs.items() - - def __str__(self): - return "<Request for %s>" % self.get_full_url() - - def get_method(self): - if self.has_data(): - return "POST" - else: - return "GET" diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_response.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_response.py deleted file mode 100644 index fad9b57..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_response.py +++ /dev/null @@ -1,527 +0,0 @@ -"""Response classes. - -The seek_wrapper code is not used if you're using UserAgent with -.set_seekable_responses(False), or if you're using the urllib2-level interface -without SeekableProcessor or HTTPEquivProcessor. Class closeable_response is -instantiated by some handlers (AbstractHTTPHandler), but the closeable_response -interface is only depended upon by Browser-level code. Function -upgrade_response is only used if you're using Browser or -ResponseUpgradeProcessor. - - -Copyright 2006 John J. Lee <jjl@pobox.com> - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt -included with the distribution). - -""" - -import copy, mimetools -from cStringIO import StringIO -import urllib2 - - -def len_of_seekable(file_): - # this function exists because evaluation of len(file_.getvalue()) on every - # .read() from seek_wrapper would be O(N**2) in number of .read()s - pos = file_.tell() - file_.seek(0, 2) # to end - try: - return file_.tell() - finally: - file_.seek(pos) - - -# XXX Andrew Dalke kindly sent me a similar class in response to my request on -# comp.lang.python, which I then proceeded to lose. I wrote this class -# instead, but I think he's released his code publicly since, could pinch the -# tests from it, at least... - -# For testing seek_wrapper invariant (note that -# test_urllib2.HandlerTest.test_seekable is expected to fail when this -# invariant checking is turned on). The invariant checking is done by module -# ipdc, which is available here: -# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/436834 -## from ipdbc import ContractBase -## class seek_wrapper(ContractBase): -class seek_wrapper: - """Adds a seek method to a file object. - - This is only designed for seeking on readonly file-like objects. - - Wrapped file-like object must have a read method. The readline method is - only supported if that method is present on the wrapped object. The - readlines method is always supported. xreadlines and iteration are - supported only for Python 2.2 and above. - - Public attributes: - - wrapped: the wrapped file object - is_closed: true iff .close() has been called - - WARNING: All other attributes of the wrapped object (ie. those that are not - one of wrapped, read, readline, readlines, xreadlines, __iter__ and next) - are passed through unaltered, which may or may not make sense for your - particular file object. - - """ - # General strategy is to check that cache is full enough, then delegate to - # the cache (self.__cache, which is a cStringIO.StringIO instance). A seek - # position (self.__pos) is maintained independently of the cache, in order - # that a single cache may be shared between multiple seek_wrapper objects. - # Copying using module copy shares the cache in this way. - - def __init__(self, wrapped): - self.wrapped = wrapped - self.__read_complete_state = [False] - self.__is_closed_state = [False] - self.__have_readline = hasattr(self.wrapped, "readline") - self.__cache = StringIO() - self.__pos = 0 # seek position - - def invariant(self): - # The end of the cache is always at the same place as the end of the - # wrapped file (though the .tell() method is not required to be present - # on wrapped file). - return self.wrapped.tell() == len(self.__cache.getvalue()) - - def close(self): - self.wrapped.close() - self.is_closed = True - - def __getattr__(self, name): - if name == "is_closed": - return self.__is_closed_state[0] - elif name == "read_complete": - return self.__read_complete_state[0] - - wrapped = self.__dict__.get("wrapped") - if wrapped: - return getattr(wrapped, name) - - return getattr(self.__class__, name) - - def __setattr__(self, name, value): - if name == "is_closed": - self.__is_closed_state[0] = bool(value) - elif name == "read_complete": - if not self.is_closed: - self.__read_complete_state[0] = bool(value) - else: - self.__dict__[name] = value - - def seek(self, offset, whence=0): - assert whence in [0,1,2] - - # how much data, if any, do we need to read? - if whence == 2: # 2: relative to end of *wrapped* file - if offset < 0: raise ValueError("negative seek offset") - # since we don't know yet where the end of that file is, we must - # read everything - to_read = None - else: - if whence == 0: # 0: absolute - if offset < 0: raise ValueError("negative seek offset") - dest = offset - else: # 1: relative to current position - pos = self.__pos - if pos < offset: - raise ValueError("seek to before start of file") - dest = pos + offset - end = len_of_seekable(self.__cache) - to_read = dest - end - if to_read < 0: - to_read = 0 - - if to_read != 0: - self.__cache.seek(0, 2) - if to_read is None: - assert whence == 2 - self.__cache.write(self.wrapped.read()) - self.read_complete = True - self.__pos = self.__cache.tell() - offset - else: - data = self.wrapped.read(to_read) - if not data: - self.read_complete = True - else: - self.__cache.write(data) - # Don't raise an exception even if we've seek()ed past the end - # of .wrapped, since fseek() doesn't complain in that case. - # Also like fseek(), pretend we have seek()ed past the end, - # i.e. not: - #self.__pos = self.__cache.tell() - # but rather: - self.__pos = dest - else: - self.__pos = dest - - def tell(self): - return self.__pos - - def __copy__(self): - cpy = self.__class__(self.wrapped) - cpy.__cache = self.__cache - cpy.__read_complete_state = self.__read_complete_state - cpy.__is_closed_state = self.__is_closed_state - return cpy - - def get_data(self): - pos = self.__pos - try: - self.seek(0) - return self.read(-1) - finally: - self.__pos = pos - - def read(self, size=-1): - pos = self.__pos - end = len_of_seekable(self.__cache) - available = end - pos - - # enough data already cached? - if size <= available and size != -1: - self.__cache.seek(pos) - self.__pos = pos+size - return self.__cache.read(size) - - # no, so read sufficient data from wrapped file and cache it - self.__cache.seek(0, 2) - if size == -1: - self.__cache.write(self.wrapped.read()) - self.read_complete = True - else: - to_read = size - available - assert to_read > 0 - data = self.wrapped.read(to_read) - if not data: - self.read_complete = True - else: - self.__cache.write(data) - self.__cache.seek(pos) - - data = self.__cache.read(size) - self.__pos = self.__cache.tell() - assert self.__pos == pos + len(data) - return data - - def readline(self, size=-1): - if not self.__have_readline: - raise NotImplementedError("no readline method on wrapped object") - - # line we're about to read might not be complete in the cache, so - # read another line first - pos = self.__pos - self.__cache.seek(0, 2) - data = self.wrapped.readline() - if not data: - self.read_complete = True - else: - self.__cache.write(data) - self.__cache.seek(pos) - - data = self.__cache.readline() - if size != -1: - r = data[:size] - self.__pos = pos+size - else: - r = data - self.__pos = pos+len(data) - return r - - def readlines(self, sizehint=-1): - pos = self.__pos - self.__cache.seek(0, 2) - self.__cache.write(self.wrapped.read()) - self.read_complete = True - self.__cache.seek(pos) - data = self.__cache.readlines(sizehint) - self.__pos = self.__cache.tell() - return data - - def __iter__(self): return self - def next(self): - line = self.readline() - if line == "": raise StopIteration - return line - - xreadlines = __iter__ - - def __repr__(self): - return ("<%s at %s whose wrapped object = %r>" % - (self.__class__.__name__, hex(abs(id(self))), self.wrapped)) - - -class response_seek_wrapper(seek_wrapper): - - """ - Supports copying response objects and setting response body data. - - """ - - def __init__(self, wrapped): - seek_wrapper.__init__(self, wrapped) - self._headers = self.wrapped.info() - - def __copy__(self): - cpy = seek_wrapper.__copy__(self) - # copy headers from delegate - cpy._headers = copy.copy(self.info()) - return cpy - - # Note that .info() and .geturl() (the only two urllib2 response methods - # that are not implemented by seek_wrapper) must be here explicitly rather - # than by seek_wrapper's __getattr__ delegation) so that the nasty - # dynamically-created HTTPError classes in get_seek_wrapper_class() get the - # wrapped object's implementation, and not HTTPError's. - - def info(self): - return self._headers - - def geturl(self): - return self.wrapped.geturl() - - def set_data(self, data): - self.seek(0) - self.read() - self.close() - cache = self._seek_wrapper__cache = StringIO() - cache.write(data) - self.seek(0) - - -class eoffile: - # file-like object that always claims to be at end-of-file... - def read(self, size=-1): return "" - def readline(self, size=-1): return "" - def __iter__(self): return self - def next(self): return "" - def close(self): pass - -class eofresponse(eoffile): - def __init__(self, url, headers, code, msg): - self._url = url - self._headers = headers - self.code = code - self.msg = msg - def geturl(self): return self._url - def info(self): return self._headers - - -class closeable_response: - """Avoids unnecessarily clobbering urllib.addinfourl methods on .close(). - - Only supports responses returned by mechanize.HTTPHandler. - - After .close(), the following methods are supported: - - .read() - .readline() - .info() - .geturl() - .__iter__() - .next() - .close() - - and the following attributes are supported: - - .code - .msg - - Also supports pickling (but the stdlib currently does something to prevent - it: http://python.org/sf/1144636). - - """ - # presence of this attr indicates is useable after .close() - closeable_response = None - - def __init__(self, fp, headers, url, code, msg): - self._set_fp(fp) - self._headers = headers - self._url = url - self.code = code - self.msg = msg - - def _set_fp(self, fp): - self.fp = fp - self.read = self.fp.read - self.readline = self.fp.readline - if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines - if hasattr(self.fp, "fileno"): - self.fileno = self.fp.fileno - else: - self.fileno = lambda: None - self.__iter__ = self.fp.__iter__ - self.next = self.fp.next - - def __repr__(self): - return '<%s at %s whose fp = %r>' % ( - self.__class__.__name__, hex(abs(id(self))), self.fp) - - def info(self): - return self._headers - - def geturl(self): - return self._url - - def close(self): - wrapped = self.fp - wrapped.close() - new_wrapped = eofresponse( - self._url, self._headers, self.code, self.msg) - self._set_fp(new_wrapped) - - def __getstate__(self): - # There are three obvious options here: - # 1. truncate - # 2. read to end - # 3. close socket, pickle state including read position, then open - # again on unpickle and use Range header - # XXXX um, 4. refuse to pickle unless .close()d. This is better, - # actually ("errors should never pass silently"). Pickling doesn't - # work anyway ATM, because of http://python.org/sf/1144636 so fix - # this later - - # 2 breaks pickle protocol, because one expects the original object - # to be left unscathed by pickling. 3 is too complicated and - # surprising (and too much work ;-) to happen in a sane __getstate__. - # So we do 1. - - state = self.__dict__.copy() - new_wrapped = eofresponse( - self._url, self._headers, self.code, self.msg) - state["wrapped"] = new_wrapped - return state - -def test_response(data='test data', headers=[], - url="http://example.com/", code=200, msg="OK"): - return make_response(data, headers, url, code, msg) - -def test_html_response(data='test data', headers=[], - url="http://example.com/", code=200, msg="OK"): - headers += [("Content-type", "text/html")] - return make_response(data, headers, url, code, msg) - -def make_response(data, headers, url, code, msg): - """Convenient factory for objects implementing response interface. - - data: string containing response body data - headers: sequence of (name, value) pairs - url: URL of response - code: integer response code (e.g. 200) - msg: string response code message (e.g. "OK") - - """ - mime_headers = make_headers(headers) - r = closeable_response(StringIO(data), mime_headers, url, code, msg) - return response_seek_wrapper(r) - - -def make_headers(headers): - """ - headers: sequence of (name, value) pairs - """ - hdr_text = [] - for name_value in headers: - hdr_text.append("%s: %s" % name_value) - return mimetools.Message(StringIO("\n".join(hdr_text))) - - -# Rest of this module is especially horrible, but needed, at least until fork -# urllib2. Even then, may want to preseve urllib2 compatibility. - -def get_seek_wrapper_class(response): - # in order to wrap response objects that are also exceptions, we must - # dynamically subclass the exception :-((( - if (isinstance(response, urllib2.HTTPError) and - not hasattr(response, "seek")): - if response.__class__.__module__ == "__builtin__": - exc_class_name = response.__class__.__name__ - else: - exc_class_name = "%s.%s" % ( - response.__class__.__module__, response.__class__.__name__) - - class httperror_seek_wrapper(response_seek_wrapper, response.__class__): - # this only derives from HTTPError in order to be a subclass -- - # the HTTPError behaviour comes from delegation - - _exc_class_name = exc_class_name - - def __init__(self, wrapped): - response_seek_wrapper.__init__(self, wrapped) - # be compatible with undocumented HTTPError attributes :-( - self.hdrs = wrapped.info() - self.filename = wrapped.geturl() - - def __repr__(self): - return ( - "<%s (%s instance) at %s " - "whose wrapped object = %r>" % ( - self.__class__.__name__, self._exc_class_name, - hex(abs(id(self))), self.wrapped) - ) - wrapper_class = httperror_seek_wrapper - else: - wrapper_class = response_seek_wrapper - return wrapper_class - -def seek_wrapped_response(response): - """Return a copy of response that supports seekable response interface. - - Accepts responses from both mechanize and urllib2 handlers. - - Copes with both oridinary response instances and HTTPError instances (which - can't be simply wrapped due to the requirement of preserving the exception - base class). - """ - if not hasattr(response, "seek"): - wrapper_class = get_seek_wrapper_class(response) - response = wrapper_class(response) - assert hasattr(response, "get_data") - return response - -def upgrade_response(response): - """Return a copy of response that supports Browser response interface. - - Browser response interface is that of "seekable responses" - (response_seek_wrapper), plus the requirement that responses must be - useable after .close() (closeable_response). - - Accepts responses from both mechanize and urllib2 handlers. - - Copes with both ordinary response instances and HTTPError instances (which - can't be simply wrapped due to the requirement of preserving the exception - base class). - """ - wrapper_class = get_seek_wrapper_class(response) - if hasattr(response, "closeable_response"): - if not hasattr(response, "seek"): - response = wrapper_class(response) - assert hasattr(response, "get_data") - return copy.copy(response) - - # a urllib2 handler constructed the response, i.e. the response is an - # urllib.addinfourl or a urllib2.HTTPError, instead of a - # _Util.closeable_response as returned by e.g. mechanize.HTTPHandler - try: - code = response.code - except AttributeError: - code = None - try: - msg = response.msg - except AttributeError: - msg = None - - # may have already-.read() data from .seek() cache - data = None - get_data = getattr(response, "get_data", None) - if get_data: - data = get_data() - - response = closeable_response( - response.fp, response.info(), response.geturl(), code, msg) - response = wrapper_class(response) - if data: - response.set_data(data) - return response diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_rfc3986.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_rfc3986.py deleted file mode 100644 index 1bb5021..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_rfc3986.py +++ /dev/null @@ -1,241 +0,0 @@ -"""RFC 3986 URI parsing and relative reference resolution / absolutization. - -(aka splitting and joining) - -Copyright 2006 John J. Lee <jjl@pobox.com> - -This code is free software; you can redistribute it and/or modify it under -the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt -included with the distribution). - -""" - -# XXX Wow, this is ugly. Overly-direct translation of the RFC ATM. - -import re, urllib - -## def chr_range(a, b): -## return "".join(map(chr, range(ord(a), ord(b)+1))) - -## UNRESERVED_URI_CHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ" -## "abcdefghijklmnopqrstuvwxyz" -## "0123456789" -## "-_.~") -## RESERVED_URI_CHARS = "!*'();:@&=+$,/?#[]" -## URI_CHARS = RESERVED_URI_CHARS+UNRESERVED_URI_CHARS+'%' -# this re matches any character that's not in URI_CHARS -BAD_URI_CHARS_RE = re.compile("[^A-Za-z0-9\-_.~!*'();:@&=+$,/?%#[\]]") - - -def clean_url(url, encoding): - # percent-encode illegal URI characters - # Trying to come up with test cases for this gave me a headache, revisit - # when do switch to unicode. - # Somebody else's comments (lost the attribution): -## - IE will return you the url in the encoding you send it -## - Mozilla/Firefox will send you latin-1 if there's no non latin-1 -## characters in your link. It will send you utf-8 however if there are... - if type(url) == type(""): - url = url.decode(encoding, "replace") - url = url.strip() - # for second param to urllib.quote(), we want URI_CHARS, minus the - # 'always_safe' characters that urllib.quote() never percent-encodes - return urllib.quote(url.encode(encoding), "!*'();:@&=+$,/?%#[]~") - -def is_clean_uri(uri): - """ - >>> is_clean_uri("ABC!") - True - >>> is_clean_uri(u"ABC!") - True - >>> is_clean_uri("ABC|") - False - >>> is_clean_uri(u"ABC|") - False - >>> is_clean_uri("http://example.com/0") - True - >>> is_clean_uri(u"http://example.com/0") - True - """ - # note module re treats bytestrings as through they were decoded as latin-1 - # so this function accepts both unicode and bytestrings - return not bool(BAD_URI_CHARS_RE.search(uri)) - - -SPLIT_MATCH = re.compile( - r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?").match -def urlsplit(absolute_uri): - """Return scheme, authority, path, query, fragment.""" - match = SPLIT_MATCH(absolute_uri) - if match: - g = match.groups() - return g[1], g[3], g[4], g[6], g[8] - -def urlunsplit(parts): - scheme, authority, path, query, fragment = parts - r = [] - append = r.append - if scheme is not None: - append(scheme) - append(":") - if authority is not None: - append("//") - append(authority) - append(path) - if query is not None: - append("?") - append(query) - if fragment is not None: - append("#") - append(fragment) - return "".join(r) - -def urljoin(base_uri, uri_reference): - return urlunsplit(urljoin_parts(urlsplit(base_uri), - urlsplit(uri_reference))) - -# oops, this doesn't do the same thing as the literal translation -# from the RFC below -## import posixpath -## def urljoin_parts(base_parts, reference_parts): -## scheme, authority, path, query, fragment = base_parts -## rscheme, rauthority, rpath, rquery, rfragment = reference_parts - -## # compute target URI path -## if rpath == "": -## tpath = path -## else: -## tpath = rpath -## if not tpath.startswith("/"): -## tpath = merge(authority, path, tpath) -## tpath = posixpath.normpath(tpath) - -## if rscheme is not None: -## return (rscheme, rauthority, tpath, rquery, rfragment) -## elif rauthority is not None: -## return (scheme, rauthority, tpath, rquery, rfragment) -## elif rpath == "": -## if rquery is not None: -## tquery = rquery -## else: -## tquery = query -## return (scheme, authority, tpath, tquery, rfragment) -## else: -## return (scheme, authority, tpath, rquery, rfragment) - -def urljoin_parts(base_parts, reference_parts): - scheme, authority, path, query, fragment = base_parts - rscheme, rauthority, rpath, rquery, rfragment = reference_parts - - if rscheme == scheme: - rscheme = None - - if rscheme is not None: - tscheme, tauthority, tpath, tquery = ( - rscheme, rauthority, remove_dot_segments(rpath), rquery) - else: - if rauthority is not None: - tauthority, tpath, tquery = ( - rauthority, remove_dot_segments(rpath), rquery) - else: - if rpath == "": - tpath = path - if rquery is not None: - tquery = rquery - else: - tquery = query - else: - if rpath.startswith("/"): - tpath = remove_dot_segments(rpath) - else: - tpath = merge(authority, path, rpath) - tpath = remove_dot_segments(tpath) - tquery = rquery - tauthority = authority - tscheme = scheme - tfragment = rfragment - return (tscheme, tauthority, tpath, tquery, tfragment) - -# um, something *vaguely* like this is what I want, but I have to generate -# lots of test cases first, if only to understand what it is that -# remove_dot_segments really does... -## def remove_dot_segments(path): -## if path == '': -## return '' -## comps = path.split('/') -## new_comps = [] -## for comp in comps: -## if comp in ['.', '']: -## if not new_comps or new_comps[-1]: -## new_comps.append('') -## continue -## if comp != '..': -## new_comps.append(comp) -## elif new_comps: -## new_comps.pop() -## return '/'.join(new_comps) - - -def remove_dot_segments(path): - r = [] - while path: - # A - if path.startswith("../"): - path = path[3:] - continue - if path.startswith("./"): - path = path[2:] - continue - # B - if path.startswith("/./"): - path = path[2:] - continue - if path == "/.": - path = "/" - continue - # C - if path.startswith("/../"): - path = path[3:] - if r: - r.pop() - continue - if path == "/..": - path = "/" - if r: - r.pop() - continue - # D - if path == ".": - path = path[1:] - continue - if path == "..": - path = path[2:] - continue - # E - start = 0 - if path.startswith("/"): - start = 1 - ii = path.find("/", start) - if ii < 0: - ii = None - r.append(path[:ii]) - if ii is None: - break - path = path[ii:] - return "".join(r) - -def merge(base_authority, base_path, ref_path): - # XXXX Oddly, the sample Perl implementation of this by Roy Fielding - # doesn't even take base_authority as a parameter, despite the wording in - # the RFC suggesting otherwise. Perhaps I'm missing some obvious identity. - #if base_authority is not None and base_path == "": - if base_path == "": - return "/" + ref_path - ii = base_path.rfind("/") - if ii >= 0: - return base_path[:ii+1] + ref_path - return ref_path - -if __name__ == "__main__": - import doctest - doctest.testmod() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_seek.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_seek.py deleted file mode 100644 index 4086d52..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_seek.py +++ /dev/null @@ -1,16 +0,0 @@ -from urllib2 import BaseHandler -from _util import deprecation -from _response import response_seek_wrapper - - -class SeekableProcessor(BaseHandler): - """Deprecated: Make responses seekable.""" - - def __init__(self): - deprecation( - "See http://wwwsearch.sourceforge.net/mechanize/doc.html#seekable") - - def any_response(self, request, response): - if not hasattr(response, "seek"): - return response_seek_wrapper(response) - return response diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_sockettimeout.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_sockettimeout.py deleted file mode 100644 index c22b734..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_sockettimeout.py +++ /dev/null @@ -1,6 +0,0 @@ -import socket - -try: - _GLOBAL_DEFAULT_TIMEOUT = socket._GLOBAL_DEFAULT_TIMEOUT -except AttributeError: - _GLOBAL_DEFAULT_TIMEOUT = object() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_testcase.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_testcase.py deleted file mode 100644 index a13cca3..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_testcase.py +++ /dev/null @@ -1,73 +0,0 @@ -import shutil -import tempfile -import unittest - - -class SetupStack(object): - - def __init__(self): - self._on_teardown = [] - - def add_teardown(self, teardown): - self._on_teardown.append(teardown) - - def tear_down(self): - for func in reversed(self._on_teardown): - func() - - -class TearDownConvenience(object): - - def __init__(self, setup_stack=None): - self._own_setup_stack = setup_stack is None - if setup_stack is None: - setup_stack = SetupStack() - self._setup_stack = setup_stack - - # only call this convenience method if no setup_stack was supplied to c'tor - def tear_down(self): - assert self._own_setup_stack - self._setup_stack.tear_down() - - -class TempDirMaker(TearDownConvenience): - - def make_temp_dir(self): - temp_dir = tempfile.mkdtemp(prefix="tmp-%s-" % self.__class__.__name__) - def tear_down(): - shutil.rmtree(temp_dir) - self._setup_stack.add_teardown(tear_down) - return temp_dir - - -class MonkeyPatcher(TearDownConvenience): - - def monkey_patch(self, obj, name, value): - orig_value = getattr(obj, name) - setattr(obj, name, value) - def reverse_patch(): - setattr(obj, name, orig_value) - self._setup_stack.add_teardown(reverse_patch) - - -class TestCase(unittest.TestCase): - - def setUp(self): - self._setup_stack = SetupStack() - - def tearDown(self): - self._setup_stack.tear_down() - - def make_temp_dir(self, *args, **kwds): - return TempDirMaker(self._setup_stack).make_temp_dir(*args, **kwds) - - def monkey_patch(self, *args, **kwds): - return MonkeyPatcher(self._setup_stack).monkey_patch(*args, **kwds) - - def assert_contains(self, container, containee): - self.assertTrue(containee in container, "%r not in %r" % - (containee, container)) - - def assert_less_than(self, got, expected): - self.assertTrue(got < expected, "%r >= %r" % - (got, expected)) diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_upgrade.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_upgrade.py deleted file mode 100644 index df59c01..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_upgrade.py +++ /dev/null @@ -1,40 +0,0 @@ -from urllib2 import BaseHandler - -from _request import Request -from _response import upgrade_response -from _util import deprecation - - -class HTTPRequestUpgradeProcessor(BaseHandler): - # upgrade urllib2.Request to this module's Request - # yuck! - handler_order = 0 # before anything else - - def http_request(self, request): - if not hasattr(request, "add_unredirected_header"): - newrequest = Request(request.get_full_url(), request.data, - request.headers) - try: newrequest.origin_req_host = request.origin_req_host - except AttributeError: pass - try: newrequest.unverifiable = request.unverifiable - except AttributeError: pass - try: newrequest.visit = request.visit - except AttributeError: pass - request = newrequest - return request - - https_request = http_request - - -class ResponseUpgradeProcessor(BaseHandler): - # upgrade responses to be .close()able without becoming unusable - handler_order = 0 # before anything else - - def __init__(self): - deprecation( - "See http://wwwsearch.sourceforge.net/mechanize/doc.html#seekable") - - def any_response(self, request, response): - if not hasattr(response, 'closeable_response'): - response = upgrade_response(response) - return response diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_urllib2.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_urllib2.py deleted file mode 100644 index cbb761b..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_urllib2.py +++ /dev/null @@ -1,55 +0,0 @@ -# urllib2 work-alike interface -# ...from urllib2... -from urllib2 import \ - URLError, \ - HTTPError, \ - BaseHandler, \ - UnknownHandler, \ - FTPHandler, \ - CacheFTPHandler -# ...and from mechanize -from _auth import \ - HTTPPasswordMgr, \ - HTTPPasswordMgrWithDefaultRealm, \ - AbstractBasicAuthHandler, \ - AbstractDigestAuthHandler, \ - HTTPProxyPasswordMgr, \ - ProxyHandler, \ - ProxyBasicAuthHandler, \ - ProxyDigestAuthHandler, \ - HTTPBasicAuthHandler, \ - HTTPDigestAuthHandler, \ - HTTPSClientCertMgr -from _debug import \ - HTTPResponseDebugProcessor, \ - HTTPRedirectDebugProcessor -from _file import \ - FileHandler -# crap ATM -## from _gzip import \ -## HTTPGzipProcessor -from _http import \ - HTTPHandler, \ - HTTPDefaultErrorHandler, \ - HTTPRedirectHandler, \ - HTTPEquivProcessor, \ - HTTPCookieProcessor, \ - HTTPRefererProcessor, \ - HTTPRefreshProcessor, \ - HTTPErrorProcessor, \ - HTTPRobotRulesProcessor, \ - RobotExclusionError -import httplib -if hasattr(httplib, 'HTTPS'): - from _http import HTTPSHandler -del httplib -from _opener import OpenerDirector, \ - SeekableResponseOpener, \ - build_opener, install_opener, urlopen -from _request import \ - Request -from _seek import \ - SeekableProcessor -from _upgrade import \ - HTTPRequestUpgradeProcessor, \ - ResponseUpgradeProcessor diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_useragent.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_useragent.py deleted file mode 100644 index 723f87c..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_useragent.py +++ /dev/null @@ -1,352 +0,0 @@ -"""Convenient HTTP UserAgent class. - -This is a subclass of urllib2.OpenerDirector. - - -Copyright 2003-2006 John J. Lee <jjl@pobox.com> - -This code is free software; you can redistribute it and/or modify it under -the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt -included with the distribution). - -""" - -import warnings - -import _auth -import _gzip -import _opener -import _response -import _sockettimeout -import _urllib2 - - -class UserAgentBase(_opener.OpenerDirector): - """Convenient user-agent class. - - Do not use .add_handler() to add a handler for something already dealt with - by this code. - - The only reason at present for the distinction between UserAgent and - UserAgentBase is so that classes that depend on .seek()able responses - (e.g. mechanize.Browser) can inherit from UserAgentBase. The subclass - UserAgent exposes a .set_seekable_responses() method that allows switching - off the adding of a .seek() method to responses. - - Public attributes: - - addheaders: list of (name, value) pairs specifying headers to send with - every request, unless they are overridden in the Request instance. - - >>> ua = UserAgentBase() - >>> ua.addheaders = [ - ... ("User-agent", "Mozilla/5.0 (compatible)"), - ... ("From", "responsible.person@example.com")] - - """ - - handler_classes = { - # scheme handlers - "http": _urllib2.HTTPHandler, - # CacheFTPHandler is buggy, at least in 2.3, so we don't use it - "ftp": _urllib2.FTPHandler, - "file": _urllib2.FileHandler, - - # other handlers - "_unknown": _urllib2.UnknownHandler, - # HTTP{S,}Handler depend on HTTPErrorProcessor too - "_http_error": _urllib2.HTTPErrorProcessor, - "_http_request_upgrade": _urllib2.HTTPRequestUpgradeProcessor, - "_http_default_error": _urllib2.HTTPDefaultErrorHandler, - - # feature handlers - "_basicauth": _urllib2.HTTPBasicAuthHandler, - "_digestauth": _urllib2.HTTPDigestAuthHandler, - "_redirect": _urllib2.HTTPRedirectHandler, - "_cookies": _urllib2.HTTPCookieProcessor, - "_refresh": _urllib2.HTTPRefreshProcessor, - "_equiv": _urllib2.HTTPEquivProcessor, - "_proxy": _urllib2.ProxyHandler, - "_proxy_basicauth": _urllib2.ProxyBasicAuthHandler, - "_proxy_digestauth": _urllib2.ProxyDigestAuthHandler, - "_robots": _urllib2.HTTPRobotRulesProcessor, - "_gzip": _gzip.HTTPGzipProcessor, # experimental! - - # debug handlers - "_debug_redirect": _urllib2.HTTPRedirectDebugProcessor, - "_debug_response_body": _urllib2.HTTPResponseDebugProcessor, - } - - default_schemes = ["http", "ftp", "file"] - default_others = ["_unknown", "_http_error", "_http_request_upgrade", - "_http_default_error", - ] - default_features = ["_redirect", "_cookies", - "_refresh", "_equiv", - "_basicauth", "_digestauth", - "_proxy", "_proxy_basicauth", "_proxy_digestauth", - "_robots", - ] - if hasattr(_urllib2, 'HTTPSHandler'): - handler_classes["https"] = _urllib2.HTTPSHandler - default_schemes.append("https") - - def __init__(self): - _opener.OpenerDirector.__init__(self) - - ua_handlers = self._ua_handlers = {} - for scheme in (self.default_schemes+ - self.default_others+ - self.default_features): - klass = self.handler_classes[scheme] - ua_handlers[scheme] = klass() - for handler in ua_handlers.itervalues(): - self.add_handler(handler) - - # Yuck. - # Ensure correct default constructor args were passed to - # HTTPRefreshProcessor and HTTPEquivProcessor. - if "_refresh" in ua_handlers: - self.set_handle_refresh(True) - if "_equiv" in ua_handlers: - self.set_handle_equiv(True) - # Ensure default password managers are installed. - pm = ppm = None - if "_basicauth" in ua_handlers or "_digestauth" in ua_handlers: - pm = _urllib2.HTTPPasswordMgrWithDefaultRealm() - if ("_proxy_basicauth" in ua_handlers or - "_proxy_digestauth" in ua_handlers): - ppm = _auth.HTTPProxyPasswordMgr() - self.set_password_manager(pm) - self.set_proxy_password_manager(ppm) - # set default certificate manager - if "https" in ua_handlers: - cm = _urllib2.HTTPSClientCertMgr() - self.set_client_cert_manager(cm) - - def close(self): - _opener.OpenerDirector.close(self) - self._ua_handlers = None - - # XXX -## def set_timeout(self, timeout): -## self._timeout = timeout -## def set_http_connection_cache(self, conn_cache): -## self._http_conn_cache = conn_cache -## def set_ftp_connection_cache(self, conn_cache): -## # XXX ATM, FTP has cache as part of handler; should it be separate? -## self._ftp_conn_cache = conn_cache - - def set_handled_schemes(self, schemes): - """Set sequence of URL scheme (protocol) strings. - - For example: ua.set_handled_schemes(["http", "ftp"]) - - If this fails (with ValueError) because you've passed an unknown - scheme, the set of handled schemes will not be changed. - - """ - want = {} - for scheme in schemes: - if scheme.startswith("_"): - raise ValueError("not a scheme '%s'" % scheme) - if scheme not in self.handler_classes: - raise ValueError("unknown scheme '%s'") - want[scheme] = None - - # get rid of scheme handlers we don't want - for scheme, oldhandler in self._ua_handlers.items(): - if scheme.startswith("_"): continue # not a scheme handler - if scheme not in want: - self._replace_handler(scheme, None) - else: - del want[scheme] # already got it - # add the scheme handlers that are missing - for scheme in want.keys(): - self._set_handler(scheme, True) - - def set_cookiejar(self, cookiejar): - """Set a mechanize.CookieJar, or None.""" - self._set_handler("_cookies", obj=cookiejar) - - # XXX could use Greg Stein's httpx for some of this instead? - # or httplib2?? - def set_proxies(self, proxies): - """Set a dictionary mapping URL scheme to proxy specification, or None. - - e.g. {"http": "joe:password@myproxy.example.com:3128", - "ftp": "proxy.example.com"} - - """ - self._set_handler("_proxy", obj=proxies) - - def add_password(self, url, user, password, realm=None): - self._password_manager.add_password(realm, url, user, password) - def add_proxy_password(self, user, password, hostport=None, realm=None): - self._proxy_password_manager.add_password( - realm, hostport, user, password) - - def add_client_certificate(self, url, key_file, cert_file): - """Add an SSL client certificate, for HTTPS client auth. - - key_file and cert_file must be filenames of the key and certificate - files, in PEM format. You can use e.g. OpenSSL to convert a p12 (PKCS - 12) file to PEM format: - - openssl pkcs12 -clcerts -nokeys -in cert.p12 -out cert.pem - openssl pkcs12 -nocerts -in cert.p12 -out key.pem - - - Note that client certificate password input is very inflexible ATM. At - the moment this seems to be console only, which is presumably the - default behaviour of libopenssl. In future mechanize may support - third-party libraries that (I assume) allow more options here. - - """ - self._client_cert_manager.add_key_cert(url, key_file, cert_file) - - # the following are rarely useful -- use add_password / add_proxy_password - # instead - def set_password_manager(self, password_manager): - """Set a mechanize.HTTPPasswordMgrWithDefaultRealm, or None.""" - self._password_manager = password_manager - self._set_handler("_basicauth", obj=password_manager) - self._set_handler("_digestauth", obj=password_manager) - def set_proxy_password_manager(self, password_manager): - """Set a mechanize.HTTPProxyPasswordMgr, or None.""" - self._proxy_password_manager = password_manager - self._set_handler("_proxy_basicauth", obj=password_manager) - self._set_handler("_proxy_digestauth", obj=password_manager) - def set_client_cert_manager(self, cert_manager): - """Set a mechanize.HTTPClientCertMgr, or None.""" - self._client_cert_manager = cert_manager - handler = self._ua_handlers["https"] - handler.client_cert_manager = cert_manager - - # these methods all take a boolean parameter - def set_handle_robots(self, handle): - """Set whether to observe rules from robots.txt.""" - self._set_handler("_robots", handle) - def set_handle_redirect(self, handle): - """Set whether to handle HTTP 30x redirections.""" - self._set_handler("_redirect", handle) - def set_handle_refresh(self, handle, max_time=None, honor_time=True): - """Set whether to handle HTTP Refresh headers.""" - self._set_handler("_refresh", handle, constructor_kwds= - {"max_time": max_time, "honor_time": honor_time}) - def set_handle_equiv(self, handle, head_parser_class=None): - """Set whether to treat HTML http-equiv headers like HTTP headers. - - Response objects may be .seek()able if this is set (currently returned - responses are, raised HTTPError exception responses are not). - - """ - if head_parser_class is not None: - constructor_kwds = {"head_parser_class": head_parser_class} - else: - constructor_kwds={} - self._set_handler("_equiv", handle, constructor_kwds=constructor_kwds) - def set_handle_gzip(self, handle): - """Handle gzip transfer encoding. - - """ - if handle: - warnings.warn( - "gzip transfer encoding is experimental!", stacklevel=2) - self._set_handler("_gzip", handle) - def set_debug_redirects(self, handle): - """Log information about HTTP redirects (including refreshes). - - Logging is performed using module logging. The logger name is - "mechanize.http_redirects". To actually print some debug output, - eg: - - import sys, logging - logger = logging.getLogger("mechanize.http_redirects") - logger.addHandler(logging.StreamHandler(sys.stdout)) - logger.setLevel(logging.INFO) - - Other logger names relevant to this module: - - "mechanize.http_responses" - "mechanize.cookies" (or "cookielib" if running Python 2.4) - - To turn on everything: - - import sys, logging - logger = logging.getLogger("mechanize") - logger.addHandler(logging.StreamHandler(sys.stdout)) - logger.setLevel(logging.INFO) - - """ - self._set_handler("_debug_redirect", handle) - def set_debug_responses(self, handle): - """Log HTTP response bodies. - - See docstring for .set_debug_redirects() for details of logging. - - Response objects may be .seek()able if this is set (currently returned - responses are, raised HTTPError exception responses are not). - - """ - self._set_handler("_debug_response_body", handle) - def set_debug_http(self, handle): - """Print HTTP headers to sys.stdout.""" - level = int(bool(handle)) - for scheme in "http", "https": - h = self._ua_handlers.get(scheme) - if h is not None: - h.set_http_debuglevel(level) - - def _set_handler(self, name, handle=None, obj=None, - constructor_args=(), constructor_kwds={}): - if handle is None: - handle = obj is not None - if handle: - handler_class = self.handler_classes[name] - if obj is not None: - newhandler = handler_class(obj) - else: - newhandler = handler_class( - *constructor_args, **constructor_kwds) - else: - newhandler = None - self._replace_handler(name, newhandler) - - def _replace_handler(self, name, newhandler=None): - # first, if handler was previously added, remove it - if name is not None: - handler = self._ua_handlers.get(name) - if handler: - try: - self.handlers.remove(handler) - except ValueError: - pass - # then add the replacement, if any - if newhandler is not None: - self.add_handler(newhandler) - self._ua_handlers[name] = newhandler - - -class UserAgent(UserAgentBase): - - def __init__(self): - UserAgentBase.__init__(self) - self._seekable = False - - def set_seekable_responses(self, handle): - """Make response objects .seek()able.""" - self._seekable = bool(handle) - - def open(self, fullurl, data=None, - timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): - if self._seekable: - def bound_open(fullurl, data=None, - timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): - return UserAgentBase.open(self, fullurl, data, timeout) - response = _opener.wrapped_open( - bound_open, _response.seek_wrapped_response, fullurl, data, - timeout) - else: - response = UserAgentBase.open(self, fullurl, data) - return response diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_util.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_util.py deleted file mode 100644 index dcdefa9..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_util.py +++ /dev/null @@ -1,291 +0,0 @@ -"""Utility functions and date/time routines. - - Copyright 2002-2006 John J Lee <jjl@pobox.com> - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -import re, time, warnings - - -class ExperimentalWarning(UserWarning): - pass - -def experimental(message): - warnings.warn(message, ExperimentalWarning, stacklevel=3) -def hide_experimental_warnings(): - warnings.filterwarnings("ignore", category=ExperimentalWarning) -def reset_experimental_warnings(): - warnings.filterwarnings("default", category=ExperimentalWarning) - -def deprecation(message): - warnings.warn(message, DeprecationWarning, stacklevel=3) -def hide_deprecations(): - warnings.filterwarnings("ignore", category=DeprecationWarning) -def reset_deprecations(): - warnings.filterwarnings("default", category=DeprecationWarning) - - -def isstringlike(x): - try: x+"" - except: return False - else: return True - -## def caller(): -## try: -## raise SyntaxError -## except: -## import sys -## return sys.exc_traceback.tb_frame.f_back.f_back.f_code.co_name - - -from calendar import timegm - -# Date/time conversion routines for formats used by the HTTP protocol. - -EPOCH = 1970 -def my_timegm(tt): - year, month, mday, hour, min, sec = tt[:6] - if ((year >= EPOCH) and (1 <= month <= 12) and (1 <= mday <= 31) and - (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): - return timegm(tt) - else: - return None - -days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] -months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] -months_lower = [] -for month in months: months_lower.append(month.lower()) - - -def time2isoz(t=None): - """Return a string representing time in seconds since epoch, t. - - If the function is called without an argument, it will use the current - time. - - The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", - representing Universal Time (UTC, aka GMT). An example of this format is: - - 1994-11-24 08:49:37Z - - """ - if t is None: t = time.time() - year, mon, mday, hour, min, sec = time.gmtime(t)[:6] - return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( - year, mon, mday, hour, min, sec) - -def time2netscape(t=None): - """Return a string representing time in seconds since epoch, t. - - If the function is called without an argument, it will use the current - time. - - The format of the returned string is like this: - - Wed, DD-Mon-YYYY HH:MM:SS GMT - - """ - if t is None: t = time.time() - year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7] - return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % ( - days[wday], mday, months[mon-1], year, hour, min, sec) - - -UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} - -timezone_re = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$") -def offset_from_tz_string(tz): - offset = None - if UTC_ZONES.has_key(tz): - offset = 0 - else: - m = timezone_re.search(tz) - if m: - offset = 3600 * int(m.group(2)) - if m.group(3): - offset = offset + 60 * int(m.group(3)) - if m.group(1) == '-': - offset = -offset - return offset - -def _str2time(day, mon, yr, hr, min, sec, tz): - # translate month name to number - # month numbers start with 1 (January) - try: - mon = months_lower.index(mon.lower())+1 - except ValueError: - # maybe it's already a number - try: - imon = int(mon) - except ValueError: - return None - if 1 <= imon <= 12: - mon = imon - else: - return None - - # make sure clock elements are defined - if hr is None: hr = 0 - if min is None: min = 0 - if sec is None: sec = 0 - - yr = int(yr) - day = int(day) - hr = int(hr) - min = int(min) - sec = int(sec) - - if yr < 1000: - # find "obvious" year - cur_yr = time.localtime(time.time())[0] - m = cur_yr % 100 - tmp = yr - yr = yr + cur_yr - m - m = m - tmp - if abs(m) > 50: - if m > 0: yr = yr + 100 - else: yr = yr - 100 - - # convert UTC time tuple to seconds since epoch (not timezone-adjusted) - t = my_timegm((yr, mon, day, hr, min, sec, tz)) - - if t is not None: - # adjust time using timezone string, to get absolute time since epoch - if tz is None: - tz = "UTC" - tz = tz.upper() - offset = offset_from_tz_string(tz) - if offset is None: - return None - t = t - offset - - return t - - -strict_re = re.compile(r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " - r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$") -wkday_re = re.compile( - r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I) -loose_http_re = re.compile( - r"""^ - (\d\d?) # day - (?:\s+|[-\/]) - (\w+) # month - (?:\s+|[-\/]) - (\d+) # year - (?: - (?:\s+|:) # separator before clock - (\d\d?):(\d\d) # hour:min - (?::(\d\d))? # optional seconds - )? # optional clock - \s* - ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone - \s* - (?:\(\w+\))? # ASCII representation of timezone in parens. - \s*$""", re.X) -def http2time(text): - """Returns time in seconds since epoch of time represented by a string. - - Return value is an integer. - - None is returned if the format of str is unrecognized, the time is outside - the representable range, or the timezone string is not recognized. If the - string contains no timezone, UTC is assumed. - - The timezone in the string may be numerical (like "-0800" or "+0100") or a - string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the - timezone strings equivalent to UTC (zero offset) are known to the function. - - The function loosely parses the following formats: - - Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format - Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format - Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format - 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) - 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) - 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday) - - The parser ignores leading and trailing whitespace. The time may be - absent. - - If the year is given with only 2 digits, the function will select the - century that makes the year closest to the current date. - - """ - # fast exit for strictly conforming string - m = strict_re.search(text) - if m: - g = m.groups() - mon = months_lower.index(g[1].lower()) + 1 - tt = (int(g[2]), mon, int(g[0]), - int(g[3]), int(g[4]), float(g[5])) - return my_timegm(tt) - - # No, we need some messy parsing... - - # clean up - text = text.lstrip() - text = wkday_re.sub("", text, 1) # Useless weekday - - # tz is time zone specifier string - day, mon, yr, hr, min, sec, tz = [None]*7 - - # loose regexp parse - m = loose_http_re.search(text) - if m is not None: - day, mon, yr, hr, min, sec, tz = m.groups() - else: - return None # bad format - - return _str2time(day, mon, yr, hr, min, sec, tz) - - -iso_re = re.compile( - """^ - (\d{4}) # year - [-\/]? - (\d\d?) # numerical month - [-\/]? - (\d\d?) # day - (?: - (?:\s+|[-:Tt]) # separator before clock - (\d\d?):?(\d\d) # hour:min - (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) - )? # optional clock - \s* - ([-+]?\d\d?:?(:?\d\d)? - |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) - \s*$""", re.X) -def iso2time(text): - """ - As for http2time, but parses the ISO 8601 formats: - - 1994-02-03 14:15:29 -0100 -- ISO 8601 format - 1994-02-03 14:15:29 -- zone is optional - 1994-02-03 -- only date - 1994-02-03T14:15:29 -- Use T as separator - 19940203T141529Z -- ISO 8601 compact format - 19940203 -- only date - - """ - # clean up - text = text.lstrip() - - # tz is time zone specifier string - day, mon, yr, hr, min, sec, tz = [None]*7 - - # loose regexp parse - m = iso_re.search(text) - if m is not None: - # XXX there's an extra bit of the timezone I'm ignoring here: is - # this the right thing to do? - yr, mon, day, hr, min, sec, tz, _ = m.groups() - else: - return None # bad format - - return _str2time(day, mon, yr, hr, min, sec, tz) diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/pep8.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/pep8.py deleted file mode 100755 index c319370..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/pep8.py +++ /dev/null @@ -1,1254 +0,0 @@ -#!/usr/bin/python -# pep8.py - Check Python source code formatting, according to PEP 8 -# Copyright (C) 2006 Johann C. Rocholl <johann@rocholl.net> -# -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation files -# (the "Software"), to deal in the Software without restriction, -# including without limitation the rights to use, copy, modify, merge, -# publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, -# subject to the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -""" -Check Python source code formatting, according to PEP 8: -http://www.python.org/dev/peps/pep-0008/ - -For usage and a list of options, try this: -$ python pep8.py -h - -This program and its regression test suite live here: -http://github.com/jcrocholl/pep8 - -Groups of errors and warnings: -E errors -W warnings -100 indentation -200 whitespace -300 blank lines -400 imports -500 line length -600 deprecation -700 statements - -You can add checks to this program by writing plugins. Each plugin is -a simple function that is called for each line of source code, either -physical or logical. - -Physical line: -- Raw line of text from the input file. - -Logical line: -- Multi-line statements converted to a single line. -- Stripped left and right. -- Contents of strings replaced with 'xxx' of same length. -- Comments removed. - -The check function requests physical or logical lines by the name of -the first argument: - -def maximum_line_length(physical_line) -def extraneous_whitespace(logical_line) -def blank_lines(logical_line, blank_lines, indent_level, line_number) - -The last example above demonstrates how check plugins can request -additional information with extra arguments. All attributes of the -Checker object are available. Some examples: - -lines: a list of the raw lines from the input file -tokens: the tokens that contribute to this logical line -line_number: line number in the input file -blank_lines: blank lines before this one -indent_char: first indentation character in this file (' ' or '\t') -indent_level: indentation (with tabs expanded to multiples of 8) -previous_indent_level: indentation on previous line -previous_logical: previous logical line - -The docstring of each check function shall be the relevant part of -text from PEP 8. It is printed if the user enables --show-pep8. -Several docstrings contain examples directly from the PEP 8 document. - -Okay: spam(ham[1], {eggs: 2}) -E201: spam( ham[1], {eggs: 2}) - -These examples are verified automatically when pep8.py is run with the ---doctest option. You can add examples for your own check functions. -The format is simple: "Okay" or error/warning code followed by colon -and space, the rest of the line is example source code. If you put 'r' -before the docstring, you can use \n for newline, \t for tab and \s -for space. - -""" - -__version__ = '0.5.0' - -import os -import sys -import re -import time -import inspect -import tokenize -from optparse import OptionParser -from keyword import iskeyword -from fnmatch import fnmatch - -DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git' -DEFAULT_IGNORE = ['E24'] - -INDENT_REGEX = re.compile(r'([ \t]*)') -RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)') -SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)') -ERRORCODE_REGEX = re.compile(r'[EW]\d{3}') -E301NOT_REGEX = re.compile(r'class |def |u?r?["\']') - -WHITESPACE = ' \t' - -BINARY_OPERATORS = ['**=', '*=', '+=', '-=', '!=', '<>', - '%=', '^=', '&=', '|=', '==', '/=', '//=', '>=', '<=', '>>=', '<<=', - '%', '^', '&', '|', '=', '/', '//', '>', '<', '>>', '<<'] -UNARY_OPERATORS = ['**', '*', '+', '-'] -OPERATORS = BINARY_OPERATORS + UNARY_OPERATORS - -options = None -args = None - - -############################################################################## -# Plugins (check functions) for physical lines -############################################################################## - - -def tabs_or_spaces(physical_line, indent_char): - r""" - Never mix tabs and spaces. - - The most popular way of indenting Python is with spaces only. The - second-most popular way is with tabs only. Code indented with a mixture - of tabs and spaces should be converted to using spaces exclusively. When - invoking the Python command line interpreter with the -t option, it issues - warnings about code that illegally mixes tabs and spaces. When using -tt - these warnings become errors. These options are highly recommended! - - Okay: if a == 0:\n a = 1\n b = 1 - E101: if a == 0:\n a = 1\n\tb = 1 - """ - indent = INDENT_REGEX.match(physical_line).group(1) - for offset, char in enumerate(indent): - if char != indent_char: - return offset, "E101 indentation contains mixed spaces and tabs" - - -def tabs_obsolete(physical_line): - r""" - For new projects, spaces-only are strongly recommended over tabs. Most - editors have features that make this easy to do. - - Okay: if True:\n return - W191: if True:\n\treturn - """ - indent = INDENT_REGEX.match(physical_line).group(1) - if indent.count('\t'): - return indent.index('\t'), "W191 indentation contains tabs" - - -def trailing_whitespace(physical_line): - """ - JCR: Trailing whitespace is superfluous. - - Okay: spam(1) - W291: spam(1)\s - """ - physical_line = physical_line.rstrip('\n') # chr(10), newline - physical_line = physical_line.rstrip('\r') # chr(13), carriage return - physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L - stripped = physical_line.rstrip() - if physical_line != stripped: - return len(stripped), "W291 trailing whitespace" - - -def trailing_blank_lines(physical_line, lines, line_number): - r""" - JCR: Trailing blank lines are superfluous. - - Okay: spam(1) - W391: spam(1)\n - """ - if physical_line.strip() == '' and line_number == len(lines): - return 0, "W391 blank line at end of file" - - -def missing_newline(physical_line): - """ - JCR: The last line should have a newline. - """ - if physical_line.rstrip() == physical_line: - return len(physical_line), "W292 no newline at end of file" - - -def maximum_line_length(physical_line): - """ - Limit all lines to a maximum of 79 characters. - - There are still many devices around that are limited to 80 character - lines; plus, limiting windows to 80 characters makes it possible to have - several windows side-by-side. The default wrapping on such devices looks - ugly. Therefore, please limit all lines to a maximum of 79 characters. - For flowing long blocks of text (docstrings or comments), limiting the - length to 72 characters is recommended. - """ - length = len(physical_line.rstrip()) - if length > 79: - return 79, "E501 line too long (%d characters)" % length - - -############################################################################## -# Plugins (check functions) for logical lines -############################################################################## - - -def blank_lines(logical_line, blank_lines, indent_level, line_number, - previous_logical, blank_lines_before_comment): - r""" - Separate top-level function and class definitions with two blank lines. - - Method definitions inside a class are separated by a single blank line. - - Extra blank lines may be used (sparingly) to separate groups of related - functions. Blank lines may be omitted between a bunch of related - one-liners (e.g. a set of dummy implementations). - - Use blank lines in functions, sparingly, to indicate logical sections. - - Okay: def a():\n pass\n\n\ndef b():\n pass - Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass - - E301: class Foo:\n b = 0\n def bar():\n pass - E302: def a():\n pass\n\ndef b(n):\n pass - E303: def a():\n pass\n\n\n\ndef b(n):\n pass - E303: def a():\n\n\n\n pass - E304: @decorator\n\ndef a():\n pass - """ - if line_number == 1: - return # Don't expect blank lines before the first line - max_blank_lines = max(blank_lines, blank_lines_before_comment) - if previous_logical.startswith('@'): - if max_blank_lines: - return 0, "E304 blank lines found after function decorator" - elif max_blank_lines > 2 or (indent_level and max_blank_lines == 2): - return 0, "E303 too many blank lines (%d)" % max_blank_lines - elif (logical_line.startswith('def ') or - logical_line.startswith('class ') or - logical_line.startswith('@')): - if indent_level: - if not (max_blank_lines or E301NOT_REGEX.match(previous_logical)): - return 0, "E301 expected 1 blank line, found 0" - elif max_blank_lines != 2: - return 0, "E302 expected 2 blank lines, found %d" % max_blank_lines - - -def extraneous_whitespace(logical_line): - """ - Avoid extraneous whitespace in the following situations: - - - Immediately inside parentheses, brackets or braces. - - - Immediately before a comma, semicolon, or colon. - - Okay: spam(ham[1], {eggs: 2}) - E201: spam( ham[1], {eggs: 2}) - E201: spam(ham[ 1], {eggs: 2}) - E201: spam(ham[1], { eggs: 2}) - E202: spam(ham[1], {eggs: 2} ) - E202: spam(ham[1 ], {eggs: 2}) - E202: spam(ham[1], {eggs: 2 }) - - E203: if x == 4: print x, y; x, y = y , x - E203: if x == 4: print x, y ; x, y = y, x - E203: if x == 4 : print x, y; x, y = y, x - """ - line = logical_line - for char in '([{': - found = line.find(char + ' ') - if found > -1: - return found + 1, "E201 whitespace after '%s'" % char - for char in '}])': - found = line.find(' ' + char) - if found > -1 and line[found - 1] != ',': - return found, "E202 whitespace before '%s'" % char - for char in ',;:': - found = line.find(' ' + char) - if found > -1: - return found, "E203 whitespace before '%s'" % char - - -def missing_whitespace(logical_line): - """ - JCR: Each comma, semicolon or colon should be followed by whitespace. - - Okay: [a, b] - Okay: (3,) - Okay: a[1:4] - Okay: a[:4] - Okay: a[1:] - Okay: a[1:4:2] - E231: ['a','b'] - E231: foo(bar,baz) - """ - line = logical_line - for index in range(len(line) - 1): - char = line[index] - if char in ',;:' and line[index + 1] not in WHITESPACE: - before = line[:index] - if char == ':' and before.count('[') > before.count(']'): - continue # Slice syntax, no space required - if char == ',' and line[index + 1] == ')': - continue # Allow tuple with only one element: (3,) - return index, "E231 missing whitespace after '%s'" % char - - -def indentation(logical_line, previous_logical, indent_char, - indent_level, previous_indent_level): - r""" - Use 4 spaces per indentation level. - - For really old code that you don't want to mess up, you can continue to - use 8-space tabs. - - Okay: a = 1 - Okay: if a == 0:\n a = 1 - E111: a = 1 - - Okay: for item in items:\n pass - E112: for item in items:\npass - - Okay: a = 1\nb = 2 - E113: a = 1\n b = 2 - """ - if indent_char == ' ' and indent_level % 4: - return 0, "E111 indentation is not a multiple of four" - indent_expect = previous_logical.endswith(':') - if indent_expect and indent_level <= previous_indent_level: - return 0, "E112 expected an indented block" - if indent_level > previous_indent_level and not indent_expect: - return 0, "E113 unexpected indentation" - - -def whitespace_before_parameters(logical_line, tokens): - """ - Avoid extraneous whitespace in the following situations: - - - Immediately before the open parenthesis that starts the argument - list of a function call. - - - Immediately before the open parenthesis that starts an indexing or - slicing. - - Okay: spam(1) - E211: spam (1) - - Okay: dict['key'] = list[index] - E211: dict ['key'] = list[index] - E211: dict['key'] = list [index] - """ - prev_type = tokens[0][0] - prev_text = tokens[0][1] - prev_end = tokens[0][3] - for index in range(1, len(tokens)): - token_type, text, start, end, line = tokens[index] - if (token_type == tokenize.OP and - text in '([' and - start != prev_end and - prev_type == tokenize.NAME and - (index < 2 or tokens[index - 2][1] != 'class') and - (not iskeyword(prev_text))): - return prev_end, "E211 whitespace before '%s'" % text - prev_type = token_type - prev_text = text - prev_end = end - - -def whitespace_around_operator(logical_line): - """ - Avoid extraneous whitespace in the following situations: - - - More than one space around an assignment (or other) operator to - align it with another. - - Okay: a = 12 + 3 - E221: a = 4 + 5 - E222: a = 4 + 5 - E223: a = 4\t+ 5 - E224: a = 4 +\t5 - """ - line = logical_line - for operator in OPERATORS: - found = line.find(' ' + operator) - if found > -1: - return found, "E221 multiple spaces before operator" - found = line.find(operator + ' ') - if found > -1: - return found, "E222 multiple spaces after operator" - found = line.find('\t' + operator) - if found > -1: - return found, "E223 tab before operator" - found = line.find(operator + '\t') - if found > -1: - return found, "E224 tab after operator" - - -def missing_whitespace_around_operator(logical_line, tokens): - r""" - - Always surround these binary operators with a single space on - either side: assignment (=), augmented assignment (+=, -= etc.), - comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not), - Booleans (and, or, not). - - - Use spaces around arithmetic operators. - - Okay: i = i + 1 - Okay: submitted += 1 - Okay: x = x * 2 - 1 - Okay: hypot2 = x * x + y * y - Okay: c = (a + b) * (a - b) - Okay: foo(bar, key='word', *args, **kwargs) - Okay: baz(**kwargs) - Okay: negative = -1 - Okay: spam(-1) - Okay: alpha[:-i] - Okay: if not -5 < x < +5:\n pass - Okay: lambda *args, **kw: (args, kw) - - E225: i=i+1 - E225: submitted +=1 - E225: x = x*2 - 1 - E225: hypot2 = x*x + y*y - E225: c = (a+b) * (a-b) - E225: c = alpha -4 - E225: z = x **y - """ - parens = 0 - need_space = False - prev_type = tokenize.OP - prev_text = prev_end = None - for token_type, text, start, end, line in tokens: - if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN): - # ERRORTOKEN is triggered by backticks in Python 3000 - continue - if text in ('(', 'lambda'): - parens += 1 - elif text == ')': - parens -= 1 - if need_space: - if start == prev_end: - return prev_end, "E225 missing whitespace around operator" - need_space = False - elif token_type == tokenize.OP: - if text == '=' and parens: - # Allow keyword args or defaults: foo(bar=None). - pass - elif text in BINARY_OPERATORS: - need_space = True - elif text in UNARY_OPERATORS: - if ((prev_type != tokenize.OP or prev_text in '}])') and not - (prev_type == tokenize.NAME and iskeyword(prev_text))): - # Allow unary operators: -123, -x, +1. - # Allow argument unpacking: foo(*args, **kwargs). - need_space = True - if need_space and start == prev_end: - return prev_end, "E225 missing whitespace around operator" - prev_type = token_type - prev_text = text - prev_end = end - - -def whitespace_around_comma(logical_line): - """ - Avoid extraneous whitespace in the following situations: - - - More than one space around an assignment (or other) operator to - align it with another. - - JCR: This should also be applied around comma etc. - Note: these checks are disabled by default - - Okay: a = (1, 2) - E241: a = (1, 2) - E242: a = (1,\t2) - """ - line = logical_line - for separator in ',;:': - found = line.find(separator + ' ') - if found > -1: - return found + 1, "E241 multiple spaces after '%s'" % separator - found = line.find(separator + '\t') - if found > -1: - return found + 1, "E242 tab after '%s'" % separator - - -def whitespace_around_named_parameter_equals(logical_line): - """ - Don't use spaces around the '=' sign when used to indicate a - keyword argument or a default parameter value. - - Okay: def complex(real, imag=0.0): - Okay: return magic(r=real, i=imag) - Okay: boolean(a == b) - Okay: boolean(a != b) - Okay: boolean(a <= b) - Okay: boolean(a >= b) - - E251: def complex(real, imag = 0.0): - E251: return magic(r = real, i = imag) - """ - parens = 0 - window = ' ' - equal_ok = ['==', '!=', '<=', '>='] - - for pos, c in enumerate(logical_line): - window = window[1:] + c - if parens: - if window[0] in WHITESPACE and window[1] == '=': - if window[1:] not in equal_ok: - issue = "E251 no spaces around keyword / parameter equals" - return pos, issue - if window[2] in WHITESPACE and window[1] == '=': - if window[:2] not in equal_ok: - issue = "E251 no spaces around keyword / parameter equals" - return pos, issue - if c == '(': - parens += 1 - elif c == ')': - parens -= 1 - - -def whitespace_before_inline_comment(logical_line, tokens): - """ - Separate inline comments by at least two spaces. - - An inline comment is a comment on the same line as a statement. Inline - comments should be separated by at least two spaces from the statement. - They should start with a # and a single space. - - Okay: x = x + 1 # Increment x - Okay: x = x + 1 # Increment x - E261: x = x + 1 # Increment x - E262: x = x + 1 #Increment x - E262: x = x + 1 # Increment x - """ - prev_end = (0, 0) - for token_type, text, start, end, line in tokens: - if token_type == tokenize.NL: - continue - if token_type == tokenize.COMMENT: - if not line[:start[1]].strip(): - continue - if prev_end[0] == start[0] and start[1] < prev_end[1] + 2: - return (prev_end, - "E261 at least two spaces before inline comment") - if (len(text) > 1 and text.startswith('# ') - or not text.startswith('# ')): - return start, "E262 inline comment should start with '# '" - else: - prev_end = end - - -def imports_on_separate_lines(logical_line): - r""" - Imports should usually be on separate lines. - - Okay: import os\nimport sys - E401: import sys, os - - Okay: from subprocess import Popen, PIPE - Okay: from myclas import MyClass - Okay: from foo.bar.yourclass import YourClass - Okay: import myclass - Okay: import foo.bar.yourclass - """ - line = logical_line - if line.startswith('import '): - found = line.find(',') - if found > -1: - return found, "E401 multiple imports on one line" - - -def compound_statements(logical_line): - r""" - Compound statements (multiple statements on the same line) are - generally discouraged. - - While sometimes it's okay to put an if/for/while with a small body - on the same line, never do this for multi-clause statements. Also - avoid folding such long lines! - - Okay: if foo == 'blah':\n do_blah_thing() - Okay: do_one() - Okay: do_two() - Okay: do_three() - - E701: if foo == 'blah': do_blah_thing() - E701: for x in lst: total += x - E701: while t < 10: t = delay() - E701: if foo == 'blah': do_blah_thing() - E701: else: do_non_blah_thing() - E701: try: something() - E701: finally: cleanup() - E701: if foo == 'blah': one(); two(); three() - - E702: do_one(); do_two(); do_three() - """ - line = logical_line - found = line.find(':') - if -1 < found < len(line) - 1: - before = line[:found] - if (before.count('{') <= before.count('}') and # {'a': 1} (dict) - before.count('[') <= before.count(']') and # [1:2] (slice) - not re.search(r'\blambda\b', before)): # lambda x: x - return found, "E701 multiple statements on one line (colon)" - found = line.find(';') - if -1 < found: - return found, "E702 multiple statements on one line (semicolon)" - - -def python_3000_has_key(logical_line): - """ - The {}.has_key() method will be removed in the future version of - Python. Use the 'in' operation instead, like: - d = {"a": 1, "b": 2} - if "b" in d: - print d["b"] - """ - pos = logical_line.find('.has_key(') - if pos > -1: - return pos, "W601 .has_key() is deprecated, use 'in'" - - -def python_3000_raise_comma(logical_line): - """ - When raising an exception, use "raise ValueError('message')" - instead of the older form "raise ValueError, 'message'". - - The paren-using form is preferred because when the exception arguments - are long or include string formatting, you don't need to use line - continuation characters thanks to the containing parentheses. The older - form will be removed in Python 3000. - """ - match = RAISE_COMMA_REGEX.match(logical_line) - if match: - return match.start(1), "W602 deprecated form of raising exception" - - -def python_3000_not_equal(logical_line): - """ - != can also be written <>, but this is an obsolete usage kept for - backwards compatibility only. New code should always use !=. - The older syntax is removed in Python 3000. - """ - pos = logical_line.find('<>') - if pos > -1: - return pos, "W603 '<>' is deprecated, use '!='" - - -def python_3000_backticks(logical_line): - """ - Backticks are removed in Python 3000. - Use repr() instead. - """ - pos = logical_line.find('`') - if pos > -1: - return pos, "W604 backticks are deprecated, use 'repr()'" - - -############################################################################## -# Helper functions -############################################################################## - - -def expand_indent(line): - """ - Return the amount of indentation. - Tabs are expanded to the next multiple of 8. - - >>> expand_indent(' ') - 4 - >>> expand_indent('\\t') - 8 - >>> expand_indent(' \\t') - 8 - >>> expand_indent(' \\t') - 8 - >>> expand_indent(' \\t') - 16 - """ - result = 0 - for char in line: - if char == '\t': - result = result // 8 * 8 + 8 - elif char == ' ': - result += 1 - else: - break - return result - - -def mute_string(text): - """ - Replace contents with 'xxx' to prevent syntax matching. - - >>> mute_string('"abc"') - '"xxx"' - >>> mute_string("'''abc'''") - "'''xxx'''" - >>> mute_string("r'abc'") - "r'xxx'" - """ - start = 1 - end = len(text) - 1 - # String modifiers (e.g. u or r) - if text.endswith('"'): - start += text.index('"') - elif text.endswith("'"): - start += text.index("'") - # Triple quotes - if text.endswith('"""') or text.endswith("'''"): - start += 2 - end -= 2 - return text[:start] + 'x' * (end - start) + text[end:] - - -def message(text): - """Print a message.""" - # print >> sys.stderr, options.prog + ': ' + text - # print >> sys.stderr, text - print(text) - - -############################################################################## -# Framework to run all checks -############################################################################## - - -def find_checks(argument_name): - """ - Find all globally visible functions where the first argument name - starts with argument_name. - """ - checks = [] - for name, function in globals().items(): - if not inspect.isfunction(function): - continue - args = inspect.getargspec(function)[0] - if args and args[0].startswith(argument_name): - codes = ERRORCODE_REGEX.findall(inspect.getdoc(function) or '') - for code in codes or ['']: - if not code or not ignore_code(code): - checks.append((name, function, args)) - break - checks.sort() - return checks - - -class Checker(object): - """ - Load a Python source file, tokenize it, check coding style. - """ - - def __init__(self, filename): - if filename: - self.filename = filename - try: - self.lines = open(filename).readlines() - except UnicodeDecodeError: - # Errors may occur with non-UTF8 files in Python 3000 - self.lines = open(filename, errors='replace').readlines() - else: - self.filename = 'stdin' - self.lines = [] - options.counters['physical lines'] = \ - options.counters.get('physical lines', 0) + len(self.lines) - - def readline(self): - """ - Get the next line from the input buffer. - """ - self.line_number += 1 - if self.line_number > len(self.lines): - return '' - return self.lines[self.line_number - 1] - - def readline_check_physical(self): - """ - Check and return the next physical line. This method can be - used to feed tokenize.generate_tokens. - """ - line = self.readline() - if line: - self.check_physical(line) - return line - - def run_check(self, check, argument_names): - """ - Run a check plugin. - """ - arguments = [] - for name in argument_names: - arguments.append(getattr(self, name)) - return check(*arguments) - - def check_physical(self, line): - """ - Run all physical checks on a raw input line. - """ - self.physical_line = line - if self.indent_char is None and len(line) and line[0] in ' \t': - self.indent_char = line[0] - for name, check, argument_names in options.physical_checks: - result = self.run_check(check, argument_names) - if result is not None: - offset, text = result - self.report_error(self.line_number, offset, text, check) - - def build_tokens_line(self): - """ - Build a logical line from tokens. - """ - self.mapping = [] - logical = [] - length = 0 - previous = None - for token in self.tokens: - token_type, text = token[0:2] - if token_type in (tokenize.COMMENT, tokenize.NL, - tokenize.INDENT, tokenize.DEDENT, - tokenize.NEWLINE): - continue - if token_type == tokenize.STRING: - text = mute_string(text) - if previous: - end_line, end = previous[3] - start_line, start = token[2] - if end_line != start_line: # different row - if self.lines[end_line - 1][end - 1] not in '{[(': - logical.append(' ') - length += 1 - elif end != start: # different column - fill = self.lines[end_line - 1][end:start] - logical.append(fill) - length += len(fill) - self.mapping.append((length, token)) - logical.append(text) - length += len(text) - previous = token - self.logical_line = ''.join(logical) - assert self.logical_line.lstrip() == self.logical_line - assert self.logical_line.rstrip() == self.logical_line - - def check_logical(self): - """ - Build a line from tokens and run all logical checks on it. - """ - options.counters['logical lines'] = \ - options.counters.get('logical lines', 0) + 1 - self.build_tokens_line() - first_line = self.lines[self.mapping[0][1][2][0] - 1] - indent = first_line[:self.mapping[0][1][2][1]] - self.previous_indent_level = self.indent_level - self.indent_level = expand_indent(indent) - if options.verbose >= 2: - print(self.logical_line[:80].rstrip()) - for name, check, argument_names in options.logical_checks: - if options.verbose >= 3: - print(' ', name) - result = self.run_check(check, argument_names) - if result is not None: - offset, text = result - if isinstance(offset, tuple): - original_number, original_offset = offset - else: - for token_offset, token in self.mapping: - if offset >= token_offset: - original_number = token[2][0] - original_offset = (token[2][1] - + offset - token_offset) - self.report_error(original_number, original_offset, - text, check) - self.previous_logical = self.logical_line - - def check_all(self): - """ - Run all checks on the input file. - """ - self.file_errors = 0 - self.line_number = 0 - self.indent_char = None - self.indent_level = 0 - self.previous_logical = '' - self.blank_lines = 0 - self.blank_lines_before_comment = 0 - self.tokens = [] - parens = 0 - for token in tokenize.generate_tokens(self.readline_check_physical): - # print(tokenize.tok_name[token[0]], repr(token)) - self.tokens.append(token) - token_type, text = token[0:2] - if token_type == tokenize.OP and text in '([{': - parens += 1 - if token_type == tokenize.OP and text in '}])': - parens -= 1 - if token_type == tokenize.NEWLINE and not parens: - self.check_logical() - self.blank_lines = 0 - self.blank_lines_before_comment = 0 - self.tokens = [] - if token_type == tokenize.NL and not parens: - if len(self.tokens) <= 1: - # The physical line contains only this token. - self.blank_lines += 1 - self.tokens = [] - if token_type == tokenize.COMMENT: - source_line = token[4] - token_start = token[2][1] - if source_line[:token_start].strip() == '': - self.blank_lines_before_comment = max(self.blank_lines, - self.blank_lines_before_comment) - self.blank_lines = 0 - if text.endswith('\n') and not parens: - # The comment also ends a physical line. This works around - # Python < 2.6 behaviour, which does not generate NL after - # a comment which is on a line by itself. - self.tokens = [] - return self.file_errors - - def report_error(self, line_number, offset, text, check): - """ - Report an error, according to options. - """ - if options.quiet == 1 and not self.file_errors: - message(self.filename) - self.file_errors += 1 - code = text[:4] - options.counters[code] = options.counters.get(code, 0) + 1 - options.messages[code] = text[5:] - if options.quiet: - return - if options.testsuite: - basename = os.path.basename(self.filename) - if basename[:4] != code: - return # Don't care about other errors or warnings - if 'not' not in basename: - return # Don't print the expected error message - if ignore_code(code): - return - if options.counters[code] == 1 or options.repeat: - message("%s:%s:%d: %s" % - (self.filename, line_number, offset + 1, text)) - if options.show_source: - line = self.lines[line_number - 1] - message(line.rstrip()) - message(' ' * offset + '^') - if options.show_pep8: - message(check.__doc__.lstrip('\n').rstrip()) - - -def input_file(filename): - """ - Run all checks on a Python source file. - """ - if excluded(filename): - return {} - if options.verbose: - message('checking ' + filename) - files_counter_before = options.counters.get('files', 0) - if options.testsuite: # Keep showing errors for multiple tests - options.counters = {} - options.counters['files'] = files_counter_before + 1 - errors = Checker(filename).check_all() - if options.testsuite: # Check if the expected error was found - basename = os.path.basename(filename) - code = basename[:4] - count = options.counters.get(code, 0) - if count == 0 and 'not' not in basename: - message("%s: error %s not found" % (filename, code)) - - -def input_dir(dirname): - """ - Check all Python source files in this directory and all subdirectories. - """ - dirname = dirname.rstrip('/') - if excluded(dirname): - return - for root, dirs, files in os.walk(dirname): - if options.verbose: - message('directory ' + root) - options.counters['directories'] = \ - options.counters.get('directories', 0) + 1 - dirs.sort() - for subdir in dirs: - if excluded(subdir): - dirs.remove(subdir) - files.sort() - for filename in files: - if filename_match(filename): - input_file(os.path.join(root, filename)) - - -def excluded(filename): - """ - Check if options.exclude contains a pattern that matches filename. - """ - basename = os.path.basename(filename) - for pattern in options.exclude: - if fnmatch(basename, pattern): - # print basename, 'excluded because it matches', pattern - return True - - -def filename_match(filename): - """ - Check if options.filename contains a pattern that matches filename. - If options.filename is unspecified, this always returns True. - """ - if not options.filename: - return True - for pattern in options.filename: - if fnmatch(filename, pattern): - return True - - -def ignore_code(code): - """ - Check if options.ignore contains a prefix of the error code. - If options.select contains a prefix of the error code, do not ignore it. - """ - for select in options.select: - if code.startswith(select): - return False - for ignore in options.ignore: - if code.startswith(ignore): - return True - - -def get_error_statistics(): - """Get error statistics.""" - return get_statistics("E") - - -def get_warning_statistics(): - """Get warning statistics.""" - return get_statistics("W") - - -def get_statistics(prefix=''): - """ - Get statistics for message codes that start with the prefix. - - prefix='' matches all errors and warnings - prefix='E' matches all errors - prefix='W' matches all warnings - prefix='E4' matches all errors that have to do with imports - """ - stats = [] - keys = list(options.messages.keys()) - keys.sort() - for key in keys: - if key.startswith(prefix): - stats.append('%-7s %s %s' % - (options.counters[key], key, options.messages[key])) - return stats - - -def get_count(prefix=''): - """Return the total count of errors and warnings.""" - keys = list(options.messages.keys()) - count = 0 - for key in keys: - if key.startswith(prefix): - count += options.counters[key] - return count - - -def print_statistics(prefix=''): - """Print overall statistics (number of errors and warnings).""" - for line in get_statistics(prefix): - print(line) - - -def print_benchmark(elapsed): - """ - Print benchmark numbers. - """ - print('%-7.2f %s' % (elapsed, 'seconds elapsed')) - keys = ['directories', 'files', - 'logical lines', 'physical lines'] - for key in keys: - if key in options.counters: - print('%-7d %s per second (%d total)' % ( - options.counters[key] / elapsed, key, - options.counters[key])) - - -def selftest(): - """ - Test all check functions with test cases in docstrings. - """ - count_passed = 0 - count_failed = 0 - checks = options.physical_checks + options.logical_checks - for name, check, argument_names in checks: - for line in check.__doc__.splitlines(): - line = line.lstrip() - match = SELFTEST_REGEX.match(line) - if match is None: - continue - code, source = match.groups() - checker = Checker(None) - for part in source.split(r'\n'): - part = part.replace(r'\t', '\t') - part = part.replace(r'\s', ' ') - checker.lines.append(part + '\n') - options.quiet = 2 - options.counters = {} - checker.check_all() - error = None - if code == 'Okay': - if len(options.counters) > 1: - codes = [key for key in options.counters.keys() - if key != 'logical lines'] - error = "incorrectly found %s" % ', '.join(codes) - elif options.counters.get(code, 0) == 0: - error = "failed to find %s" % code - if not error: - count_passed += 1 - else: - count_failed += 1 - if len(checker.lines) == 1: - print("pep8.py: %s: %s" % - (error, checker.lines[0].rstrip())) - else: - print("pep8.py: %s:" % error) - for line in checker.lines: - print(line.rstrip()) - if options.verbose: - print("%d passed and %d failed." % (count_passed, count_failed)) - if count_failed: - print("Test failed.") - else: - print("Test passed.") - - -def process_options(arglist=None): - """ - Process options passed either via arglist or via command line args. - """ - global options, args - parser = OptionParser(version=__version__, - usage="%prog [options] input ...") - parser.add_option('-v', '--verbose', default=0, action='count', - help="print status messages, or debug with -vv") - parser.add_option('-q', '--quiet', default=0, action='count', - help="report only file names, or nothing with -qq") - parser.add_option('-r', '--repeat', action='store_true', - help="show all occurrences of the same error") - parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE, - help="exclude files or directories which match these " - "comma separated patterns (default: %s)" % - DEFAULT_EXCLUDE) - parser.add_option('--filename', metavar='patterns', default='*.py', - help="when parsing directories, only check filenames " - "matching these comma separated patterns (default: " - "*.py)") - parser.add_option('--select', metavar='errors', default='', - help="select errors and warnings (e.g. E,W6)") - parser.add_option('--ignore', metavar='errors', default='', - help="skip errors and warnings (e.g. E4,W)") - parser.add_option('--show-source', action='store_true', - help="show source code for each error") - parser.add_option('--show-pep8', action='store_true', - help="show text of PEP 8 for each error") - parser.add_option('--statistics', action='store_true', - help="count errors and warnings") - parser.add_option('--count', action='store_true', - help="print total number of errors and warnings " - "to standard error and set exit code to 1 if " - "total is not null") - parser.add_option('--benchmark', action='store_true', - help="measure processing speed") - parser.add_option('--testsuite', metavar='dir', - help="run regression tests from dir") - parser.add_option('--doctest', action='store_true', - help="run doctest on myself") - options, args = parser.parse_args(arglist) - if options.testsuite: - args.append(options.testsuite) - if len(args) == 0 and not options.doctest: - parser.error('input not specified') - options.prog = os.path.basename(sys.argv[0]) - options.exclude = options.exclude.split(',') - for index in range(len(options.exclude)): - options.exclude[index] = options.exclude[index].rstrip('/') - if options.filename: - options.filename = options.filename.split(',') - if options.select: - options.select = options.select.split(',') - else: - options.select = [] - if options.ignore: - options.ignore = options.ignore.split(',') - elif options.select: - # Ignore all checks which are not explicitly selected - options.ignore = [''] - elif options.testsuite or options.doctest: - # For doctest and testsuite, all checks are required - options.ignore = [] - else: - # The default choice: ignore controversial checks - options.ignore = DEFAULT_IGNORE - options.physical_checks = find_checks('physical_line') - options.logical_checks = find_checks('logical_line') - options.counters = {} - options.messages = {} - return options, args - - -def _main(): - """ - Parse options and run checks on Python source. - """ - options, args = process_options() - if options.doctest: - import doctest - doctest.testmod(verbose=options.verbose) - selftest() - start_time = time.time() - for path in args: - if os.path.isdir(path): - input_dir(path) - else: - input_file(path) - elapsed = time.time() - start_time - if options.statistics: - print_statistics() - if options.benchmark: - print_benchmark(elapsed) - if options.count: - count = get_count() - if count: - sys.stderr.write(str(count) + '\n') - sys.exit(1) - - -if __name__ == '__main__': - _main() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/.upload.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/.upload.py.url deleted file mode 100644 index 8098dbc..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/.upload.py.url +++ /dev/null @@ -1 +0,0 @@ -http://webkit-rietveld.googlecode.com/svn/trunk/static/upload.py
\ No newline at end of file diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/__init__.py deleted file mode 100644 index c1e4c6d..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# This file is required for Python to search this directory for modules. diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/upload.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/upload.py deleted file mode 100755 index e91060f..0000000 --- a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/upload.py +++ /dev/null @@ -1,1702 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2007 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tool for uploading diffs from a version control system to the codereview app. - -Usage summary: upload.py [options] [-- diff_options] - -Diff options are passed to the diff command of the underlying system. - -Supported version control systems: - Git - Mercurial - Subversion - -It is important for Git/Mercurial users to specify a tree/node/branch to diff -against by using the '--rev' option. -""" -# This code is derived from appcfg.py in the App Engine SDK (open source), -# and from ASPN recipe #146306. - -import ConfigParser -import cookielib -import fnmatch -import getpass -import logging -import mimetypes -import optparse -import os -import re -import socket -import subprocess -import sys -import urllib -import urllib2 -import urlparse - -# The md5 module was deprecated in Python 2.5. -try: - from hashlib import md5 -except ImportError: - from md5 import md5 - -try: - import readline -except ImportError: - pass - -# The logging verbosity: -# 0: Errors only. -# 1: Status messages. -# 2: Info logs. -# 3: Debug logs. -verbosity = 1 - -# Max size of patch or base file. -MAX_UPLOAD_SIZE = 900 * 1024 - -# Constants for version control names. Used by GuessVCSName. -VCS_GIT = "Git" -VCS_MERCURIAL = "Mercurial" -VCS_SUBVERSION = "Subversion" -VCS_UNKNOWN = "Unknown" - -# whitelist for non-binary filetypes which do not start with "text/" -# .mm (Objective-C) shows up as application/x-freemind on my Linux box. -TEXT_MIMETYPES = ['application/javascript', 'application/x-javascript', - 'application/xml', 'application/x-freemind'] - -VCS_ABBREVIATIONS = { - VCS_MERCURIAL.lower(): VCS_MERCURIAL, - "hg": VCS_MERCURIAL, - VCS_SUBVERSION.lower(): VCS_SUBVERSION, - "svn": VCS_SUBVERSION, - VCS_GIT.lower(): VCS_GIT, -} - -# The result of parsing Subversion's [auto-props] setting. -svn_auto_props_map = None - -def GetEmail(prompt): - """Prompts the user for their email address and returns it. - - The last used email address is saved to a file and offered up as a suggestion - to the user. If the user presses enter without typing in anything the last - used email address is used. If the user enters a new address, it is saved - for next time we prompt. - - """ - last_email_file_name = os.path.expanduser("~/.last_codereview_email_address") - last_email = "" - if os.path.exists(last_email_file_name): - try: - last_email_file = open(last_email_file_name, "r") - last_email = last_email_file.readline().strip("\n") - last_email_file.close() - prompt += " [%s]" % last_email - except IOError, e: - pass - email = raw_input(prompt + ": ").strip() - if email: - try: - last_email_file = open(last_email_file_name, "w") - last_email_file.write(email) - last_email_file.close() - except IOError, e: - pass - else: - email = last_email - return email - - -def StatusUpdate(msg): - """Print a status message to stdout. - - If 'verbosity' is greater than 0, print the message. - - Args: - msg: The string to print. - """ - if verbosity > 0: - print msg - - -def ErrorExit(msg): - """Print an error message to stderr and exit.""" - print >>sys.stderr, msg - sys.exit(1) - - -class ClientLoginError(urllib2.HTTPError): - """Raised to indicate there was an error authenticating with ClientLogin.""" - - def __init__(self, url, code, msg, headers, args): - urllib2.HTTPError.__init__(self, url, code, msg, headers, None) - self.args = args - self.reason = args["Error"] - - -class AbstractRpcServer(object): - """Provides a common interface for a simple RPC server.""" - - def __init__(self, host, auth_function, host_override=None, extra_headers={}, - save_cookies=False): - """Creates a new HttpRpcServer. - - Args: - host: The host to send requests to. - auth_function: A function that takes no arguments and returns an - (email, password) tuple when called. Will be called if authentication - is required. - host_override: The host header to send to the server (defaults to host). - extra_headers: A dict of extra headers to append to every request. - save_cookies: If True, save the authentication cookies to local disk. - If False, use an in-memory cookiejar instead. Subclasses must - implement this functionality. Defaults to False. - """ - self.host = host - self.host_override = host_override - self.auth_function = auth_function - self.authenticated = False - self.extra_headers = extra_headers - self.save_cookies = save_cookies - self.opener = self._GetOpener() - if self.host_override: - logging.info("Server: %s; Host: %s", self.host, self.host_override) - else: - logging.info("Server: %s", self.host) - - def _GetOpener(self): - """Returns an OpenerDirector for making HTTP requests. - - Returns: - A urllib2.OpenerDirector object. - """ - raise NotImplementedError() - - def _CreateRequest(self, url, data=None): - """Creates a new urllib request.""" - logging.debug("Creating request for: '%s' with payload:\n%s", url, data) - req = urllib2.Request(url, data=data) - if self.host_override: - req.add_header("Host", self.host_override) - for key, value in self.extra_headers.iteritems(): - req.add_header(key, value) - return req - - def _GetAuthToken(self, email, password): - """Uses ClientLogin to authenticate the user, returning an auth token. - - Args: - email: The user's email address - password: The user's password - - Raises: - ClientLoginError: If there was an error authenticating with ClientLogin. - HTTPError: If there was some other form of HTTP error. - - Returns: - The authentication token returned by ClientLogin. - """ - account_type = "GOOGLE" - if self.host.endswith(".google.com"): - # Needed for use inside Google. - account_type = "HOSTED" - req = self._CreateRequest( - url="https://www.google.com/accounts/ClientLogin", - data=urllib.urlencode({ - "Email": email, - "Passwd": password, - "service": "ah", - "source": "rietveld-codereview-upload", - "accountType": account_type, - }), - ) - try: - response = self.opener.open(req) - response_body = response.read() - response_dict = dict(x.split("=") - for x in response_body.split("\n") if x) - return response_dict["Auth"] - except urllib2.HTTPError, e: - if e.code == 403: - body = e.read() - response_dict = dict(x.split("=", 1) for x in body.split("\n") if x) - raise ClientLoginError(req.get_full_url(), e.code, e.msg, - e.headers, response_dict) - else: - raise - - def _GetAuthCookie(self, auth_token): - """Fetches authentication cookies for an authentication token. - - Args: - auth_token: The authentication token returned by ClientLogin. - - Raises: - HTTPError: If there was an error fetching the authentication cookies. - """ - # This is a dummy value to allow us to identify when we're successful. - continue_location = "http://localhost/" - args = {"continue": continue_location, "auth": auth_token} - req = self._CreateRequest("http://%s/_ah/login?%s" % - (self.host, urllib.urlencode(args))) - try: - response = self.opener.open(req) - except urllib2.HTTPError, e: - response = e - if (response.code != 302 or - response.info()["location"] != continue_location): - raise urllib2.HTTPError(req.get_full_url(), response.code, response.msg, - response.headers, response.fp) - self.authenticated = True - - def _Authenticate(self): - """Authenticates the user. - - The authentication process works as follows: - 1) We get a username and password from the user - 2) We use ClientLogin to obtain an AUTH token for the user - (see http://code.google.com/apis/accounts/AuthForInstalledApps.html). - 3) We pass the auth token to /_ah/login on the server to obtain an - authentication cookie. If login was successful, it tries to redirect - us to the URL we provided. - - If we attempt to access the upload API without first obtaining an - authentication cookie, it returns a 401 response (or a 302) and - directs us to authenticate ourselves with ClientLogin. - """ - for i in range(3): - credentials = self.auth_function() - try: - auth_token = self._GetAuthToken(credentials[0], credentials[1]) - except ClientLoginError, e: - if e.reason == "BadAuthentication": - print >>sys.stderr, "Invalid username or password." - continue - if e.reason == "CaptchaRequired": - print >>sys.stderr, ( - "Please go to\n" - "https://www.google.com/accounts/DisplayUnlockCaptcha\n" - "and verify you are a human. Then try again.") - break - if e.reason == "NotVerified": - print >>sys.stderr, "Account not verified." - break - if e.reason == "TermsNotAgreed": - print >>sys.stderr, "User has not agreed to TOS." - break - if e.reason == "AccountDeleted": - print >>sys.stderr, "The user account has been deleted." - break - if e.reason == "AccountDisabled": - print >>sys.stderr, "The user account has been disabled." - break - if e.reason == "ServiceDisabled": - print >>sys.stderr, ("The user's access to the service has been " - "disabled.") - break - if e.reason == "ServiceUnavailable": - print >>sys.stderr, "The service is not available; try again later." - break - raise - self._GetAuthCookie(auth_token) - return - - def Send(self, request_path, payload=None, - content_type="application/octet-stream", - timeout=None, - **kwargs): - """Sends an RPC and returns the response. - - Args: - request_path: The path to send the request to, eg /api/appversion/create. - payload: The body of the request, or None to send an empty request. - content_type: The Content-Type header to use. - timeout: timeout in seconds; default None i.e. no timeout. - (Note: for large requests on OS X, the timeout doesn't work right.) - kwargs: Any keyword arguments are converted into query string parameters. - - Returns: - The response body, as a string. - """ - # TODO: Don't require authentication. Let the server say - # whether it is necessary. - if not self.authenticated: - self._Authenticate() - - old_timeout = socket.getdefaulttimeout() - socket.setdefaulttimeout(timeout) - try: - tries = 0 - while True: - tries += 1 - args = dict(kwargs) - url = "http://%s%s" % (self.host, request_path) - if args: - url += "?" + urllib.urlencode(args) - req = self._CreateRequest(url=url, data=payload) - req.add_header("Content-Type", content_type) - try: - f = self.opener.open(req) - response = f.read() - f.close() - return response - except urllib2.HTTPError, e: - if tries > 3: - raise - elif e.code == 401 or e.code == 302: - self._Authenticate() -## elif e.code >= 500 and e.code < 600: -## # Server Error - try again. -## continue - else: - raise - finally: - socket.setdefaulttimeout(old_timeout) - - -class HttpRpcServer(AbstractRpcServer): - """Provides a simplified RPC-style interface for HTTP requests.""" - - def _Authenticate(self): - """Save the cookie jar after authentication.""" - super(HttpRpcServer, self)._Authenticate() - if self.save_cookies: - StatusUpdate("Saving authentication cookies to %s" % self.cookie_file) - self.cookie_jar.save() - - def _GetOpener(self): - """Returns an OpenerDirector that supports cookies and ignores redirects. - - Returns: - A urllib2.OpenerDirector object. - """ - opener = urllib2.OpenerDirector() - opener.add_handler(urllib2.ProxyHandler()) - opener.add_handler(urllib2.UnknownHandler()) - opener.add_handler(urllib2.HTTPHandler()) - opener.add_handler(urllib2.HTTPDefaultErrorHandler()) - opener.add_handler(urllib2.HTTPSHandler()) - opener.add_handler(urllib2.HTTPErrorProcessor()) - if self.save_cookies: - self.cookie_file = os.path.expanduser("~/.codereview_upload_cookies") - self.cookie_jar = cookielib.MozillaCookieJar(self.cookie_file) - if os.path.exists(self.cookie_file): - try: - self.cookie_jar.load() - self.authenticated = True - StatusUpdate("Loaded authentication cookies from %s" % - self.cookie_file) - except (cookielib.LoadError, IOError): - # Failed to load cookies - just ignore them. - pass - else: - # Create an empty cookie file with mode 600 - fd = os.open(self.cookie_file, os.O_CREAT, 0600) - os.close(fd) - # Always chmod the cookie file - os.chmod(self.cookie_file, 0600) - else: - # Don't save cookies across runs of update.py. - self.cookie_jar = cookielib.CookieJar() - opener.add_handler(urllib2.HTTPCookieProcessor(self.cookie_jar)) - return opener - - -parser = optparse.OptionParser(usage="%prog [options] [-- diff_options]") -parser.add_option("-y", "--assume_yes", action="store_true", - dest="assume_yes", default=False, - help="Assume that the answer to yes/no questions is 'yes'.") -# Logging -group = parser.add_option_group("Logging options") -group.add_option("-q", "--quiet", action="store_const", const=0, - dest="verbose", help="Print errors only.") -group.add_option("-v", "--verbose", action="store_const", const=2, - dest="verbose", default=1, - help="Print info level logs (default).") -group.add_option("--noisy", action="store_const", const=3, - dest="verbose", help="Print all logs.") -# Review server -group = parser.add_option_group("Review server options") -group.add_option("-s", "--server", action="store", dest="server", - default="codereview.appspot.com", - metavar="SERVER", - help=("The server to upload to. The format is host[:port]. " - "Defaults to '%default'.")) -group.add_option("-e", "--email", action="store", dest="email", - metavar="EMAIL", default=None, - help="The username to use. Will prompt if omitted.") -group.add_option("-H", "--host", action="store", dest="host", - metavar="HOST", default=None, - help="Overrides the Host header sent with all RPCs.") -group.add_option("--no_cookies", action="store_false", - dest="save_cookies", default=True, - help="Do not save authentication cookies to local disk.") -# Issue -group = parser.add_option_group("Issue options") -group.add_option("-d", "--description", action="store", dest="description", - metavar="DESCRIPTION", default=None, - help="Optional description when creating an issue.") -group.add_option("-f", "--description_file", action="store", - dest="description_file", metavar="DESCRIPTION_FILE", - default=None, - help="Optional path of a file that contains " - "the description when creating an issue.") -group.add_option("-r", "--reviewers", action="store", dest="reviewers", - metavar="REVIEWERS", default=None, - help="Add reviewers (comma separated email addresses).") -group.add_option("--cc", action="store", dest="cc", - metavar="CC", default=None, - help="Add CC (comma separated email addresses).") -group.add_option("--private", action="store_true", dest="private", - default=False, - help="Make the issue restricted to reviewers and those CCed") -# Upload options -group = parser.add_option_group("Patch options") -group.add_option("-m", "--message", action="store", dest="message", - metavar="MESSAGE", default=None, - help="A message to identify the patch. " - "Will prompt if omitted.") -group.add_option("-i", "--issue", type="int", action="store", - metavar="ISSUE", default=None, - help="Issue number to which to add. Defaults to new issue.") -group.add_option("--base_url", action="store", dest="base_url", default=None, - help="Base repository URL (listed as \"Base URL\" when " - "viewing issue). If omitted, will be guessed automatically " - "for SVN repos and left blank for others.") -group.add_option("--download_base", action="store_true", - dest="download_base", default=False, - help="Base files will be downloaded by the server " - "(side-by-side diffs may not work on files with CRs).") -group.add_option("--rev", action="store", dest="revision", - metavar="REV", default=None, - help="Base revision/branch/tree to diff against. Use " - "rev1:rev2 range to review already committed changeset.") -group.add_option("--send_mail", action="store_true", - dest="send_mail", default=False, - help="Send notification email to reviewers.") -group.add_option("--vcs", action="store", dest="vcs", - metavar="VCS", default=None, - help=("Version control system (optional, usually upload.py " - "already guesses the right VCS).")) -group.add_option("--emulate_svn_auto_props", action="store_true", - dest="emulate_svn_auto_props", default=False, - help=("Emulate Subversion's auto properties feature.")) - - -def GetRpcServer(server, email=None, host_override=None, save_cookies=True): - """Returns an instance of an AbstractRpcServer. - - Args: - server: String containing the review server URL. - email: String containing user's email address. - host_override: If not None, string containing an alternate hostname to use - in the host header. - save_cookies: Whether authentication cookies should be saved to disk. - - Returns: - A new AbstractRpcServer, on which RPC calls can be made. - """ - - rpc_server_class = HttpRpcServer - - def GetUserCredentials(): - """Prompts the user for a username and password.""" - if email is None: - email = GetEmail("Email (login for uploading to %s)" % server) - password = getpass.getpass("Password for %s: " % email) - return (email, password) - - # If this is the dev_appserver, use fake authentication. - host = (host_override or server).lower() - if host == "localhost" or host.startswith("localhost:"): - if email is None: - email = "test@example.com" - logging.info("Using debug user %s. Override with --email" % email) - server = rpc_server_class( - server, - lambda: (email, "password"), - host_override=host_override, - extra_headers={"Cookie": - 'dev_appserver_login="%s:False"' % email}, - save_cookies=save_cookies) - # Don't try to talk to ClientLogin. - server.authenticated = True - return server - - return rpc_server_class(server, - GetUserCredentials, - host_override=host_override, - save_cookies=save_cookies) - - -def EncodeMultipartFormData(fields, files): - """Encode form fields for multipart/form-data. - - Args: - fields: A sequence of (name, value) elements for regular form fields. - files: A sequence of (name, filename, value) elements for data to be - uploaded as files. - Returns: - (content_type, body) ready for httplib.HTTP instance. - - Source: - http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306 - """ - BOUNDARY = '-M-A-G-I-C---B-O-U-N-D-A-R-Y-' - CRLF = '\r\n' - lines = [] - for (key, value) in fields: - lines.append('--' + BOUNDARY) - lines.append('Content-Disposition: form-data; name="%s"' % key) - lines.append('') - lines.append(value) - for (key, filename, value) in files: - lines.append('--' + BOUNDARY) - lines.append('Content-Disposition: form-data; name="%s"; filename="%s"' % - (key, filename)) - lines.append('Content-Type: %s' % GetContentType(filename)) - lines.append('') - lines.append(value) - lines.append('--' + BOUNDARY + '--') - lines.append('') - body = CRLF.join(lines) - content_type = 'multipart/form-data; boundary=%s' % BOUNDARY - return content_type, body - - -def GetContentType(filename): - """Helper to guess the content-type from the filename.""" - return mimetypes.guess_type(filename)[0] or 'application/octet-stream' - - -# Use a shell for subcommands on Windows to get a PATH search. -use_shell = sys.platform.startswith("win") - -def RunShellWithReturnCode(command, print_output=False, - universal_newlines=True, - env=os.environ): - """Executes a command and returns the output from stdout and the return code. - - Args: - command: Command to execute. - print_output: If True, the output is printed to stdout. - If False, both stdout and stderr are ignored. - universal_newlines: Use universal_newlines flag (default: True). - - Returns: - Tuple (output, return code) - """ - logging.info("Running %s", command) - p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - shell=use_shell, universal_newlines=universal_newlines, - env=env) - if print_output: - output_array = [] - while True: - line = p.stdout.readline() - if not line: - break - print line.strip("\n") - output_array.append(line) - output = "".join(output_array) - else: - output = p.stdout.read() - p.wait() - errout = p.stderr.read() - if print_output and errout: - print >>sys.stderr, errout - p.stdout.close() - p.stderr.close() - return output, p.returncode - - -def RunShell(command, silent_ok=False, universal_newlines=True, - print_output=False, env=os.environ): - data, retcode = RunShellWithReturnCode(command, print_output, - universal_newlines, env) - if retcode: - ErrorExit("Got error status from %s:\n%s" % (command, data)) - if not silent_ok and not data: - ErrorExit("No output from %s" % command) - return data - - -class VersionControlSystem(object): - """Abstract base class providing an interface to the VCS.""" - - def __init__(self, options): - """Constructor. - - Args: - options: Command line options. - """ - self.options = options - - def GenerateDiff(self, args): - """Return the current diff as a string. - - Args: - args: Extra arguments to pass to the diff command. - """ - raise NotImplementedError( - "abstract method -- subclass %s must override" % self.__class__) - - def GetUnknownFiles(self): - """Return a list of files unknown to the VCS.""" - raise NotImplementedError( - "abstract method -- subclass %s must override" % self.__class__) - - def CheckForUnknownFiles(self): - """Show an "are you sure?" prompt if there are unknown files.""" - unknown_files = self.GetUnknownFiles() - if unknown_files: - print "The following files are not added to version control:" - for line in unknown_files: - print line - prompt = "Are you sure to continue?(y/N) " - answer = raw_input(prompt).strip() - if answer != "y": - ErrorExit("User aborted") - - def GetBaseFile(self, filename): - """Get the content of the upstream version of a file. - - Returns: - A tuple (base_content, new_content, is_binary, status) - base_content: The contents of the base file. - new_content: For text files, this is empty. For binary files, this is - the contents of the new file, since the diff output won't contain - information to reconstruct the current file. - is_binary: True iff the file is binary. - status: The status of the file. - """ - - raise NotImplementedError( - "abstract method -- subclass %s must override" % self.__class__) - - - def GetBaseFiles(self, diff): - """Helper that calls GetBase file for each file in the patch. - - Returns: - A dictionary that maps from filename to GetBaseFile's tuple. Filenames - are retrieved based on lines that start with "Index:" or - "Property changes on:". - """ - files = {} - for line in diff.splitlines(True): - if line.startswith('Index:') or line.startswith('Property changes on:'): - unused, filename = line.split(':', 1) - # On Windows if a file has property changes its filename uses '\' - # instead of '/'. - filename = filename.strip().replace('\\', '/') - files[filename] = self.GetBaseFile(filename) - return files - - - def UploadBaseFiles(self, issue, rpc_server, patch_list, patchset, options, - files): - """Uploads the base files (and if necessary, the current ones as well).""" - - def UploadFile(filename, file_id, content, is_binary, status, is_base): - """Uploads a file to the server.""" - file_too_large = False - if is_base: - type = "base" - else: - type = "current" - if len(content) > MAX_UPLOAD_SIZE: - print ("Not uploading the %s file for %s because it's too large." % - (type, filename)) - file_too_large = True - content = "" - checksum = md5(content).hexdigest() - if options.verbose > 0 and not file_too_large: - print "Uploading %s file for %s" % (type, filename) - url = "/%d/upload_content/%d/%d" % (int(issue), int(patchset), file_id) - form_fields = [("filename", filename), - ("status", status), - ("checksum", checksum), - ("is_binary", str(is_binary)), - ("is_current", str(not is_base)), - ] - if file_too_large: - form_fields.append(("file_too_large", "1")) - if options.email: - form_fields.append(("user", options.email)) - ctype, body = EncodeMultipartFormData(form_fields, - [("data", filename, content)]) - response_body = rpc_server.Send(url, body, - content_type=ctype) - if not response_body.startswith("OK"): - StatusUpdate(" --> %s" % response_body) - sys.exit(1) - - patches = dict() - [patches.setdefault(v, k) for k, v in patch_list] - for filename in patches.keys(): - base_content, new_content, is_binary, status = files[filename] - file_id_str = patches.get(filename) - if file_id_str.find("nobase") != -1: - base_content = None - file_id_str = file_id_str[file_id_str.rfind("_") + 1:] - file_id = int(file_id_str) - if base_content != None: - UploadFile(filename, file_id, base_content, is_binary, status, True) - if new_content != None: - UploadFile(filename, file_id, new_content, is_binary, status, False) - - def IsImage(self, filename): - """Returns true if the filename has an image extension.""" - mimetype = mimetypes.guess_type(filename)[0] - if not mimetype: - return False - return mimetype.startswith("image/") - - def IsBinary(self, filename): - """Returns true if the guessed mimetyped isnt't in text group.""" - mimetype = mimetypes.guess_type(filename)[0] - if not mimetype: - return False # e.g. README, "real" binaries usually have an extension - # special case for text files which don't start with text/ - if mimetype in TEXT_MIMETYPES: - return False - return not mimetype.startswith("text/") - - -class SubversionVCS(VersionControlSystem): - """Implementation of the VersionControlSystem interface for Subversion.""" - - def __init__(self, options): - super(SubversionVCS, self).__init__(options) - if self.options.revision: - match = re.match(r"(\d+)(:(\d+))?", self.options.revision) - if not match: - ErrorExit("Invalid Subversion revision %s." % self.options.revision) - self.rev_start = match.group(1) - self.rev_end = match.group(3) - else: - self.rev_start = self.rev_end = None - # Cache output from "svn list -r REVNO dirname". - # Keys: dirname, Values: 2-tuple (ouput for start rev and end rev). - self.svnls_cache = {} - # Base URL is required to fetch files deleted in an older revision. - # Result is cached to not guess it over and over again in GetBaseFile(). - required = self.options.download_base or self.options.revision is not None - self.svn_base = self._GuessBase(required) - - def GuessBase(self, required): - """Wrapper for _GuessBase.""" - return self.svn_base - - def _GuessBase(self, required): - """Returns the SVN base URL. - - Args: - required: If true, exits if the url can't be guessed, otherwise None is - returned. - """ - info = RunShell(["svn", "info"]) - for line in info.splitlines(): - words = line.split() - if len(words) == 2 and words[0] == "URL:": - url = words[1] - scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) - username, netloc = urllib.splituser(netloc) - if username: - logging.info("Removed username from base URL") - if netloc.endswith("svn.python.org"): - if netloc == "svn.python.org": - if path.startswith("/projects/"): - path = path[9:] - elif netloc != "pythondev@svn.python.org": - ErrorExit("Unrecognized Python URL: %s" % url) - base = "http://svn.python.org/view/*checkout*%s/" % path - logging.info("Guessed Python base = %s", base) - elif netloc.endswith("svn.collab.net"): - if path.startswith("/repos/"): - path = path[6:] - base = "http://svn.collab.net/viewvc/*checkout*%s/" % path - logging.info("Guessed CollabNet base = %s", base) - elif netloc.endswith(".googlecode.com"): - path = path + "/" - base = urlparse.urlunparse(("http", netloc, path, params, - query, fragment)) - logging.info("Guessed Google Code base = %s", base) - else: - path = path + "/" - base = urlparse.urlunparse((scheme, netloc, path, params, - query, fragment)) - logging.info("Guessed base = %s", base) - return base - if required: - ErrorExit("Can't find URL in output from svn info") - return None - - def GenerateDiff(self, args): - cmd = ["svn", "diff"] - if self.options.revision: - cmd += ["-r", self.options.revision] - cmd.extend(args) - data = RunShell(cmd) - count = 0 - for line in data.splitlines(): - if line.startswith("Index:") or line.startswith("Property changes on:"): - count += 1 - logging.info(line) - if not count: - ErrorExit("No valid patches found in output from svn diff") - return data - - def _CollapseKeywords(self, content, keyword_str): - """Collapses SVN keywords.""" - # svn cat translates keywords but svn diff doesn't. As a result of this - # behavior patching.PatchChunks() fails with a chunk mismatch error. - # This part was originally written by the Review Board development team - # who had the same problem (http://reviews.review-board.org/r/276/). - # Mapping of keywords to known aliases - svn_keywords = { - # Standard keywords - 'Date': ['Date', 'LastChangedDate'], - 'Revision': ['Revision', 'LastChangedRevision', 'Rev'], - 'Author': ['Author', 'LastChangedBy'], - 'HeadURL': ['HeadURL', 'URL'], - 'Id': ['Id'], - - # Aliases - 'LastChangedDate': ['LastChangedDate', 'Date'], - 'LastChangedRevision': ['LastChangedRevision', 'Rev', 'Revision'], - 'LastChangedBy': ['LastChangedBy', 'Author'], - 'URL': ['URL', 'HeadURL'], - } - - def repl(m): - if m.group(2): - return "$%s::%s$" % (m.group(1), " " * len(m.group(3))) - return "$%s$" % m.group(1) - keywords = [keyword - for name in keyword_str.split(" ") - for keyword in svn_keywords.get(name, [])] - return re.sub(r"\$(%s):(:?)([^\$]+)\$" % '|'.join(keywords), repl, content) - - def GetUnknownFiles(self): - status = RunShell(["svn", "status", "--ignore-externals"], silent_ok=True) - unknown_files = [] - for line in status.split("\n"): - if line and line[0] == "?": - unknown_files.append(line) - return unknown_files - - def ReadFile(self, filename): - """Returns the contents of a file.""" - file = open(filename, 'rb') - result = "" - try: - result = file.read() - finally: - file.close() - return result - - def GetStatus(self, filename): - """Returns the status of a file.""" - if not self.options.revision: - status = RunShell(["svn", "status", "--ignore-externals", filename]) - if not status: - ErrorExit("svn status returned no output for %s" % filename) - status_lines = status.splitlines() - # If file is in a cl, the output will begin with - # "\n--- Changelist 'cl_name':\n". See - # http://svn.collab.net/repos/svn/trunk/notes/changelist-design.txt - if (len(status_lines) == 3 and - not status_lines[0] and - status_lines[1].startswith("--- Changelist")): - status = status_lines[2] - else: - status = status_lines[0] - # If we have a revision to diff against we need to run "svn list" - # for the old and the new revision and compare the results to get - # the correct status for a file. - else: - dirname, relfilename = os.path.split(filename) - if dirname not in self.svnls_cache: - cmd = ["svn", "list", "-r", self.rev_start, dirname or "."] - out, returncode = RunShellWithReturnCode(cmd) - if returncode: - ErrorExit("Failed to get status for %s." % filename) - old_files = out.splitlines() - args = ["svn", "list"] - if self.rev_end: - args += ["-r", self.rev_end] - cmd = args + [dirname or "."] - out, returncode = RunShellWithReturnCode(cmd) - if returncode: - ErrorExit("Failed to run command %s" % cmd) - self.svnls_cache[dirname] = (old_files, out.splitlines()) - old_files, new_files = self.svnls_cache[dirname] - if relfilename in old_files and relfilename not in new_files: - status = "D " - elif relfilename in old_files and relfilename in new_files: - status = "M " - else: - status = "A " - return status - - def GetBaseFile(self, filename): - status = self.GetStatus(filename) - base_content = None - new_content = None - - # If a file is copied its status will be "A +", which signifies - # "addition-with-history". See "svn st" for more information. We need to - # upload the original file or else diff parsing will fail if the file was - # edited. - if status[0] == "A" and status[3] != "+": - # We'll need to upload the new content if we're adding a binary file - # since diff's output won't contain it. - mimetype = RunShell(["svn", "propget", "svn:mime-type", filename], - silent_ok=True) - base_content = "" - is_binary = bool(mimetype) and not mimetype.startswith("text/") - if is_binary and self.IsImage(filename): - new_content = self.ReadFile(filename) - elif (status[0] in ("M", "D", "R") or - (status[0] == "A" and status[3] == "+") or # Copied file. - (status[0] == " " and status[1] == "M")): # Property change. - args = [] - if self.options.revision: - url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start) - else: - # Don't change filename, it's needed later. - url = filename - args += ["-r", "BASE"] - cmd = ["svn"] + args + ["propget", "svn:mime-type", url] - mimetype, returncode = RunShellWithReturnCode(cmd) - if returncode: - # File does not exist in the requested revision. - # Reset mimetype, it contains an error message. - mimetype = "" - get_base = False - is_binary = bool(mimetype) and not mimetype.startswith("text/") - if status[0] == " ": - # Empty base content just to force an upload. - base_content = "" - elif is_binary: - if self.IsImage(filename): - get_base = True - if status[0] == "M": - if not self.rev_end: - new_content = self.ReadFile(filename) - else: - url = "%s/%s@%s" % (self.svn_base, filename, self.rev_end) - new_content = RunShell(["svn", "cat", url], - universal_newlines=True, silent_ok=True) - else: - base_content = "" - else: - get_base = True - - if get_base: - if is_binary: - universal_newlines = False - else: - universal_newlines = True - if self.rev_start: - # "svn cat -r REV delete_file.txt" doesn't work. cat requires - # the full URL with "@REV" appended instead of using "-r" option. - url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start) - base_content = RunShell(["svn", "cat", url], - universal_newlines=universal_newlines, - silent_ok=True) - else: - base_content = RunShell(["svn", "cat", filename], - universal_newlines=universal_newlines, - silent_ok=True) - if not is_binary: - args = [] - if self.rev_start: - url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start) - else: - url = filename - args += ["-r", "BASE"] - cmd = ["svn"] + args + ["propget", "svn:keywords", url] - keywords, returncode = RunShellWithReturnCode(cmd) - if keywords and not returncode: - base_content = self._CollapseKeywords(base_content, keywords) - else: - StatusUpdate("svn status returned unexpected output: %s" % status) - sys.exit(1) - return base_content, new_content, is_binary, status[0:5] - - -class GitVCS(VersionControlSystem): - """Implementation of the VersionControlSystem interface for Git.""" - - def __init__(self, options): - super(GitVCS, self).__init__(options) - # Map of filename -> (hash before, hash after) of base file. - # Hashes for "no such file" are represented as None. - self.hashes = {} - # Map of new filename -> old filename for renames. - self.renames = {} - - def GenerateDiff(self, extra_args): - # This is more complicated than svn's GenerateDiff because we must convert - # the diff output to include an svn-style "Index:" line as well as record - # the hashes of the files, so we can upload them along with our diff. - - # Special used by git to indicate "no such content". - NULL_HASH = "0"*40 - - extra_args = extra_args[:] - if self.options.revision: - extra_args = [self.options.revision] + extra_args - - # --no-ext-diff is broken in some versions of Git, so try to work around - # this by overriding the environment (but there is still a problem if the - # git config key "diff.external" is used). - env = os.environ.copy() - if 'GIT_EXTERNAL_DIFF' in env: del env['GIT_EXTERNAL_DIFF'] - gitdiff = RunShell(["git", "diff", "--no-ext-diff", "--full-index", "-M"] - + extra_args, env=env) - - def IsFileNew(filename): - return filename in self.hashes and self.hashes[filename][0] is None - - def AddSubversionPropertyChange(filename): - """Add svn's property change information into the patch if given file is - new file. - - We use Subversion's auto-props setting to retrieve its property. - See http://svnbook.red-bean.com/en/1.1/ch07.html#svn-ch-7-sect-1.3.2 for - Subversion's [auto-props] setting. - """ - if self.options.emulate_svn_auto_props and IsFileNew(filename): - svnprops = GetSubversionPropertyChanges(filename) - if svnprops: - svndiff.append("\n" + svnprops + "\n") - - svndiff = [] - filecount = 0 - filename = None - for line in gitdiff.splitlines(): - match = re.match(r"diff --git a/(.*) b/(.*)$", line) - if match: - # Add auto property here for previously seen file. - if filename is not None: - AddSubversionPropertyChange(filename) - filecount += 1 - # Intentionally use the "after" filename so we can show renames. - filename = match.group(2) - svndiff.append("Index: %s\n" % filename) - if match.group(1) != match.group(2): - self.renames[match.group(2)] = match.group(1) - else: - # The "index" line in a git diff looks like this (long hashes elided): - # index 82c0d44..b2cee3f 100755 - # We want to save the left hash, as that identifies the base file. - match = re.match(r"index (\w+)\.\.(\w+)", line) - if match: - before, after = (match.group(1), match.group(2)) - if before == NULL_HASH: - before = None - if after == NULL_HASH: - after = None - self.hashes[filename] = (before, after) - svndiff.append(line + "\n") - if not filecount: - ErrorExit("No valid patches found in output from git diff") - # Add auto property for the last seen file. - assert filename is not None - AddSubversionPropertyChange(filename) - return "".join(svndiff) - - def GetUnknownFiles(self): - status = RunShell(["git", "ls-files", "--exclude-standard", "--others"], - silent_ok=True) - return status.splitlines() - - def GetFileContent(self, file_hash, is_binary): - """Returns the content of a file identified by its git hash.""" - data, retcode = RunShellWithReturnCode(["git", "show", file_hash], - universal_newlines=not is_binary) - if retcode: - ErrorExit("Got error status from 'git show %s'" % file_hash) - return data - - def GetBaseFile(self, filename): - hash_before, hash_after = self.hashes.get(filename, (None,None)) - base_content = None - new_content = None - is_binary = self.IsBinary(filename) - status = None - - if filename in self.renames: - status = "A +" # Match svn attribute name for renames. - if filename not in self.hashes: - # If a rename doesn't change the content, we never get a hash. - base_content = RunShell(["git", "show", "HEAD:" + filename]) - elif not hash_before: - status = "A" - base_content = "" - elif not hash_after: - status = "D" - else: - status = "M" - - is_image = self.IsImage(filename) - - # Grab the before/after content if we need it. - # We should include file contents if it's text or it's an image. - if not is_binary or is_image: - # Grab the base content if we don't have it already. - if base_content is None and hash_before: - base_content = self.GetFileContent(hash_before, is_binary) - # Only include the "after" file if it's an image; otherwise it - # it is reconstructed from the diff. - if is_image and hash_after: - new_content = self.GetFileContent(hash_after, is_binary) - - return (base_content, new_content, is_binary, status) - - -class MercurialVCS(VersionControlSystem): - """Implementation of the VersionControlSystem interface for Mercurial.""" - - def __init__(self, options, repo_dir): - super(MercurialVCS, self).__init__(options) - # Absolute path to repository (we can be in a subdir) - self.repo_dir = os.path.normpath(repo_dir) - # Compute the subdir - cwd = os.path.normpath(os.getcwd()) - assert cwd.startswith(self.repo_dir) - self.subdir = cwd[len(self.repo_dir):].lstrip(r"\/") - if self.options.revision: - self.base_rev = self.options.revision - else: - self.base_rev = RunShell(["hg", "parent", "-q"]).split(':')[1].strip() - - def _GetRelPath(self, filename): - """Get relative path of a file according to the current directory, - given its logical path in the repo.""" - assert filename.startswith(self.subdir), (filename, self.subdir) - return filename[len(self.subdir):].lstrip(r"\/") - - def GenerateDiff(self, extra_args): - # If no file specified, restrict to the current subdir - extra_args = extra_args or ["."] - cmd = ["hg", "diff", "--git", "-r", self.base_rev] + extra_args - data = RunShell(cmd, silent_ok=True) - svndiff = [] - filecount = 0 - for line in data.splitlines(): - m = re.match("diff --git a/(\S+) b/(\S+)", line) - if m: - # Modify line to make it look like as it comes from svn diff. - # With this modification no changes on the server side are required - # to make upload.py work with Mercurial repos. - # NOTE: for proper handling of moved/copied files, we have to use - # the second filename. - filename = m.group(2) - svndiff.append("Index: %s" % filename) - svndiff.append("=" * 67) - filecount += 1 - logging.info(line) - else: - svndiff.append(line) - if not filecount: - ErrorExit("No valid patches found in output from hg diff") - return "\n".join(svndiff) + "\n" - - def GetUnknownFiles(self): - """Return a list of files unknown to the VCS.""" - args = [] - status = RunShell(["hg", "status", "--rev", self.base_rev, "-u", "."], - silent_ok=True) - unknown_files = [] - for line in status.splitlines(): - st, fn = line.split(" ", 1) - if st == "?": - unknown_files.append(fn) - return unknown_files - - def GetBaseFile(self, filename): - # "hg status" and "hg cat" both take a path relative to the current subdir - # rather than to the repo root, but "hg diff" has given us the full path - # to the repo root. - base_content = "" - new_content = None - is_binary = False - oldrelpath = relpath = self._GetRelPath(filename) - # "hg status -C" returns two lines for moved/copied files, one otherwise - out = RunShell(["hg", "status", "-C", "--rev", self.base_rev, relpath]) - out = out.splitlines() - # HACK: strip error message about missing file/directory if it isn't in - # the working copy - if out[0].startswith('%s: ' % relpath): - out = out[1:] - if len(out) > 1: - # Moved/copied => considered as modified, use old filename to - # retrieve base contents - oldrelpath = out[1].strip() - status = "M" - else: - status, _ = out[0].split(' ', 1) - if ":" in self.base_rev: - base_rev = self.base_rev.split(":", 1)[0] - else: - base_rev = self.base_rev - if status != "A": - base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath], - silent_ok=True) - is_binary = "\0" in base_content # Mercurial's heuristic - if status != "R": - new_content = open(relpath, "rb").read() - is_binary = is_binary or "\0" in new_content - if is_binary and base_content: - # Fetch again without converting newlines - base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath], - silent_ok=True, universal_newlines=False) - if not is_binary or not self.IsImage(relpath): - new_content = None - return base_content, new_content, is_binary, status - - -# NOTE: The SplitPatch function is duplicated in engine.py, keep them in sync. -def SplitPatch(data): - """Splits a patch into separate pieces for each file. - - Args: - data: A string containing the output of svn diff. - - Returns: - A list of 2-tuple (filename, text) where text is the svn diff output - pertaining to filename. - """ - patches = [] - filename = None - diff = [] - for line in data.splitlines(True): - new_filename = None - if line.startswith('Index:'): - unused, new_filename = line.split(':', 1) - new_filename = new_filename.strip() - elif line.startswith('Property changes on:'): - unused, temp_filename = line.split(':', 1) - # When a file is modified, paths use '/' between directories, however - # when a property is modified '\' is used on Windows. Make them the same - # otherwise the file shows up twice. - temp_filename = temp_filename.strip().replace('\\', '/') - if temp_filename != filename: - # File has property changes but no modifications, create a new diff. - new_filename = temp_filename - if new_filename: - if filename and diff: - patches.append((filename, ''.join(diff))) - filename = new_filename - diff = [line] - continue - if diff is not None: - diff.append(line) - if filename and diff: - patches.append((filename, ''.join(diff))) - return patches - - -def UploadSeparatePatches(issue, rpc_server, patchset, data, options): - """Uploads a separate patch for each file in the diff output. - - Returns a list of [patch_key, filename] for each file. - """ - patches = SplitPatch(data) - rv = [] - for patch in patches: - if len(patch[1]) > MAX_UPLOAD_SIZE: - print ("Not uploading the patch for " + patch[0] + - " because the file is too large.") - continue - form_fields = [("filename", patch[0])] - if not options.download_base: - form_fields.append(("content_upload", "1")) - files = [("data", "data.diff", patch[1])] - ctype, body = EncodeMultipartFormData(form_fields, files) - url = "/%d/upload_patch/%d" % (int(issue), int(patchset)) - print "Uploading patch for " + patch[0] - response_body = rpc_server.Send(url, body, content_type=ctype) - lines = response_body.splitlines() - if not lines or lines[0] != "OK": - StatusUpdate(" --> %s" % response_body) - sys.exit(1) - rv.append([lines[1], patch[0]]) - return rv - - -def GuessVCSName(): - """Helper to guess the version control system. - - This examines the current directory, guesses which VersionControlSystem - we're using, and returns an string indicating which VCS is detected. - - Returns: - A pair (vcs, output). vcs is a string indicating which VCS was detected - and is one of VCS_GIT, VCS_MERCURIAL, VCS_SUBVERSION, or VCS_UNKNOWN. - output is a string containing any interesting output from the vcs - detection routine, or None if there is nothing interesting. - """ - # Mercurial has a command to get the base directory of a repository - # Try running it, but don't die if we don't have hg installed. - # NOTE: we try Mercurial first as it can sit on top of an SVN working copy. - try: - out, returncode = RunShellWithReturnCode(["hg", "root"]) - if returncode == 0: - return (VCS_MERCURIAL, out.strip()) - except OSError, (errno, message): - if errno != 2: # ENOENT -- they don't have hg installed. - raise - - # Subversion has a .svn in all working directories. - if os.path.isdir('.svn'): - logging.info("Guessed VCS = Subversion") - return (VCS_SUBVERSION, None) - - # Git has a command to test if you're in a git tree. - # Try running it, but don't die if we don't have git installed. - try: - out, returncode = RunShellWithReturnCode(["git", "rev-parse", - "--is-inside-work-tree"]) - if returncode == 0: - return (VCS_GIT, None) - except OSError, (errno, message): - if errno != 2: # ENOENT -- they don't have git installed. - raise - - return (VCS_UNKNOWN, None) - - -def GuessVCS(options): - """Helper to guess the version control system. - - This verifies any user-specified VersionControlSystem (by command line - or environment variable). If the user didn't specify one, this examines - the current directory, guesses which VersionControlSystem we're using, - and returns an instance of the appropriate class. Exit with an error - if we can't figure it out. - - Returns: - A VersionControlSystem instance. Exits if the VCS can't be guessed. - """ - vcs = options.vcs - if not vcs: - vcs = os.environ.get("CODEREVIEW_VCS") - if vcs: - v = VCS_ABBREVIATIONS.get(vcs.lower()) - if v is None: - ErrorExit("Unknown version control system %r specified." % vcs) - (vcs, extra_output) = (v, None) - else: - (vcs, extra_output) = GuessVCSName() - - if vcs == VCS_MERCURIAL: - if extra_output is None: - extra_output = RunShell(["hg", "root"]).strip() - return MercurialVCS(options, extra_output) - elif vcs == VCS_SUBVERSION: - return SubversionVCS(options) - elif vcs == VCS_GIT: - return GitVCS(options) - - ErrorExit(("Could not guess version control system. " - "Are you in a working copy directory?")) - - -def CheckReviewer(reviewer): - """Validate a reviewer -- either a nickname or an email addres. - - Args: - reviewer: A nickname or an email address. - - Calls ErrorExit() if it is an invalid email address. - """ - if "@" not in reviewer: - return # Assume nickname - parts = reviewer.split("@") - if len(parts) > 2: - ErrorExit("Invalid email address: %r" % reviewer) - assert len(parts) == 2 - if "." not in parts[1]: - ErrorExit("Invalid email address: %r" % reviewer) - - -def LoadSubversionAutoProperties(): - """Returns the content of [auto-props] section of Subversion's config file as - a dictionary. - - Returns: - A dictionary whose key-value pair corresponds the [auto-props] section's - key-value pair. - In following cases, returns empty dictionary: - - config file doesn't exist, or - - 'enable-auto-props' is not set to 'true-like-value' in [miscellany]. - """ - # Todo(hayato): Windows users might use different path for configuration file. - subversion_config = os.path.expanduser("~/.subversion/config") - if not os.path.exists(subversion_config): - return {} - config = ConfigParser.ConfigParser() - config.read(subversion_config) - if (config.has_section("miscellany") and - config.has_option("miscellany", "enable-auto-props") and - config.getboolean("miscellany", "enable-auto-props") and - config.has_section("auto-props")): - props = {} - for file_pattern in config.options("auto-props"): - props[file_pattern] = ParseSubversionPropertyValues( - config.get("auto-props", file_pattern)) - return props - else: - return {} - -def ParseSubversionPropertyValues(props): - """Parse the given property value which comes from [auto-props] section and - returns a list whose element is a (svn_prop_key, svn_prop_value) pair. - - See the following doctest for example. - - >>> ParseSubversionPropertyValues('svn:eol-style=LF') - [('svn:eol-style', 'LF')] - >>> ParseSubversionPropertyValues('svn:mime-type=image/jpeg') - [('svn:mime-type', 'image/jpeg')] - >>> ParseSubversionPropertyValues('svn:eol-style=LF;svn:executable') - [('svn:eol-style', 'LF'), ('svn:executable', '*')] - """ - key_value_pairs = [] - for prop in props.split(";"): - key_value = prop.split("=") - assert len(key_value) <= 2 - if len(key_value) == 1: - # If value is not given, use '*' as a Subversion's convention. - key_value_pairs.append((key_value[0], "*")) - else: - key_value_pairs.append((key_value[0], key_value[1])) - return key_value_pairs - - -def GetSubversionPropertyChanges(filename): - """Return a Subversion's 'Property changes on ...' string, which is used in - the patch file. - - Args: - filename: filename whose property might be set by [auto-props] config. - - Returns: - A string like 'Property changes on |filename| ...' if given |filename| - matches any entries in [auto-props] section. None, otherwise. - """ - global svn_auto_props_map - if svn_auto_props_map is None: - svn_auto_props_map = LoadSubversionAutoProperties() - - all_props = [] - for file_pattern, props in svn_auto_props_map.items(): - if fnmatch.fnmatch(filename, file_pattern): - all_props.extend(props) - if all_props: - return FormatSubversionPropertyChanges(filename, all_props) - return None - - -def FormatSubversionPropertyChanges(filename, props): - """Returns Subversion's 'Property changes on ...' strings using given filename - and properties. - - Args: - filename: filename - props: A list whose element is a (svn_prop_key, svn_prop_value) pair. - - Returns: - A string which can be used in the patch file for Subversion. - - See the following doctest for example. - - >>> print FormatSubversionPropertyChanges('foo.cc', [('svn:eol-style', 'LF')]) - Property changes on: foo.cc - ___________________________________________________________________ - Added: svn:eol-style - + LF - <BLANKLINE> - """ - prop_changes_lines = [ - "Property changes on: %s" % filename, - "___________________________________________________________________"] - for key, value in props: - prop_changes_lines.append("Added: " + key) - prop_changes_lines.append(" + " + value) - return "\n".join(prop_changes_lines) + "\n" - - -def RealMain(argv, data=None): - """The real main function. - - Args: - argv: Command line arguments. - data: Diff contents. If None (default) the diff is generated by - the VersionControlSystem implementation returned by GuessVCS(). - - Returns: - A 2-tuple (issue id, patchset id). - The patchset id is None if the base files are not uploaded by this - script (applies only to SVN checkouts). - """ - logging.basicConfig(format=("%(asctime).19s %(levelname)s %(filename)s:" - "%(lineno)s %(message)s ")) - os.environ['LC_ALL'] = 'C' - options, args = parser.parse_args(argv[1:]) - global verbosity - verbosity = options.verbose - if verbosity >= 3: - logging.getLogger().setLevel(logging.DEBUG) - elif verbosity >= 2: - logging.getLogger().setLevel(logging.INFO) - - vcs = GuessVCS(options) - - base = options.base_url - if isinstance(vcs, SubversionVCS): - # Guessing the base field is only supported for Subversion. - # Note: Fetching base files may become deprecated in future releases. - guessed_base = vcs.GuessBase(options.download_base) - if base: - if guessed_base and base != guessed_base: - print "Using base URL \"%s\" from --base_url instead of \"%s\"" % \ - (base, guessed_base) - else: - base = guessed_base - - if not base and options.download_base: - options.download_base = True - logging.info("Enabled upload of base file") - if not options.assume_yes: - vcs.CheckForUnknownFiles() - if data is None: - data = vcs.GenerateDiff(args) - files = vcs.GetBaseFiles(data) - if verbosity >= 1: - print "Upload server:", options.server, "(change with -s/--server)" - if options.issue: - prompt = "Message describing this patch set: " - else: - prompt = "New issue subject: " - message = options.message or raw_input(prompt).strip() - if not message: - ErrorExit("A non-empty message is required") - rpc_server = GetRpcServer(options.server, - options.email, - options.host, - options.save_cookies) - form_fields = [("subject", message)] - if base: - form_fields.append(("base", base)) - if options.issue: - form_fields.append(("issue", str(options.issue))) - if options.email: - form_fields.append(("user", options.email)) - if options.reviewers: - for reviewer in options.reviewers.split(','): - CheckReviewer(reviewer) - form_fields.append(("reviewers", options.reviewers)) - if options.cc: - for cc in options.cc.split(','): - CheckReviewer(cc) - form_fields.append(("cc", options.cc)) - description = options.description - if options.description_file: - if options.description: - ErrorExit("Can't specify description and description_file") - file = open(options.description_file, 'r') - description = file.read() - file.close() - if description: - form_fields.append(("description", description)) - # Send a hash of all the base file so the server can determine if a copy - # already exists in an earlier patchset. - base_hashes = "" - for file, info in files.iteritems(): - if not info[0] is None: - checksum = md5(info[0]).hexdigest() - if base_hashes: - base_hashes += "|" - base_hashes += checksum + ":" + file - form_fields.append(("base_hashes", base_hashes)) - if options.private: - if options.issue: - print "Warning: Private flag ignored when updating an existing issue." - else: - form_fields.append(("private", "1")) - # If we're uploading base files, don't send the email before the uploads, so - # that it contains the file status. - if options.send_mail and options.download_base: - form_fields.append(("send_mail", "1")) - if not options.download_base: - form_fields.append(("content_upload", "1")) - if len(data) > MAX_UPLOAD_SIZE: - print "Patch is large, so uploading file patches separately." - uploaded_diff_file = [] - form_fields.append(("separate_patches", "1")) - else: - uploaded_diff_file = [("data", "data.diff", data)] - ctype, body = EncodeMultipartFormData(form_fields, uploaded_diff_file) - response_body = rpc_server.Send("/upload", body, content_type=ctype) - patchset = None - if not options.download_base or not uploaded_diff_file: - lines = response_body.splitlines() - if len(lines) >= 2: - msg = lines[0] - patchset = lines[1].strip() - patches = [x.split(" ", 1) for x in lines[2:]] - else: - msg = response_body - else: - msg = response_body - StatusUpdate(msg) - if not response_body.startswith("Issue created.") and \ - not response_body.startswith("Issue updated."): - sys.exit(0) - issue = msg[msg.rfind("/")+1:] - - if not uploaded_diff_file: - result = UploadSeparatePatches(issue, rpc_server, patchset, data, options) - if not options.download_base: - patches = result - - if not options.download_base: - vcs.UploadBaseFiles(issue, rpc_server, patches, patchset, options, files) - if options.send_mail: - rpc_server.Send("/" + issue + "/mail", payload="") - return issue, patchset - - -def main(): - try: - RealMain(sys.argv) - except KeyboardInterrupt: - print - StatusUpdate("Interrupted.") - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/WebKitTools/Scripts/webkitpy/tool/bot/irc_command.py b/WebKitTools/Scripts/webkitpy/tool/bot/irc_command.py index c21fdc6..ee8c669 100644 --- a/WebKitTools/Scripts/webkitpy/tool/bot/irc_command.py +++ b/WebKitTools/Scripts/webkitpy/tool/bot/irc_command.py @@ -75,8 +75,35 @@ class Rollout(IRCCommand): tool.bugs.bug_url_for_bug_id(bug_id)) +class Help(IRCCommand): + def execute(self, nick, args, tool, sheriff): + return "%s: Available commands: %s" % (nick, ", ".join(commands.keys())) + + class Hi(IRCCommand): def execute(self, nick, args, tool, sheriff): quips = tool.bugs.quips() quips.append('"Only you can prevent forest fires." -- Smokey the Bear') return random.choice(quips) + + +class Eliza(IRCCommand): + therapist = None + + def __init__(self): + if not self.therapist: + import webkitpy.thirdparty.autoinstalled.eliza as eliza + Eliza.therapist = eliza.eliza() + + def execute(self, nick, args, tool, sheriff): + return "%s: %s" % (nick, self.therapist.respond(" ".join(args))) + + +# FIXME: Lame. We should have an auto-registering CommandCenter. +commands = { + "last-green-revision": LastGreenRevision, + "restart": Restart, + "rollout": Rollout, + "help": Help, + "hi": Hi, +} diff --git a/WebKitTools/Scripts/webkitpy/tool/bot/irc_command_unittest.py b/WebKitTools/Scripts/webkitpy/tool/bot/irc_command_unittest.py new file mode 100644 index 0000000..7aeb6a0 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/tool/bot/irc_command_unittest.py @@ -0,0 +1,38 @@ +# Copyright (c) 2010 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import unittest + +from webkitpy.tool.bot.irc_command import * + + +class IRCCommandTest(unittest.TestCase): + def test_eliza(self): + eliza = Eliza() + eliza.execute("tom", "hi", None, None) + eliza.execute("tom", "bye", None, None) diff --git a/WebKitTools/Scripts/webkitpy/tool/bot/queueengine.py b/WebKitTools/Scripts/webkitpy/tool/bot/queueengine.py index ac7a760..a1a66a1 100644 --- a/WebKitTools/Scripts/webkitpy/tool/bot/queueengine.py +++ b/WebKitTools/Scripts/webkitpy/tool/bot/queueengine.py @@ -113,7 +113,7 @@ class QueueEngine: # handled in the child process and we should just keep looping. if e.exit_code == self.handled_error_code: continue - message = "Unexpected failure when landing patch! Please file a bug against webkit-patch.\n%s" % e.message_with_output() + message = "Unexpected failure when processing patch! Please file a bug against webkit-patch.\n%s" % e.message_with_output() self._delegate.handle_unexpected_error(work_item, message) except TerminateQueue, e: log("\nTerminateQueue exception received.") diff --git a/WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot.py b/WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot.py index 43aa9c3..de77222 100644 --- a/WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot.py +++ b/WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot.py @@ -52,14 +52,6 @@ class _IRCThreadTearoff(IRCBotDelegate): class SheriffIRCBot(object): - # FIXME: Lame. We should have an auto-registering CommandCenter. - commands = { - "last-green-revision": irc_command.LastGreenRevision, - "restart": irc_command.Restart, - "rollout": irc_command.Rollout, - "hi": irc_command.Hi, - } - def __init__(self, tool, sheriff): self._tool = tool self._sheriff = sheriff @@ -75,15 +67,13 @@ class SheriffIRCBot(object): tokenized_request = request.strip().split(" ") if not tokenized_request: return - command = self.commands.get(tokenized_request[0]) + command = irc_command.commands.get(tokenized_request[0]) + args = tokenized_request[1:] if not command: - self._tool.irc().post("%s: Available commands: %s" % ( - nick, ", ".join(self.commands.keys()))) - return - response = command().execute(nick, - tokenized_request[1:], - self._tool, - self._sheriff) + # Give the peoples someone to talk with. + command = irc_command.Eliza + args = tokenized_request + response = command().execute(nick, args, self._tool, self._sheriff) if response: self._tool.irc().post(response) diff --git a/WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot_unittest.py b/WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot_unittest.py index d5116e4..21bff12 100644 --- a/WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot_unittest.py +++ b/WebKitTools/Scripts/webkitpy/tool/bot/sheriffircbot_unittest.py @@ -50,9 +50,9 @@ class SheriffIRCBotTest(unittest.TestCase): expected_stderr = 'MOCK: irc.post: "Only you can prevent forest fires." -- Smokey the Bear\n' OutputCapture().assert_outputs(self, run, args=["hi"], expected_stderr=expected_stderr) - def test_bogus(self): - expected_stderr = "MOCK: irc.post: mock_nick: Available commands: rollout, hi, restart, last-green-revision\n" - OutputCapture().assert_outputs(self, run, args=["bogus"], expected_stderr=expected_stderr) + def test_help(self): + expected_stderr = "MOCK: irc.post: mock_nick: Available commands: rollout, hi, help, restart, last-green-revision\n" + OutputCapture().assert_outputs(self, run, args=["help"], expected_stderr=expected_stderr) def test_lgr(self): expected_stderr = "MOCK: irc.post: mock_nick: http://trac.webkit.org/changeset/9479\n" diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/download.py b/WebKitTools/Scripts/webkitpy/tool/commands/download.py index a283da9..a85b09a 100644 --- a/WebKitTools/Scripts/webkitpy/tool/commands/download.py +++ b/WebKitTools/Scripts/webkitpy/tool/commands/download.py @@ -182,6 +182,18 @@ class BuildAttachment(AbstractPatchSequencingCommand, ProcessAttachmentsMixin): ] +class PostAttachmentToRietveld(AbstractPatchSequencingCommand, ProcessAttachmentsMixin): + name = "post-attachment-to-rietveld" + help_text = "Uploads a bugzilla attachment to rietveld" + arguments_names = "ATTACHMENTID" + main_steps = [ + steps.CleanWorkingDirectory, + steps.Update, + steps.ApplyPatch, + steps.PostCodeReview, + ] + + class AbstractPatchApplyingCommand(AbstractPatchSequencingCommand): prepare_steps = [ steps.EnsureLocalCommitIfNeeded, diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/download_unittest.py b/WebKitTools/Scripts/webkitpy/tool/commands/download_unittest.py index 08a4377..958620a 100644 --- a/WebKitTools/Scripts/webkitpy/tool/commands/download_unittest.py +++ b/WebKitTools/Scripts/webkitpy/tool/commands/download_unittest.py @@ -108,6 +108,10 @@ class DownloadCommandsTest(CommandsTest): expected_stderr = "Processing 1 patch from 1 bug.\nUpdating working directory\nProcessing patch 197 from bug 42.\nBuilding WebKit\n" self.assert_execute_outputs(BuildAttachment(), [197], options=self._default_options(), expected_stderr=expected_stderr) + def test_post_attachment_to_rietveld(self): + expected_stderr = "Processing 1 patch from 1 bug.\nUpdating working directory\nProcessing patch 197 from bug 42.\nMOCK: Uploading patch to rietveld\nMOCK setting flag 'in-rietveld' to '+' on attachment '197' with comment 'None' and additional comment 'None'\n" + self.assert_execute_outputs(PostAttachmentToRietveld(), [197], options=self._default_options(), expected_stderr=expected_stderr) + def test_land_attachment(self): # FIXME: This expected result is imperfect, notice how it's seeing the same patch as still there after it thought it would have cleared the flags. expected_stderr = """Processing 1 patch from 1 bug. diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/earlywarningsystem_unittest.py b/WebKitTools/Scripts/webkitpy/tool/commands/earlywarningsystem_unittest.py index 27e09ba..67393d8 100644 --- a/WebKitTools/Scripts/webkitpy/tool/commands/earlywarningsystem_unittest.py +++ b/WebKitTools/Scripts/webkitpy/tool/commands/earlywarningsystem_unittest.py @@ -43,17 +43,23 @@ class EarlyWarningSytemTest(QueuesTest): string_replacemnts = { "name": ews.name, "checkout_dir": os.getcwd(), # FIXME: Use of os.getcwd() is wrong, should be scm.checkout_root + "port": ews.port_name, + "watchers": ews.watchers, } expected_stderr = { "begin_work_queue": "CAUTION: %(name)s will discard all local changes in \"%(checkout_dir)s\"\nRunning WebKit %(name)s.\n" % string_replacemnts, "handle_unexpected_error": "Mock error message\n", "next_work_item": "MOCK: update_work_items: %(name)s [103]\n" % string_replacemnts, "process_work_item": "MOCK: update_status: %(name)s Pass\n" % string_replacemnts, + "handle_script_error": "MOCK: update_status: %(name)s ScriptError error message\nMOCK bug comment: bug_id=345, cc=%(watchers)s\n--- Begin comment ---\\Attachment 1234 did not build on %(port)s:\nBuild output: http://dummy_url\n--- End comment ---\n\n" % string_replacemnts, } return expected_stderr def _test_ews(self, ews): - self.assert_queue_outputs(ews, expected_stderr=self._default_expected_stderr(ews)) + expected_exceptions = { + "handle_script_error": SystemExit, + } + self.assert_queue_outputs(ews, expected_stderr=self._default_expected_stderr(ews), expected_exceptions=expected_exceptions) # FIXME: If all EWSes are going to output the same text, we # could test them all in one method with a for loop over an array. @@ -73,4 +79,7 @@ class EarlyWarningSytemTest(QueuesTest): ews = MacEWS() expected_stderr = self._default_expected_stderr(ews) expected_stderr["process_work_item"] = "MOCK: update_status: mac-ews Error: mac-ews cannot process patches from non-committers :(\n" - self.assert_queue_outputs(ews, expected_stderr=expected_stderr) + expected_exceptions = { + "handle_script_error": SystemExit, + } + self.assert_queue_outputs(ews, expected_stderr=expected_stderr, expected_exceptions=expected_exceptions) diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/queues.py b/WebKitTools/Scripts/webkitpy/tool/commands/queues.py index 08bd3aa..d14ac9e 100644 --- a/WebKitTools/Scripts/webkitpy/tool/commands/queues.py +++ b/WebKitTools/Scripts/webkitpy/tool/commands/queues.py @@ -121,7 +121,7 @@ class AbstractQueue(Command, QueueEngineDelegate): @classmethod def _update_status_for_script_error(cls, tool, state, script_error, is_error=False): - message = script_error.message + message = str(script_error) if is_error: message = "Error: %s" % message output = script_error.message_with_output(output_limit=1024*1024) # 1MB @@ -289,7 +289,6 @@ class CommitQueue(AbstractPatchQueue, StepSequenceErrorHandler): self.committer_validator.reject_patch_from_commit_queue(patch.id(), message) # StepSequenceErrorHandler methods - @staticmethod def _error_message_for_bug(tool, status_id, script_error): if not script_error.output: @@ -304,6 +303,51 @@ class CommitQueue(AbstractPatchQueue, StepSequenceErrorHandler): validator.reject_patch_from_commit_queue(state["patch"].id(), cls._error_message_for_bug(tool, status_id, script_error)) +class RietveldUploadQueue(AbstractPatchQueue, StepSequenceErrorHandler): + name = "rietveld-upload-queue" + + def __init__(self): + AbstractPatchQueue.__init__(self) + + # AbstractPatchQueue methods + + def next_work_item(self): + patch_id = self.tool.bugs.queries.fetch_first_patch_from_rietveld_queue() + if patch_id: + return patch_id + self._update_status("Empty queue") + + def should_proceed_with_work_item(self, patch): + self._update_status("Uploading patch", patch) + return True + + def process_work_item(self, patch): + try: + self.run_webkit_patch(["post-attachment-to-rietveld", "--force-clean", "--non-interactive", "--parent-command=rietveld-upload-queue", patch.id()]) + self._did_pass(patch) + return True + except ScriptError, e: + if e.exit_code != QueueEngine.handled_error_code: + self._did_fail(patch) + raise e + + @classmethod + def _reject_patch(cls, tool, patch_id): + tool.bugs.set_flag_on_attachment(patch_id, "in-rietveld", "-") + + def handle_unexpected_error(self, patch, message): + log(message) + self._reject_patch(self.tool, patch.id()) + + # StepSequenceErrorHandler methods + + @classmethod + def handle_script_error(cls, tool, state, script_error): + log(script_error.message_with_output()) + cls._update_status_for_script_error(tool, state, script_error) + cls._reject_patch(tool, state["patch"].id()) + + class AbstractReviewQueue(AbstractPatchQueue, PersistentPatchCollectionDelegate, StepSequenceErrorHandler): def __init__(self, options=None): AbstractPatchQueue.__init__(self, options) diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/queues_unittest.py b/WebKitTools/Scripts/webkitpy/tool/commands/queues_unittest.py index a5d56da..b32dfa8 100644 --- a/WebKitTools/Scripts/webkitpy/tool/commands/queues_unittest.py +++ b/WebKitTools/Scripts/webkitpy/tool/commands/queues_unittest.py @@ -122,10 +122,13 @@ class CommitQueueTest(QueuesTest): # FIXME: The commit-queue warns about bad committers twice. This is due to the fact that we access Attachment.reviewer() twice and it logs each time. "next_work_item" : """Warning, attachment 128 on bug 42 has invalid committer (non-committer@example.com) Warning, attachment 128 on bug 42 has invalid committer (non-committer@example.com) +MOCK setting flag 'commit-queue' to '-' on attachment '128' with comment 'Rejecting patch 128 from commit-queue.' and additional comment 'non-committer@example.com does not have committer permissions according to http://trac.webkit.org/browser/trunk/WebKitTools/Scripts/webkitpy/common/config/committers.py.\n\n- If you do not have committer rights please read http://webkit.org/coding/contributing.html for instructions on how to use bugzilla flags.\n\n- If you have committer rights please correct the error in WebKitTools/Scripts/webkitpy/common/config/committers.py by adding yourself to the file (no review needed). Due to bug 30084 the commit-queue will require a restart after your change. Please contact eseidel@chromium.org to request a commit-queue restart. After restart the commit-queue will correctly respect your committer rights.' MOCK: update_work_items: commit-queue [106, 197] 2 patches in commit-queue [106, 197] """, "process_work_item" : "MOCK: update_status: commit-queue Pass\n", + "handle_unexpected_error" : "MOCK setting flag 'commit-queue' to '-' on attachment '1234' with comment 'Rejecting patch 1234 from commit-queue.' and additional comment 'Mock error message'\n", + "handle_script_error": "MOCK: update_status: commit-queue ScriptError error message\nMOCK setting flag 'commit-queue' to '-' on attachment '1234' with comment 'Rejecting patch 1234 from commit-queue.' and additional comment 'ScriptError error message'\n", } self.assert_queue_outputs(CommitQueue(), expected_stderr=expected_stderr) @@ -138,11 +141,14 @@ MOCK: update_work_items: commit-queue [106, 197] # FIXME: The commit-queue warns about bad committers twice. This is due to the fact that we access Attachment.reviewer() twice and it logs each time. "next_work_item" : """Warning, attachment 128 on bug 42 has invalid committer (non-committer@example.com) Warning, attachment 128 on bug 42 has invalid committer (non-committer@example.com) +MOCK setting flag \'commit-queue\' to \'-\' on attachment \'128\' with comment \'Rejecting patch 128 from commit-queue.\' and additional comment \'non-committer@example.com does not have committer permissions according to http://trac.webkit.org/browser/trunk/WebKitTools/Scripts/webkitpy/common/config/committers.py.\n\n- If you do not have committer rights please read http://webkit.org/coding/contributing.html for instructions on how to use bugzilla flags.\n\n- If you have committer rights please correct the error in WebKitTools/Scripts/webkitpy/common/config/committers.py by adding yourself to the file (no review needed). Due to bug 30084 the commit-queue will require a restart after your change. Please contact eseidel@chromium.org to request a commit-queue restart. After restart the commit-queue will correctly respect your committer rights.\' MOCK: update_work_items: commit-queue [106, 197] MOCK: update_status: commit-queue Builders ["Builder2"] are red. See http://build.webkit.org 1 patch in commit-queue [106] """, "process_work_item" : "MOCK: update_status: commit-queue Builders [\"Builder2\"] are red. See http://build.webkit.org\n", + "handle_unexpected_error" : "MOCK setting flag 'commit-queue' to '-' on attachment '1234' with comment 'Rejecting patch 1234 from commit-queue.' and additional comment 'Mock error message'\n", + "handle_script_error": "MOCK: update_status: commit-queue ScriptError error message\nMOCK setting flag 'commit-queue' to '-' on attachment '1234' with comment 'Rejecting patch 1234 from commit-queue.' and additional comment 'ScriptError error message'\n", } self.assert_queue_outputs(CommitQueue(), tool=tool, expected_stderr=expected_stderr) @@ -156,11 +162,14 @@ MOCK: update_status: commit-queue Builders ["Builder2"] are red. See http://buil # FIXME: The commit-queue warns about bad committers twice. This is due to the fact that we access Attachment.reviewer() twice and it logs each time. "next_work_item": """Warning, attachment 128 on bug 42 has invalid committer (non-committer@example.com) Warning, attachment 128 on bug 42 has invalid committer (non-committer@example.com) +MOCK setting flag \'commit-queue\' to \'-\' on attachment \'128\' with comment \'Rejecting patch 128 from commit-queue.\' and additional comment \'non-committer@example.com does not have committer permissions according to http://trac.webkit.org/browser/trunk/WebKitTools/Scripts/webkitpy/common/config/committers.py.\n\n- If you do not have committer rights please read http://webkit.org/coding/contributing.html for instructions on how to use bugzilla flags.\n\n- If you have committer rights please correct the error in WebKitTools/Scripts/webkitpy/common/config/committers.py by adding yourself to the file (no review needed). Due to bug 30084 the commit-queue will require a restart after your change. Please contact eseidel@chromium.org to request a commit-queue restart. After restart the commit-queue will correctly respect your committer rights.\' MOCK: update_work_items: commit-queue [106, 197] MOCK: update_status: commit-queue Builders ["Builder2"] are red. See http://build.webkit.org 1 patch in commit-queue [106] """, "process_work_item": "MOCK run_and_throw_if_fail: ['echo', '--status-host=example.com', 'land-attachment', '--force-clean', '--build', '--non-interactive', '--ignore-builders', '--build-style=both', '--quiet', 76543]\nMOCK: update_status: commit-queue Pass\n", + "handle_unexpected_error": "MOCK setting flag 'commit-queue' to '-' on attachment '76543' with comment 'Rejecting patch 76543 from commit-queue.' and additional comment 'Mock error message'\n", + "handle_script_error": "MOCK: update_status: commit-queue ScriptError error message\nMOCK setting flag 'commit-queue' to '-' on attachment '1234' with comment 'Rejecting patch 1234 from commit-queue.' and additional comment 'ScriptError error message'\n", } self.assert_queue_outputs(CommitQueue(), tool=tool, work_item=rollout_patch, expected_stderr=expected_stderr) @@ -193,6 +202,18 @@ MOCK: update_status: commit-queue Builders ["Builder2"] are red. See http://buil self.assertEqual(attachments, expected_sort) +class RietveldUploadQueueTest(QueuesTest): + def test_rietveld_upload_queue(self): + expected_stderr = { + "begin_work_queue": "CAUTION: rietveld-upload-queue will discard all local changes in \"%s\"\nRunning WebKit rietveld-upload-queue.\n" % MockSCM.fake_checkout_root, + "should_proceed_with_work_item": "MOCK: update_status: rietveld-upload-queue Uploading patch\n", + "process_work_item": "MOCK: update_status: rietveld-upload-queue Pass\n", + "handle_unexpected_error": "Mock error message\nMOCK setting flag 'in-rietveld' to '-' on attachment '1234' with comment 'None' and additional comment 'None'\n", + "handle_script_error": "ScriptError error message\nMOCK: update_status: rietveld-upload-queue ScriptError error message\nMOCK setting flag 'in-rietveld' to '-' on attachment '1234' with comment 'None' and additional comment 'None'\n", + } + self.assert_queue_outputs(RietveldUploadQueue(), expected_stderr=expected_stderr) + + class StyleQueueTest(QueuesTest): def test_style_queue(self): expected_stderr = { @@ -201,5 +222,9 @@ class StyleQueueTest(QueuesTest): "should_proceed_with_work_item": "MOCK: update_status: style-queue Checking style\n", "process_work_item" : "MOCK: update_status: style-queue Pass\n", "handle_unexpected_error" : "Mock error message\n", + "handle_script_error": "MOCK: update_status: style-queue ScriptError error message\nMOCK bug comment: bug_id=345, cc=[]\n--- Begin comment ---\\Attachment 1234 did not pass style-queue:\n\nScriptError error message\n\nIf any of these errors are false positives, please file a bug against check-webkit-style.\n--- End comment ---\n\n", + } + expected_exceptions = { + "handle_script_error": SystemExit, } - self.assert_queue_outputs(StyleQueue(), expected_stderr=expected_stderr) + self.assert_queue_outputs(StyleQueue(), expected_stderr=expected_stderr, expected_exceptions=expected_exceptions) diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/queuestest.py b/WebKitTools/Scripts/webkitpy/tool/commands/queuestest.py index bf7e32a..9e17c5c 100644 --- a/WebKitTools/Scripts/webkitpy/tool/commands/queuestest.py +++ b/WebKitTools/Scripts/webkitpy/tool/commands/queuestest.py @@ -30,6 +30,7 @@ import unittest from webkitpy.common.net.bugzilla import Attachment from webkitpy.common.system.outputcapture import OutputCapture +from webkitpy.common.system.executive import ScriptError from webkitpy.thirdparty.mock import Mock from webkitpy.tool.mocktool import MockTool @@ -42,6 +43,14 @@ class MockQueueEngine(object): pass +class MockPatch(): + def id(self): + return 1234 + + def bug_id(self): + return 345 + + class QueuesTest(unittest.TestCase): mock_work_item = Attachment({ "id": 1234, @@ -50,7 +59,19 @@ class QueuesTest(unittest.TestCase): "attacher_email": "adam@example.com", }, None) - def assert_queue_outputs(self, queue, args=None, work_item=None, expected_stdout=None, expected_stderr=None, options=Mock(), tool=MockTool()): + def assert_outputs(self, func, func_name, args, expected_stdout, expected_stderr, expected_exceptions): + exception = None + if expected_exceptions and func_name in expected_exceptions: + exception = expected_exceptions[func_name] + + OutputCapture().assert_outputs(self, + func, + args=args, + expected_stdout=expected_stdout.get(func_name, ""), + expected_stderr=expected_stderr.get(func_name, ""), + expected_exception=exception) + + def assert_queue_outputs(self, queue, args=None, work_item=None, expected_stdout=None, expected_stderr=None, expected_exceptions=None, options=Mock(), tool=MockTool()): if not expected_stdout: expected_stdout = {} if not expected_stderr: @@ -63,38 +84,12 @@ class QueuesTest(unittest.TestCase): queue.execute(options, args, tool, engine=MockQueueEngine) - OutputCapture().assert_outputs(self, - queue.queue_log_path, - expected_stdout=expected_stdout.get("queue_log_path", ""), - expected_stderr=expected_stderr.get("queue_log_path", "")) - OutputCapture().assert_outputs(self, - queue.work_item_log_path, - args=[work_item], - expected_stdout=expected_stdout.get("work_item_log_path", ""), - expected_stderr=expected_stderr.get("work_item_log_path", "")) - OutputCapture().assert_outputs(self, - queue.begin_work_queue, - expected_stdout=expected_stdout.get("begin_work_queue", ""), - expected_stderr=expected_stderr.get("begin_work_queue", "")) - OutputCapture().assert_outputs(self, - queue.should_continue_work_queue, - expected_stdout=expected_stdout.get("should_continue_work_queue", ""), expected_stderr=expected_stderr.get("should_continue_work_queue", "")) - OutputCapture().assert_outputs(self, - queue.next_work_item, - expected_stdout=expected_stdout.get("next_work_item", ""), - expected_stderr=expected_stderr.get("next_work_item", "")) - OutputCapture().assert_outputs(self, - queue.should_proceed_with_work_item, - args=[work_item], - expected_stdout=expected_stdout.get("should_proceed_with_work_item", ""), - expected_stderr=expected_stderr.get("should_proceed_with_work_item", "")) - OutputCapture().assert_outputs(self, - queue.process_work_item, - args=[work_item], - expected_stdout=expected_stdout.get("process_work_item", ""), - expected_stderr=expected_stderr.get("process_work_item", "")) - OutputCapture().assert_outputs(self, - queue.handle_unexpected_error, - args=[work_item, "Mock error message"], - expected_stdout=expected_stdout.get("handle_unexpected_error", ""), - expected_stderr=expected_stderr.get("handle_unexpected_error", "")) + self.assert_outputs(queue.queue_log_path, "queue_log_path", [], expected_stdout, expected_stderr, expected_exceptions) + self.assert_outputs(queue.work_item_log_path, "work_item_log_path", [work_item], expected_stdout, expected_stderr, expected_exceptions) + self.assert_outputs(queue.begin_work_queue, "begin_work_queue", [], expected_stdout, expected_stderr, expected_exceptions) + self.assert_outputs(queue.should_continue_work_queue, "should_continue_work_queue", [], expected_stdout, expected_stderr, expected_exceptions) + self.assert_outputs(queue.next_work_item, "next_work_item", [], expected_stdout, expected_stderr, expected_exceptions) + self.assert_outputs(queue.should_proceed_with_work_item, "should_proceed_with_work_item", [work_item], expected_stdout, expected_stderr, expected_exceptions) + self.assert_outputs(queue.process_work_item, "process_work_item", [work_item], expected_stdout, expected_stderr, expected_exceptions) + self.assert_outputs(queue.handle_unexpected_error, "handle_unexpected_error", [work_item, "Mock error message"], expected_stdout, expected_stderr, expected_exceptions) + self.assert_outputs(queue.handle_script_error, "handle_script_error", [tool, {"patch": MockPatch()}, ScriptError(message="ScriptError error message", script_args="MockErrorCommand")], expected_stdout, expected_stderr, expected_exceptions) diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/upload.py b/WebKitTools/Scripts/webkitpy/tool/commands/upload.py index cf715b9..e682ca7 100644 --- a/WebKitTools/Scripts/webkitpy/tool/commands/upload.py +++ b/WebKitTools/Scripts/webkitpy/tool/commands/upload.py @@ -171,7 +171,6 @@ class Post(AbstractPatchUploadingCommand): steps = [ steps.CheckStyle, steps.ConfirmDiff, - steps.PostCodeReview, steps.ObsoletePatches, steps.PostDiff, ] @@ -215,7 +214,6 @@ class Upload(AbstractPatchUploadingCommand): steps.PrepareChangeLog, steps.EditChangeLog, steps.ConfirmDiff, - steps.PostCodeReview, steps.ObsoletePatches, steps.PostDiff, ] diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/upload_unittest.py b/WebKitTools/Scripts/webkitpy/tool/commands/upload_unittest.py index d52775b..8fef54a 100644 --- a/WebKitTools/Scripts/webkitpy/tool/commands/upload_unittest.py +++ b/WebKitTools/Scripts/webkitpy/tool/commands/upload_unittest.py @@ -56,8 +56,6 @@ class UploadCommandsTest(CommandsTest): options.request_commit = False options.review = True options.comment = None - # Rietveld upload code requires a real SCM checkout. - options.fancy_review = False options.cc = None expected_stderr = """Running check-webkit-style MOCK: user.open_url: file://... @@ -87,8 +85,6 @@ MOCK: user.open_url: http://example.com/42 options.request_commit = False options.review = True options.comment = None - # Rietveld upload code requires a real SCM checkout. - options.fancy_review = False options.cc = None expected_stderr = """Running check-webkit-style MOCK: user.open_url: file://... diff --git a/WebKitTools/Scripts/webkitpy/tool/mocktool.py b/WebKitTools/Scripts/webkitpy/tool/mocktool.py index 3934ea3..d88190f 100644 --- a/WebKitTools/Scripts/webkitpy/tool/mocktool.py +++ b/WebKitTools/Scripts/webkitpy/tool/mocktool.py @@ -86,6 +86,7 @@ _patch3 = { "name": "Patch3", "is_obsolete": False, "is_patch": True, + "in-rietveld": "?", "review": "?", "attacher_email": "eric@webkit.org", } @@ -112,6 +113,7 @@ _patch5 = { "name": "Patch5", "is_obsolete": False, "is_patch": True, + "in-rietveld": "?", "review": "+", "reviewer_email": "foo@bar.com", "attacher_email": "eric@webkit.org", @@ -125,6 +127,7 @@ _patch6 = { # Valid committer, but no reviewer. "name": "ROLLOUT of r3489", "is_obsolete": False, "is_patch": True, + "in-rietveld": "-", "commit-queue": "+", "committer_email": "foo@bar.com", "attacher_email": "eric@webkit.org", @@ -138,6 +141,7 @@ _patch7 = { # Valid review, patch is marked obsolete. "name": "Patch7", "is_obsolete": True, "is_patch": True, + "in-rietveld": "+", "review": "+", "reviewer_email": "foo@bar.com", "attacher_email": "eric@webkit.org", @@ -221,6 +225,12 @@ class MockBugzillaQueries(Mock): def fetch_patches_from_pending_commit_list(self): return sum([bug.reviewed_patches() for bug in self._all_bugs()], []) + def fetch_first_patch_from_rietveld_queue(self): + for bug in self._all_bugs(): + patches = bug.in_rietveld_queue_patches() + if len(patches): + return patches[0] + raise Exception('No patches in the rietveld queue') # FIXME: Bugzilla is the wrong Mock-point. Once we have a BugzillaNetwork # class we should mock that instead. @@ -287,6 +297,15 @@ class MockBugzilla(Mock): action_param = "&action=%s" % action return "%s/%s%s" % (self.bug_server_url, attachment_id, action_param) + def set_flag_on_attachment(self, + attachment_id, + flag_name, + flag_value, + comment_text=None, + additional_comment_text=None): + log("MOCK setting flag '%s' to '%s' on attachment '%s' with comment '%s' and additional comment '%s'" % ( + flag_name, flag_value, attachment_id, comment_text, additional_comment_text)) + def post_comment_to_bug(self, bug_id, comment_text, cc=None): log("MOCK bug comment: bug_id=%s, cc=%s\n--- Begin comment ---\%s\n--- End comment ---\n" % ( bug_id, cc, comment_text)) @@ -453,6 +472,9 @@ class MockUser(object): def confirm(self, message=None): return True + def can_open_url(self): + return True + def open_url(self, url): if url.startswith("file://"): log("MOCK: user.open_url: file://...") @@ -490,6 +512,8 @@ class MockStatusServer(object): def update_svn_revision(self, svn_revision, broken_bot): return 191 + def results_url_for_status(self, status_id): + return "http://dummy_url" class MockExecute(Mock): def __init__(self, should_log): @@ -513,6 +537,15 @@ class MockExecute(Mock): return "MOCK output of child process" +class MockRietveld(): + + def __init__(self, executive, dryrun=False): + pass + + def post(self, diff, message=None, codereview_issue=None, cc=None): + log("MOCK: Uploading patch to rietveld") + + class MockTool(): def __init__(self, log_executive=False): @@ -526,7 +559,7 @@ class MockTool(): self._checkout = MockCheckout() self.status_server = MockStatusServer() self.irc_password = "MOCK irc password" - self.codereview = Rietveld(self.executive) + self.codereview = MockRietveld(self.executive) def scm(self): return self._scm diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/abstractstep.py b/WebKitTools/Scripts/webkitpy/tool/steps/abstractstep.py index abafe63..20f8bbf 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/abstractstep.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/abstractstep.py @@ -53,8 +53,9 @@ class AbstractStep(object): return self._port _well_known_keys = { - "diff": lambda self: self._tool.scm().create_patch(self._options.git_commit, self._options.squash), - "changelogs": lambda self: self._tool.checkout().modified_changelogs(self._options.git_commit, self._options.squash), + "diff": lambda self, state: self._tool.scm().create_patch(self._options.git_commit, self._options.squash), + "changelogs": lambda self, state: self._tool.checkout().modified_changelogs(self._options.git_commit, self._options.squash), + "bug_title": lambda self, state: self._tool.bugs.fetch_bug(state["bug_id"]).title(), } def cached_lookup(self, state, key, promise=None): @@ -62,7 +63,7 @@ class AbstractStep(object): return state[key] if not promise: promise = self._well_known_keys.get(key) - state[key] = promise(self) + state[key] = promise(self, state) return state[key] @classmethod diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/confirmdiff.py b/WebKitTools/Scripts/webkitpy/tool/steps/confirmdiff.py index 626fcf3..7e8e348 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/confirmdiff.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/confirmdiff.py @@ -46,6 +46,9 @@ class ConfirmDiff(AbstractStep): ] def _show_pretty_diff(self, diff): + if not self._tool.user.can_open_url(): + return None + try: pretty_patch = PrettyPatch(self._tool.executive, self._tool.scm().checkout_root) diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/options.py b/WebKitTools/Scripts/webkitpy/tool/steps/options.py index 186d292..fa36f73 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/options.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/options.py @@ -41,7 +41,6 @@ class Options(object): confirm = make_option("--no-confirm", action="store_false", dest="confirm", default=True, help="Skip confirmation steps.") description = make_option("-m", "--description", action="store", type="string", dest="description", help="Description string for the attachment (default: \"patch\")") email = make_option("--email", action="store", type="string", dest="email", help="Email address to use in ChangeLogs.") - fancy_review = make_option("--fancy-review", action="store_true", dest="fancy_review", default=False, help="(Experimental) Upload the patch to Rietveld code review tool.") force_clean = make_option("--force-clean", action="store_true", dest="force_clean", default=False, help="Clean working directory before applying patches (removes local changes and commits)") # FIXME: Make commit ranges treat each commit separately instead of squashing them into one. git_commit = make_option("--git-commit", action="store", dest="git_commit", help="Local git commit to upload/land. If a range, the commits are squashed into one.") diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/postcodereview.py b/WebKitTools/Scripts/webkitpy/tool/steps/postcodereview.py index 8397519..f9bc685 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/postcodereview.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/postcodereview.py @@ -36,33 +36,27 @@ class PostCodeReview(AbstractStep): return AbstractStep.options() + [ Options.cc, Options.description, - Options.fancy_review, ] def run(self, state): - if not self._options.fancy_review: - return + patch = state.get("patch") + bug_id = patch.bug_id() + title = patch.name() - bug_id = state.get("bug_id") - if not bug_id: - raise ScriptError(message="Cannot upload a fancy review without a bug ID.") - - message = self._options.description - if not message: - # If we have an issue number, then the message becomes the label - # of the new patch. Otherwise, it becomes the title of the whole - # issue. - if state.get("bug_title"): - # This is the common case for the the first "upload" command. - message = state.get("bug_title") - elif bug_id: - # This is the common case for the "post" command and - # subsequent runs of the "upload" command. - message = "Code review for %s" % self._tool.bugs.bug_url_for_bug_id(bug_id) - else: - # Unreachable with our current commands, but we might hit - # this case if we support bug-less code reviews. - message = "Code review" + # If the issue already exists, then the message becomes the label + # of the new patch. Otherwise, it becomes the title of the whole + # issue. + if title: + # This is the common case for the the first "upload" command. + message = title + elif bug_id: + # This is the common case for the "post" command and + # subsequent runs of the "upload" command. + message = "Code review for %s" % self._tool.bugs.bug_url_for_bug_id(bug_id) + else: + # Unreachable with our current commands, but we might hit + # this case if we support bug-less code reviews. + message = "Code review" # Use the bug ID as the rietveld issue number. This means rietveld code reviews # when there are multiple different patches on a bug will be a bit wonky, but @@ -71,3 +65,5 @@ class PostCodeReview(AbstractStep): message=message, codereview_issue=bug_id, cc=self._options.cc) + + self._tool.bugs.set_flag_on_attachment(patch.id(), 'in-rietveld', '+') diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog.py b/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog.py index 3a5c013..59048a3 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog.py @@ -28,6 +28,7 @@ import os +from webkitpy.common.checkout.changelog import ChangeLog from webkitpy.common.system.executive import ScriptError from webkitpy.tool.steps.abstractstep import AbstractStep from webkitpy.tool.steps.options import Options @@ -46,8 +47,21 @@ class PrepareChangeLog(AbstractStep): Options.squash, ] + def _ensure_bug_url(self, state): + if not state.get("bug_id"): + return + bug_id = state.get("bug_id") + changelogs = self.cached_lookup(state, "changelogs") + for changelog_path in changelogs: + changelog = ChangeLog(changelog_path) + if not changelog.latest_entry().bug_id(): + changelog.set_short_description_and_bug_url( + self.cached_lookup(state, "bug_title"), + self._tool.bugs.bug_url_for_bug_id(bug_id)) + def run(self, state): if self.cached_lookup(state, "changelogs"): + self._ensure_bug_url(state) return os.chdir(self._tool.scm().checkout_root) args = [self.port().script_path("prepare-ChangeLog")] @@ -56,7 +70,7 @@ class PrepareChangeLog(AbstractStep): if self._options.email: args.append("--email=%s" % self._options.email) if self._tool.scm().should_squash(self._options.squash): - args.append("--merge-base=%s" % self._tool.scm().svn_merge_base()) + args.append("--merge-base=%s" % self._tool.scm().remote_merge_base()) if self._options.git_commit: args.append("--git-commit=%s" % self._options.git_commit) diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog_unittest.py b/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog_unittest.py new file mode 100644 index 0000000..1d0db75 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog_unittest.py @@ -0,0 +1,55 @@ +# Copyright (C) 2010 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import unittest + +from webkitpy.common.checkout.changelog_unittest import ChangeLogTest +from webkitpy.common.system.outputcapture import OutputCapture +from webkitpy.thirdparty.mock import Mock +from webkitpy.tool.mocktool import MockTool +from webkitpy.tool.steps.preparechangelog import PrepareChangeLog + + +class PrepareChangeLogTest(ChangeLogTest): + def test_ensure_bug_url(self): + capture = OutputCapture() + step = PrepareChangeLog(MockTool(), Mock()) + changelog_contents = u"%s\n%s" % (self._new_entry_boilerplate, self._example_changelog) + changelog_path = self._write_tmp_file_with_contents(changelog_contents.encode("utf-8")) + state = { + "bug_title": "Example title", + "bug_id": 1234, + "changelogs": [changelog_path], + } + capture.assert_outputs(self, step.run, [state]) + actual_contents = self._read_file_contents(changelog_path, "utf-8") + expected_message = "Example title\n http://example.com/1234" + expected_contents = changelog_contents.replace("Need a short description and bug URL (OOPS!)", expected_message) + os.remove(changelog_path) + self.assertEquals(actual_contents, expected_contents) |