diff options
author | Ben Murdoch <benm@google.com> | 2010-05-11 18:35:50 +0100 |
---|---|---|
committer | Ben Murdoch <benm@google.com> | 2010-05-14 10:23:05 +0100 |
commit | 21939df44de1705786c545cd1bf519d47250322d (patch) | |
tree | ef56c310f5c0cdc379c2abb2e212308a3281ce20 /WebKitTools/Scripts | |
parent | 4ff1d8891d520763f17675827154340c7c740f90 (diff) | |
download | external_webkit-21939df44de1705786c545cd1bf519d47250322d.zip external_webkit-21939df44de1705786c545cd1bf519d47250322d.tar.gz external_webkit-21939df44de1705786c545cd1bf519d47250322d.tar.bz2 |
Merge Webkit at r58956: Initial merge by Git.
Change-Id: I1d9fb60ea2c3f2ddc04c17a871acdb39353be228
Diffstat (limited to 'WebKitTools/Scripts')
176 files changed, 23285 insertions, 2623 deletions
diff --git a/WebKitTools/Scripts/VCSUtils.pm b/WebKitTools/Scripts/VCSUtils.pm index 25a319b..777fe7f 100644 --- a/WebKitTools/Scripts/VCSUtils.pm +++ b/WebKitTools/Scripts/VCSUtils.pm @@ -65,6 +65,7 @@ BEGIN { &normalizePath &parsePatch &pathRelativeToSVNRepositoryRootForPath + &prepareParsedPatch &runPatchCommand &svnRevisionForDirectory &svnStatus @@ -96,6 +97,40 @@ sub exitStatus($) return WEXITSTATUS($returnvalue); } +# Note, this method will not error if the file corresponding to the path does not exist. +sub scmToggleExecutableBit +{ + my ($path, $executableBitDelta) = @_; + return if ! -e $path; + if ($executableBitDelta == 1) { + scmAddExecutableBit($path); + } elsif ($executableBitDelta == -1) { + scmRemoveExecutableBit($path); + } +} + +sub scmAddExecutableBit($) +{ + my ($path) = @_; + + if (isSVN()) { + system("svn", "propset", "svn:executable", "on", $path) == 0 or die "Failed to run 'svn propset svn:executable on $path'."; + } elsif (isGit()) { + chmod(0755, $path); + } +} + +sub scmRemoveExecutableBit($) +{ + my ($path) = @_; + + if (isSVN()) { + system("svn", "propdel", "svn:executable", $path) == 0 or die "Failed to run 'svn propdel svn:executable $path'."; + } elsif (isGit()) { + chmod(0664, $path); + } +} + sub isGitDirectory($) { my ($dir) = @_; @@ -362,126 +397,298 @@ sub svnStatus($) return $svnStatus; } -# Convert a line of a git-formatted patch to SVN format, while -# preserving any end-of-line characters. -sub gitdiff2svndiff($) +# Return whether the given file mode is executable in the source control +# sense. We make this determination based on whether the executable bit +# is set for "others" rather than the stronger condition that it be set +# for the user, group, and others. This is sufficient for distinguishing +# the default behavior in Git and SVN. +# +# Args: +# $fileMode: A number or string representing a file mode in octal notation. +sub isExecutable($) { - $_ = shift @_; + my $fileMode = shift; - if (m#^diff --git \w/(.+) \w/([^\r\n]+)#) { - return "Index: $1$POSTMATCH"; - } - if (m#^index [0-9a-f]{7}\.\.[0-9a-f]{7} [0-9]{6}#) { - # FIXME: No need to return dividing line once parseDiffHeader() is used. - return "===================================================================$POSTMATCH"; - } - if (m#^--- \w/([^\r\n]+)#) { - return "--- $1$POSTMATCH"; - } - if (m#^\+\+\+ \w/([^\r\n]+)#) { - return "+++ $1$POSTMATCH"; - } - return $_; + return $fileMode % 2; } -# Parse the next diff header from the given file handle, and advance -# the file handle so the last line read is the first line after the -# parsed header block. +# Parse the next Git diff header from the given file handle, and advance +# the handle so the last line read is the first line after the header. # -# This subroutine dies if given leading junk or if the end of the header -# block could not be detected. The last line of a header block is a -# line beginning with "+++". +# This subroutine dies if given leading junk. # # Args: -# $fileHandle: advanced so the last line read is the first line of the -# next diff header. For SVN-formatted diffs, this is the -# "Index:" line. +# $fileHandle: advanced so the last line read from the handle is the first +# line of the header to parse. This should be a line +# beginning with "diff --git". # $line: the line last read from $fileHandle # # Returns ($headerHashRef, $lastReadLine): -# $headerHashRef: a hash reference representing a diff header -# copiedFromPath: if a file copy, the path from which the file was -# copied. Otherwise, undefined. -# indexPath: the path in the "Index:" line. -# sourceRevision: the revision number of the source. This is the same -# as the revision number the file was copied from, in -# the case of a file copy. -# svnConvertedText: the header text converted to SVN format. -# Unrecognized lines are discarded. -# $lastReadLine: the line last read from $fileHandle. This is the first -# line after the header ending. -sub parseDiffHeader($$) +# $headerHashRef: a hash reference representing a diff header, as follows-- +# copiedFromPath: the path from which the file was copied if the diff +# is a copy. +# executableBitDelta: the value 1 or -1 if the executable bit was added or +# removed, respectively. New and deleted files have +# this value only if the file is executable, in which +# case the value is 1 and -1, respectively. +# indexPath: the path of the target file. +# isBinary: the value 1 if the diff is for a binary file. +# svnConvertedText: the header text with some lines converted to SVN +# format. Git-specific lines are preserved. +# $lastReadLine: the line last read from $fileHandle. +sub parseGitDiffHeader($$) { my ($fileHandle, $line) = @_; - my $filter; - if ($line =~ m#^diff --git #) { - $filter = \&gitdiff2svndiff; - } - $line = &$filter($line) if $filter; + $_ = $line; + my $headerStartRegEx = qr#^diff --git (\w/)?(.+) (\w/)?([^\r\n]+)#; my $indexPath; - if ($line =~ /^Index: ([^\r\n]+)/) { - $indexPath = $1; + if (/$headerStartRegEx/) { + # The first and second paths can differ in the case of copies + # and renames. We use the second file path because it is the + # destination path. + $indexPath = $4; + # Use $POSTMATCH to preserve the end-of-line character. + $_ = "Index: $indexPath$POSTMATCH"; # Convert to SVN format. } else { - die("Could not parse first line of diff header: \"$line\"."); + die("Could not parse leading \"diff --git\" line: \"$line\"."); } + my $copiedFromPath; + my $foundHeaderEnding; + my $isBinary; + my $newExecutableBit = 0; + my $oldExecutableBit = 0; + my $similarityIndex; + my $svnConvertedText; + while (1) { + # Temporarily strip off any end-of-line characters to simplify + # regex matching below. + s/([\n\r]+)$//; + my $eol = $1; + + if (/^(deleted file|old) mode ([0-9]{6})/) { + $oldExecutableBit = (isExecutable($2) ? 1 : 0); + } elsif (/^new( file)? mode ([0-9]{6})/) { + $newExecutableBit = (isExecutable($2) ? 1 : 0); + } elsif (/^--- \S+/) { + $_ = "--- $indexPath"; # Convert to SVN format. + } elsif (/^\+\+\+ \S+/) { + $_ = "+++ $indexPath"; # Convert to SVN format. + $foundHeaderEnding = 1; + } elsif (/^similarity index (\d+)%/) { + $similarityIndex = $1; + } elsif (/^copy from (\S+)/) { + $copiedFromPath = $1; + # The "git diff" command includes a line of the form "Binary files + # <path1> and <path2> differ" if the --binary flag is not used. + } elsif (/^Binary files / ) { + die("Error: the Git diff contains a binary file without the binary data in ". + "line: \"$_\". Be sure to use the --binary flag when invoking \"git diff\" ". + "with diffs containing binary files."); + } elsif (/^GIT binary patch$/ ) { + $isBinary = 1; + $foundHeaderEnding = 1; + } + + $svnConvertedText .= "$_$eol"; # Also restore end-of-line characters. + + $_ = <$fileHandle>; # Not defined if end-of-file reached. + + last if (!defined($_) || /$headerStartRegEx/ || $foundHeaderEnding); + } + + my $executableBitDelta = $newExecutableBit - $oldExecutableBit; + my %header; + $header{copiedFromPath} = $copiedFromPath if ($copiedFromPath && $similarityIndex == 100); + $header{executableBitDelta} = $executableBitDelta if $executableBitDelta; + $header{indexPath} = $indexPath; + $header{isBinary} = $isBinary if $isBinary; + $header{svnConvertedText} = $svnConvertedText; + + return (\%header, $_); +} + +# Parse the next SVN diff header from the given file handle, and advance +# the handle so the last line read is the first line after the header. +# +# This subroutine dies if given leading junk or if it could not detect +# the end of the header block. +# +# Args: +# $fileHandle: advanced so the last line read from the handle is the first +# line of the header to parse. This should be a line +# beginning with "Index:". +# $line: the line last read from $fileHandle +# +# Returns ($headerHashRef, $lastReadLine): +# $headerHashRef: a hash reference representing a diff header, as follows-- +# copiedFromPath: the path from which the file was copied if the diff +# is a copy. +# indexPath: the path of the target file, which is the path found in +# the "Index:" line. +# isBinary: the value 1 if the diff is for a binary file. +# sourceRevision: the revision number of the source, if it exists. This +# is the same as the revision number the file was copied +# from, in the case of a file copy. +# svnConvertedText: the header text converted to a header with the paths +# in some lines corrected. +# $lastReadLine: the line last read from $fileHandle. +sub parseSvnDiffHeader($$) +{ + my ($fileHandle, $line) = @_; + + $_ = $line; + + my $headerStartRegEx = qr/^Index: /; + + if (!/$headerStartRegEx/) { + die("First line of SVN diff does not begin with \"Index \": \"$_\""); + } + + my $copiedFromPath; my $foundHeaderEnding; - my $lastReadLine; + my $indexPath; + my $isBinary; my $sourceRevision; - my $svnConvertedText = $line; - while (<$fileHandle>) { + my $svnConvertedText; + while (1) { # Temporarily strip off any end-of-line characters to simplify # regex matching below. s/([\n\r]+)$//; my $eol = $1; - $_ = &$filter($_) if $filter; - # Fix paths on ""---" and "+++" lines to match the leading # index line. - if (s/^--- \S+/--- $indexPath/) { + if (/^Index: ([^\r\n]+)/) { + $indexPath = $1; + } elsif (s/^--- \S+/--- $indexPath/) { # --- if (/^--- .+\(revision (\d+)\)/) { - $sourceRevision = $1 if ($1 != 0); + $sourceRevision = $1; if (/\(from (\S+):(\d+)\)$/) { # The "from" clause is created by svn-create-patch, in # which case there is always also a "revision" clause. - $header{copiedFromPath} = $1; + $copiedFromPath = $1; die("Revision number \"$2\" in \"from\" clause does not match " . "source revision number \"$sourceRevision\".") if ($2 != $sourceRevision); } } - $_ = "=" x 67 . "$eol$_"; # Prepend dividing line ===.... } elsif (s/^\+\+\+ \S+/+++ $indexPath/) { - # +++ $foundHeaderEnding = 1; - } else { - # Skip unrecognized lines. - next; + } elsif (/^Cannot display: file marked as a binary type.$/) { + $isBinary = 1; + $foundHeaderEnding = 1; } $svnConvertedText .= "$_$eol"; # Also restore end-of-line characters. - if ($foundHeaderEnding) { - $lastReadLine = <$fileHandle>; - last; - } - } # $lastReadLine is undef if while loop ran out. + + $_ = <$fileHandle>; # Not defined if end-of-file reached. + + last if (!defined($_) || /$headerStartRegEx/ || $foundHeaderEnding); + } if (!$foundHeaderEnding) { die("Did not find end of header block corresponding to index path \"$indexPath\"."); } + my %header; + + $header{copiedFromPath} = $copiedFromPath if $copiedFromPath; $header{indexPath} = $indexPath; - $header{sourceRevision} = $sourceRevision; + $header{isBinary} = $isBinary if $isBinary; + $header{sourceRevision} = $sourceRevision if $sourceRevision; $header{svnConvertedText} = $svnConvertedText; - return (\%header, $lastReadLine); + return (\%header, $_); } +# Parse the next diff header from the given file handle, and advance +# the handle so the last line read is the first line after the header. +# +# This subroutine dies if given leading junk or if it could not detect +# the end of the header block. +# +# Args: +# $fileHandle: advanced so the last line read from the handle is the first +# line of the header to parse. For SVN-formatted diffs, this +# is a line beginning with "Index:". For Git, this is a line +# beginning with "diff --git". +# $line: the line last read from $fileHandle +# +# Returns ($headerHashRef, $lastReadLine): +# $headerHashRef: a hash reference representing a diff header +# copiedFromPath: the path from which the file was copied if the diff +# is a copy. +# executableBitDelta: the value 1 or -1 if the executable bit was added or +# removed, respectively. New and deleted files have +# this value only if the file is executable, in which +# case the value is 1 and -1, respectively. +# indexPath: the path of the target file. +# isBinary: the value 1 if the diff is for a binary file. +# isGit: the value 1 if the diff is Git-formatted. +# isSvn: the value 1 if the diff is SVN-formatted. +# sourceRevision: the revision number of the source, if it exists. This +# is the same as the revision number the file was copied +# from, in the case of a file copy. +# svnConvertedText: the header text with some lines converted to SVN +# format. Git-specific lines are preserved. +# $lastReadLine: the line last read from $fileHandle. +sub parseDiffHeader($$) +{ + my ($fileHandle, $line) = @_; + + my $header; # This is a hash ref. + my $isGit; + my $isSvn; + my $lastReadLine; + + if ($line =~ /^Index:/) { + $isSvn = 1; + ($header, $lastReadLine) = parseSvnDiffHeader($fileHandle, $line); + } elsif ($line =~ /^diff --git/) { + $isGit = 1; + ($header, $lastReadLine) = parseGitDiffHeader($fileHandle, $line); + } else { + die("First line of diff does not begin with \"Index:\" or \"diff --git\": \"$line\""); + } + + $header->{isGit} = $isGit if $isGit; + $header->{isSvn} = $isSvn if $isSvn; + + return ($header, $lastReadLine); +} + +# FIXME: The %diffHash "object" should not have an svnConvertedText property. +# Instead, the hash object should store its information in a +# structured way as properties. This should be done in a way so +# that, if necessary, the text of an SVN or Git patch can be +# reconstructed from the information in those hash properties. +# +# A %diffHash is a hash representing a source control diff of a single +# file operation (e.g. a file modification, copy, or delete). +# +# These hashes appear, for example, in the parseDiff(), parsePatch(), +# and prepareParsedPatch() subroutines of this package. +# +# The corresponding values are-- +# +# copiedFromPath: the path from which the file was copied if the diff +# is a copy. +# indexPath: the path of the target file. For SVN-formatted diffs, +# this is the same as the path in the "Index:" line. +# isBinary: the value 1 if the diff is for a binary file. +# isGit: the value 1 if the diff is Git-formatted. +# isSvn: the value 1 if the diff is SVN-formatted. +# sourceRevision: the revision number of the source, if it exists. This +# is the same as the revision number the file was copied +# from, in the case of a file copy. +# svnConvertedText: the diff with some lines converted to SVN format. +# Git-specific lines are preserved. + # Parse one diff from a patch file created by svn-create-patch, and # advance the file handle so the last line read is the first line # of the next header block. @@ -494,14 +701,8 @@ sub parseDiffHeader($$) # $line: the line last read from $fileHandle. # # Returns ($diffHashRef, $lastReadLine): -# $diffHashRef: -# copiedFromPath: if a file copy, the path from which the file was -# copied. Otherwise, undefined. -# indexPath: the path in the "Index:" line. -# sourceRevision: the revision number of the source. This is the same -# as the revision number the file was copied from, in -# the case of a file copy. -# svnConvertedText: the diff converted to SVN format. +# $diffHashRef: A reference to a %diffHash. +# See the %diffHash documentation above. # $lastReadLine: the line last read from $fileHandle sub parseDiff($$) { @@ -538,9 +739,15 @@ sub parseDiff($$) } my %diffHashRef; - $diffHashRef{copiedFromPath} = $headerHashRef->{copiedFromPath}; + $diffHashRef{copiedFromPath} = $headerHashRef->{copiedFromPath} if $headerHashRef->{copiedFromPath}; + # FIXME: Add executableBitDelta as a key. $diffHashRef{indexPath} = $headerHashRef->{indexPath}; - $diffHashRef{sourceRevision} = $headerHashRef->{sourceRevision}; + $diffHashRef{isBinary} = $headerHashRef->{isBinary} if $headerHashRef->{isBinary}; + $diffHashRef{isGit} = $headerHashRef->{isGit} if $headerHashRef->{isGit}; + $diffHashRef{isSvn} = $headerHashRef->{isSvn} if $headerHashRef->{isSvn}; + $diffHashRef{sourceRevision} = $headerHashRef->{sourceRevision} if $headerHashRef->{sourceRevision}; + # FIXME: Remove the need for svnConvertedText. See the %diffHash + # code comments above for more information. $diffHashRef{svnConvertedText} = $svnText; return (\%diffHashRef, $line); @@ -553,8 +760,8 @@ sub parseDiff($$) # read from. # # Returns: -# @diffHashRefs: an array of diff hash references. See parseDiff() for -# a description of each $diffHashRef. +# @diffHashRefs: an array of diff hash references. +# See the %diffHash documentation above. sub parsePatch($) { my ($fileHandle) = @_; @@ -574,6 +781,78 @@ sub parsePatch($) return @diffHashRefs; } +# Prepare the results of parsePatch() for use in svn-apply and svn-unapply. +# +# Args: +# $shouldForce: Whether to continue processing if an unexpected +# state occurs. +# @diffHashRefs: An array of references to %diffHashes. +# See the %diffHash documentation above. +# +# Returns $preparedPatchHashRef: +# copyDiffHashRefs: A reference to an array of the $diffHashRefs in +# @diffHashRefs that represent file copies. The original +# ordering is preserved. +# nonCopyDiffHashRefs: A reference to an array of the $diffHashRefs in +# @diffHashRefs that do not represent file copies. +# The original ordering is preserved. +# sourceRevisionHash: A reference to a hash of source path to source +# revision number. +sub prepareParsedPatch($@) +{ + my ($shouldForce, @diffHashRefs) = @_; + + my %copiedFiles; + + # Return values + my @copyDiffHashRefs = (); + my @nonCopyDiffHashRefs = (); + my %sourceRevisionHash = (); + for my $diffHashRef (@diffHashRefs) { + my $copiedFromPath = $diffHashRef->{copiedFromPath}; + my $indexPath = $diffHashRef->{indexPath}; + my $sourceRevision = $diffHashRef->{sourceRevision}; + my $sourcePath; + + if (defined($copiedFromPath)) { + # Then the diff is a copy operation. + $sourcePath = $copiedFromPath; + + # FIXME: Consider printing a warning or exiting if + # exists($copiedFiles{$indexPath}) is true -- i.e. if + # $indexPath appears twice as a copy target. + $copiedFiles{$indexPath} = $sourcePath; + + push @copyDiffHashRefs, $diffHashRef; + } else { + # Then the diff is not a copy operation. + $sourcePath = $indexPath; + + push @nonCopyDiffHashRefs, $diffHashRef; + } + + if (defined($sourceRevision)) { + if (exists($sourceRevisionHash{$sourcePath}) && + ($sourceRevisionHash{$sourcePath} != $sourceRevision)) { + if (!$shouldForce) { + die "Two revisions of the same file required as a source:\n". + " $sourcePath:$sourceRevisionHash{$sourcePath}\n". + " $sourcePath:$sourceRevision"; + } + } + $sourceRevisionHash{$sourcePath} = $sourceRevision; + } + } + + my %preparedPatchHash; + + $preparedPatchHash{copyDiffHashRefs} = \@copyDiffHashRefs; + $preparedPatchHash{nonCopyDiffHashRefs} = \@nonCopyDiffHashRefs; + $preparedPatchHash{sourceRevisionHash} = \%sourceRevisionHash; + + return \%preparedPatchHash; +} + # If possible, returns a ChangeLog patch equivalent to the given one, # but with the newest ChangeLog entry inserted at the top of the # file -- i.e. no leading context and all lines starting with "+". diff --git a/WebKitTools/Scripts/build-dumprendertree b/WebKitTools/Scripts/build-dumprendertree index 14690a8..6502916 100755 --- a/WebKitTools/Scripts/build-dumprendertree +++ b/WebKitTools/Scripts/build-dumprendertree @@ -75,7 +75,7 @@ if (isAppleMacWebKit()) { $result = 0; } elsif (isChromium()) { if (isDarwin()) { - $result = buildXCodeProject("DumpRenderTree.gyp/DumpRenderTree", $clean, @ARGV); + $result = buildXCodeProject("DumpRenderTree.gyp/DumpRenderTree", $clean, "-configuration", configuration(), @ARGV); } elsif (isCygwin() || isWindows()) { # Windows build - builds the root visual studio solution. $result = buildChromiumVisualStudioProject("DumpRenderTree.gyp/DumpRenderTree.sln", $clean); diff --git a/WebKitTools/Scripts/build-webkit b/WebKitTools/Scripts/build-webkit index 2d172c5..3a3edb9 100755 --- a/WebKitTools/Scripts/build-webkit +++ b/WebKitTools/Scripts/build-webkit @@ -48,7 +48,6 @@ chdirWebKit(); my $showHelp = 0; my $clean = 0; my $minimal = 0; -my $webkit2 = 0; my $makeArgs; my $startTime = time(); @@ -224,7 +223,6 @@ Usage: $programName [options] [options to pass to build system] --chromium Build the Chromium port on Mac/Win/Linux --gtk Build the GTK+ port --qt Build the Qt port - --webkit2 Build the WebKit2 framework --inspector-frontend Copy changes to the inspector front-end files to the build directory --makeargs=<arguments> Optional Makefile flags @@ -238,7 +236,6 @@ my %options = ( 'clean' => \$clean, 'makeargs=s' => \$makeArgs, 'minimal' => \$minimal, - 'webkit2' => \$webkit2, ); # Build usage text and options list from features @@ -261,16 +258,7 @@ setConfiguration(); my $productDir = productDir(); # Check that all the project directories are there. -my @projects = ("JavaScriptCore", "WebCore"); - -if (!$webkit2) { - push @projects, "WebKit"; -} else { - push @projects, ("WebKit2", "WebKitTools/MiniBrowser"); -} - -# Only Apple builds JavaScriptGlue, and only on the Mac -splice @projects, 1, 0, "JavaScriptGlue" if isAppleMacWebKit(); +my @projects = ("JavaScriptCore", "WebCore", "WebKit"); my @otherDirs = ("WebKitLibraries"); for my $dir (@projects, @otherDirs) { @@ -306,8 +294,13 @@ if (isGtk()) { } } - # Copy library and header from WebKitLibraries to a findable place in the product directory. + # Apple builds JavaScriptGlue, and only on the Mac. + splice @projects, 1, 0, "JavaScriptGlue"; + + # WebKit2 is only supported in SnowLeopard and later at present. + push @projects, ("WebKit2", "WebKitTools/MiniBrowser") if osXVersion()->{"minor"} >= 6; + # Copy library and header from WebKitLibraries to a findable place in the product directory. my @librariesToCopy = ( "libWebKitSystemInterfaceTiger.a", "libWebKitSystemInterfaceLeopard.a", @@ -350,6 +343,10 @@ if (isGtk()) { foreach (@features) { push @options, "DEFINES+=$_->{define}=${$_->{value}}" if ${$_->{value}} != $_->{default}; } + + if ($minimal) { + push @options, "CONFIG+=minimal"; + } } # Force re-link of existing libraries if different than expected @@ -367,6 +364,7 @@ if (isWx()) { } if (isChromium()) { + @options = @ARGV; # Chromium doesn't build by project directories. @projects = (); my $result = buildChromium($clean, @options); @@ -389,18 +387,8 @@ for my $dir (@projects) { } elsif (isQt()) { $result = buildQMakeQtProject($dir, $clean, @options); } elsif (isAppleMacWebKit()) { - my @completeOptions = @options; - if ($webkit2 && $dir eq "WebCore") { - my @webKit2SpecificOverrides = ( - 'UMBRELLA_LDFLAGS=', - 'GCC_PREPROCESSOR_DEFINITIONS=$(GCC_PREPROCESSOR_DEFINITIONS) ' . - 'ENABLE_EXPERIMENTAL_SINGLE_VIEW_MODE=1 ' . - 'WTF_USE_WEB_THREAD=1 ' - ); - push @completeOptions, @webKit2SpecificOverrides; - } - - $result = buildXCodeProject($dir, $clean, @completeOptions, @ARGV); + $dir = "MiniBrowser" if $dir eq "WebKitTools/MiniBrowser"; + $result = buildXCodeProject($dir, $clean, @options, @ARGV); } elsif (isAppleWinWebKit()) { if ($dir eq "WebKit") { $result = buildVisualStudioProject("win/WebKit.vcproj/WebKit.sln", $clean); @@ -461,16 +449,10 @@ sub writeCongrats() print "\n"; print "===========================================================\n"; - if ($webkit2) { - print " WebKit2 is now built ($buildTime). \n"; - print " To run MiniBrowser with this newly-built code, use the\n"; - print " \"run-minibrowser\" script.\n"; - } else { - print " WebKit is now built ($buildTime). \n"; - if (!isChromium()) { - print " To run $launcherName with this newly-built code, use the\n"; - print " \"$launcherPath\" script.\n"; - } + print " WebKit is now built ($buildTime). \n"; + if (!isChromium()) { + print " To run $launcherName with this newly-built code, use the\n"; + print " \"$launcherPath\" script.\n"; } print "===========================================================\n"; } diff --git a/WebKitTools/Scripts/check-webkit-style b/WebKitTools/Scripts/check-webkit-style index 9897fbd..f74c3bd 100755 --- a/WebKitTools/Scripts/check-webkit-style +++ b/WebKitTools/Scripts/check-webkit-style @@ -50,7 +50,9 @@ import sys from webkitpy.style_references import detect_checkout import webkitpy.style.checker as checker -from webkitpy.style.checker import PatchChecker +from webkitpy.style.checker import PatchReader +from webkitpy.style.checker import StyleProcessor +from webkitpy.style.filereader import TextFileReader from webkitpy.style.main import change_directory _log = logging.getLogger("check-webkit-style") @@ -84,41 +86,42 @@ def main(): checker.configure_logging(stream=stderr, is_verbose=is_verbose) _log.debug("Verbose logging enabled.") + parser = checker.check_webkit_style_parser() + (paths, options) = parser.parse(args) + checkout = detect_checkout() if checkout is None: + if not paths: + _log.error("WebKit checkout not found: You must run this script " + "from within a WebKit checkout if you are not passing " + "specific paths to check.") + sys.exit(1) + checkout_root = None _log.debug("WebKit checkout not found for current directory.") else: checkout_root = checkout.root_path() _log.debug("WebKit checkout found with root: %s" % checkout_root) - parser = checker.check_webkit_style_parser() - (paths, options) = parser.parse(args) - - if checkout is None and not paths: - _log.error("WebKit checkout not found: You must run this script " - "from within a WebKit checkout if you are not passing " - "specific paths to check.") - sys.exit(1) - configuration = checker.check_webkit_style_configuration(options) - style_checker = checker.StyleChecker(configuration) paths = change_directory(checkout_root=checkout_root, paths=paths) + style_processor = StyleProcessor(configuration) + + file_reader = TextFileReader(style_processor) + if paths: - style_checker.check_paths(paths) + file_reader.process_paths(paths) else: - if options.git_commit: - patch = checkout.create_patch_since_local_commit(options.git_commit) - else: - patch = checkout.create_patch() - patch_checker = PatchChecker(style_checker) + patch = checkout.create_patch(options.git_commit, options.squash) + patch_checker = PatchReader(file_reader) patch_checker.check(patch) - error_count = style_checker.error_count - file_count = style_checker.file_count + error_count = style_processor.error_count + file_count = file_reader.file_count + _log.info("Total errors found: %d in %d files" % (error_count, file_count)) # We fail when style errors are found or there are no checked files. diff --git a/WebKitTools/Scripts/extract-localizable-strings b/WebKitTools/Scripts/extract-localizable-strings index cf4f8f0..b31550a 100755 --- a/WebKitTools/Scripts/extract-localizable-strings +++ b/WebKitTools/Scripts/extract-localizable-strings @@ -1,6 +1,6 @@ #!/usr/bin/perl -w -# Copyright (C) 2006, 2007, 2009 Apple Inc. All rights reserved. +# Copyright (C) 2006, 2007, 2009, 2010 Apple Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -48,7 +48,7 @@ sub UnescapeHexSequence($); my %isDebugMacro = ( ASSERT_WITH_MESSAGE => 1, LOG_ERROR => 1, ERROR => 1, NSURL_ERROR => 1, FATAL => 1, LOG => 1, LOG_WARNING => 1, UI_STRING_LOCALIZE_LATER => 1, LPCTSTR_UI_STRING_LOCALIZE_LATER => 1, UNLOCALIZED_STRING => 1, UNLOCALIZED_LPCTSTR => 1, dprintf => 1, NSException => 1, NSLog => 1, printf => 1 ); -@ARGV >= 1 or die "Usage: extract-localizable-strings <exceptions file> [ directory... ]\nDid you mean to run update-webkit-localizable-strings instead?\n"; +@ARGV >= 2 or die "Usage: extract-localizable-strings <exceptions file> <file to update> [ directory... ]\nDid you mean to run update-webkit-localizable-strings instead?\n"; my $exceptionsFile = shift @ARGV; -f $exceptionsFile or die "Couldn't find exceptions file $exceptionsFile\n"; @@ -99,14 +99,13 @@ if (open EXCEPTIONS, $exceptionsFile) { my $quotedDirectoriesString = '"' . join('" "', @directories) . '"'; for my $dir (@directoriesToSkip) { - $quotedDirectoriesString .= ' -path "' . $dir . '" -prune'; + $quotedDirectoriesString .= ' -path "' . $dir . '" -prune -o'; } -my @files = ( split "\n", `find $quotedDirectoriesString -name "*.h" -o -name "*.m" -o -name "*.mm" -o -name "*.c" -o -name "*.cpp"` ); +my @files = ( split "\n", `find $quotedDirectoriesString \\( -name "*.h" -o -name "*.m" -o -name "*.mm" -o -name "*.c" -o -name "*.cpp" \\)` ); for my $file (sort @files) { - next if $file =~ /\/WebLocalizableStrings\.h$/; - next if $file =~ /\/icu\//; + next if $file =~ /\/\w+LocalizableStrings\.h$/; $file =~ s-^./--; diff --git a/WebKitTools/Scripts/new-run-webkit-tests b/WebKitTools/Scripts/new-run-webkit-tests index 2ebe1da..0728ee0 100755 --- a/WebKitTools/Scripts/new-run-webkit-tests +++ b/WebKitTools/Scripts/new-run-webkit-tests @@ -33,5 +33,4 @@ import sys import webkitpy.layout_tests.run_webkit_tests as run_webkit_tests if __name__ == '__main__': - options, args = run_webkit_tests.parse_args() - sys.exit(run_webkit_tests.main(options, args)) + sys.exit(run_webkit_tests.main()) diff --git a/WebKitTools/Scripts/old-run-webkit-tests b/WebKitTools/Scripts/old-run-webkit-tests index d5d7349..f6dbf5b 100755 --- a/WebKitTools/Scripts/old-run-webkit-tests +++ b/WebKitTools/Scripts/old-run-webkit-tests @@ -501,6 +501,10 @@ if (checkWebCoreFeatureSupport("WML", 0)) { $ignoredDirectories{'wml'} = 1; } +if (!checkWebCoreFeatureSupport("WCSS", 0)) { + $ignoredDirectories{'fast/wcss'} = 1; +} + if (!checkWebCoreFeatureSupport("XHTMLMP", 0)) { $ignoredDirectories{'fast/xhtmlmp'} = 1; } @@ -1149,6 +1153,7 @@ sub countAndPrintLeaks($$$) "QTKitMovieControllerView completeUISetup", # <rdar://problem/7155156> leak in QTKit "getVMInitArgs", # <rdar://problem/7714444> leak in Java "Java_java_lang_System_initProperties", # <rdar://problem/7714465> leak in Java + "glrCompExecuteKernel", # <rdar://problem/7815391> leak in graphics driver while using OpenGL ); } diff --git a/WebKitTools/Scripts/prepare-ChangeLog b/WebKitTools/Scripts/prepare-ChangeLog index b087f67..2ef1eb4 100755 --- a/WebKitTools/Scripts/prepare-ChangeLog +++ b/WebKitTools/Scripts/prepare-ChangeLog @@ -260,7 +260,9 @@ if ($bugNumber) { $bugURL = "https://bugs.webkit.org/show_bug.cgi?id=$bugNumber"; my $bugXMLURL = "$bugURL&ctype=xml"; # Perl has no built in XML processing, so we'll fetch and parse with curl and grep - my $descriptionLine = `curl --silent "$bugXMLURL" | grep short_desc`; + # Pass --insecure because some cygwin installs have no certs we don't + # care about validating that bugs.webkit.org is who it says it is here. + my $descriptionLine = `curl --insecure --silent "$bugXMLURL" | grep short_desc`; if ($descriptionLine !~ /<short_desc>(.*)<\/short_desc>/) { print STDERR " Bug $bugNumber has no bug description. Maybe you set wrong bug ID?\n"; print STDERR " The bug URL: $bugXMLURL\n"; diff --git a/WebKitTools/Scripts/run-bindings-tests b/WebKitTools/Scripts/run-bindings-tests new file mode 100755 index 0000000..56b2aff --- /dev/null +++ b/WebKitTools/Scripts/run-bindings-tests @@ -0,0 +1,136 @@ +#!/usr/bin/python +# Copyright (C) 2010 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# This script generates h and cpp file for TestObj.idl using the V8 code +# generator. Please execute the script whenever changes are made to +# CodeGeneratorV8.pm, and submit the changes in V8TestObj.h/cpp in the same +# patch. This makes it easier to track and review changes in generated code. +# To execute, invoke: 'python run_tests.py' + +import os +import os.path +import subprocess +import sys +import tempfile +from webkitpy.common.checkout import scm + + +def generate_from_idl(generator, idl_file, output_directory): + cmd = ['perl', '-w', + '-IWebCore/bindings/scripts', + 'WebCore/bindings/scripts/generate-bindings.pl', + # idl include directories (path relative to generate-bindings.pl) + '--include', '.', + '--defines', 'TESTING_%s' % generator, + '--generator', generator, + '--outputDir', output_directory, + idl_file] + return subprocess.call(cmd) == 0 + + +def detect_changes(work_directory, reference_directory): + changes_found = False + for output_file in os.listdir(work_directory): + print 'Detecting changes in %s...' % output_file + cmd = ['diff', + '-u', + os.path.join(reference_directory, output_file), + os.path.join(work_directory, output_file)] + if subprocess.call(cmd) != 0: + print 'Detected changes in %s (see above)' % output_file + changes_found = True + else: + print 'No changes found.' + + return changes_found + + +def run_tests(generator, input_directory, reference_directory, reset_results): + work_directory = reference_directory + + passed = True + for input_file in os.listdir(input_directory): + (name, extension) = os.path.splitext(input_file) + if extension != '.idl': + continue + print 'Testing the %s generator on %s' % (generator, input_file) + # Generate output into the work directory (either the given one or a + # temp one if not reset_results is performed) + if not reset_results: + work_directory = tempfile.mkdtemp() + if not generate_from_idl(generator, os.path.join(input_directory, + input_file), + work_directory): + passed = False + if reset_results: + print "Overwrote reference files" + continue + # Detect changes + if detect_changes(work_directory, reference_directory): + passed = False + + if not passed: + print '%s generator failed.' % generator + return passed + + +def main(argv): + """Runs WebCore bindings code generators on test IDL files and compares + the results with reference files. + + Options: + --reset-results: Overwrites the reference files with the generated results. + + """ + reset_results = "--reset-results" in argv + + current_scm = scm.detect_scm_system(os.curdir) + os.chdir(current_scm.checkout_root) + + all_tests_passed = True + + generators = [ + 'JS', + 'V8', + 'ObjC', + 'GObject', + ] + + for generator in generators: + input_directory = os.path.join('WebCore', 'bindings', 'scripts', 'test') + reference_directory = os.path.join('WebCore', 'bindings', 'scripts', 'test', generator) + if not run_tests(generator, input_directory, reference_directory, reset_results): + all_tests_passed = False + + if all_tests_passed: + print 'All tests passed!' + return 0 + else: + print '(To update the reference files, execute "run-bindings-test --reset-results")' + return -1 + + +if __name__ == '__main__': + sys.exit(main(sys.argv)) diff --git a/WebKitTools/Scripts/run-webkit-tests b/WebKitTools/Scripts/run-webkit-tests index f28f20a..8fe8360 100755 --- a/WebKitTools/Scripts/run-webkit-tests +++ b/WebKitTools/Scripts/run-webkit-tests @@ -58,6 +58,8 @@ sub useNewRunWebKitTests() { # Change this check to control which platforms use # new-run-webkit-tests by default. + # Example: return runningOnBuildBot() && isLeopard(); + # would enable new-run-webkit-tests on only the leopard buildbots. return 0; } diff --git a/WebKitTools/Scripts/svn-apply b/WebKitTools/Scripts/svn-apply index 61d193d..b49ccec 100755 --- a/WebKitTools/Scripts/svn-apply +++ b/WebKitTools/Scripts/svn-apply @@ -123,72 +123,43 @@ my $repositoryRootPath = determineVCSRoot(); my %checkedDirectories; my %copiedFiles; -my @patches; -my %versions; - -my $copiedFromPath; -my $filter; -my $indexPath; -my $patch; -while (<>) { - s/([\n\r]+)$//mg; - my $eol = $1; - if (!defined($indexPath) && m#^diff --git \w/#) { - $filter = \&gitdiff2svndiff; - } - $_ = &$filter($_) if $filter; - if (/^Index: (.+)/) { - $indexPath = $1; - if ($patch) { - if (!$copiedFromPath) { - push @patches, $patch; - } - $copiedFromPath = ""; - $patch = ""; - } - } - if ($indexPath) { - # Fix paths on diff, ---, and +++ lines to match preceding Index: line. - s/\S+$/$indexPath/ if /^diff/; - s/^--- \S+/--- $indexPath/; - if (/^--- .+\(from (\S+):(\d+)\)$/) { - $copiedFromPath = $1; - $copiedFiles{$indexPath} = $copiedFromPath; - $versions{$copiedFromPath} = $2 if ($2 != 0); - } - elsif (/^--- .+\(revision (\d+)\)$/) { - $versions{$indexPath} = $1 if ($1 != 0); - } - if (s/^\+\+\+ \S+/+++ $indexPath/) { - $indexPath = ""; - } - } - $patch .= $_; - $patch .= $eol; -} -if ($patch && !$copiedFromPath) { - push @patches, $patch; -} +# Need to use a typeglob to pass the file handle as a parameter, +# otherwise get a bareword error. +my @diffHashRefs = parsePatch(*ARGV); + +print "Parsed " . @diffHashRefs . " diffs from patch file(s).\n"; + +my $preparedPatchHash = prepareParsedPatch($force, @diffHashRefs); + +my @copyDiffHashRefs = @{$preparedPatchHash->{copyDiffHashRefs}}; +my @nonCopyDiffHashRefs = @{$preparedPatchHash->{nonCopyDiffHashRefs}}; +my %sourceRevisions = %{$preparedPatchHash->{sourceRevisionHash}}; if ($merge) { die "--merge is currently only supported for SVN" unless isSVN(); # How do we handle Git patches applied to an SVN checkout here? - for my $file (sort keys %versions) { - my $version = $versions{$file}; + for my $file (sort keys %sourceRevisions) { + my $version = $sourceRevisions{$file}; print "Getting version $version of $file\n"; system("svn", "update", "-r", $version, $file) == 0 or die "Failed to run svn update -r $version $file."; } } -# Handle copied and moved files first since moved files may have their source deleted before the move. -for my $file (keys %copiedFiles) { - addDirectoriesIfNeeded(dirname($file)); - scmCopy($copiedFiles{$file}, $file); +# Handle copied and moved files first since moved files may have their +# source deleted before the move. +for my $copyDiffHashRef (@copyDiffHashRefs) { + my $indexPath = $copyDiffHashRef->{indexPath}; + my $copiedFromPath = $copyDiffHashRef->{copiedFromPath}; + + addDirectoriesIfNeeded(dirname($indexPath)); + scmCopy($copiedFromPath, $indexPath); + + $copiedFiles{$indexPath} = $copiedFromPath; } -for $patch (@patches) { - patch($patch); +for my $diffHashRef (@nonCopyDiffHashRefs) { + patch($diffHashRef); } removeDirectoriesIfNeeded(); @@ -326,30 +297,27 @@ sub isDirectoryEmptyForRemoval($) return $directoryIsEmpty; } +# Args: +# $diffHashRef: a diff hash reference of the type returned by parsePatch(). sub patch($) { - my ($patch) = @_; - return if !$patch; - - unless ($patch =~ m|^Index: ([^\r\n]+)|) { - my $separator = '-' x 67; - warn "Failed to find 'Index:' in:\n$separator\n$patch\n$separator\n"; - die unless $force; - return; - } - my $fullPath = $1; + my ($diffHashRef) = @_; + + my $patch = $diffHashRef->{svnConvertedText}; + + my $fullPath = $diffHashRef->{indexPath}; + my $isBinary = $diffHashRef->{isBinary}; + my $isGit = $diffHashRef->{isGit}; my $deletion = 0; my $addition = 0; - my $isBinary = 0; - my $isGitBinary = 0; + # FIXME: This information should be extracted from the diff file as + # part of the parsing stage, i.e. the call to parsePatch(). $addition = 1 if ($patch =~ /\n--- .+\(revision 0\)\r?\n/ || $patch =~ /\n@@ -0,0 .* @@/) && !exists($copiedFiles{$fullPath}); $deletion = 1 if $patch =~ /\n@@ .* \+0,0 @@/; - $isBinary = 1 if $patch =~ /\nCannot display: file marked as a binary type\./; - $isGitBinary = 1 if $patch =~ /\nGIT binary patch\n/; - if (!$addition && !$deletion && !$isBinary && !$isGitBinary) { + if (!$addition && !$deletion && !$isBinary) { # Standard patch, patch tool can handle this. if (basename($fullPath) eq "ChangeLog") { my $changeLogDotOrigExisted = -f "${fullPath}.orig"; @@ -364,11 +332,11 @@ sub patch($) addDirectoriesIfNeeded(dirname($fullPath)); if ($isBinary) { - # Binary change - handleBinaryChange($fullPath, $patch); - } elsif ($isGitBinary) { - # Git binary change - handleGitBinaryChange($fullPath, $patch); + if ($isGit) { + handleGitBinaryChange($fullPath, $patch); + } else { + handleBinaryChange($fullPath, $patch); + } } elsif ($deletion) { # Deletion applyPatch($patch, $fullPath, ["--force"]); @@ -383,6 +351,8 @@ sub patch($) system("svn", "stat", "$fullPath.orig") if isSVN() && -e "$fullPath.orig"; } } + + scmToggleExecutableBit($fullPath, $diffHashRef->{executableBitDelta}) if defined($diffHashRef->{executableBitDelta}); } sub removeDirectoriesIfNeeded() diff --git a/WebKitTools/Scripts/svn-unapply b/WebKitTools/Scripts/svn-unapply index eb20ca0..2ef7249 100755 --- a/WebKitTools/Scripts/svn-unapply +++ b/WebKitTools/Scripts/svn-unapply @@ -97,54 +97,24 @@ my $repositoryRootPath = determineVCSRoot(); my @copiedFiles; my %directoriesToCheck; -my $copiedFromPath; -my $filter; -my $indexPath; -my $patch; -while (<>) { - s/([\n\r]+)$//mg; - my $eol = $1; - if (!defined($indexPath) && m#^diff --git \w/#) { - $filter = \&gitdiff2svndiff; - } - $_ = &$filter($_) if $filter; - if (/^Index: (.+)/) { - $indexPath = $1; - if ($patch) { - if ($copiedFromPath) { - push @copiedFiles, $patch; - } else { - patch($patch); - } - $copiedFromPath = ""; - $patch = ""; - } - } - if ($indexPath) { - # Fix paths on diff, ---, and +++ lines to match preceding Index: line. - s/^--- \S+/--- $indexPath/; - if (/^--- .+\(from (\S+):\d+\)$/) { - $copiedFromPath = $1; - } - if (s/^\+\+\+ \S+/+++ $indexPath/) { - $indexPath = ""; - } - } - $patch .= $_; - $patch .= $eol; -} +# Need to use a typeglob to pass the file handle as a parameter, +# otherwise get a bareword error. +my @diffHashRefs = parsePatch(*ARGV); -if ($patch) { - if ($copiedFromPath) { - push @copiedFiles, $patch; - } else { - patch($patch); - } +print "Parsed " . @diffHashRefs . " diffs from patch file(s).\n"; + +my $preparedPatchHash = prepareParsedPatch($force, @diffHashRefs); + +my @copyDiffHashRefs = @{$preparedPatchHash->{copyDiffHashRefs}}; +my @nonCopyDiffHashRefs = @{$preparedPatchHash->{nonCopyDiffHashRefs}}; + +for my $diffHashRef (@nonCopyDiffHashRefs) { + patch($diffHashRef); } # Handle copied and moved files last since they may have had post-copy changes that have now been unapplied -for $patch (@copiedFiles) { - patch($patch); +for my $diffHashRef (@copyDiffHashRefs) { + patch($diffHashRef); } if (isSVN()) { @@ -163,28 +133,28 @@ sub checksum($) return $checksum; } +# Args: +# $diffHashRef: a diff hash reference of the type returned by parsePatch(). sub patch($) { - my ($patch) = @_; - return if !$patch; + my ($diffHashRef) = @_; + + my $patch = $diffHashRef->{svnConvertedText}; + + my $fullPath = $diffHashRef->{indexPath}; + my $isSvnBinary = $diffHashRef->{isBinary} && $diffHashRef->{isSvn}; - unless ($patch =~ m|^Index: ([^\r\n]+)|) { - my $separator = '-' x 67; - warn "Failed to find 'Index:' in:\n$separator\n$patch\n$separator\n"; - return; - } - my $fullPath = $1; $directoriesToCheck{dirname($fullPath)} = 1; my $deletion = 0; my $addition = 0; - my $isBinary = 0; - $addition = 1 if ($patch =~ /\n--- .+\(revision 0\)\n/ || $patch =~ /\n@@ -0,0 .* @@/); + # FIXME: This information should be extracted from the diff file as + # part of the parsing stage, i.e. the call to parsePatch(). + $addition = 1 if ($diffHashRef->{copiedFromPath} || $patch =~ /\n--- .+\(revision 0\)\n/ || $patch =~ /\n@@ -0,0 .* @@/); $deletion = 1 if $patch =~ /\n@@ .* \+0,0 @@/; - $isBinary = 1 if $patch =~ /\nCannot display: file marked as a binary type\./; - if (!$addition && !$deletion && !$isBinary) { + if (!$addition && !$deletion && !$isSvnBinary) { # Standard patch, patch tool can handle this. if (basename($fullPath) eq "ChangeLog") { my $changeLogDotOrigExisted = -f "${fullPath}.orig"; @@ -196,7 +166,8 @@ sub patch($) } else { # Either a deletion, an addition or a binary change. - if ($isBinary) { + # FIXME: Add support for Git binary files. + if ($isSvnBinary) { # Reverse binary change unlink($fullPath) if (-e $fullPath); system "svn", "revert", $fullPath; @@ -228,11 +199,17 @@ sub patch($) system "svn", "stat", $fullPath; } else { # Reverse addition + # + # FIXME: This should use the same logic as svn-apply's deletion + # code. In particular, svn-apply's scmRemove() subroutine + # should be used here. unapplyPatch($patch, $fullPath, ["--force"]); unlink($fullPath) if -z $fullPath; system "svn", "revert", $fullPath; } } + + scmToggleExecutableBit($fullPath, -1 * $diffHashRef->{executableBitDelta}) if defined($diffHashRef->{executableBitDelta}); } sub revertDirectories() diff --git a/WebKitTools/Scripts/test-webkitperl b/WebKitTools/Scripts/test-webkitperl index 2e31593..4e63b8a 100755 --- a/WebKitTools/Scripts/test-webkitperl +++ b/WebKitTools/Scripts/test-webkitperl @@ -39,10 +39,20 @@ use Test::Harness; use lib $FindBin::Bin; # so this script can be run from any directory. use VCSUtils; -# Use an absolute path so this script can be run from any directory. -my $scriptsDir = $FindBin::Bin; +# Change the working directory so that we can pass shorter, relative +# paths to runtests(), rather than longer, absolute paths. +# +# We change to the source root so the paths can be relative to the +# source root. These paths display on the screen, and their meaning +# will be clearer to the user if relative to the root, rather than to +# the Scripts directory, say. +# +# Source root is two levels up from the Scripts directory. +my $sourceRootDir = File::Spec->catfile($FindBin::Bin, "../.."); +chdir($sourceRootDir); -my $pattern = File::Spec->catfile($scriptsDir, "webkitperl/*_unittest/*.pl"); +# Relative to root +my $pattern = "WebKitTools/Scripts/webkitperl/*_unittest/*.pl"; my @files = <${pattern}>; # lists files alphabetically diff --git a/WebKitTools/Scripts/update-webkit-localizable-strings b/WebKitTools/Scripts/update-webkit-localizable-strings index 1d1f413..493a777 100755 --- a/WebKitTools/Scripts/update-webkit-localizable-strings +++ b/WebKitTools/Scripts/update-webkit-localizable-strings @@ -34,7 +34,7 @@ use FindBin; use lib $FindBin::Bin; use webkitdirs; -my @directoriesToScan = ("WebKit/mac", "WebKit/win"); +my @directoriesToScan = ("WebKit/mac", "WebKit/win", "-WebCore/icu", "-WebKit/mac/icu"); my $fileToUpdate = "WebKit/English.lproj/Localizable.strings"; my $exceptionsFile = "WebKit/StringsNotToBeLocalized.txt"; diff --git a/WebKitTools/Scripts/webkitdirs.pm b/WebKitTools/Scripts/webkitdirs.pm index 0b18373..4fecf6b 100644 --- a/WebKitTools/Scripts/webkitdirs.pm +++ b/WebKitTools/Scripts/webkitdirs.pm @@ -1098,7 +1098,7 @@ sub copyInspectorFrontendFiles print "*************************************************************\n"; die; } - return system "rsync", "-aut", "--exclude=/.DS_Store", "--exclude=.svn/", !isQt() ? "--exclude=/WebKit.qrc" : "", $sourceInspectorPath, $inspectorResourcesDirPath; + return system "rsync", "-aut", "--exclude=/.DS_Store", "--exclude=*.re2js", "--exclude=.svn/", !isQt() ? "--exclude=/WebKit.qrc" : "", $sourceInspectorPath, $inspectorResourcesDirPath; } sub buildXCodeProject($$@) @@ -1479,7 +1479,7 @@ sub buildChromium($@) my $result = 1; if (isDarwin()) { # Mac build - builds the root xcode project. - $result = buildXCodeProject("WebKit/chromium/WebKit", $clean, (@options)); + $result = buildXCodeProject("WebKit/chromium/WebKit", $clean, "-configuration", configuration(), @options); } elsif (isCygwin() || isWindows()) { # Windows build - builds the root visual studio solution. $result = buildChromiumVisualStudioProject("WebKit/chromium/WebKit.sln", $clean); diff --git a/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/gitdiff2svndiff.pl b/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/gitdiff2svndiff.pl deleted file mode 100644 index 93708d6..0000000 --- a/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/gitdiff2svndiff.pl +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/perl -w -# -# Copyright (C) 2009, 2010 Chris Jerdonek (chris.jerdonek@gmail.com) -# Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies) -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following disclaimer -# in the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Google Inc. nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# Unit tests of VCSUtils::gitdiff2svndiff() - -use strict; -use warnings; - -use Test::Simple tests => 20; -use VCSUtils; - -# We use this for display purposes, to keep each test title on one line. -sub excerptString($) -{ - my ($text) = @_; - - my $length = 25; - - my $shortened = substr($text, 0, $length); - $shortened .= "..." if (length($text) > $length); - - return $shortened; -} - -my $git_patch = <<END; -diff --git a/WebCore/rendering/style/StyleFlexibleBoxData.h b/WebCore/rendering/style/StyleFlexibleBoxData.h -index f5d5e74..3b6aa92 100644 ---- a/WebCore/rendering/style/StyleFlexibleBoxData.h -+++ b/WebCore/rendering/style/StyleFlexibleBoxData.h -@@ -47,7 +47,6 @@ public: -END - -my $svn_patch = <<END; -Index: WebCore/rendering/style/StyleFlexibleBoxData.h -=================================================================== ---- WebCore/rendering/style/StyleFlexibleBoxData.h -+++ WebCore/rendering/style/StyleFlexibleBoxData.h -@@ -47,7 +47,6 @@ public: -END - -my @gitLines = split("\n", $git_patch); -my @svnLines = split("\n", $svn_patch); - -# New test: check each git header line with different line endings -my $titleHeader = "gitdiff2svndiff: "; - -my @lineEndingPairs = ( # display name, value - ["", ""], - ["\\n", "\n"], - ["\\r\\n", "\r\n"], -); - -for (my $i = 0; $i < @gitLines; $i++) { - foreach my $pair (@lineEndingPairs) { - my $gitLine = $gitLines[$i] . $pair->[1]; - my $expected = $svnLines[$i] . $pair->[1]; - my $title = $titleHeader . excerptString($gitLine); - $title .= " [line-end: \"$pair->[0]\"]"; - - ok($expected eq gitdiff2svndiff($gitLine), $title); - } -} - -# New test -my $title = "gitdiff2svndiff: Convert mnemonic git diff to svn diff"; - -my @prefixes = ( - { 'a' => 'i', 'b' => 'w' }, # git-diff (compares the (i)ndex and the (w)ork tree) - { 'a' => 'c', 'b' => 'w' }, # git-diff HEAD (compares a (c)ommit and the (w)ork tree) - { 'a' => 'c', 'b' => 'i' }, # git diff --cached (compares a (c)ommit and the (i)ndex) - { 'a' => 'o', 'b' => 'w' }, # git-diff HEAD:file1 file2 (compares an (o)bject and a (w)ork tree entity) - { 'a' => '1', 'b' => '2' }, # git diff --no-index a b (compares two non-git things (1) and (2)) -); - -my $out = ""; - -foreach my $prefix (@prefixes) { - my $mnemonic_patch = $git_patch; - $mnemonic_patch =~ s/ a\// $prefix->{'a'}\//g; - $mnemonic_patch =~ s/ b\// $prefix->{'b'}\//g; - - $out = ""; - foreach my $line (split('\n', $mnemonic_patch)) { - $out .= gitdiff2svndiff($line) . "\n"; - } - - ok($svn_patch eq $out, $title . " (" . $prefix->{'a'} . "," . $prefix->{'b'} . ")"); -} - diff --git a/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiff.pl b/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiff.pl index 2507d2d..9f112b2 100644 --- a/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiff.pl +++ b/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiff.pl @@ -30,13 +30,6 @@ use warnings; use Test::More; use VCSUtils; -my @diffHashRefKeys = ( # The $diffHashRef keys to check. - "copiedFromPath", - "indexPath", - "sourceRevision", - "svnConvertedText", -); - # The array of test cases. my @testCaseHashRefs = ( { @@ -53,7 +46,8 @@ Index: Makefile all: END - # Header keys to check + expectedReturn => [ +{ svnConvertedText => <<'END', # Same as input text Index: Makefile =================================================================== @@ -65,12 +59,52 @@ Index: Makefile all: END - copiedFromPath => undef, indexPath => "Makefile", + isSvn => 1, sourceRevision => "53052", - # Other values to check - lastReadLine => undef, - nextLine => undef, +}, +undef], + expectedNextLine => undef, +}, +{ + # New test + diffName => "SVN: binary file (isBinary true)", + inputText => <<'END', +Index: test_file.swf +=================================================================== +Cannot display: file marked as a binary type. +svn:mime-type = application/octet-stream + +Property changes on: test_file.swf +___________________________________________________________________ +Name: svn:mime-type + + application/octet-stream + + +Q1dTBx0AAAB42itg4GlgYJjGwMDDyODMxMDw34GBgQEAJPQDJA== +END + expectedReturn => [ +{ + svnConvertedText => <<'END', # Same as input text +Index: test_file.swf +=================================================================== +Cannot display: file marked as a binary type. +svn:mime-type = application/octet-stream + +Property changes on: test_file.swf +___________________________________________________________________ +Name: svn:mime-type + + application/octet-stream + + +Q1dTBx0AAAB42itg4GlgYJjGwMDDyODMxMDw34GBgQEAJPQDJA== +END + indexPath => "test_file.swf", + isBinary => 1, + isSvn => 1, +}, +undef], + expectedNextLine => undef, }, { # New test @@ -89,7 +123,8 @@ Index: Makefile all: END - # Header keys to check + expectedReturn => [ +{ svnConvertedText => <<'END', # Same as input text LEADING JUNK @@ -104,12 +139,12 @@ Index: Makefile all: END - copiedFromPath => undef, indexPath => "Makefile", + isSvn => 1, sourceRevision => "53052", - # Other values to check - lastReadLine => undef, - nextLine => undef, +}, +undef], + expectedNextLine => undef, }, { # New test @@ -122,7 +157,8 @@ Index: Makefile_new @@ -0,0 +1,1 @@ +MODULES = JavaScriptCore JavaScriptGlue WebCore WebKit WebKitTools END - # Header keys to check + expectedReturn => [ +{ svnConvertedText => <<'END', # Same as input text Index: Makefile_new =================================================================== @@ -133,10 +169,11 @@ Index: Makefile_new END copiedFromPath => "Makefile", indexPath => "Makefile_new", + isSvn => 1, sourceRevision => "53131", - # Other values to check - lastReadLine => undef, - nextLine => undef, +}, +undef], + expectedNextLine => undef, }, { # New test @@ -152,7 +189,8 @@ Index: Makefile_new =================================================================== --- Makefile_new (revision 53131) (from Makefile:53131) END - # Header keys to check + expectedReturn => [ +{ svnConvertedText => <<'END', Index: Makefile =================================================================== @@ -161,12 +199,12 @@ Index: Makefile @@ -1,1 +0,0 @@ -MODULES = JavaScriptCore JavaScriptGlue WebCore WebKit WebKitTools END - copiedFromPath => undef, indexPath => "Makefile", + isSvn => 1, sourceRevision => "53131", - # Other values to check - lastReadLine => "Index: Makefile_new\n", - nextLine => "===================================================================\n", +}, +"Index: Makefile_new\n"], + expectedNextLine => "===================================================================\n", }, { # New test @@ -184,7 +222,8 @@ index f5d5e74..3b6aa92 100644 +++ b/Makefile @@ -1,1 1,1 @@ public: END - # Header keys to check + expectedReturn => [ +{ svnConvertedText => <<'END', # Same as input text Index: Makefile =================================================================== @@ -198,13 +237,16 @@ index f5d5e74..3b6aa92 100644 +++ b/Makefile @@ -1,1 1,1 @@ public: END - copiedFromPath => undef, indexPath => "Makefile", + isSvn => 1, sourceRevision => "53131", - # Other values to check - lastReadLine => undef, - nextLine => undef, }, +undef], + expectedNextLine => undef, +}, +#### +# Git test cases +## { # New test diffName => "Git: simple", @@ -215,20 +257,20 @@ index f5d5e74..3b6aa92 100644 +++ b/Makefile @@ -1,1 1,1 @@ public: END - # Header keys to check + expectedReturn => [ +{ svnConvertedText => <<'END', Index: Makefile -=================================================================== +index f5d5e74..3b6aa92 100644 --- Makefile +++ Makefile @@ -1,1 1,1 @@ public: END - copiedFromPath => undef, indexPath => "Makefile", - sourceRevision => undef, - # Other values to check - lastReadLine => undef, - nextLine => undef, + isGit => 1, +}, +undef], + expectedNextLine => undef, }, { # New test @@ -243,10 +285,11 @@ Index: Makefile_new =================================================================== --- Makefile_new (revision 53131) (from Makefile:53131) END - # Header keys to check + expectedReturn => [ +{ svnConvertedText => <<'END', Index: Makefile -=================================================================== +index f5d5e74..3b6aa92 100644 --- Makefile +++ Makefile @@ -1,1 1,1 @@ public: @@ -254,75 +297,29 @@ Index: Makefile_new =================================================================== --- Makefile_new (revision 53131) (from Makefile:53131) END - copiedFromPath => undef, indexPath => "Makefile", - sourceRevision => undef, - # Other values to check - lastReadLine => undef, - nextLine => undef, + isGit => 1, +}, +undef], + expectedNextLine => undef, }, ); -# Return the arguments for each assertion per test case. -# -# In particular, the number of assertions per test case is the length -# of the return value of this subroutine on a sample input. -# -# Returns @assertionArgsArrayRefs: -# $assertionArgsArrayRef: A reference to an array of parameters to pass -# to each call to is(). The parameters are-- -# $got: The value obtained -# $expected: The expected value -# $testName: The name of the test -sub testParseDiffAssertionArgs($) -{ - my ($testCaseHashRef) = @_; +my $testCasesCount = @testCaseHashRefs; +plan(tests => 2 * $testCasesCount); # Total number of assertions. - my $fileHandle; - open($fileHandle, "<", \$testCaseHashRef->{inputText}); +foreach my $testCase (@testCaseHashRefs) { + my $testNameStart = "parseDiff(): $testCase->{diffName}: comparing"; + my $fileHandle; + open($fileHandle, "<", \$testCase->{inputText}); my $line = <$fileHandle>; - my ($diffHashRef, $lastReadLine) = VCSUtils::parseDiff($fileHandle, $line); - - my $testNameStart = "parseDiff(): [$testCaseHashRef->{diffName}] "; - - my @assertionArgsArrayRefs; # Return value - my @assertionArgs; - foreach my $diffHashRefKey (@diffHashRefKeys) { - my $testName = "${testNameStart}key=\"$diffHashRefKey\""; - @assertionArgs = ($diffHashRef->{$diffHashRefKey}, $testCaseHashRef->{$diffHashRefKey}, $testName); - push(@assertionArgsArrayRefs, \@assertionArgs); - } - - @assertionArgs = ($lastReadLine, $testCaseHashRef->{lastReadLine}, "${testNameStart}lastReadLine"); - push(@assertionArgsArrayRefs, \@assertionArgs); - - my $nextLine = <$fileHandle>; - @assertionArgs = ($nextLine, $testCaseHashRef->{nextLine}, "${testNameStart}nextLine"); - push(@assertionArgsArrayRefs, \@assertionArgs); - - return @assertionArgsArrayRefs; -} - -# Test parseDiff() for the given test case. -sub testParseDiff($) -{ - my ($testCaseHashRef) = @_; - - my @assertionArgsArrayRefs = testParseDiffAssertionArgs($testCaseHashRef); - - foreach my $arrayRef (@assertionArgsArrayRefs) { - # The parameters are -- is($got, $expected, $testName). - is($arrayRef->[0], $arrayRef->[1], $arrayRef->[2]); - } -} - -# Count the number of assertions per test case, using a sample test case. -my $assertionCount = testParseDiffAssertionArgs($testCaseHashRefs[0]); + my @got = VCSUtils::parseDiff($fileHandle, $line); + my $expectedReturn = $testCase->{expectedReturn}; -plan(tests => @testCaseHashRefs * $assertionCount); # Total number of tests + is_deeply(\@got, $expectedReturn, "$testNameStart return value."); -foreach my $testCaseHashRef (@testCaseHashRefs) { - testParseDiff($testCaseHashRef); + my $gotNextLine = <$fileHandle>; + is($gotNextLine, $testCase->{expectedNextLine}, "$testNameStart next read line."); } diff --git a/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl b/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl index a7a3c26..8c20f65 100644 --- a/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl +++ b/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl @@ -36,68 +36,21 @@ use warnings; use Test::More; use VCSUtils; -my @diffHeaderHashRefKeys = ( # The $diffHeaderHashRef keys to check. - "copiedFromPath", - "indexPath", - "sourceRevision", - "svnConvertedText", -); - -# The array of test cases. +# The unit tests for parseGitDiffHeader() and parseSvnDiffHeader() +# already thoroughly test parsing each format. +# +# For parseDiffHeader(), it should suffice to verify that -- (1) for each +# format, the method can return non-trivial values back for each key +# supported by that format (e.g. "sourceRevision" for SVN), (2) the method +# correctly sets default values when specific key-values are not set +# (e.g. undef for "sourceRevision" for Git), and (3) key-values unique to +# this method are set correctly (e.g. "scmFormat"). my @testCaseHashRefs = ( -{ - # New test - diffName => "SVN: simple", - inputText => <<'END', -Index: WebKitTools/Scripts/VCSUtils.pm -=================================================================== ---- WebKitTools/Scripts/VCSUtils.pm (revision 53004) -+++ WebKitTools/Scripts/VCSUtils.pm (working copy) -@@ -32,6 +32,7 @@ use strict; - use warnings; -END - # Header keys to check - svnConvertedText => <<'END', -Index: WebKitTools/Scripts/VCSUtils.pm -=================================================================== ---- WebKitTools/Scripts/VCSUtils.pm (revision 53004) -+++ WebKitTools/Scripts/VCSUtils.pm (working copy) -END - copiedFromPath => undef, - indexPath => "WebKitTools/Scripts/VCSUtils.pm", - sourceRevision => "53004", - # Other values to check - lastReadLine => "@@ -32,6 +32,7 @@ use strict;\n", - nextLine => " use warnings;\n", -}, -{ - # New test - diffName => "SVN: new file", - inputText => <<'END', -Index: WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl -=================================================================== ---- WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl (revision 0) -+++ WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl (revision 0) -@@ -0,0 +1,262 @@ -+#!/usr/bin/perl -w -END - # Header keys to check - svnConvertedText => <<'END', -Index: WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl -=================================================================== ---- WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl (revision 0) -+++ WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl (revision 0) -END - copiedFromPath => undef, - indexPath => "WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl", - sourceRevision => undef, - # Other values to check - lastReadLine => "@@ -0,0 +1,262 @@\n", - nextLine => "+#!/usr/bin/perl -w\n", -}, -{ - # New test - diffName => "SVN: copy", +#### +# SVN test cases +## +{ # New test + diffName => "SVN: non-trivial copiedFromPath and sourceRevision values", inputText => <<'END', Index: index_path.py =================================================================== @@ -106,7 +59,8 @@ Index: index_path.py @@ -0,0 +1,7 @@ +# Python file... END - # Header keys to check + expectedReturn => [ +{ svnConvertedText => <<'END', Index: index_path.py =================================================================== @@ -115,174 +69,53 @@ Index: index_path.py END copiedFromPath => "copied_from_path.py", indexPath => "index_path.py", + isSvn => 1, sourceRevision => 53048, - # Other values to check - lastReadLine => "@@ -0,0 +1,7 @@\n", - nextLine => "+# Python file...\n", }, -{ - # New test - diffName => "SVN: \\r\\n lines", - inputText => <<END, # No single quotes to allow interpolation of "\r" -Index: index_path.py\r -===================================================================\r ---- index_path.py (revision 53048) (from copied_from_path.py:53048)\r -+++ index_path.py (working copy)\r -@@ -0,0 +1,7 @@\r -+# Python file...\r -END - # Header keys to check - svnConvertedText => <<END, # No single quotes to allow interpolation of "\r" -Index: index_path.py\r -===================================================================\r ---- index_path.py (revision 53048) (from copied_from_path.py:53048)\r -+++ index_path.py (working copy)\r -END - copiedFromPath => "copied_from_path.py", - indexPath => "index_path.py", - sourceRevision => 53048, - # Other values to check - lastReadLine => "@@ -0,0 +1,7 @@\r\n", - nextLine => "+# Python file...\r\n", +"@@ -0,0 +1,7 @@\n"], + expectedNextLine => "+# Python file...\n", }, -{ - # New test - diffName => "SVN: path corrections", +#### +# Git test cases +## +{ # New test case + diffName => "Git: Non-zero executable bit", inputText => <<'END', -Index: index_path.py -=================================================================== ---- bad_path (revision 53048) (from copied_from_path.py:53048) -+++ bad_path (working copy) -@@ -0,0 +1,7 @@ -+# Python file... -END - # Header keys to check - svnConvertedText => <<'END', -Index: index_path.py -=================================================================== ---- index_path.py (revision 53048) (from copied_from_path.py:53048) -+++ index_path.py (working copy) +diff --git a/foo.exe b/foo.exe +old mode 100644 +new mode 100755 END - copiedFromPath => "copied_from_path.py", - indexPath => "index_path.py", - sourceRevision => 53048, - # Other values to check - lastReadLine => "@@ -0,0 +1,7 @@\n", - nextLine => "+# Python file...\n", -}, + expectedReturn => [ { - # New test - diffName => "Git: simple", - inputText => <<'END', -diff --git a/WebCore/rendering/style/StyleFlexibleBoxData.h b/WebCore/rendering/style/StyleFlexibleBoxData.h -index f5d5e74..3b6aa92 100644 ---- a/WebCore/rendering/style/StyleFlexibleBoxData.h -+++ b/WebCore/rendering/style/StyleFlexibleBoxData.h -@@ -47,7 +47,6 @@ public: -END - # Header keys to check svnConvertedText => <<'END', -Index: WebCore/rendering/style/StyleFlexibleBoxData.h -=================================================================== ---- WebCore/rendering/style/StyleFlexibleBoxData.h -+++ WebCore/rendering/style/StyleFlexibleBoxData.h +Index: foo.exe +old mode 100644 +new mode 100755 END - copiedFromPath => undef, - indexPath => "WebCore/rendering/style/StyleFlexibleBoxData.h", - sourceRevision => undef, - # Other values to check - lastReadLine => "@@ -47,7 +47,6 @@ public:\n", - nextLine => undef, + executableBitDelta => 1, + indexPath => "foo.exe", + isGit => 1, }, -{ - # New test - diffName => "Git: unrecognized lines", - inputText => <<'END', -diff --git a/LayoutTests/http/tests/security/listener/xss-inactive-closure.html b/LayoutTests/http/tests/security/listener/xss-inactive-closure.html -new file mode 100644 -index 0000000..3c9f114 ---- /dev/null -+++ b/LayoutTests/http/tests/security/listener/xss-inactive-closure.html -@@ -0,0 +1,34 @@ -+<html> -END - # Header keys to check - svnConvertedText => <<'END', -Index: LayoutTests/http/tests/security/listener/xss-inactive-closure.html -=================================================================== ---- LayoutTests/http/tests/security/listener/xss-inactive-closure.html -+++ LayoutTests/http/tests/security/listener/xss-inactive-closure.html -END - copiedFromPath => undef, - indexPath => "LayoutTests/http/tests/security/listener/xss-inactive-closure.html", - sourceRevision => undef, - # Other values to check - lastReadLine => "@@ -0,0 +1,34 @@\n", - nextLine => "+<html>\n", +undef], + expectedNextLine => undef, }, ); -# Return the arguments for each assertion per test case. -# -# In particular, the number of assertions per test case is the length -# of the return value of this subroutine on a sample input. -# -# Returns @assertionArgsArrayRefs: -# $assertionArgsArrayRef: A reference to an array of parameters to pass -# to each call to is(). The parameters are-- -# $got: The value obtained -# $expected: The expected value -# $testName: The name of the test -sub testParseDiffHeaderAssertionArgs($) -{ - my ($testCaseHashRef) = @_; +my $testCasesCount = @testCaseHashRefs; +plan(tests => 2 * $testCasesCount); # Total number of assertions. - my $fileHandle; - open($fileHandle, "<", \$testCaseHashRef->{inputText}); +foreach my $testCase (@testCaseHashRefs) { + my $testNameStart = "parseDiffHeader(): $testCase->{diffName}: comparing"; + my $fileHandle; + open($fileHandle, "<", \$testCase->{inputText}); my $line = <$fileHandle>; - my ($headerHashRef, $lastReadLine) = VCSUtils::parseDiffHeader($fileHandle, $line); - - my $testNameStart = "parseDiffHeader(): [$testCaseHashRef->{diffName}] "; - - my @assertionArgsArrayRefs; # Return value - my @assertionArgs; - foreach my $diffHeaderHashRefKey (@diffHeaderHashRefKeys) { - my $testName = "${testNameStart}key=\"$diffHeaderHashRefKey\""; - @assertionArgs = ($headerHashRef->{$diffHeaderHashRefKey}, $testCaseHashRef->{$diffHeaderHashRefKey}, $testName); - push(@assertionArgsArrayRefs, \@assertionArgs); - } - - @assertionArgs = ($lastReadLine, $testCaseHashRef->{lastReadLine}, "${testNameStart}lastReadLine"); - push(@assertionArgsArrayRefs, \@assertionArgs); - - my $nextLine = <$fileHandle>; - @assertionArgs = ($nextLine, $testCaseHashRef->{nextLine}, "${testNameStart}nextLine"); - push(@assertionArgsArrayRefs, \@assertionArgs); - - return @assertionArgsArrayRefs; -} - -# Test parseDiffHeader() for the given test case. -sub testParseDiffHeader($) -{ - my ($testCaseHashRef) = @_; - - my @assertionArgsArrayRefs = testParseDiffHeaderAssertionArgs($testCaseHashRef); - - foreach my $arrayRef (@assertionArgsArrayRefs) { - # The parameters are -- is($got, $expected, $testName). - is($arrayRef->[0], $arrayRef->[1], $arrayRef->[2]); - } -} - -# Count the number of assertions per test case to calculate the total number -# of Test::More tests. We could have used any test case for the count. -my $assertionCount = testParseDiffHeaderAssertionArgs($testCaseHashRefs[0]); + my @got = VCSUtils::parseDiffHeader($fileHandle, $line); + my $expectedReturn = $testCase->{expectedReturn}; -plan(tests => @testCaseHashRefs * $assertionCount); # Total number of tests + is_deeply(\@got, $expectedReturn, "$testNameStart return value."); -foreach my $testCaseHashRef (@testCaseHashRefs) { - testParseDiffHeader($testCaseHashRef); + my $gotNextLine = <$fileHandle>; + is($gotNextLine, $testCase->{expectedNextLine}, "$testNameStart next read line."); } diff --git a/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseGitDiffHeader.pl b/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseGitDiffHeader.pl new file mode 100644 index 0000000..9e2a88d --- /dev/null +++ b/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseGitDiffHeader.pl @@ -0,0 +1,366 @@ +#!/usr/bin/perl -w +# +# Copyright (C) 2010 Chris Jerdonek (cjerdonek@webkit.org) +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Unit tests of parseGitDiffHeader(). + +use strict; +use warnings; + +use Test::More; +use VCSUtils; + +# The array of test cases. +my @testCaseHashRefs = ( +{ # New test + diffName => "Modified file", + inputText => <<'END', +diff --git a/foo.h b/foo.h +index f5d5e74..3b6aa92 100644 +--- a/foo.h ++++ b/foo.h +@@ -1 +1 @@ +-file contents ++new file contents +END + expectedReturn => [ +{ + svnConvertedText => <<'END', +Index: foo.h +index f5d5e74..3b6aa92 100644 +--- foo.h ++++ foo.h +END + indexPath => "foo.h", +}, +"@@ -1 +1 @@\n"], + expectedNextLine => "-file contents\n", +}, +{ # New test + diffName => "New file", + inputText => <<'END', +diff --git a/foo.h b/foo.h +new file mode 100644 +index 0000000..3c9f114 +--- /dev/null ++++ b/foo.h +@@ -0,0 +1,34 @@ ++<html> +END + expectedReturn => [ +{ + svnConvertedText => <<'END', +Index: foo.h +new file mode 100644 +index 0000000..3c9f114 +--- foo.h ++++ foo.h +END + indexPath => "foo.h", +}, +"@@ -0,0 +1,34 @@\n"], + expectedNextLine => "+<html>\n", +}, +{ # New test + diffName => "using --no-prefix", + inputText => <<'END', +diff --git foo.h foo.h +index c925780..9e65c43 100644 +--- foo.h ++++ foo.h +@@ -1,3 +1,17 @@ ++contents +END + expectedReturn => [ +{ + svnConvertedText => <<'END', +Index: foo.h +index c925780..9e65c43 100644 +--- foo.h ++++ foo.h +END + indexPath => "foo.h", +}, +"@@ -1,3 +1,17 @@\n"], + expectedNextLine => "+contents\n", +}, +#### +# Copy operations +## +{ # New test + diffName => "copy (with similarity index 100%)", + inputText => <<'END', +diff --git a/foo b/foo_new +similarity index 100% +copy from foo +copy to foo_new +diff --git a/bar b/bar +index d45dd40..3494526 100644 +END + expectedReturn => [ +{ + svnConvertedText => <<'END', +Index: foo_new +similarity index 100% +copy from foo +copy to foo_new +END + copiedFromPath => "foo", + indexPath => "foo_new", +}, +"diff --git a/bar b/bar\n"], + expectedNextLine => "index d45dd40..3494526 100644\n", +}, +{ # New test + diffName => "copy (with similarity index < 100%)", + inputText => <<'END', +diff --git a/foo b/foo_new +similarity index 99% +copy from foo +copy to foo_new +diff --git a/bar b/bar +index d45dd40..3494526 100644 +END + expectedReturn => [ +{ + svnConvertedText => <<'END', +Index: foo_new +similarity index 99% +copy from foo +copy to foo_new +END + indexPath => "foo_new", +}, +"diff --git a/bar b/bar\n"], + expectedNextLine => "index d45dd40..3494526 100644\n", +}, +#### +# Binary file test cases +## +{ + # New test case + diffName => "New binary file", + inputText => <<'END', +diff --git a/foo.gif b/foo.gif +new file mode 100644 +index 0000000000000000000000000000000000000000..64a9532e7794fcd791f6f12157406d9060151690 +GIT binary patch +literal 7 +OcmYex&reDa;sO8*F9L)B + +literal 0 +HcmV?d00001 + +END + expectedReturn => [ +{ + svnConvertedText => <<'END', +Index: foo.gif +new file mode 100644 +index 0000000000000000000000000000000000000000..64a9532e7794fcd791f6f12157406d9060151690 +GIT binary patch +END + indexPath => "foo.gif", + isBinary => 1, +}, +"literal 7\n"], + expectedNextLine => "OcmYex&reDa;sO8*F9L)B\n", +}, +{ + # New test case + diffName => "Deleted binary file", + inputText => <<'END', +diff --git a/foo.gif b/foo.gif +deleted file mode 100644 +index 323fae0..0000000 +GIT binary patch +literal 0 +HcmV?d00001 + +literal 7 +OcmYex&reDa;sO8*F9L)B + +END + expectedReturn => [ +{ + svnConvertedText => <<'END', +Index: foo.gif +deleted file mode 100644 +index 323fae0..0000000 +GIT binary patch +END + indexPath => "foo.gif", + isBinary => 1, +}, +"literal 0\n"], + expectedNextLine => "HcmV?d00001\n", +}, +#### +# Executable bit test cases +## +{ + # New test case + diffName => "Modified executable file", + inputText => <<'END', +diff --git a/foo b/foo +index d03e242..435ad3a 100755 +--- a/foo ++++ b/foo +@@ -1 +1 @@ +-file contents ++new file contents + +END + expectedReturn => [ +{ + svnConvertedText => <<'END', +Index: foo +index d03e242..435ad3a 100755 +--- foo ++++ foo +END + indexPath => "foo", +}, +"@@ -1 +1 @@\n"], + expectedNextLine => "-file contents\n", +}, +{ + # New test case + diffName => "Making file executable (last diff)", + inputText => <<'END', +diff --git a/foo.exe b/foo.exe +old mode 100644 +new mode 100755 +END + expectedReturn => [ +{ + svnConvertedText => <<'END', +Index: foo.exe +old mode 100644 +new mode 100755 +END + executableBitDelta => 1, + indexPath => "foo.exe", +}, +undef], + expectedNextLine => undef, +}, +{ + # New test case + diffName => "Making file executable (not last diff)", + inputText => <<'END', +diff --git a/foo.exe b/foo.exe +old mode 100644 +new mode 100755 +diff --git a/another_file.txt b/another_file.txt +index d03e242..435ad3a 100755 +END + expectedReturn => [ +{ + svnConvertedText => <<'END', +Index: foo.exe +old mode 100644 +new mode 100755 +END + executableBitDelta => 1, + indexPath => "foo.exe", +}, +"diff --git a/another_file.txt b/another_file.txt\n"], + expectedNextLine => "index d03e242..435ad3a 100755\n", +}, +{ + # New test case + diffName => "New executable file", + inputText => <<'END', +diff --git a/foo b/foo +new file mode 100755 +index 0000000..d03e242 +--- /dev/null ++++ b/foo +@@ -0,0 +1 @@ ++file contents + +END + expectedReturn => [ +{ + svnConvertedText => <<'END', +Index: foo +new file mode 100755 +index 0000000..d03e242 +--- foo ++++ foo +END + executableBitDelta => 1, + indexPath => "foo", +}, +"@@ -0,0 +1 @@\n"], + expectedNextLine => "+file contents\n", +}, +{ + # New test case + diffName => "Deleted executable file", + inputText => <<'END', +diff --git a/foo b/foo +deleted file mode 100755 +index d03e242..0000000 +--- a/foo ++++ /dev/null +@@ -1 +0,0 @@ +-file contents + +END + expectedReturn => [ +{ + svnConvertedText => <<'END', +Index: foo +deleted file mode 100755 +index d03e242..0000000 +--- foo ++++ foo +END + executableBitDelta => -1, + indexPath => "foo", +}, +"@@ -1 +0,0 @@\n"], + expectedNextLine => "-file contents\n", +}, +); + +my $testCasesCount = @testCaseHashRefs; +plan(tests => 2 * $testCasesCount); # Total number of assertions. + +foreach my $testCase (@testCaseHashRefs) { + my $testNameStart = "parseGitDiffHeader(): $testCase->{diffName}: comparing"; + + my $fileHandle; + open($fileHandle, "<", \$testCase->{inputText}); + my $line = <$fileHandle>; + + my @got = VCSUtils::parseGitDiffHeader($fileHandle, $line); + my $expectedReturn = $testCase->{expectedReturn}; + + is_deeply(\@got, $expectedReturn, "$testNameStart return value."); + + my $gotNextLine = <$fileHandle>; + is($gotNextLine, $testCase->{expectedNextLine}, "$testNameStart next read line."); +} diff --git a/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseSvnDiffHeader.pl b/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseSvnDiffHeader.pl new file mode 100644 index 0000000..b732889 --- /dev/null +++ b/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseSvnDiffHeader.pl @@ -0,0 +1,220 @@ +#!/usr/bin/perl -w +# +# Copyright (C) 2010 Chris Jerdonek (chris.jerdonek@gmail.com) +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Apple Computer, Inc. ("Apple") nor the names of +# its contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Unit tests of parseSvnDiffHeader(). + +use strict; +use warnings; + +use Test::More; +use VCSUtils; + +# The array of test cases. +my @testCaseHashRefs = ( +{ + # New test + diffName => "simple diff", + inputText => <<'END', +Index: WebKitTools/Scripts/VCSUtils.pm +=================================================================== +--- WebKitTools/Scripts/VCSUtils.pm (revision 53004) ++++ WebKitTools/Scripts/VCSUtils.pm (working copy) +@@ -32,6 +32,7 @@ use strict; + use warnings; +END + expectedReturn => [ +{ + svnConvertedText => <<'END', +Index: WebKitTools/Scripts/VCSUtils.pm +=================================================================== +--- WebKitTools/Scripts/VCSUtils.pm (revision 53004) ++++ WebKitTools/Scripts/VCSUtils.pm (working copy) +END + indexPath => "WebKitTools/Scripts/VCSUtils.pm", + sourceRevision => "53004", +}, +"@@ -32,6 +32,7 @@ use strict;\n"], + expectedNextLine => " use warnings;\n", +}, +{ + # New test + diffName => "new file", + inputText => <<'END', +Index: WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl +=================================================================== +--- WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl (revision 0) ++++ WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl (revision 0) +@@ -0,0 +1,262 @@ ++#!/usr/bin/perl -w +END + expectedReturn => [ +{ + svnConvertedText => <<'END', +Index: WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl +=================================================================== +--- WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl (revision 0) ++++ WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl (revision 0) +END + indexPath => "WebKitTools/Scripts/webkitperl/VCSUtils_unittest/parseDiffHeader.pl", +}, +"@@ -0,0 +1,262 @@\n"], + expectedNextLine => "+#!/usr/bin/perl -w\n", +}, +{ + # New test + diffName => "copied file", + inputText => <<'END', +Index: index_path.py +=================================================================== +--- index_path.py (revision 53048) (from copied_from_path.py:53048) ++++ index_path.py (working copy) +@@ -0,0 +1,7 @@ ++# Python file... +END + expectedReturn => [ +{ + svnConvertedText => <<'END', +Index: index_path.py +=================================================================== +--- index_path.py (revision 53048) (from copied_from_path.py:53048) ++++ index_path.py (working copy) +END + copiedFromPath => "copied_from_path.py", + indexPath => "index_path.py", + sourceRevision => 53048, +}, +"@@ -0,0 +1,7 @@\n"], + expectedNextLine => "+# Python file...\n", +}, +{ + # New test + diffName => "contains \\r\\n lines", + inputText => <<END, # No single quotes to allow interpolation of "\r" +Index: index_path.py\r +===================================================================\r +--- index_path.py (revision 53048) (from copied_from_path.py:53048)\r ++++ index_path.py (working copy)\r +@@ -0,0 +1,7 @@\r ++# Python file...\r +END + expectedReturn => [ +{ + svnConvertedText => <<END, # No single quotes to allow interpolation of "\r" +Index: index_path.py\r +===================================================================\r +--- index_path.py (revision 53048) (from copied_from_path.py:53048)\r ++++ index_path.py (working copy)\r +END + copiedFromPath => "copied_from_path.py", + indexPath => "index_path.py", + sourceRevision => 53048, +}, +"@@ -0,0 +1,7 @@\r\n"], + expectedNextLine => "+# Python file...\r\n", +}, +{ + # New test + diffName => "contains path corrections", + inputText => <<'END', +Index: index_path.py +=================================================================== +--- bad_path (revision 53048) (from copied_from_path.py:53048) ++++ bad_path (working copy) +@@ -0,0 +1,7 @@ ++# Python file... +END + expectedReturn => [ +{ + svnConvertedText => <<'END', +Index: index_path.py +=================================================================== +--- index_path.py (revision 53048) (from copied_from_path.py:53048) ++++ index_path.py (working copy) +END + copiedFromPath => "copied_from_path.py", + indexPath => "index_path.py", + sourceRevision => 53048, +}, +"@@ -0,0 +1,7 @@\n"], + expectedNextLine => "+# Python file...\n", +}, +#### +# Binary test cases +## +{ + # New test + diffName => "binary file", + inputText => <<'END', +Index: test_file.swf +=================================================================== +Cannot display: file marked as a binary type. +svn:mime-type = application/octet-stream + +Property changes on: test_file.swf +___________________________________________________________________ +Name: svn:mime-type + + application/octet-stream + + +Q1dTBx0AAAB42itg4GlgYJjGwMDDyODMxMDw34GBgQEAJPQDJA== +END + expectedReturn => [ +{ + svnConvertedText => <<'END', +Index: test_file.swf +=================================================================== +Cannot display: file marked as a binary type. +END + indexPath => "test_file.swf", + isBinary => 1, +}, +"svn:mime-type = application/octet-stream\n"], + expectedNextLine => "\n", +}, +); + +my $testCasesCount = @testCaseHashRefs; +plan(tests => 2 * $testCasesCount); # Total number of assertions. + +foreach my $testCase (@testCaseHashRefs) { + my $testNameStart = "parseSvnDiffHeader(): $testCase->{diffName}: comparing"; + + my $fileHandle; + open($fileHandle, "<", \$testCase->{inputText}); + my $line = <$fileHandle>; + + my @got = VCSUtils::parseSvnDiffHeader($fileHandle, $line); + my $expectedReturn = $testCase->{expectedReturn}; + + is_deeply(\@got, $expectedReturn, "$testNameStart return value."); + + my $gotNextLine = <$fileHandle>; + is($gotNextLine, $testCase->{expectedNextLine}, "$testNameStart next read line."); +} diff --git a/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/prepareParsedPatch.pl b/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/prepareParsedPatch.pl new file mode 100644 index 0000000..a7ae807 --- /dev/null +++ b/WebKitTools/Scripts/webkitperl/VCSUtils_unittest/prepareParsedPatch.pl @@ -0,0 +1,136 @@ +#!/usr/bin/perl -w +# +# Copyright (C) 2010 Chris Jerdonek (cjerdonek@webkit.org) +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Unit tests of prepareParsedPatch(). + +use strict; +use warnings; + +use Test::More; +use VCSUtils; + +my $diffHashRef1 = { # not a copy, no source revision + copiedFromPath => undef, + indexPath => "indexPath1", + sourceRevision => undef, + svnConvertedText => "diff1", +}; +my $diffHashRef2 = { # not a copy, has source revision + copiedFromPath => undef, + indexPath => "indexPath2", + sourceRevision => 20, + svnConvertedText => "diff2", +}; +my $diffHashRef3 = { # a copy (copies always have source revision) + copiedFromPath => "sourcePath3", + indexPath => "indexPath2", # Deliberately choosing same as $diffHashRef2 + sourceRevision => 3, + svnConvertedText => "diff3", +}; + +my @testCases = ( +{ + # New test + testName => "zero diffs: empty array", + diffHashRefsInput => [], + expected => { + copyDiffHashRefs => [], + nonCopyDiffHashRefs => [], + sourceRevisionHash => {}, + }, +}, +{ + # New test + testName => "one diff: non-copy, no revision", + diffHashRefsInput => [$diffHashRef1], + expected => { + copyDiffHashRefs => [], + nonCopyDiffHashRefs => [$diffHashRef1], + sourceRevisionHash => {}, + }, +}, +{ + # New test + testName => "one diff: non-copy, has revision", + diffHashRefsInput => [$diffHashRef2], + expected => { + copyDiffHashRefs => [], + nonCopyDiffHashRefs => [$diffHashRef2], + sourceRevisionHash => { + "indexPath2" => 20, + } + }, +}, +{ + # New test + testName => "one diff: copy (has revision)", + diffHashRefsInput => [$diffHashRef3], + expected => { + copyDiffHashRefs => [$diffHashRef3], + nonCopyDiffHashRefs => [], + sourceRevisionHash => { + "sourcePath3" => 3, + } + }, +}, +{ + # New test + testName => "two diffs: two non-copies", + diffHashRefsInput => [$diffHashRef1, $diffHashRef2], + expected => { + copyDiffHashRefs => [], + nonCopyDiffHashRefs => [$diffHashRef1, $diffHashRef2], + sourceRevisionHash => { + "indexPath2" => 20, + } + }, +}, +{ + # New test + testName => "two diffs: non-copy and copy", + diffHashRefsInput => [$diffHashRef2, $diffHashRef3], + expected => { + copyDiffHashRefs => [$diffHashRef3], + nonCopyDiffHashRefs => [$diffHashRef2], + sourceRevisionHash => { + "sourcePath3" => 3, + "indexPath2" => 20, + } + }, +}, +); + +my $testCasesCount = @testCases; +plan(tests => $testCasesCount); + +foreach my $testCase (@testCases) { + my $testName = $testCase->{testName}; + my @diffHashRefs = @{$testCase->{diffHashRefsInput}}; + my $expected = $testCase->{expected}; + + my $got = prepareParsedPatch(0, @diffHashRefs); + + is_deeply($got, $expected, $testName); +} + diff --git a/WebKitTools/Scripts/webkitperl/features.pm b/WebKitTools/Scripts/webkitperl/features.pm index 1f88022..7ca924b 100644 --- a/WebKitTools/Scripts/webkitperl/features.pm +++ b/WebKitTools/Scripts/webkitperl/features.pm @@ -72,6 +72,7 @@ sub hasFeature($$) "3D Rendering" => "WebCoreHas3DRendering", "3D Canvas" => "WebGLShader", "WML" => "WMLElement", + "WCSS" => "parseWCSSInputProperty", "XHTMLMP" => "isXHTMLMPDocument", ); my $symbolName = $symbolForFeature{$featureName}; diff --git a/WebKitTools/Scripts/webkitpy/common/array_stream.py b/WebKitTools/Scripts/webkitpy/common/array_stream.py new file mode 100644 index 0000000..e425d02 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/common/array_stream.py @@ -0,0 +1,66 @@ +#!/usr/bin/python +# Copyright (C) 2010 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Package that private an array-based implementation of a stream.""" + + +class ArrayStream(object): + """Simple class that implmements a stream interface on top of an array. + + This is used primarily by unit test classes to mock output streams. It + performs a similar function to StringIO, but (a) it is write-only, and + (b) it can be used to retrieve each individual write(); StringIO + concatenates all of the writes together. + """ + + def __init__(self): + self._contents = [] + + def write(self, msg): + """Implement stream.write() by appending to the stream's contents.""" + self._contents.append(msg) + + def get(self): + """Return the contents of a stream (as an array).""" + return self._contents + + def reset(self): + """Empty the stream.""" + self._contents = [] + + def empty(self): + """Return whether the stream is empty.""" + return (len(self._contents) == 0) + + def flush(self): + """Flush the stream (a no-op implemented for compatibility).""" + pass + + def __repr__(self): + return '<ArrayStream: ' + str(self._contents) + '>' diff --git a/WebKitTools/Scripts/webkitpy/common/array_stream_unittest.py b/WebKitTools/Scripts/webkitpy/common/array_stream_unittest.py new file mode 100644 index 0000000..1a9b34a --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/common/array_stream_unittest.py @@ -0,0 +1,78 @@ +#!/usr/bin/python +# Copyright (C) 2010 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Unit tests for array_stream.py.""" + +import pdb +import unittest + +from webkitpy.common.array_stream import ArrayStream + + +class ArrayStreamTest(unittest.TestCase): + def assertEmpty(self, a_stream): + self.assertTrue(a_stream.empty()) + + def assertNotEmpty(self, a_stream): + self.assertFalse(a_stream.empty()) + + def assertContentsMatch(self, a_stream, contents): + self.assertEquals(a_stream.get(), contents) + + def test_basics(self): + a = ArrayStream() + self.assertEmpty(a) + self.assertContentsMatch(a, []) + + a.flush() + self.assertEmpty(a) + self.assertContentsMatch(a, []) + + a.write("foo") + a.write("bar") + self.assertNotEmpty(a) + self.assertContentsMatch(a, ["foo", "bar"]) + + a.flush() + self.assertNotEmpty(a) + self.assertContentsMatch(a, ["foo", "bar"]) + + a.reset() + self.assertEmpty(a) + self.assertContentsMatch(a, []) + + self.assertEquals(str(a), "<ArrayStream: []>") + + a.write("foo") + self.assertNotEmpty(a) + self.assertContentsMatch(a, ["foo"]) + self.assertEquals(str(a), "<ArrayStream: ['foo']>") + +if __name__ == '__main__': + unittest.main() diff --git a/WebKitTools/Scripts/webkitpy/common/checkout/api.py b/WebKitTools/Scripts/webkitpy/common/checkout/api.py index c4e2b69..a5ac939 100644 --- a/WebKitTools/Scripts/webkitpy/common/checkout/api.py +++ b/WebKitTools/Scripts/webkitpy/common/checkout/api.py @@ -27,7 +27,6 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os -import subprocess import StringIO from webkitpy.common.checkout.changelog import ChangeLog @@ -50,7 +49,11 @@ class Checkout(object): def _latest_entry_for_changelog_at_revision(self, changelog_path, revision): changelog_contents = self._scm.contents_at_revision(changelog_path, revision) - return ChangeLog.parse_latest_entry_from_file(StringIO.StringIO(changelog_contents)) + # contents_at_revision returns a byte array (str()), but we know + # that ChangeLog files are utf-8. parse_latest_entry_from_file + # expects a file-like object which vends unicode(), so we decode here. + changelog_file = StringIO.StringIO(changelog_contents.decode("utf-8")) + return ChangeLog.parse_latest_entry_from_file(changelog_file) def changelog_entries_for_revision(self, revision): changed_files = self._scm.changed_files_for_revision(revision) @@ -80,16 +83,16 @@ class Checkout(object): def bug_id_for_revision(self, revision): return self.commit_info_for_revision(revision).bug_id() - def modified_changelogs(self): + def modified_changelogs(self, git_commit, squash): # SCM returns paths relative to scm.checkout_root # Callers (especially those using the ChangeLog class) may # expect absolute paths, so this method returns absolute paths. - changed_files = self._scm.changed_files() + changed_files = self._scm.changed_files(git_commit, squash) absolute_paths = [os.path.join(self._scm.checkout_root, path) for path in changed_files] return [path for path in absolute_paths if self._is_path_to_changelog(path)] - def commit_message_for_this_commit(self): - changelog_paths = self.modified_changelogs() + def commit_message_for_this_commit(self, git_commit, squash): + changelog_paths = self.modified_changelogs(git_commit, squash) if not len(changelog_paths): raise ScriptError(message="Found no modified ChangeLogs, cannot create a commit message.\n" "All changes require a ChangeLog. See:\n" @@ -106,32 +109,29 @@ class Checkout(object): # FIXME: We should sort and label the ChangeLog messages like commit-log-editor does. return CommitMessage("".join(changelog_messages).splitlines()) - def bug_id_for_this_commit(self): + def bug_id_for_this_commit(self, git_commit, squash): try: - return parse_bug_id(self.commit_message_for_this_commit().message()) + return parse_bug_id(self.commit_message_for_this_commit(git_commit, squash).message()) except ScriptError, e: pass # We might not have ChangeLogs. def apply_patch(self, patch, force=False): # It's possible that the patch was not made from the root directory. # We should detect and handle that case. - # FIXME: Use Executive instead of subprocess here. - curl_process = subprocess.Popen(['curl', '--location', '--silent', '--show-error', patch.url()], stdout=subprocess.PIPE) # FIXME: Move _scm.script_path here once we get rid of all the dependencies. args = [self._scm.script_path('svn-apply')] if patch.reviewer(): args += ['--reviewer', patch.reviewer().full_name] if force: args.append('--force') - - run_command(args, input=curl_process.stdout) + run_command(args, input=patch.contents()) def apply_reverse_diff(self, revision): self._scm.apply_reverse_diff(revision) # We revert the ChangeLogs because removing lines from a ChangeLog # doesn't make sense. ChangeLogs are append only. - changelog_paths = self.modified_changelogs() + changelog_paths = self.modified_changelogs(git_commit=None, squash=False) if len(changelog_paths): self._scm.revert_files(changelog_paths) diff --git a/WebKitTools/Scripts/webkitpy/common/checkout/api_unittest.py b/WebKitTools/Scripts/webkitpy/common/checkout/api_unittest.py index e99caee..1436379 100644 --- a/WebKitTools/Scripts/webkitpy/common/checkout/api_unittest.py +++ b/WebKitTools/Scripts/webkitpy/common/checkout/api_unittest.py @@ -26,6 +26,9 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from __future__ import with_statement + +import codecs import os import shutil import tempfile @@ -37,14 +40,14 @@ from webkitpy.common.checkout.scm import detect_scm_system, CommitMessage from webkitpy.common.system.outputcapture import OutputCapture from webkitpy.thirdparty.mock import Mock + # FIXME: Copied from scm_unittest.py -def write_into_file_at_path(file_path, contents): - new_file = open(file_path, 'w') - new_file.write(contents) - new_file.close() +def write_into_file_at_path(file_path, contents, encoding="utf-8"): + with codecs.open(file_path, "w", encoding) as file: + file.write(contents) -_changelog1entry1 = """2010-03-25 Eric Seidel <eric@webkit.org> +_changelog1entry1 = u"""2010-03-25 Tor Arne Vestb\u00f8 <vestbo@webkit.org> Unreviewed build fix to un-break webkit-patch land. @@ -53,7 +56,7 @@ _changelog1entry1 = """2010-03-25 Eric Seidel <eric@webkit.org> * Scripts/webkitpy/common/checkout/api.py: import scm.CommitMessage """ -_changelog1entry2 = """2010-03-25 Adam Barth <abarth@webkit.org> +_changelog1entry2 = u"""2010-03-25 Adam Barth <abarth@webkit.org> Reviewed by Eric Seidel. @@ -62,8 +65,8 @@ _changelog1entry2 = """2010-03-25 Adam Barth <abarth@webkit.org> * Scripts/webkitpy/common/checkout/api.py: """ -_changelog1 = "\n".join([_changelog1entry1, _changelog1entry2]) -_changelog2 = """2010-03-25 Eric Seidel <eric@webkit.org> +_changelog1 = u"\n".join([_changelog1entry1, _changelog1entry2]) +_changelog2 = u"""2010-03-25 Tor Arne Vestb\u00f8 <vestbo@webkit.org> Unreviewed build fix to un-break webkit-patch land. @@ -79,7 +82,7 @@ _changelog2 = """2010-03-25 Eric Seidel <eric@webkit.org> """ class CommitMessageForThisCommitTest(unittest.TestCase): - expected_commit_message = """2010-03-25 Eric Seidel <eric@webkit.org> + expected_commit_message = u"""2010-03-25 Tor Arne Vestb\u00f8 <vestbo@webkit.org> Unreviewed build fix to un-break webkit-patch land. @@ -87,7 +90,7 @@ class CommitMessageForThisCommitTest(unittest.TestCase): https://bugs.webkit.org/show_bug.cgi?id=36629 * Scripts/webkitpy/common/checkout/api.py: import scm.CommitMessage -2010-03-25 Eric Seidel <eric@webkit.org> +2010-03-25 Tor Arne Vestb\u00f8 <vestbo@webkit.org> Unreviewed build fix to un-break webkit-patch land. @@ -111,10 +114,11 @@ class CommitMessageForThisCommitTest(unittest.TestCase): # ChangeLog is difficult to mock at current. def test_commit_message_for_this_commit(self): checkout = Checkout(None) - checkout.modified_changelogs = lambda: ["ChangeLog1", "ChangeLog2"] + checkout.modified_changelogs = lambda git_commit, squash: ["ChangeLog1", "ChangeLog2"] output = OutputCapture() expected_stderr = "Parsing ChangeLog: ChangeLog1\nParsing ChangeLog: ChangeLog2\n" - commit_message = output.assert_outputs(self, checkout.commit_message_for_this_commit, expected_stderr=expected_stderr) + commit_message = output.assert_outputs(self, checkout.commit_message_for_this_commit, + kwargs={"git_commit": None, "squash": False}, expected_stderr=expected_stderr) self.assertEqual(commit_message.message(), self.expected_commit_message) @@ -124,7 +128,9 @@ class CheckoutTest(unittest.TestCase): def mock_contents_at_revision(changelog_path, revision): self.assertEqual(changelog_path, "foo") self.assertEqual(revision, "bar") - return _changelog1 + # contents_at_revision is expected to return a byte array (str) + # so we encode our unicode ChangeLog down to a utf-8 stream. + return _changelog1.encode("utf-8") scm.contents_at_revision = mock_contents_at_revision checkout = Checkout(scm) entry = checkout._latest_entry_for_changelog_at_revision("foo", "bar") @@ -137,8 +143,8 @@ class CheckoutTest(unittest.TestCase): checkout.changelog_entries_for_revision = lambda revision: [ChangeLogEntry(_changelog1entry1)] commitinfo = checkout.commit_info_for_revision(4) self.assertEqual(commitinfo.bug_id(), 36629) - self.assertEqual(commitinfo.author_name(), "Eric Seidel") - self.assertEqual(commitinfo.author_email(), "eric@webkit.org") + self.assertEqual(commitinfo.author_name(), u"Tor Arne Vestb\u00f8") + self.assertEqual(commitinfo.author_email(), "vestbo@webkit.org") self.assertEqual(commitinfo.reviewer_text(), None) self.assertEqual(commitinfo.reviewer(), None) self.assertEqual(commitinfo.committer_email(), "committer@example.com") @@ -157,13 +163,13 @@ class CheckoutTest(unittest.TestCase): def test_bug_id_for_this_commit(self): scm = Mock() checkout = Checkout(scm) - checkout.commit_message_for_this_commit = lambda: CommitMessage(ChangeLogEntry(_changelog1entry1).contents().splitlines()) - self.assertEqual(checkout.bug_id_for_this_commit(), 36629) + checkout.commit_message_for_this_commit = lambda git_commit, squash: CommitMessage(ChangeLogEntry(_changelog1entry1).contents().splitlines()) + self.assertEqual(checkout.bug_id_for_this_commit(git_commit=None, squash=False), 36629) def test_modified_changelogs(self): scm = Mock() scm.checkout_root = "/foo/bar" - scm.changed_files = lambda:["file1", "ChangeLog", "relative/path/ChangeLog"] + scm.changed_files = lambda git_commit, squash: ["file1", "ChangeLog", "relative/path/ChangeLog"] checkout = Checkout(scm) expected_changlogs = ["/foo/bar/ChangeLog", "/foo/bar/relative/path/ChangeLog"] - self.assertEqual(checkout.modified_changelogs(), expected_changlogs) + self.assertEqual(checkout.modified_changelogs(git_commit=None, squash=False), expected_changlogs) diff --git a/WebKitTools/Scripts/webkitpy/common/checkout/changelog.py b/WebKitTools/Scripts/webkitpy/common/checkout/changelog.py index e93896f..6220fbd 100644 --- a/WebKitTools/Scripts/webkitpy/common/checkout/changelog.py +++ b/WebKitTools/Scripts/webkitpy/common/checkout/changelog.py @@ -99,10 +99,14 @@ class ChangeLog(object): @staticmethod def parse_latest_entry_from_file(changelog_file): + """changelog_file must be a file-like object which returns + unicode strings. Use codecs.open or StringIO(unicode()) + to pass file objects to this class.""" date_line_regexp = re.compile(ChangeLogEntry.date_line_regexp) entry_lines = [] # The first line should be a date line. first_line = changelog_file.readline() + assert(isinstance(first_line, unicode)) if not date_line_regexp.match(first_line): return None entry_lines.append(first_line) diff --git a/WebKitTools/Scripts/webkitpy/common/checkout/changelog_unittest.py b/WebKitTools/Scripts/webkitpy/common/checkout/changelog_unittest.py index 9210c9c..864428a 100644 --- a/WebKitTools/Scripts/webkitpy/common/checkout/changelog_unittest.py +++ b/WebKitTools/Scripts/webkitpy/common/checkout/changelog_unittest.py @@ -26,9 +26,12 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import unittest +from __future__ import with_statement + +import codecs import os import tempfile +import unittest from StringIO import StringIO @@ -52,7 +55,7 @@ class ChangeLogsTest(unittest.TestCase): ''' # More example text than we need. Eventually we need to support parsing this all and write tests for the parsing. - _example_changelog = '''2009-08-17 David Kilzer <ddkilzer@apple.com> + _example_changelog = u"""2009-08-17 Tor Arne Vestb\xf8 <vestbo@webkit.org> <http://webkit.org/b/28393> check-webkit-style: add check for use of std::max()/std::min() instead of MAX()/MIN() @@ -84,10 +87,10 @@ class ChangeLogsTest(unittest.TestCase): so we can't assert here. == Rolled over to ChangeLog-2009-06-16 == -''' +""" def test_latest_entry_parse(self): - changelog_contents = "%s\n%s" % (self._example_entry, self._example_changelog) + changelog_contents = u"%s\n%s" % (self._example_entry, self._example_changelog) changelog_file = StringIO(changelog_contents) latest_entry = ChangeLog.parse_latest_entry_from_file(changelog_file) self.assertEquals(latest_entry.contents(), self._example_entry) @@ -97,19 +100,17 @@ class ChangeLogsTest(unittest.TestCase): self.assertTrue(latest_entry.reviewer()) # Make sure that our UTF8-based lookup of Tor works. @staticmethod - def _write_tmp_file_with_contents(contents): + def _write_tmp_file_with_contents(byte_array): + assert(isinstance(byte_array, str)) (file_descriptor, file_path) = tempfile.mkstemp() # NamedTemporaryFile always deletes the file on close in python < 2.6 - file = os.fdopen(file_descriptor, 'w') - file.write(contents) - file.close() + with os.fdopen(file_descriptor, "w") as file: + file.write(byte_array) return file_path @staticmethod - def _read_file_contents(file_path): - file = open(file_path) - contents = file.read() - file.close() - return contents + def _read_file_contents(file_path, encoding): + with codecs.open(file_path, "r", encoding) as file: + return file.read() _new_entry_boilerplate = '''2009-08-19 Eric Seidel <eric@webkit.org> @@ -121,11 +122,11 @@ class ChangeLogsTest(unittest.TestCase): ''' def test_set_reviewer(self): - changelog_contents = "%s\n%s" % (self._new_entry_boilerplate, self._example_changelog) - changelog_path = self._write_tmp_file_with_contents(changelog_contents) + changelog_contents = u"%s\n%s" % (self._new_entry_boilerplate, self._example_changelog) + changelog_path = self._write_tmp_file_with_contents(changelog_contents.encode("utf-8")) reviewer_name = 'Test Reviewer' ChangeLog(changelog_path).set_reviewer(reviewer_name) - actual_contents = self._read_file_contents(changelog_path) + actual_contents = self._read_file_contents(changelog_path, "utf-8") expected_contents = changelog_contents.replace('NOBODY (OOPS!)', reviewer_name) os.remove(changelog_path) self.assertEquals(actual_contents, expected_contents) @@ -169,8 +170,8 @@ class ChangeLogsTest(unittest.TestCase): ''' def _assert_update_for_revert_output(self, args, expected_entry): - changelog_contents = "%s\n%s" % (self._new_entry_boilerplate, self._example_changelog) - changelog_path = self._write_tmp_file_with_contents(changelog_contents) + changelog_contents = u"%s\n%s" % (self._new_entry_boilerplate, self._example_changelog) + changelog_path = self._write_tmp_file_with_contents(changelog_contents.encode("utf-8")) changelog = ChangeLog(changelog_path) changelog.update_for_revert(*args) actual_entry = changelog.latest_entry() diff --git a/WebKitTools/Scripts/webkitpy/common/checkout/commitinfo.py b/WebKitTools/Scripts/webkitpy/common/checkout/commitinfo.py index 7c3315f..448d530 100644 --- a/WebKitTools/Scripts/webkitpy/common/checkout/commitinfo.py +++ b/WebKitTools/Scripts/webkitpy/common/checkout/commitinfo.py @@ -28,8 +28,6 @@ # # WebKit's python module for holding information on a commit -import StringIO - from webkitpy.common.checkout.changelog import view_source_url from webkitpy.common.config.committers import CommitterList diff --git a/WebKitTools/Scripts/webkitpy/common/checkout/scm.py b/WebKitTools/Scripts/webkitpy/common/checkout/scm.py index 2704f07..02e114a 100644 --- a/WebKitTools/Scripts/webkitpy/common/checkout/scm.py +++ b/WebKitTools/Scripts/webkitpy/common/checkout/scm.py @@ -41,11 +41,13 @@ from webkitpy.common.system.deprecated_logging import error, log def detect_scm_system(path): - if SVN.in_working_directory(path): - return SVN(cwd=path) + absolute_path = os.path.abspath(path) + + if SVN.in_working_directory(absolute_path): + return SVN(cwd=absolute_path) - if Git.in_working_directory(path): - return Git(cwd=path) + if Git.in_working_directory(absolute_path): + return Git(cwd=absolute_path) return None @@ -145,7 +147,7 @@ class SCM: return filenames def strip_r_from_svn_revision(self, svn_revision): - match = re.match("^r(?P<svn_revision>\d+)", svn_revision) + match = re.match("^r(?P<svn_revision>\d+)", unicode(svn_revision)) if (match): return match.group('svn_revision') return svn_revision @@ -178,7 +180,7 @@ class SCM: def add(self, path): raise NotImplementedError, "subclasses must implement" - def changed_files(self): + def changed_files(self, git_commit=None, squash=None): raise NotImplementedError, "subclasses must implement" def changed_files_for_revision(self): @@ -193,7 +195,7 @@ class SCM: def display_name(self): raise NotImplementedError, "subclasses must implement" - def create_patch(self): + def create_patch(self, git_commit=None, squash=None): raise NotImplementedError, "subclasses must implement" def committer_email_for_revision(self, revision): @@ -211,7 +213,10 @@ class SCM: def revert_files(self, file_paths): raise NotImplementedError, "subclasses must implement" - def commit_with_message(self, message, username=None): + def should_squash(self, squash): + raise NotImplementedError, "subclasses must implement" + + def commit_with_message(self, message, username=None, git_commit=None, squash=None): raise NotImplementedError, "subclasses must implement" def svn_commit_log(self, svn_revision): @@ -229,12 +234,6 @@ class SCM: def svn_merge_base(): raise NotImplementedError, "subclasses must implement" - def create_patch_from_local_commit(self, commit_id): - error("Your source control manager does not support creating a patch from a local commit.") - - def create_patch_since_local_commit(self, commit_id): - error("Your source control manager does not support creating a patch from a local commit.") - def commit_locally_with_message(self, message): error("Your source control manager does not support local commits.") @@ -308,7 +307,7 @@ class SVN(SCM): return self.cached_version def working_directory_is_clean(self): - return run_command(["svn", "diff"], cwd=self.checkout_root) == "" + return run_command(["svn", "diff"], cwd=self.checkout_root, decode_output=False) == "" def clean_working_directory(self): # svn revert -R is not as awesome as git reset --hard. @@ -339,12 +338,13 @@ class SVN(SCM): # path is assumed to be cwd relative? run_command(["svn", "add", path]) - def changed_files(self): + def changed_files(self, git_commit=None, squash=None): return self.run_status_and_extract_filenames(self.status_command(), self._status_regexp("ACDMR")) def changed_files_for_revision(self, revision): # As far as I can tell svn diff --summarize output looks just like svn status output. - status_command = ["svn", "diff", "--summarize", "-c", str(revision)] + # No file contents printed, thus utf-8 auto-decoding in run_command is fine. + status_command = ["svn", "diff", "--summarize", "-c", revision] return self.run_status_and_extract_filenames(status_command, self._status_regexp("ACDMR")) def conflicted_files(self): @@ -360,19 +360,26 @@ class SVN(SCM): def display_name(self): return "svn" - def create_patch(self): - return run_command(self.script_path("svn-create-patch"), cwd=self.checkout_root, return_stderr=False) + def create_patch(self, git_commit=None, squash=None): + """Returns a byte array (str()) representing the patch file. + Patch files are effectively binary since they may contain + files of multiple different encodings.""" + return run_command([self.script_path("svn-create-patch")], + cwd=self.checkout_root, return_stderr=False, + decode_output=False) def committer_email_for_revision(self, revision): - return run_command(["svn", "propget", "svn:author", "--revprop", "-r", str(revision)]).rstrip() + return run_command(["svn", "propget", "svn:author", "--revprop", "-r", revision]).rstrip() def contents_at_revision(self, path, revision): + """Returns a byte array (str()) containing the contents + of path @ revision in the repository.""" remote_path = "%s/%s" % (self._repository_url(), path) - return run_command(["svn", "cat", "-r", str(revision), remote_path]) + return run_command(["svn", "cat", "-r", revision, remote_path], decode_output=False) def diff_for_revision(self, revision): # FIXME: This should probably use cwd=self.checkout_root - return run_command(['svn', 'diff', '-c', str(revision)]) + return run_command(['svn', 'diff', '-c', revision]) def _repository_url(self): return self.value_from_svn_info(self.checkout_root, 'URL') @@ -389,7 +396,12 @@ class SVN(SCM): # FIXME: This should probably use cwd=self.checkout_root. run_command(['svn', 'revert'] + file_paths) - def commit_with_message(self, message, username=None): + def should_squash(self, squash): + # SVN doesn't support the concept of squashing. + return False + + def commit_with_message(self, message, username=None, git_commit=None, squash=None): + # squash and git-commit are not used by SVN. if self.dryrun: # Return a string which looks like a commit so that things which parse this output will succeed. return "Dry run, no commit.\nCommitted revision 0." @@ -405,7 +417,7 @@ class SVN(SCM): return run_command(svn_commit_args, error_handler=commit_error_handler) def svn_commit_log(self, svn_revision): - svn_revision = self.strip_r_from_svn_revision(str(svn_revision)) + svn_revision = self.strip_r_from_svn_revision(svn_revision) return run_command(['svn', 'log', '--non-interactive', '--revision', svn_revision]); def last_svn_commit_log(self): @@ -466,6 +478,7 @@ class Git(SCM): def status_command(self): # git status returns non-zero when there are changes, so we use git diff name --name-status HEAD instead. + # No file contents printed, thus utf-8 autodecoding in run_command is fine. return ["git", "diff", "--name-status", "HEAD"] def _status_regexp(self, expected_types): @@ -475,8 +488,24 @@ class Git(SCM): # path is assumed to be cwd relative? run_command(["git", "add", path]) - def changed_files(self): - status_command = ['git', 'diff', '-r', '--name-status', '-C', '-M', 'HEAD'] + def _merge_base(self, git_commit, squash): + if git_commit: + # FIXME: Calling code should turn commit ranges into a list of commit IDs + # and then treat each commit separately. + if '..' not in git_commit: + git_commit = git_commit + "^.." + git_commit + return git_commit + + if self.should_squash(squash): + return self.svn_merge_base() + + # FIXME: Non-squash behavior should match commit_with_message. It raises an error + # if there are working copy changes and --squash or --no-squash wasn't passed in. + # If --no-squash, then it should proceed with each local commit as a separate patch. + return 'HEAD' + + def changed_files(self, git_commit=None, squash=None): + status_command = ['git', 'diff', '-r', '--name-status', '-C', '-M', "--no-ext-diff", "--full-index", self._merge_base(git_commit, squash)] return self.run_status_and_extract_filenames(status_command, self._status_regexp("ADM")) def _changes_files_for_commit(self, git_commit): @@ -490,6 +519,8 @@ class Git(SCM): return self._changes_files_for_commit(commit_id) def conflicted_files(self): + # We do not need to pass decode_output for this diff command + # as we're passing --name-status which does not output any data. status_command = ['git', 'diff', '--name-status', '-C', '-M', '--diff-filter=U'] return self.run_status_and_extract_filenames(status_command, self._status_regexp("U")) @@ -503,9 +534,12 @@ class Git(SCM): def display_name(self): return "git" - def create_patch(self): + def create_patch(self, git_commit=None, squash=None): + """Returns a byte array (str()) representing the patch file. + Patch files are effectively binary since they may contain + files of multiple different encodings.""" # FIXME: This should probably use cwd=self.checkout_root - return run_command(['git', 'diff', '--binary', 'HEAD']) + return run_command(['git', 'diff', '--binary', "--no-ext-diff", "--full-index", "-M", self._merge_base(git_commit, squash)], decode_output=False) @classmethod def git_commit_from_svn_revision(cls, revision): @@ -517,11 +551,13 @@ class Git(SCM): return git_commit def contents_at_revision(self, path, revision): - return run_command(["git", "show", "%s:%s" % (self.git_commit_from_svn_revision(revision), path)]) + """Returns a byte array (str()) containing the contents + of path @ revision in the repository.""" + return run_command(["git", "show", "%s:%s" % (self.git_commit_from_svn_revision(revision), path)], decode_output=False) def diff_for_revision(self, revision): git_commit = self.git_commit_from_svn_revision(revision) - return self.create_patch_from_local_commit(git_commit) + return self.create_patch(git_commit) def committer_email_for_revision(self, revision): git_commit = self.git_commit_from_svn_revision(revision) @@ -538,11 +574,100 @@ class Git(SCM): def revert_files(self, file_paths): run_command(['git', 'checkout', 'HEAD'] + file_paths) - def commit_with_message(self, message, username=None): + def should_squash(self, squash): + if squash is not None: + # Squash is specified on the command-line. + return squash + + config_squash = Git.read_git_config('webkit-patch.squash') + if (config_squash and config_squash is not ""): + return config_squash.lower() == "true" + + # Only raise an error if there are actually multiple commits to squash. + num_local_commits = len(self.local_commits()) + if num_local_commits > 1 or num_local_commits > 0 and not self.working_directory_is_clean(): + working_directory_message = "" if self.working_directory_is_clean() else " and working copy changes" + raise ScriptError(message="""There are %s local commits%s. Do one of the following: +1) Use --squash or --no-squash +2) git config webkit-patch.squash true/false +""" % (num_local_commits, working_directory_message)) + + return None + + def commit_with_message(self, message, username=None, git_commit=None, squash=None): # Username is ignored during Git commits. - self.commit_locally_with_message(message) + if git_commit: + # Need working directory changes to be committed so we can checkout the merge branch. + if not self.working_directory_is_clean(): + # FIXME: webkit-patch land will modify the ChangeLogs to correct the reviewer. + # That will modify the working-copy and cause us to hit this error. + # The ChangeLog modification could be made to modify the existing local commit? + raise ScriptError(message="Working copy is modified. Cannot commit individual git_commits.") + return self._commit_on_branch(message, git_commit) + + squash = self.should_squash(squash) + if squash: + run_command(['git', 'reset', '--soft', self.svn_branch_name()]) + self.commit_locally_with_message(message) + elif not self.working_directory_is_clean(): + if not len(self.local_commits()): + # There are only working copy changes. Assume they should be committed. + self.commit_locally_with_message(message) + elif squash is None: + # The user didn't explicitly say to squash or not squash. There are local commits + # and working copy changes. Not clear what the user wants. + raise ScriptError(message="""There are local commits and working copy changes. Do one of the following: +1) Commit/revert working copy changes. +2) Use --squash or --no-squash +3) git config webkit-patch.squash true/false +""") + + # FIXME: This will commit all local commits, each with it's own message. We should restructure + # so that each local commit has the appropriate commit message based off it's ChangeLogs. return self.push_local_commits_to_server() + def _commit_on_branch(self, message, git_commit): + branch_ref = run_command(['git', 'symbolic-ref', 'HEAD']).strip() + branch_name = branch_ref.replace('refs/heads/', '') + commit_ids = self.commit_ids_from_commitish_arguments([git_commit]) + + # We want to squash all this branch's commits into one commit with the proper description. + # We do this by doing a "merge --squash" into a new commit branch, then dcommitting that. + MERGE_BRANCH = 'webkit-patch-land' + self.delete_branch(MERGE_BRANCH) + + # We might be in a directory that's present in this branch but not in the + # trunk. Move up to the top of the tree so that git commands that expect a + # valid CWD won't fail after we check out the merge branch. + os.chdir(self.checkout_root) + + # Stuff our change into the merge branch. + # We wrap in a try...finally block so if anything goes wrong, we clean up the branches. + commit_succeeded = True + try: + run_command(['git', 'checkout', '-q', '-b', MERGE_BRANCH, self.svn_branch_name()]) + + for commit in commit_ids: + # We're on a different branch now, so convert "head" to the branch name. + commit = re.sub(r'(?i)head', branch_name, commit) + # FIXME: Once changed_files and create_patch are modified to separately handle each + # commit in a commit range, commit each cherry pick so they'll get dcommitted separately. + run_command(['git', 'cherry-pick', '--no-commit', commit]) + + run_command(['git', 'commit', '-m', message]) + output = self.push_local_commits_to_server() + except Exception, e: + log("COMMIT FAILED: " + str(e)) + output = "Commit failed." + commit_succeeded = False + finally: + # And then swap back to the original branch and clean up. + self.clean_working_directory() + run_command(['git', 'checkout', '-q', branch_name]) + self.delete_branch(MERGE_BRANCH) + + return output + def svn_commit_log(self, svn_revision): svn_revision = self.strip_r_from_svn_revision(svn_revision) return run_command(['git', 'svn', 'log', '-r', svn_revision]) @@ -560,13 +685,9 @@ class Git(SCM): return run_command(['git', 'merge-base', self.svn_branch_name(), 'HEAD']).strip() def svn_branch_name(self): - return Git.read_git_config('svn-remote.svn.fetch').split(':')[1] - - def create_patch_from_local_commit(self, commit_id): - return run_command(['git', 'diff', '--binary', commit_id + "^.." + commit_id]) - - def create_patch_since_local_commit(self, commit_id): - return run_command(['git', 'diff', '--binary', commit_id]) + # FIXME: This should so something like: Git.read_git_config('svn-remote.svn.fetch').split(':')[1] + # but that doesn't work if the git repo is tracking multiple svn branches. + return 'trunk' def commit_locally_with_message(self, message): run_command(['git', 'commit', '--all', '-F', '-'], input=message) diff --git a/WebKitTools/Scripts/webkitpy/common/checkout/scm_unittest.py b/WebKitTools/Scripts/webkitpy/common/checkout/scm_unittest.py index c0a64d4..5a2c094 100644 --- a/WebKitTools/Scripts/webkitpy/common/checkout/scm_unittest.py +++ b/WebKitTools/Scripts/webkitpy/common/checkout/scm_unittest.py @@ -27,7 +27,10 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from __future__ import with_statement + import base64 +import codecs import getpass import os import os.path @@ -49,23 +52,39 @@ from webkitpy.common.system.executive import Executive, run_command, ScriptError # Perhaps through some SCMTest base-class which both SVNTest and GitTest inherit from. # FIXME: This should be unified into one of the executive.py commands! +# Callers could use run_and_throw_if_fail(args, cwd=cwd, quiet=True) def run_silent(args, cwd=None): + # Note: Not thread safe: http://bugs.python.org/issue2320 process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd) process.communicate() # ignore output exit_code = process.wait() if exit_code: raise ScriptError('Failed to run "%s" exit_code: %d cwd: %s' % (args, exit_code, cwd)) -def write_into_file_at_path(file_path, contents): - file = open(file_path, 'w') - file.write(contents) - file.close() -def read_from_path(file_path): - file = open(file_path, 'r') - contents = file.read() - file.close() - return contents +def write_into_file_at_path(file_path, contents, encoding="utf-8"): + with codecs.open(file_path, "w", encoding) as file: + file.write(contents) + + +def read_from_path(file_path, encoding="utf-8"): + with codecs.open(file_path, "r", encoding) as file: + return file.read() + + +def _make_diff(command, *args): + # We use this wrapper to disable output decoding. diffs should be treated as + # binary files since they may include text files of multiple differnet encodings. + return run_command([command, "diff"] + list(args), decode_output=False) + + +def _svn_diff(*args): + return _make_diff("svn", *args) + + +def _git_diff(*args): + return _make_diff("git", *args) + # Exists to share svn repository creation code between the git and svn tests class SVNTestRepository: @@ -103,7 +122,11 @@ class SVNTestRepository: cls._svn_add("test_file2") cls._svn_commit("third commit") - write_into_file_at_path("test_file", "test1test2test3\ntest4\n") + # This 4th commit is used to make sure that our patch file handling + # code correctly treats patches as binary and does not attempt to + # decode them assuming they're utf-8. + write_into_file_at_path("test_file", u"latin1 test: \u00A0\n", "latin1") + write_into_file_at_path("test_file2", u"utf-8 test: \u00A0\n", "utf-8") cls._svn_commit("fourth commit") # svn does not seem to update after commit as I would expect. @@ -122,6 +145,19 @@ class SVNTestRepository: test_object.svn_checkout_path = tempfile.mkdtemp(suffix="svn_test_checkout") run_command(['svn', 'checkout', '--quiet', test_object.svn_repo_url, test_object.svn_checkout_path]) + # Create and checkout a trunk dir to match the standard svn configuration to match git-svn's expectations + os.chdir(test_object.svn_checkout_path) + os.mkdir('trunk') + cls._svn_add('trunk') + # We can add tags and branches as well if we ever need to test those. + cls._svn_commit('add trunk') + + # Change directory out of the svn checkout so we can delete the checkout directory. + # _setup_test_commits will CD back to the svn checkout directory. + os.chdir('/') + run_command(['rm', '-rf', test_object.svn_checkout_path]) + run_command(['svn', 'checkout', '--quiet', test_object.svn_repo_url + '/trunk', test_object.svn_checkout_path]) + cls._setup_test_commits(test_object) @classmethod @@ -181,15 +217,12 @@ svn: resource out of date; try updating # GitTest and SVNTest inherit from this so any test_ methods here will be run once for this class and then once for each subclass. class SCMTest(unittest.TestCase): def _create_patch(self, patch_contents): - patch_path = os.path.join(self.svn_checkout_path, 'patch.diff') - write_into_file_at_path(patch_path, patch_contents) - patch = {} - patch['bug_id'] = '12345' - patch['url'] = 'file://%s' % urllib.pathname2url(patch_path) + # FIXME: This code is brittle if the Attachment API changes. + attachment = Attachment({"bug_id": 12345}, None) + attachment.contents = lambda: patch_contents - attachment = Attachment(patch, None) # FIXME: This is a hack, scm.py shouldn't be fetching attachment data. joe_cool = Committer(name="Joe Cool", email_or_emails=None) - attachment._reviewer = joe_cool + attachment.reviewer = lambda: joe_cool return attachment @@ -202,15 +235,6 @@ class SCMTest(unittest.TestCase): # Tests which both GitTest and SVNTest should run. # FIXME: There must be a simpler way to add these w/o adding a wrapper method to both subclasses - def _shared_test_commit_with_message(self, username="dbates@webkit.org"): - write_into_file_at_path('test_file', 'more test content') - commit_text = self.scm.commit_with_message("another test commit", username) - self.assertEqual(self.scm.svn_revision_from_commit_text(commit_text), '5') - - self.scm.dryrun = True - write_into_file_at_path('test_file', 'still more test content') - commit_text = self.scm.commit_with_message("yet another test commit", username) - self.assertEqual(self.scm.svn_revision_from_commit_text(commit_text), '0') def _shared_test_changed_files(self): write_into_file_at_path("test_file", "changed content") @@ -248,19 +272,22 @@ class SCMTest(unittest.TestCase): def _shared_test_changed_files_for_revision(self): # SVN reports directory changes, Git does not. - changed_files = self.scm.changed_files_for_revision(2) + changed_files = self.scm.changed_files_for_revision(3) if "test_dir" in changed_files: changed_files.remove("test_dir") self.assertEqual(changed_files, ["test_dir/test_file3", "test_file"]) - self.assertEqual(sorted(self.scm.changed_files_for_revision(3)), sorted(["test_file", "test_file2"])) # Git and SVN return different orders. - self.assertEqual(self.scm.changed_files_for_revision(4), ["test_file"]) + self.assertEqual(sorted(self.scm.changed_files_for_revision(4)), sorted(["test_file", "test_file2"])) # Git and SVN return different orders. + self.assertEqual(self.scm.changed_files_for_revision(2), ["test_file"]) def _shared_test_contents_at_revision(self): - self.assertEqual(self.scm.contents_at_revision("test_file", 2), "test1test2") - self.assertEqual(self.scm.contents_at_revision("test_file", 3), "test1test2test3\n") - self.assertEqual(self.scm.contents_at_revision("test_file", 4), "test1test2test3\ntest4\n") + self.assertEqual(self.scm.contents_at_revision("test_file", 3), "test1test2") + self.assertEqual(self.scm.contents_at_revision("test_file", 4), "test1test2test3\n") + + # Verify that contents_at_revision returns a byte array, aka str(): + self.assertEqual(self.scm.contents_at_revision("test_file", 5), u"latin1 test: \u00A0\n".encode("latin1")) + self.assertEqual(self.scm.contents_at_revision("test_file2", 5), u"utf-8 test: \u00A0\n".encode("utf-8")) - self.assertEqual(self.scm.contents_at_revision("test_file2", 3), "second file") + self.assertEqual(self.scm.contents_at_revision("test_file2", 4), "second file") # Files which don't exist: # Currently we raise instead of returning None because detecting the difference between # "file not found" and any other error seems impossible with svn (git seems to expose such through the return code). @@ -268,21 +295,21 @@ class SCMTest(unittest.TestCase): self.assertRaises(ScriptError, self.scm.contents_at_revision, "does_not_exist", 2) def _shared_test_committer_email_for_revision(self): - self.assertEqual(self.scm.committer_email_for_revision(2), getpass.getuser()) # Committer "email" will be the current user + self.assertEqual(self.scm.committer_email_for_revision(3), getpass.getuser()) # Committer "email" will be the current user def _shared_test_reverse_diff(self): self._setup_webkittools_scripts_symlink(self.scm) # Git's apply_reverse_diff uses resolve-ChangeLogs # Only test the simple case, as any other will end up with conflict markers. - self.scm.apply_reverse_diff('4') + self.scm.apply_reverse_diff('5') self.assertEqual(read_from_path('test_file'), "test1test2test3\n") def _shared_test_diff_for_revision(self): # Patch formats are slightly different between svn and git, so just regexp for things we know should be there. - r3_patch = self.scm.diff_for_revision(3) + r3_patch = self.scm.diff_for_revision(4) self.assertTrue(re.search('test3', r3_patch)) self.assertFalse(re.search('test4', r3_patch)) self.assertTrue(re.search('test2', r3_patch)) - self.assertTrue(re.search('test2', self.scm.diff_for_revision(2))) + self.assertTrue(re.search('test2', self.scm.diff_for_revision(3))) def _shared_test_svn_apply_git_patch(self): self._setup_webkittools_scripts_symlink(self.scm) @@ -308,7 +335,7 @@ HcmV?d00001 """ self.checkout.apply_patch(self._create_patch(git_binary_addition)) - added = read_from_path('fizzbuzz7.gif') + added = read_from_path('fizzbuzz7.gif', encoding=None) self.assertEqual(512, len(added)) self.assertTrue(added.startswith('GIF89a')) self.assertTrue('fizzbuzz7.gif' in self.scm.changed_files()) @@ -336,7 +363,7 @@ ptUl-ZG<%a~#LwkIWv&q!KSCH7tQ8cJDiw+|GV?MN)RjY50RTb-xvT&H """ self.checkout.apply_patch(self._create_patch(git_binary_modification)) - modified = read_from_path('fizzbuzz7.gif') + modified = read_from_path('fizzbuzz7.gif', encoding=None) self.assertEqual('foobar\n', modified) self.assertTrue('fizzbuzz7.gif' in self.scm.changed_files()) @@ -473,6 +500,12 @@ class SVNTest(SCMTest): def tearDown(self): SVNTestRepository.tear_down(self) + def test_detect_scm_system_relative_url(self): + scm = detect_scm_system(".") + # I wanted to assert that we got the right path, but there was some + # crazy magic with temp folder names that I couldn't figure out. + self.assertTrue(scm.checkout_root) + def test_create_patch_is_full_patch(self): test_dir_path = os.path.join(self.svn_checkout_path, "test_dir2") os.mkdir(test_dir_path) @@ -518,25 +551,35 @@ Q1dTBx0AAAB42itg4GlgYJjGwMDDyODMxMDw34GBgQEAJPQDJA== self._setup_webkittools_scripts_symlink(self.scm) patch_file = self._create_patch(patch_contents) self.checkout.apply_patch(patch_file) - actual_contents = read_from_path("test_file.swf") + actual_contents = read_from_path("test_file.swf", encoding=None) self.assertEqual(actual_contents, expected_contents) def test_apply_svn_patch(self): scm = detect_scm_system(self.svn_checkout_path) - patch = self._create_patch(run_command(['svn', 'diff', '-r4:3'])) + patch = self._create_patch(_svn_diff("-r5:4")) self._setup_webkittools_scripts_symlink(scm) Checkout(scm).apply_patch(patch) def test_apply_svn_patch_force(self): scm = detect_scm_system(self.svn_checkout_path) - patch = self._create_patch(run_command(['svn', 'diff', '-r2:4'])) + patch = self._create_patch(_svn_diff("-r3:5")) self._setup_webkittools_scripts_symlink(scm) self.assertRaises(ScriptError, Checkout(scm).apply_patch, patch, force=True) def test_commit_logs(self): # Commits have dates and usernames in them, so we can't just direct compare. self.assertTrue(re.search('fourth commit', self.scm.last_svn_commit_log())) - self.assertTrue(re.search('second commit', self.scm.svn_commit_log(2))) + self.assertTrue(re.search('second commit', self.scm.svn_commit_log(3))) + + def _shared_test_commit_with_message(self, username=None): + write_into_file_at_path('test_file', 'more test content') + commit_text = self.scm.commit_with_message("another test commit", username) + self.assertEqual(self.scm.svn_revision_from_commit_text(commit_text), '6') + + self.scm.dryrun = True + write_into_file_at_path('test_file', 'still more test content') + commit_text = self.scm.commit_with_message("yet another test commit", username) + self.assertEqual(self.scm.svn_revision_from_commit_text(commit_text), '0') def test_commit_text_parsing(self): self._shared_test_commit_with_message() @@ -595,7 +638,7 @@ class GitTest(SCMTest): def _setup_git_clone_of_svn_repository(self): self.git_checkout_path = tempfile.mkdtemp(suffix="git_test_checkout") # --quiet doesn't make git svn silent, so we use run_silent to redirect output - run_silent(['git', 'svn', '--quiet', 'clone', self.svn_repo_url, self.git_checkout_path]) + run_silent(['git', 'svn', 'clone', '-T', 'trunk', self.svn_repo_url, self.git_checkout_path]) def _tear_down_git_clone_of_svn_repository(self): run_command(['rm', '-rf', self.git_checkout_path]) @@ -657,8 +700,8 @@ class GitTest(SCMTest): test_file = os.path.join(self.git_checkout_path, 'test_file') write_into_file_at_path(test_file, 'foo') - diff_to_common_base = run_command(['git', 'diff', self.scm.svn_branch_name() + '..']) - diff_to_merge_base = run_command(['git', 'diff', self.scm.svn_merge_base()]) + diff_to_common_base = _git_diff(self.scm.svn_branch_name() + '..') + diff_to_merge_base = _git_diff(self.scm.svn_merge_base()) self.assertFalse(re.search(r'foo', diff_to_common_base)) self.assertTrue(re.search(r'foo', diff_to_merge_base)) @@ -711,18 +754,126 @@ class GitTest(SCMTest): # We carefullly pick a diff which does not have a directory addition # as currently svn-apply will error out when trying to remove directories # in Git: https://bugs.webkit.org/show_bug.cgi?id=34871 - patch = self._create_patch(run_command(['git', 'diff', 'HEAD..HEAD^'])) + patch = self._create_patch(_git_diff('HEAD..HEAD^')) self._setup_webkittools_scripts_symlink(scm) Checkout(scm).apply_patch(patch) def test_apply_git_patch_force(self): scm = detect_scm_system(self.git_checkout_path) - patch = self._create_patch(run_command(['git', 'diff', 'HEAD~2..HEAD'])) + patch = self._create_patch(_git_diff('HEAD~2..HEAD')) self._setup_webkittools_scripts_symlink(scm) self.assertRaises(ScriptError, Checkout(scm).apply_patch, patch, force=True) def test_commit_text_parsing(self): - self._shared_test_commit_with_message() + write_into_file_at_path('test_file', 'more test content') + self.scm.commit_locally_with_message("another test commit") + commit_text = self.scm.commit_with_message("another test commit") + self.assertEqual(self.scm.svn_revision_from_commit_text(commit_text), '6') + + self.scm.dryrun = True + write_into_file_at_path('test_file', 'still more test content') + self.scm.commit_locally_with_message("yet another test commit") + commit_text = self.scm.commit_with_message("yet another test commit") + self.assertEqual(self.scm.svn_revision_from_commit_text(commit_text), '0') + + def _one_local_commit_plus_working_copy_changes(self): + write_into_file_at_path('test_file_commit1', 'more test content') + run_command(['git', 'add', 'test_file_commit1']) + self.scm.commit_locally_with_message("another test commit") + + write_into_file_at_path('test_file_commit2', 'still more test content') + run_command(['git', 'add', 'test_file_commit2']) + + def test_commit_with_message_working_copy_only(self): + write_into_file_at_path('test_file_commit1', 'more test content') + run_command(['git', 'add', 'test_file_commit1']) + scm = detect_scm_system(self.git_checkout_path) + commit_text = scm.commit_with_message("yet another test commit") + + self.assertEqual(scm.svn_revision_from_commit_text(commit_text), '6') + svn_log = run_command(['git', 'svn', 'log', '--limit=1', '--verbose']) + self.assertTrue(re.search(r'test_file_commit1', svn_log)) + + def test_commit_with_message_squashed(self): + self._one_local_commit_plus_working_copy_changes() + scm = detect_scm_system(self.git_checkout_path) + commit_text = scm.commit_with_message("yet another test commit", squash=True) + + self.assertEqual(scm.svn_revision_from_commit_text(commit_text), '6') + svn_log = run_command(['git', 'svn', 'log', '--limit=1', '--verbose']) + self.assertTrue(re.search(r'test_file_commit2', svn_log)) + self.assertTrue(re.search(r'test_file_commit1', svn_log)) + + def _two_local_commits(self): + write_into_file_at_path('test_file_commit1', 'more test content') + run_command(['git', 'add', 'test_file_commit1']) + self.scm.commit_locally_with_message("another test commit") + + write_into_file_at_path('test_file_commit2', 'still more test content') + run_command(['git', 'add', 'test_file_commit2']) + self.scm.commit_locally_with_message("yet another test commit") + + def test_commit_with_message_git_commit(self): + self._two_local_commits() + + scm = detect_scm_system(self.git_checkout_path) + commit_text = scm.commit_with_message("another test commit", git_commit="HEAD^") + self.assertEqual(scm.svn_revision_from_commit_text(commit_text), '6') + + svn_log = run_command(['git', 'svn', 'log', '--limit=1', '--verbose']) + self.assertTrue(re.search(r'test_file_commit1', svn_log)) + self.assertFalse(re.search(r'test_file_commit2', svn_log)) + + def test_commit_with_message_git_commit_range(self): + self._two_local_commits() + + scm = detect_scm_system(self.git_checkout_path) + commit_text = scm.commit_with_message("another test commit", git_commit="HEAD~2..HEAD") + self.assertEqual(scm.svn_revision_from_commit_text(commit_text), '6') + + svn_log = run_command(['git', 'svn', 'log', '--limit=1', '--verbose']) + self.assertTrue(re.search(r'test_file_commit1', svn_log)) + self.assertTrue(re.search(r'test_file_commit2', svn_log)) + + def test_commit_with_message_multiple_local_commits(self): + self._two_local_commits() + scm = detect_scm_system(self.git_checkout_path) + self.assertRaises(ScriptError, scm.commit_with_message, ["another test commit"]) + + def test_commit_with_message_multiple_local_commits_and_working_copy(self): + self._two_local_commits() + write_into_file_at_path('test_file_commit1', 'working copy change') + scm = detect_scm_system(self.git_checkout_path) + self.assertRaises(ScriptError, scm.commit_with_message, ["another test commit"]) + + def test_commit_with_message_git_commit_and_working_copy(self): + self._two_local_commits() + write_into_file_at_path('test_file_commit1', 'working copy change') + scm = detect_scm_system(self.git_checkout_path) + self.assertRaises(ScriptError, scm.commit_with_message, ["another test commit", 'git_commit="HEAD^"']) + + def test_commit_with_message_multiple_local_commits_no_squash(self): + self._two_local_commits() + scm = detect_scm_system(self.git_checkout_path) + commit_text = scm.commit_with_message("yet another test commit", squash=False) + self.assertEqual(scm.svn_revision_from_commit_text(commit_text), '6') + + svn_log = run_command(['git', 'svn', 'log', '--limit=1', '--verbose']) + self.assertTrue(re.search(r'test_file_commit2', svn_log)) + self.assertFalse(re.search(r'test_file_commit1', svn_log)) + + svn_log = run_command(['git', 'svn', 'log', '--limit=2', '--verbose']) + self.assertTrue(re.search(r'test_file_commit1', svn_log)) + + def test_commit_with_message_multiple_local_commits_squash(self): + self._two_local_commits() + scm = detect_scm_system(self.git_checkout_path) + commit_text = scm.commit_with_message("yet another test commit", squash=True) + self.assertEqual(scm.svn_revision_from_commit_text(commit_text), '6') + + svn_log = run_command(['git', 'svn', 'log', '--limit=1', '--verbose']) + self.assertTrue(re.search(r'test_file_commit2', svn_log)) + self.assertTrue(re.search(r'test_file_commit1', svn_log)) def test_reverse_diff(self): self._shared_test_reverse_diff() @@ -733,13 +884,66 @@ class GitTest(SCMTest): def test_svn_apply_git_patch(self): self._shared_test_svn_apply_git_patch() + def test_create_patch_local_plus_working_copy(self): + self._one_local_commit_plus_working_copy_changes() + scm = detect_scm_system(self.git_checkout_path) + self.assertRaises(ScriptError, scm.create_patch) + + def test_create_patch_multiple_local_commits(self): + self._two_local_commits() + scm = detect_scm_system(self.git_checkout_path) + self.assertRaises(ScriptError, scm.create_patch) + + def test_create_patch_squashed(self): + self._one_local_commit_plus_working_copy_changes() + scm = detect_scm_system(self.git_checkout_path) + patch = scm.create_patch(squash=True) + self.assertTrue(re.search(r'test_file_commit2', patch)) + self.assertTrue(re.search(r'test_file_commit1', patch)) + + def test_create_patch_not_squashed(self): + self._one_local_commit_plus_working_copy_changes() + scm = detect_scm_system(self.git_checkout_path) + patch = scm.create_patch(squash=False) + self.assertTrue(re.search(r'test_file_commit2', patch)) + self.assertFalse(re.search(r'test_file_commit1', patch)) + + def test_create_patch_git_commit(self): + self._two_local_commits() + scm = detect_scm_system(self.git_checkout_path) + patch = scm.create_patch(git_commit="HEAD^") + self.assertTrue(re.search(r'test_file_commit1', patch)) + self.assertFalse(re.search(r'test_file_commit2', patch)) + + def test_create_patch_git_commit_range(self): + self._two_local_commits() + scm = detect_scm_system(self.git_checkout_path) + patch = scm.create_patch(git_commit="HEAD~2..HEAD") + self.assertTrue(re.search(r'test_file_commit2', patch)) + self.assertTrue(re.search(r'test_file_commit1', patch)) + + def test_create_patch_multiple_local_commits_no_squash(self): + self._two_local_commits() + scm = detect_scm_system(self.git_checkout_path) + patch = scm.create_patch(squash=False) + # FIXME: It's weird that with squash=False, create_patch/changed_files ignores local commits, + # but commit_with_message commits them. + self.assertTrue(patch == "") + + def test_create_patch_multiple_local_commits_squash(self): + self._two_local_commits() + scm = detect_scm_system(self.git_checkout_path) + patch = scm.create_patch(squash=True) + self.assertTrue(re.search(r'test_file_commit2', patch)) + self.assertTrue(re.search(r'test_file_commit1', patch)) + def test_create_binary_patch(self): # Create a git binary patch and check the contents. scm = detect_scm_system(self.git_checkout_path) test_file_name = 'binary_file' test_file_path = os.path.join(self.git_checkout_path, test_file_name) file_contents = ''.join(map(chr, range(256))) - write_into_file_at_path(test_file_path, file_contents) + write_into_file_at_path(test_file_path, file_contents, encoding=None) run_command(['git', 'add', test_file_name]) patch = scm.create_patch() self.assertTrue(re.search(r'\nliteral 0\n', patch)) @@ -749,19 +953,68 @@ class GitTest(SCMTest): run_command(['git', 'rm', '-f', test_file_name]) self._setup_webkittools_scripts_symlink(scm) self.checkout.apply_patch(self._create_patch(patch)) - self.assertEqual(file_contents, read_from_path(test_file_path)) + self.assertEqual(file_contents, read_from_path(test_file_path, encoding=None)) # Check if we can create a patch from a local commit. - write_into_file_at_path(test_file_path, file_contents) + write_into_file_at_path(test_file_path, file_contents, encoding=None) run_command(['git', 'add', test_file_name]) run_command(['git', 'commit', '-m', 'binary diff']) - patch_from_local_commit = scm.create_patch_from_local_commit('HEAD') + patch_from_local_commit = scm.create_patch('HEAD') self.assertTrue(re.search(r'\nliteral 0\n', patch_from_local_commit)) self.assertTrue(re.search(r'\nliteral 256\n', patch_from_local_commit)) - patch_since_local_commit = scm.create_patch_since_local_commit('HEAD^1') - self.assertTrue(re.search(r'\nliteral 0\n', patch_since_local_commit)) - self.assertTrue(re.search(r'\nliteral 256\n', patch_since_local_commit)) - self.assertEqual(patch_from_local_commit, patch_since_local_commit) + + def test_changed_files_local_plus_working_copy(self): + self._one_local_commit_plus_working_copy_changes() + scm = detect_scm_system(self.git_checkout_path) + self.assertRaises(ScriptError, scm.changed_files) + + def test_changed_files_multiple_local_commits(self): + self._two_local_commits() + scm = detect_scm_system(self.git_checkout_path) + self.assertRaises(ScriptError, scm.changed_files) + + def test_changed_files_squashed(self): + self._one_local_commit_plus_working_copy_changes() + scm = detect_scm_system(self.git_checkout_path) + files = scm.changed_files(squash=True) + self.assertTrue('test_file_commit2' in files) + self.assertTrue('test_file_commit1' in files) + + def test_changed_files_not_squashed(self): + self._one_local_commit_plus_working_copy_changes() + scm = detect_scm_system(self.git_checkout_path) + files = scm.changed_files(squash=False) + self.assertTrue('test_file_commit2' in files) + self.assertFalse('test_file_commit1' in files) + + def test_changed_files_git_commit(self): + self._two_local_commits() + scm = detect_scm_system(self.git_checkout_path) + files = scm.changed_files(git_commit="HEAD^") + self.assertTrue('test_file_commit1' in files) + self.assertFalse('test_file_commit2' in files) + + def test_changed_files_git_commit_range(self): + self._two_local_commits() + scm = detect_scm_system(self.git_checkout_path) + files = scm.changed_files(git_commit="HEAD~2..HEAD") + self.assertTrue('test_file_commit1' in files) + self.assertTrue('test_file_commit2' in files) + + def test_changed_files_multiple_local_commits_no_squash(self): + self._two_local_commits() + scm = detect_scm_system(self.git_checkout_path) + files = scm.changed_files(squash=False) + # FIXME: It's weird that with squash=False, create_patch/changed_files ignores local commits, + # but commit_with_message commits them. + self.assertTrue(len(files) == 0) + + def test_changed_files_multiple_local_commits_squash(self): + self._two_local_commits() + scm = detect_scm_system(self.git_checkout_path) + files = scm.changed_files(squash=True) + self.assertTrue('test_file_commit2' in files) + self.assertTrue('test_file_commit1' in files) def test_changed_files(self): self._shared_test_changed_files() diff --git a/WebKitTools/Scripts/webkitpy/common/config/committers.py b/WebKitTools/Scripts/webkitpy/common/config/committers.py index a92dbd3..56887ab 100644 --- a/WebKitTools/Scripts/webkitpy/common/config/committers.py +++ b/WebKitTools/Scripts/webkitpy/common/config/committers.py @@ -86,7 +86,6 @@ committers_unable_to_review = [ Committer("Carol Szabo", "carol.szabo@nokia.com"), Committer("Chang Shu", "Chang.Shu@nokia.com"), Committer("Chris Fleizach", "cfleizach@apple.com"), - Committer("Chris Jerdonek", "cjerdonek@webkit.org", "cjerdonek"), Committer("Chris Marrin", "cmarrin@apple.com", "cmarrin"), Committer("Chris Petersen", "cpetersen@apple.com", "cpetersen"), Committer("Christian Dywan", ["christian@twotoasts.de", "christian@webkit.org"]), @@ -94,6 +93,7 @@ committers_unable_to_review = [ Committer("Csaba Osztrogonac", "ossy@webkit.org", "ossy"), Committer("David Smith", ["catfish.man@gmail.com", "dsmith@webkit.org"], "catfishman"), Committer("Dean Jackson", "dino@apple.com", "dino"), + Committer("Diego Gonzalez", ["diegohcg@webkit.org", "diego.gonzalez@openbossa.org"], "diegohcg"), Committer("Dirk Pranke", "dpranke@chromium.org"), Committer("Drew Wilson", "atwilson@chromium.org", "atwilson"), Committer("Dumitru Daniliuc", "dumi@chromium.org", "dumi"), @@ -101,6 +101,8 @@ committers_unable_to_review = [ Committer("Enrica Casucci", "enrica@apple.com"), Committer("Erik Arvidsson", "arv@chromium.org", "arv"), Committer("Eric Roman", "eroman@chromium.org", "eroman"), + Committer("Evan Martin", "evan@chromium.org", "evmar"), + Committer("Evan Stade", "estade@chromium.org", "estade"), Committer("Feng Qian", "feng@chromium.org"), Committer("Fumitoshi Ukai", "ukai@chromium.org", "ukai"), Committer("Gabor Loki", "loki@webkit.org", "loki04"), @@ -153,13 +155,13 @@ committers_unable_to_review = [ Committer("Ryosuke Niwa", "rniwa@webkit.org", "rniwa"), Committer("Scott Violet", "sky@chromium.org", "sky"), Committer("Stephen White", "senorblanco@chromium.org", "senorblanco"), - Committer("Steve Block", "steveblock@google.com"), Committer("Tony Chang", "tony@chromium.org", "tony^work"), Committer("Trey Matteson", "trey@usa.net", "trey"), Committer("Tristan O'Tierney", ["tristan@otierney.net", "tristan@apple.com"]), Committer("Victor Wang", "victorw@chromium.org"), Committer("Vitaly Repeshko", "vitalyr@chromium.org"), Committer("William Siegrist", "wsiegrist@apple.com", "wms"), + Committer("Xiaomei Ji", "xji@chromium.org", "xji"), Committer("Yael Aharon", "yael.aharon@nokia.com"), Committer("Yaar Schnitman", ["yaar@chromium.org", "yaar@google.com"]), Committer("Yong Li", ["yong.li@torchmobile.com", "yong.li.webkit@gmail.com"], "yong"), @@ -191,6 +193,7 @@ reviewers_list = [ Reviewer("Brady Eidson", "beidson@apple.com", "bradee-oh"), Reviewer("Cameron Zwarich", ["zwarich@apple.com", "cwzwarich@apple.com", "cwzwarich@webkit.org"]), Reviewer("Chris Blumenberg", "cblu@apple.com", "cblu"), + Reviewer("Chris Jerdonek", "cjerdonek@webkit.org", "cjerdonek"), Reviewer("Dan Bernstein", ["mitz@webkit.org", "mitz@apple.com"], "mitzpettel"), Reviewer("Daniel Bates", "dbates@webkit.org", "dydz"), Reviewer("Darin Adler", "darin@apple.com", "darin"), @@ -237,10 +240,11 @@ reviewers_list = [ Reviewer("Simon Fraser", "simon.fraser@apple.com", "smfr"), Reviewer("Simon Hausmann", ["hausmann@webkit.org", "hausmann@kde.org", "simon.hausmann@nokia.com"], "tronical"), Reviewer("Stephanie Lewis", "slewis@apple.com", "sundiamonde"), + Reviewer("Steve Block", "steveblock@google.com", "steveblock"), Reviewer("Steve Falkenburg", "sfalken@apple.com", "sfalken"), Reviewer("Tim Omernick", "timo@apple.com"), Reviewer("Timothy Hatcher", ["timothy@hatcher.name", "timothy@apple.com"], "xenon"), - Reviewer(u'Tor Arne Vestb\xf8', "vestbo@webkit.org", "torarne"), + Reviewer(u"Tor Arne Vestb\u00f8", "vestbo@webkit.org", "torarne"), Reviewer("Vicki Murley", "vicki@apple.com"), Reviewer("Xan Lopez", ["xan.lopez@gmail.com", "xan@gnome.org", "xan@webkit.org"], "xan"), Reviewer("Yury Semikhatsky", "yurys@chromium.org", "yurys"), diff --git a/WebKitTools/Scripts/webkitpy/common/net/bugzilla.py b/WebKitTools/Scripts/webkitpy/common/net/bugzilla.py index 6920d67..4311a00 100644 --- a/WebKitTools/Scripts/webkitpy/common/net/bugzilla.py +++ b/WebKitTools/Scripts/webkitpy/common/net/bugzilla.py @@ -32,7 +32,7 @@ import os.path import re -import subprocess +import StringIO from datetime import datetime # used in timestamp() @@ -116,6 +116,10 @@ class Attachment(object): # depends on the current behavior. return self._attachment_dictionary.get("url") + def contents(self): + # FIXME: We shouldn't be grabbing at _bugzilla. + return self._bug._bugzilla.fetch_attachment_contents(self.id()) + def _validate_flag_value(self, flag): email = self._attachment_dictionary.get("%s_email" % flag) if not email: @@ -427,7 +431,16 @@ class Bugzilla(object): if flag['status'] == '+': attachment[result_key] = flag['setter'] + def _string_contents(self, soup): + # WebKit's bugzilla instance uses UTF-8. + # BeautifulSoup always returns Unicode strings, however + # the .string method returns a (unicode) NavigableString. + # NavigableString can confuse other parts of the code, so we + # convert from NavigableString to a real unicode() object using unicode(). + return unicode(soup.string) + def _parse_attachment_element(self, element, bug_id): + attachment = {} attachment['bug_id'] = bug_id attachment['is_obsolete'] = (element.has_key('isobsolete') and element['isobsolete'] == "1") @@ -435,9 +448,9 @@ class Bugzilla(object): attachment['id'] = int(element.find('attachid').string) # FIXME: No need to parse out the url here. attachment['url'] = self.attachment_url_for_id(attachment['id']) - attachment['name'] = unicode(element.find('desc').string) - attachment['attacher_email'] = str(element.find('attacher').string) - attachment['type'] = str(element.find('type').string) + attachment['name'] = self._string_contents(element.find('desc')) + attachment['attacher_email'] = self._string_contents(element.find('attacher')) + attachment['type'] = self._string_contents(element.find('type')) self._parse_attachment_flag( element, 'review', attachment, 'reviewer_email') self._parse_attachment_flag( @@ -448,10 +461,10 @@ class Bugzilla(object): soup = BeautifulSoup(page) bug = {} bug["id"] = int(soup.find("bug_id").string) - bug["title"] = unicode(soup.find("short_desc").string) - bug["reporter_email"] = str(soup.find("reporter").string) - bug["assigned_to_email"] = str(soup.find("assigned_to").string) - bug["cc_emails"] = [str(element.string) + bug["title"] = self._string_contents(soup.find("short_desc")) + bug["reporter_email"] = self._string_contents(soup.find("reporter")) + bug["assigned_to_email"] = self._string_contents(soup.find("assigned_to")) + bug["cc_emails"] = [self._string_contents(element) for element in soup.findAll('cc')] bug["attachments"] = [self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment')] return bug @@ -476,6 +489,12 @@ class Bugzilla(object): def fetch_bug(self, bug_id): return Bug(self.fetch_bug_dictionary(bug_id), self) + def fetch_attachment_contents(self, attachment_id): + attachment_url = self.attachment_url_for_id(attachment_id) + # We need to authenticate to download patches from security bugs. + self.authenticate() + return self.browser.open(attachment_url).read() + def _parse_bug_id_from_attachment_page(self, page): # The "Up" relation happens to point to the bug. up_link = BeautifulSoup(page).find('link', rel='Up') @@ -568,6 +587,7 @@ class Bugzilla(object): patch_name = "bug-%s-%s.patch" % (bug_id, timestamp()) else: patch_name ="%s.patch" % timestamp() + self.browser.add_file(patch_file_object, "text/plain", patch_name, @@ -575,7 +595,7 @@ class Bugzilla(object): def add_patch_to_bug(self, bug_id, - patch_file_object, + diff, description, comment_text=None, mark_for_review=False, @@ -594,6 +614,11 @@ class Bugzilla(object): self.browser.open("%sattachment.cgi?action=enter&bugid=%s" % ( self.bug_server_url, bug_id)) self.browser.select_form(name="entryform") + + # _fill_attachment_form expects a file-like object + # Patch files are already binary, so no encoding needed. + assert(isinstance(diff, str)) + patch_file_object = StringIO.StringIO(diff) self._fill_attachment_form(description, patch_file_object, mark_for_review=mark_for_review, @@ -628,7 +653,7 @@ class Bugzilla(object): bug_title, bug_description, component=None, - patch_file_object=None, + diff=None, patch_description=None, cc=None, blocked=None, @@ -653,11 +678,15 @@ class Bugzilla(object): if cc: self.browser["cc"] = cc if blocked: - self.browser["blocked"] = str(blocked) + self.browser["blocked"] = unicode(blocked) self.browser["short_desc"] = bug_title self.browser["comment"] = bug_description - if patch_file_object: + if diff: + # _fill_attachment_form expects a file-like object + # Patch files are already binary, so no encoding needed. + assert(isinstance(diff, str)) + patch_file_object = StringIO.StringIO(diff) self._fill_attachment_form( patch_description, patch_file_object, diff --git a/WebKitTools/Scripts/webkitpy/common/net/buildbot.py b/WebKitTools/Scripts/webkitpy/common/net/buildbot.py index 753e909..6c6ed43 100644 --- a/WebKitTools/Scripts/webkitpy/common/net/buildbot.py +++ b/WebKitTools/Scripts/webkitpy/common/net/buildbot.py @@ -44,7 +44,7 @@ _log = get_logger(__file__) class Builder(object): def __init__(self, name, buildbot): - self._name = unicode(name) + self._name = name self._buildbot = buildbot self._builds_cache = {} self._revision_to_build_number = None @@ -223,12 +223,12 @@ class LayoutTestResults(object): parsed_results = {} tables = BeautifulSoup(page).findAll("table") for table in tables: - table_title = table.findPreviousSibling("p").string + table_title = unicode(table.findPreviousSibling("p").string) if table_title not in cls.expected_keys: # This Exception should only ever be hit if run-webkit-tests changes its results.html format. - raise Exception("Unhandled title: %s" % str(table_title)) + raise Exception("Unhandled title: %s" % table_title) # We might want to translate table titles into identifiers before storing. - parsed_results[table_title] = [row.find("a").string for row in table.findAll("tr")] + parsed_results[table_title] = [unicode(row.find("a").string) for row in table.findAll("tr")] return parsed_results @@ -319,7 +319,6 @@ class BuildBot(object): "Leopard", "Tiger", "Windows.*Build", - "Windows.*Debug.*Test", "GTK", "Qt", "Chromium", @@ -361,7 +360,7 @@ class BuildBot(object): # First cell is the name name_link = status_cells[0].find('a') - builder["name"] = name_link.string + builder["name"] = unicode(name_link.string) self._parse_last_build_cell(builder, status_cells[1]) self._parse_current_build_cell(builder, status_cells[2]) @@ -410,13 +409,13 @@ class BuildBot(object): return urllib2.urlopen(build_status_url) def _parse_twisted_file_row(self, file_row): - string_or_empty = lambda string: str(string) if string else "" + string_or_empty = lambda soup: unicode(soup.string) if soup.string else u"" file_cells = file_row.findAll('td') return { - "filename" : string_or_empty(file_cells[0].find("a").string), - "size" : string_or_empty(file_cells[1].string), - "type" : string_or_empty(file_cells[2].string), - "encoding" : string_or_empty(file_cells[3].string), + "filename": string_or_empty(file_cells[0].find("a")), + "size": string_or_empty(file_cells[1]), + "type": string_or_empty(file_cells[2]), + "encoding": string_or_empty(file_cells[3]), } def _parse_twisted_directory_listing(self, page): diff --git a/WebKitTools/Scripts/webkitpy/common/net/buildbot_unittest.py b/WebKitTools/Scripts/webkitpy/common/net/buildbot_unittest.py index f765f6e..5e04745 100644 --- a/WebKitTools/Scripts/webkitpy/common/net/buildbot_unittest.py +++ b/WebKitTools/Scripts/webkitpy/common/net/buildbot_unittest.py @@ -51,7 +51,7 @@ class BuilderTest(unittest.TestCase): def setUp(self): self.buildbot = BuildBot() - self.builder = Builder("Test Builder", self.buildbot) + self.builder = Builder(u"Test Builder \u2661", self.buildbot) self._install_fetch_build(lambda build_number: ["test1", "test2"]) def test_find_failure_transition(self): @@ -271,7 +271,6 @@ class BuildBotTest(unittest.TestCase): "Leopard", "Tiger", "Windows.*Build", - "Windows.*Debug.*Test", "GTK", "Qt", "Chromium", @@ -286,7 +285,6 @@ class BuildBotTest(unittest.TestCase): {'name': u'SnowLeopard Intel Release (Tests)', }, {'name': u'Windows Release (Build)', }, {'name': u'Windows Debug (Build)', }, - {'name': u'Windows Debug (Tests)', }, {'name': u'GTK Linux 32-bit Release', }, {'name': u'GTK Linux 32-bit Debug', }, {'name': u'GTK Linux 64-bit Debug', }, diff --git a/WebKitTools/Scripts/webkitpy/common/net/rietveld.py b/WebKitTools/Scripts/webkitpy/common/net/rietveld.py index a9e5b1a..9cc97f2 100644 --- a/WebKitTools/Scripts/webkitpy/common/net/rietveld.py +++ b/WebKitTools/Scripts/webkitpy/common/net/rietveld.py @@ -26,6 +26,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import logging import os import re import stat @@ -50,23 +51,19 @@ class Rietveld(object): def __init__(self, executive, dryrun=False): self.dryrun = dryrun self._executive = executive - self._upload_py = upload.__file__ - # Chop off the last character so we modify permissions on the py file instead of the pyc. - if os.path.splitext(self._upload_py)[1] == ".pyc": - self._upload_py = self._upload_py[:-1] - os.chmod(self._upload_py, os.stat(self._upload_py).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) def url_for_issue(self, codereview_issue): if not codereview_issue: return None return "%s%s" % (config.codereview_server_url, codereview_issue) - def post(self, message=None, codereview_issue=None, cc=None): + def post(self, diff, message=None, codereview_issue=None, cc=None): if not message: raise ScriptError("Rietveld requires a message.") args = [ - self._upload_py, + # First argument is empty string to mimic sys.argv. + "", "--assume_yes", "--server=%s" % config.codereview_server_host, "--message=%s" % message, @@ -80,10 +77,15 @@ class Rietveld(object): log("Would have run %s" % args) return - output = self._executive.run_and_throw_if_fail(args) - match = re.search("Issue created\. URL: " + - config.codereview_server_regex + - "(?P<codereview_issue>\d+)", - output) - if match: - return int(match.group('codereview_issue')) + # Set logging level to avoid rietveld's logging spew. + old_level_name = logging.getLogger().getEffectiveLevel() + logging.getLogger().setLevel(logging.ERROR) + + # Use RealMain instead of calling upload from the commandline so that + # we can pass in the diff ourselves. Otherwise, upload will just use + # git diff for git checkouts, which doesn't respect --squash and --git-commit. + issue, patchset = upload.RealMain(args[1:], data=diff) + + # Reset logging level to the original value. + logging.getLogger().setLevel(old_level_name) + return issue diff --git a/WebKitTools/Scripts/webkitpy/common/net/statusserver.py b/WebKitTools/Scripts/webkitpy/common/net/statusserver.py index e8987a9..d9b52a2 100644 --- a/WebKitTools/Scripts/webkitpy/common/net/statusserver.py +++ b/WebKitTools/Scripts/webkitpy/common/net/statusserver.py @@ -52,9 +52,9 @@ class StatusServer: if not patch: return if patch.bug_id(): - self.browser["bug_id"] = str(patch.bug_id()) + self.browser["bug_id"] = unicode(patch.bug_id()) if patch.id(): - self.browser["patch_id"] = str(patch.id()) + self.browser["patch_id"] = unicode(patch.id()) def _add_results_file(self, results_file): if not results_file: @@ -79,7 +79,7 @@ class StatusServer: update_svn_revision_url = "%s/update-svn-revision" % self.url self.browser.open(update_svn_revision_url) self.browser.select_form(name="update_svn_revision") - self.browser["number"] = str(svn_revision_number) + self.browser["number"] = unicode(svn_revision_number) self.browser["broken_bot"] = broken_bot return self.browser.submit().read() diff --git a/WebKitTools/Scripts/webkitpy/common/prettypatch.py b/WebKitTools/Scripts/webkitpy/common/prettypatch.py index 8157f9c..4e92a53 100644 --- a/WebKitTools/Scripts/webkitpy/common/prettypatch.py +++ b/WebKitTools/Scripts/webkitpy/common/prettypatch.py @@ -31,11 +31,15 @@ import tempfile class PrettyPatch(object): + # FIXME: PrettyPatch should not require checkout_root. def __init__(self, executive, checkout_root): self._executive = executive self._checkout_root = checkout_root def pretty_diff_file(self, diff): + # Diffs can contain multiple text files of different encodings + # so we always deal with them as byte arrays, not unicode strings. + assert(isinstance(diff, str)) pretty_diff = self.pretty_diff(diff) diff_file = tempfile.NamedTemporaryFile(suffix=".html") diff_file.write(pretty_diff) @@ -43,6 +47,11 @@ class PrettyPatch(object): return diff_file def pretty_diff(self, diff): + # pretify.rb will hang forever if given no input. + # Avoid the hang by returning an empty string. + if not diff: + return "" + pretty_patch_path = os.path.join(self._checkout_root, "BugsSite", "PrettyPatch") prettify_path = os.path.join(pretty_patch_path, "prettify.rb") @@ -52,4 +61,6 @@ class PrettyPatch(object): pretty_patch_path, prettify_path, ] - return self._executive.run_command(args, input=diff) + # PrettyPatch does not modify the encoding of the diff output + # so we can't expect it to be utf-8. + return self._executive.run_command(args, input=diff, decode_output=False) diff --git a/WebKitTools/Scripts/webkitpy/common/prettypatch_unittest.py b/WebKitTools/Scripts/webkitpy/common/prettypatch_unittest.py new file mode 100644 index 0000000..1307856 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/common/prettypatch_unittest.py @@ -0,0 +1,70 @@ +# Copyright (C) 2010 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os.path +import unittest + +from webkitpy.common.system.executive import Executive +from webkitpy.common.prettypatch import PrettyPatch + + +class PrettyPatchTest(unittest.TestCase): + + _diff_with_multiple_encodings = """ +Index: utf8_test +=================================================================== +--- utf8_test\t(revision 0) ++++ utf8_test\t(revision 0) +@@ -0,0 +1 @@ ++utf-8 test: \xc2\xa0 +Index: latin1_test +=================================================================== +--- latin1_test\t(revision 0) ++++ latin1_test\t(revision 0) +@@ -0,0 +1 @@ ++latin1 test: \xa0 +""" + + def _webkit_root(self): + webkitpy_common = os.path.dirname(__file__) + webkitpy = os.path.dirname(webkitpy_common) + scripts = os.path.dirname(webkitpy) + webkit_tools = os.path.dirname(scripts) + webkit_root = os.path.dirname(webkit_tools) + return webkit_root + + def test_pretty_diff_encodings(self): + pretty_patch = PrettyPatch(Executive(), self._webkit_root()) + pretty = pretty_patch.pretty_diff(self._diff_with_multiple_encodings) + self.assertTrue(pretty) # We got some output + self.assertTrue(isinstance(pretty, str)) # It's a byte array, not unicode + + def test_pretty_print_empty_string(self): + # Make sure that an empty diff does not hang the process. + pretty_patch = PrettyPatch(Executive(), self._webkit_root()) + self.assertEqual(pretty_patch.pretty_diff(""), "") diff --git a/WebKitTools/Scripts/webkitpy/common/system/autoinstall.py b/WebKitTools/Scripts/webkitpy/common/system/autoinstall.py index 32fd2cf..9adab29 100755 --- a/WebKitTools/Scripts/webkitpy/common/system/autoinstall.py +++ b/WebKitTools/Scripts/webkitpy/common/system/autoinstall.py @@ -30,6 +30,10 @@ """Support for automatically downloading Python packages from an URL.""" + +from __future__ import with_statement + +import codecs import logging import new import os @@ -114,7 +118,7 @@ class AutoInstaller(object): os.makedirs(path) - def _write_file(self, path, text): + def _write_file(self, path, text, encoding): """Create a file at the given path with given text. This method overwrites any existing file. @@ -122,11 +126,8 @@ class AutoInstaller(object): """ _log.debug("Creating file...") _log.debug(' "%s"' % path) - file = open(path, "w") - try: + with codecs.open(path, "w", encoding) as file: file.write(text) - finally: - file.close() def _set_up_target_dir(self, target_dir, append_to_search_path, make_package): @@ -154,7 +155,7 @@ class AutoInstaller(object): if not os.path.exists(init_path): text = ("# This file is required for Python to search this " "directory for modules.\n") - self._write_file(init_path, text) + self._write_file(init_path, text, "ascii") def _create_scratch_directory_inner(self, prefix): """Create a scratch directory without exception handling. @@ -216,11 +217,8 @@ class AutoInstaller(object): _log.debug("No URL file found.") return False - file = open(version_path, "r") - try: + with codecs.open(version_path, "r", "utf-8") as file: version = file.read() - finally: - file.close() return version.strip() == url.strip() @@ -231,7 +229,7 @@ class AutoInstaller(object): _log.debug(' URL: "%s"' % url) _log.debug(' To: "%s"' % version_path) - self._write_file(version_path, url) + self._write_file(version_path, url, "utf-8") def _extract_targz(self, path, scratch_dir): # tarfile.extractall() extracts to a path without the @@ -284,6 +282,8 @@ class AutoInstaller(object): # Otherwise, it is a file. try: + # We open this file w/o encoding, as we're reading/writing + # the raw byte-stream from the zip file. outfile = open(path, 'wb') except IOError, err: # Not all zip files seem to list the directories explicitly, @@ -384,9 +384,8 @@ class AutoInstaller(object): self._log_transfer("Starting download...", url, target_path) - stream = file(target_path, "wb") - bytes = self._download_to_stream(url, stream) - stream.close() + with open(target_path, "wb") as stream: + bytes = self._download_to_stream(url, stream) _log.debug("Downloaded %s bytes." % bytes) diff --git a/WebKitTools/Scripts/webkitpy/common/system/deprecated_logging.py b/WebKitTools/Scripts/webkitpy/common/system/deprecated_logging.py index ba1c5eb..9e6b529 100644 --- a/WebKitTools/Scripts/webkitpy/common/system/deprecated_logging.py +++ b/WebKitTools/Scripts/webkitpy/common/system/deprecated_logging.py @@ -30,24 +30,30 @@ # WebKit's Python module for logging # This module is now deprecated in favor of python's built-in logging.py. +import codecs import os import sys + def log(string): print >> sys.stderr, string + def error(string): log("ERROR: %s" % string) exit(1) + # Simple class to split output between multiple destinations class tee: def __init__(self, *files): self.files = files - def write(self, string): + # Callers should pass an already encoded string for writing. + def write(self, bytes): for file in self.files: - file.write(string) + file.write(bytes) + class OutputTee: def __init__(self): @@ -71,7 +77,7 @@ class OutputTee: (log_directory, log_name) = os.path.split(log_path) if log_directory and not os.path.exists(log_directory): os.makedirs(log_directory) - return open(log_path, 'a+') + return codecs.open(log_path, "a+", "utf-8") def _tee_outputs_to_files(self, files): if not self._original_stdout: diff --git a/WebKitTools/Scripts/webkitpy/common/system/deprecated_logging_unittest.py b/WebKitTools/Scripts/webkitpy/common/system/deprecated_logging_unittest.py index 2b71803..3778162 100644 --- a/WebKitTools/Scripts/webkitpy/common/system/deprecated_logging_unittest.py +++ b/WebKitTools/Scripts/webkitpy/common/system/deprecated_logging_unittest.py @@ -27,7 +27,6 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os -import subprocess import StringIO import tempfile import unittest diff --git a/WebKitTools/Scripts/webkitpy/common/system/executive.py b/WebKitTools/Scripts/webkitpy/common/system/executive.py index b6126e4..11eb051 100644 --- a/WebKitTools/Scripts/webkitpy/common/system/executive.py +++ b/WebKitTools/Scripts/webkitpy/common/system/executive.py @@ -87,10 +87,20 @@ def run_command(*args, **kwargs): class Executive(object): + def _should_close_fds(self): + # We need to pass close_fds=True to work around Python bug #2320 + # (otherwise we can hang when we kill DumpRenderTree when we are running + # multiple threads). See http://bugs.python.org/issue2320 . + # Note that close_fds isn't supported on Windows, but this bug only + # shows up on Mac and Linux. + return sys.platform not in ('win32', 'cygwin') + def _run_command_with_teed_output(self, args, teed_output): + args = map(unicode, args) # Popen will throw an exception if args are non-strings (like int()) child_process = subprocess.Popen(args, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) + stderr=subprocess.STDOUT, + close_fds=self._should_close_fds()) # Use our own custom wait loop because Popen ignores a tee'd # stderr/stdout. @@ -98,15 +108,24 @@ class Executive(object): while True: output_line = child_process.stdout.readline() if output_line == "" and child_process.poll() != None: + # poll() is not threadsafe and can throw OSError due to: + # http://bugs.python.org/issue1731717 return child_process.poll() + # We assume that the child process wrote to us in utf-8, + # so no re-encoding is necessary before writing here. teed_output.write(output_line) - def run_and_throw_if_fail(self, args, quiet=False): + # FIXME: Remove this deprecated method and move callers to run_command. + # FIXME: This method is a hack to allow running command which both + # capture their output and print out to stdin. Useful for things + # like "build-webkit" where we want to display to the user that we're building + # but still have the output to stuff into a log file. + def run_and_throw_if_fail(self, args, quiet=False, decode_output=True): # Cache the child's output locally so it can be used for error reports. child_out_file = StringIO.StringIO() tee_stdout = sys.stdout if quiet: - dev_null = open(os.devnull, "w") + dev_null = open(os.devnull, "w") # FIXME: Does this need an encoding? tee_stdout = dev_null child_stdout = tee(child_out_file, tee_stdout) exit_code = self._run_command_with_teed_output(args, child_stdout) @@ -116,6 +135,10 @@ class Executive(object): child_output = child_out_file.getvalue() child_out_file.close() + # We assume the child process output utf-8 + if decode_output: + child_output = child_output.decode("utf-8") + if exit_code: raise ScriptError(script_args=args, exit_code=exit_code, @@ -140,17 +163,39 @@ class Executive(object): return 2 def kill_process(self, pid): + """Attempts to kill the given pid. + Will fail silently if pid does not exist or insufficient permisssions.""" if platform.system() == "Windows": # According to http://docs.python.org/library/os.html # os.kill isn't available on Windows. However, when I tried it # using Cygwin, it worked fine. We should investigate whether # we need this platform specific code here. - subprocess.call(('taskkill.exe', '/f', '/pid', str(pid)), - stdin=open(os.devnull, 'r'), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + command = ["taskkill.exe", "/f", "/pid", str(pid)] + # taskkill will exit 128 if the process is not found. + self.run_command(command, error_handler=self.ignore_error) return - os.kill(pid, signal.SIGKILL) + try: + os.kill(pid, signal.SIGKILL) + except OSError, e: + # FIXME: We should make non-silent failure an option. + pass + + def kill_all(self, process_name): + """Attempts to kill processes matching process_name. + Will fail silently if no process are found.""" + if platform.system() == "Windows": + # We might want to automatically append .exe? + command = ["taskkill.exe", "/f", "/im", process_name] + # taskkill will exit 128 if the process is not found. + self.run_command(command, error_handler=self.ignore_error) + return + + # FIXME: This is inconsistent that kill_all uses TERM and kill_process + # uses KILL. Windows is always using /f (which seems like -KILL). + # We should pick one mode, or add support for switching between them. + # Note: Mac OS X 10.6 requires -SIGNALNAME before -u USER + command = ["killall", "-TERM", "-u", os.getenv("USER"), process_name] + self.run_command(command, error_handler=self.ignore_error) # Error handlers do not need to be static methods once all callers are # updated to use an Executive object. @@ -163,38 +208,51 @@ class Executive(object): def ignore_error(error): pass - # FIXME: This should be merged with run_and_throw_if_fail + def _compute_stdin(self, input): + """Returns (stdin, string_to_communicate)""" + # FIXME: We should be returning /dev/null for stdin + # or closing stdin after process creation to prevent + # child processes from getting input from the user. + if not input: + return (None, None) + if hasattr(input, "read"): # Check if the input is a file. + return (input, None) # Assume the file is in the right encoding. + + # Popen in Python 2.5 and before does not automatically encode unicode objects. + # http://bugs.python.org/issue5290 + # See https://bugs.webkit.org/show_bug.cgi?id=37528 + # for an example of a regresion caused by passing a unicode string directly. + # FIXME: We may need to encode differently on different platforms. + if isinstance(input, unicode): + input = input.encode("utf-8") + return (subprocess.PIPE, input) + # FIXME: run_and_throw_if_fail should be merged into this method. def run_command(self, args, cwd=None, input=None, error_handler=None, return_exit_code=False, - return_stderr=True): - if hasattr(input, 'read'): # Check if the input is a file. - stdin = input - string_to_communicate = None - else: - stdin = None - if input: - stdin = subprocess.PIPE - # string_to_communicate seems to need to be a str for proper - # communication with shell commands. - # See https://bugs.webkit.org/show_bug.cgi?id=37528 - # For an example of a regresion caused by passing a unicode string through. - string_to_communicate = str(input) - if return_stderr: - stderr = subprocess.STDOUT - else: - stderr = None + return_stderr=True, + decode_output=True): + """Popen wrapper for convenience and to work around python bugs.""" + args = map(unicode, args) # Popen will throw an exception if args are non-strings (like int()) + stdin, string_to_communicate = self._compute_stdin(input) + stderr = subprocess.STDOUT if return_stderr else None process = subprocess.Popen(args, stdin=stdin, stdout=subprocess.PIPE, stderr=stderr, - cwd=cwd) + cwd=cwd, + close_fds=self._should_close_fds()) output = process.communicate(string_to_communicate)[0] + # run_command automatically decodes to unicode() unless explicitly told not to. + if decode_output: + output = output.decode("utf-8") + # wait() is not threadsafe and can throw OSError due to: + # http://bugs.python.org/issue1731717 exit_code = process.wait() if return_exit_code: diff --git a/WebKitTools/Scripts/webkitpy/common/system/executive_unittest.py b/WebKitTools/Scripts/webkitpy/common/system/executive_unittest.py index ac380f8..ce91269 100644 --- a/WebKitTools/Scripts/webkitpy/common/system/executive_unittest.py +++ b/WebKitTools/Scripts/webkitpy/common/system/executive_unittest.py @@ -1,4 +1,4 @@ -# Copyright (C) 2009 Google Inc. All rights reserved. +# Copyright (C) 2010 Google Inc. All rights reserved. # Copyright (C) 2009 Daniel Bates (dbates@intudata.com). All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -27,10 +27,14 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import signal +import subprocess +import sys import unittest from webkitpy.common.system.executive import Executive, run_command + class ExecutiveTest(unittest.TestCase): def test_run_command_with_bad_command(self): @@ -38,5 +42,52 @@ class ExecutiveTest(unittest.TestCase): run_command(["foo_bar_command_blah"], error_handler=Executive.ignore_error, return_exit_code=True) self.failUnlessRaises(OSError, run_bad_command) -if __name__ == '__main__': - unittest.main() + def test_run_command_with_unicode(self): + """Validate that it is safe to pass unicode() objects + to Executive.run* methods, and they will return unicode() + objects by default unless decode_output=False""" + executive = Executive() + unicode_tor = u"WebKit \u2661 Tor Arne Vestb\u00F8!" + utf8_tor = unicode_tor.encode("utf-8") + + output = executive.run_command(["cat"], input=unicode_tor) + self.assertEquals(output, unicode_tor) + + output = executive.run_command(["echo", "-n", unicode_tor]) + self.assertEquals(output, unicode_tor) + + output = executive.run_command(["echo", "-n", unicode_tor], decode_output=False) + self.assertEquals(output, utf8_tor) + + # Make sure that str() input also works. + output = executive.run_command(["cat"], input=utf8_tor, decode_output=False) + self.assertEquals(output, utf8_tor) + + # FIXME: We should only have one run* method to test + output = executive.run_and_throw_if_fail(["echo", "-n", unicode_tor], quiet=True) + self.assertEquals(output, unicode_tor) + + output = executive.run_and_throw_if_fail(["echo", "-n", unicode_tor], quiet=True, decode_output=False) + self.assertEquals(output, utf8_tor) + + def test_kill_process(self): + executive = Executive() + # FIXME: This may need edits to work right on windows. + # We use "yes" because it loops forever. + process = subprocess.Popen(["yes"], stdout=subprocess.PIPE) + self.assertEqual(process.poll(), None) # Process is running + executive.kill_process(process.pid) + self.assertEqual(process.wait(), -signal.SIGKILL) + # Killing again should fail silently. + executive.kill_process(process.pid) + + def test_kill_all(self): + executive = Executive() + # FIXME: This may need edits to work right on windows. + # We use "yes" because it loops forever. + process = subprocess.Popen(["yes"], stdout=subprocess.PIPE) + self.assertEqual(process.poll(), None) # Process is running + executive.kill_all("yes") + self.assertEqual(process.wait(), -signal.SIGTERM) + # Killing again should fail silently. + executive.kill_all("yes") diff --git a/WebKitTools/Scripts/webkitpy/common/system/user.py b/WebKitTools/Scripts/webkitpy/common/system/user.py index 076f965..64995bb 100644 --- a/WebKitTools/Scripts/webkitpy/common/system/user.py +++ b/WebKitTools/Scripts/webkitpy/common/system/user.py @@ -62,11 +62,13 @@ class User(object): def edit(self, files): editor = os.environ.get("EDITOR") or "vi" args = shlex.split(editor) + # Note: Not thread safe: http://bugs.python.org/issue2320 subprocess.call(args + files) def page(self, message): pager = os.environ.get("PAGER") or "less" try: + # Note: Not thread safe: http://bugs.python.org/issue2320 child_process = subprocess.Popen([pager], stdin=subprocess.PIPE) child_process.communicate(input=message) except IOError, e: diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/driver_test.py b/WebKitTools/Scripts/webkitpy/layout_tests/driver_test.py index 231ed70..633dfe8 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/driver_test.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/driver_test.py @@ -42,7 +42,8 @@ import port def run_tests(port, options, tests): # |image_path| is a path to the image capture from the driver. image_path = 'image_result.png' - driver = port.start_driver(image_path, None) + driver = port.create_driver(image_path, None) + driver.start() for t in tests: uri = port.filename_to_uri(os.path.join(port.layout_tests_dir(), t)) print "uri: " + uri @@ -58,6 +59,7 @@ def run_tests(port, options, tests): print ''.join(err) print '"""' print + driver.stop() if __name__ == '__main__': diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/dump_render_tree_thread.py b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/dump_render_tree_thread.py index e61d11f..6957fcd 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/dump_render_tree_thread.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/dump_render_tree_thread.py @@ -35,6 +35,9 @@ the output. When there are no more URLs to process in the shared queue, the thread exits. """ +from __future__ import with_statement + +import codecs import copy import logging import os @@ -89,10 +92,10 @@ def process_output(port, test_info, test_types, test_args, configuration, test_info.filename)) filename = os.path.splitext(filename)[0] + "-stack.txt" port.maybe_make_directory(os.path.split(filename)[0]) - open(filename, "wb").write(error) # FIXME: This leaks a file handle. + with codecs.open(filename, "wb", "utf-8") as file: + file.write(error) elif error: - _log.debug("Previous test output extra lines after dump:\n%s" % - error) + _log.debug("Previous test output stderr lines:\n%s" % error) # Check the output and save the results. start_time = time.time() @@ -152,7 +155,8 @@ class SingleTestThread(threading.Thread): def run(self): test_info = self._test_info - driver = self._port.start_driver(self._image_path, self._shell_args) + driver = self._port.create_driver(self._image_path, self._shell_args) + driver.start() start = time.time() crash, timeout, actual_checksum, output, error = \ driver.run_test(test_info.uri.strip(), test_info.timeout, @@ -290,7 +294,7 @@ class TestShellThread(threading.Thread): # This is created in run_webkit_tests.py:_PrepareListsAndPrintOutput. tests_run_filename = os.path.join(self._options.results_directory, "tests_run.txt") - tests_run_file = open(tests_run_filename, "a") + tests_run_file = codecs.open(tests_run_filename, "a", "utf-8") while True: if self._canceled: @@ -443,9 +447,11 @@ class TestShellThread(threading.Thread): a separate DumpRenderTree in their own thread. """ + # poll() is not threadsafe and can throw OSError due to: + # http://bugs.python.org/issue1731717 if (not self._driver or self._driver.poll() is not None): - self._driver = self._port.start_driver( - self._image_path, self._shell_args) + self._driver = self._port.create_driver(self._image_path, self._shell_args) + self._driver.start() def _kill_dump_render_tree(self): """Kill the DumpRenderTree process if it's running.""" diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_results_generator.py b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_results_generator.py index 6263540..0993cbd 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_results_generator.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/json_results_generator.py @@ -27,6 +27,9 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from __future__ import with_statement + +import codecs import logging import os import subprocess @@ -118,10 +121,11 @@ class JSONResultsGenerator(object): """Generates the JSON output file.""" json = self._get_json() if json: - results_file = open(self._results_file_path, "w") + results_file = codecs.open(self._results_file_path, "w", "utf-8") results_file.write(json) results_file.close() + # FIXME: Callers should use scm.py instead. def _get_svn_revision(self, in_directory): """Returns the svn revision for the given directory. @@ -129,6 +133,7 @@ class JSONResultsGenerator(object): in_directory: The directory where svn is to be run. """ if os.path.exists(os.path.join(in_directory, '.svn')): + # Note: Not thread safe: http://bugs.python.org/issue2320 output = subprocess.Popen(["svn", "info", "--xml"], cwd=in_directory, shell=(sys.platform == 'win32'), @@ -151,8 +156,8 @@ class JSONResultsGenerator(object): error = None if os.path.exists(self._results_file_path): - old_results_file = open(self._results_file_path, "r") - old_results = old_results_file.read() + with codecs.open(self._results_file_path, "r", "utf-8") as file: + old_results = file.read() elif self._builder_base_url: # Check if we have the archived JSON file on the buildbot server. results_file_url = (self._builder_base_url + diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/metered_stream.py b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/metered_stream.py index 930b9e4..9c42d73 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/metered_stream.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/metered_stream.py @@ -32,6 +32,9 @@ Package that implements a stream wrapper that has 'meters' as well as regular output. A 'meter' is a single line of text that can be erased and rewritten repeatedly, without producing multiple lines of output. It can be used to produce effects like progress bars. + +This package should only be called by the printing module in the layout_tests +package. """ import logging @@ -41,18 +44,38 @@ _log = logging.getLogger("webkitpy.layout_tests.metered_stream") class MeteredStream: """This class is a wrapper around a stream that allows you to implement - meters. - - It can be used like a stream, but calling update() will print - the string followed by only a carriage return (instead of a carriage - return and a line feed). This can be used to implement progress bars and - other sorts of meters. Note that anything written by update() will be - erased by a subsequent update(), write(), or flush().""" + meters (progress bars, etc.). + + It can be used directly as a stream, by calling write(), but provides + two other methods for output, update(), and progress(). + + In normal usage, update() will overwrite the output of the immediately + preceding update() (write() also will overwrite update()). So, calling + multiple update()s in a row can provide an updating status bar (note that + if an update string contains newlines, only the text following the last + newline will be overwritten/erased). + + If the MeteredStream is constructed in "verbose" mode (i.e., by passing + verbose=true), then update() no longer overwrite a previous update(), and + instead the call is equivalent to write(), although the text is + actually sent to the logger rather than to the stream passed + to the constructor. + + progress() is just like update(), except that if you are in verbose mode, + progress messages are not output at all (they are dropped). This is + used for things like progress bars which are presumed to be unwanted in + verbose mode. + + Note that the usual usage for this class is as a destination for + a logger that can also be written to directly (i.e., some messages go + through the logger, some don't). We thus have to dance around a + layering inversion in update() for things to work correctly. + """ def __init__(self, verbose, stream): """ Args: - verbose: whether update is a no-op + verbose: whether progress is a no-op and updates() aren't overwritten stream: output stream to write to """ self._dirty = False @@ -63,9 +86,11 @@ class MeteredStream: def write(self, txt): """Write to the stream, overwriting and resetting the meter.""" if self._dirty: - self.update("") + self._write(txt) self._dirty = False - self._stream.write(txt) + self._last_update = '' + else: + self._stream.write(txt) def flush(self): """Flush any buffered output.""" @@ -111,10 +136,13 @@ class MeteredStream: # Print the necessary number of backspaces to erase the previous # message. - self._stream.write("\b" * len(self._last_update)) - self._stream.write(str) + if len(self._last_update): + self._stream.write("\b" * len(self._last_update)) + if len(str): + self._stream.write(str) num_remaining = len(self._last_update) - len(str) if num_remaining > 0: self._stream.write(" " * num_remaining + "\b" * num_remaining) - self._last_update = str + last_newline = str.rfind("\n") + self._last_update = str[(last_newline + 1):] self._dirty = True diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/metered_stream_unittest.py b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/metered_stream_unittest.py new file mode 100644 index 0000000..926f9b3 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/metered_stream_unittest.py @@ -0,0 +1,106 @@ +#!/usr/bin/python +# Copyright (C) 2010 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Unit tests for metered_stream.py.""" + +import os +import optparse +import pdb +import sys +import unittest +import logging + +from webkitpy.common.array_stream import ArrayStream +from webkitpy.layout_tests.layout_package import metered_stream + + +class TestMeteredStream(unittest.TestCase): + def test_regular(self): + a = ArrayStream() + m = metered_stream.MeteredStream(verbose=False, stream=a) + self.assertTrue(a.empty()) + + # basic test - note that the flush() is a no-op, but we include it + # for coverage. + m.write("foo") + m.flush() + self.assertEquals(a.get(), ['foo']) + + # now check that a second write() does not overwrite the first. + m.write("bar") + self.assertEquals(a.get(), ['foo', 'bar']) + + m.update("batter") + self.assertEquals(a.get(), ['foo', 'bar', 'batter']) + + # The next update() should overwrite the laste update() but not the + # other text. Note that the cursor is effectively positioned at the + # end of 'foo', even though we had to erase three more characters. + m.update("foo") + self.assertEquals(a.get(), ['foo', 'bar', 'batter', '\b\b\b\b\b\b', + 'foo', ' \b\b\b']) + + m.progress("progress") + self.assertEquals(a.get(), ['foo', 'bar', 'batter', '\b\b\b\b\b\b', + 'foo', ' \b\b\b', '\b\b\b', 'progress']) + + # now check that a write() does overwrite the progress bar + m.write("foo") + self.assertEquals(a.get(), ['foo', 'bar', 'batter', '\b\b\b\b\b\b', + 'foo', ' \b\b\b', '\b\b\b', 'progress', + '\b\b\b\b\b\b\b\b', + 'foo', ' \b\b\b\b\b']) + + # Now test that we only back up to the most recent newline. + + # Note also that we do not back up to erase the most recent write(), + # i.e., write()s do not get erased. + a.reset() + m.update("foo\nbar") + m.update("baz") + self.assertEquals(a.get(), ['foo\nbar', '\b\b\b', 'baz']) + + def test_verbose(self): + a = ArrayStream() + m = metered_stream.MeteredStream(verbose=True, stream=a) + self.assertTrue(a.empty()) + m.write("foo") + self.assertEquals(a.get(), ['foo']) + + m.update("bar") + # FIXME: figure out how to test that this went to the logger. Is this + # good enough? + self.assertEquals(a.get(), ['foo']) + + m.progress("dropped") + self.assertEquals(a.get(), ['foo']) + + +if __name__ == '__main__': + unittest.main() diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/printing.py b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/printing.py new file mode 100644 index 0000000..91d49c6 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/printing.py @@ -0,0 +1,500 @@ +#!/usr/bin/env python +# Copyright (C) 2010 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Package that handles non-debug, non-file output for run-webkit-tests.""" + +import logging +import optparse +import os +import pdb + +from webkitpy.layout_tests.layout_package import metered_stream +from webkitpy.layout_tests.layout_package import test_expectations + +_log = logging.getLogger("webkitpy.layout_tests.printer") + +TestExpectationsFile = test_expectations.TestExpectationsFile + +NUM_SLOW_TESTS_TO_LOG = 10 + +PRINT_DEFAULT = ("misc,one-line-progress,one-line-summary,unexpected," + "unexpected-results,updates") +PRINT_EVERYTHING = ("actual,config,expected,misc,one-line-progress," + "one-line-summary,slowest,timing,unexpected," + "unexpected-results,updates") + +HELP_PRINTING = """ +Output for run-webkit-tests is controlled by a comma-separated list of +values passed to --print. Values either influence the overall output, or +the output at the beginning of the run, during the run, or at the end: + +Overall options: + nothing don't print anything. This overrides every other option + everything print everything (except the trace-* options and the + detailed-progress option, see below for the full list ) + misc print miscellaneous things like blank lines + +At the beginning of the run: + config print the test run configuration + expected print a summary of what is expected to happen + (# passes, # failures, etc.) + +During the run: + detailed-progress print one dot per test completed + one-line-progress print a one-line progress bar + unexpected print any unexpected results as they occur + updates print updates on which stage is executing + trace-everything print detailed info on every test's results + (baselines, expectation, time it took to run). If + this is specified it will override the '*-progress' + options, the 'trace-unexpected' option, and the + 'unexpected' option. + trace-unexpected like 'trace-everything', but only for tests with + unexpected results. If this option is specified, + it will override the 'unexpected' option. + +At the end of the run: + actual print a summary of the actual results + slowest print %(slowest)d slowest tests and the time they took + timing print timing statistics + unexpected-results print a list of the tests with unexpected results + one-line-summary print a one-line summary of the run + +Notes: + - 'detailed-progress' can only be used if running in a single thread + (using --child-processes=1) or a single queue of tests (using + --experimental-fully-parallel). If these conditions aren't true, + 'one-line-progress' will be used instead. + - If both 'detailed-progress' and 'one-line-progress' are specified (and + both are possible), 'detailed-progress' will be used. + - If 'nothing' is specified, it overrides all of the other options. + - Specifying --verbose is equivalent to --print everything plus it + changes the format of the log messages to add timestamps and other + information. If you specify --verbose and --print X, then X overrides + the --print everything implied by --verbose. + +--print 'everything' is equivalent to --print '%(everything)s'. + +The default is to --print '%(default)s'. +""" % {'slowest': NUM_SLOW_TESTS_TO_LOG, 'everything': PRINT_EVERYTHING, + 'default': PRINT_DEFAULT} + + +def print_options(): + return [ + # Note: we use print_options rather than just 'print' because print + # is a reserved word. + optparse.make_option("--print", dest="print_options", + help=("controls print output of test run. " + "Use --help-printing for more.")), + optparse.make_option("--help-printing", action="store_true", + help="show detailed help on controlling print output"), + optparse.make_option("-v", "--verbose", action="store_true", + default=False, help="include debug-level logging"), + + # FIXME: we should remove this; it's pretty much obsolete with the + # --print trace-everything option. + optparse.make_option("--sources", action="store_true", + help=("show expected result file path for each test " + "(implies --verbose)")), + ] + + +def configure_logging(options, meter): + """Configures the logging system.""" + log_fmt = '%(message)s' + log_datefmt = '%y%m%d %H:%M:%S' + log_level = logging.INFO + if options.verbose: + log_fmt = ('%(asctime)s %(filename)s:%(lineno)-4d %(levelname)s ' + '%(message)s') + log_level = logging.DEBUG + + logging.basicConfig(level=log_level, format=log_fmt, + datefmt=log_datefmt, stream=meter) + + +def parse_print_options(print_options, verbose, child_processes, + is_fully_parallel): + """Parse the options provided to --print and dedup and rank them. + + Returns + a set() of switches that govern how logging is done + + """ + if print_options: + switches = set(print_options.split(',')) + elif verbose: + switches = set(PRINT_EVERYTHING.split(',')) + else: + switches = set(PRINT_DEFAULT.split(',')) + + if 'nothing' in switches: + return set() + + if (child_processes != 1 and not is_fully_parallel and + 'detailed-progress' in switches): + _log.warn("Can only print 'detailed-progress' if running " + "with --child-processes=1 or " + "with --experimental-fully-parallel. " + "Using 'one-line-progress' instead.") + switches.discard('detailed-progress') + switches.add('one-line-progress') + + if 'everything' in switches: + switches.discard('everything') + switches.update(set(PRINT_EVERYTHING.split(','))) + + if 'detailed-progress' in switches: + switches.discard('one-line-progress') + + if 'trace-everything' in switches: + switches.discard('detailed-progress') + switches.discard('one-line-progress') + switches.discard('trace-unexpected') + switches.discard('unexpected') + + if 'trace-unexpected' in switches: + switches.discard('unexpected') + + return switches + + +class Printer(object): + """Class handling all non-debug-logging printing done by run-webkit-tests. + + Printing from run-webkit-tests falls into two buckets: general or + regular output that is read only by humans and can be changed at any + time, and output that is parsed by buildbots (and humans) and hence + must be changed more carefully and in coordination with the buildbot + parsing code (in chromium.org's buildbot/master.chromium/scripts/master/ + log_parser/webkit_test_command.py script). + + By default the buildbot-parsed code gets logged to stdout, and regular + output gets logged to stderr.""" + def __init__(self, port, options, regular_output, buildbot_output, + child_processes, is_fully_parallel): + """ + Args + port interface to port-specific routines + options OptionParser object with command line settings + regular_output stream to which output intended only for humans + should be written + buildbot_output stream to which output intended to be read by + the buildbots (and humans) should be written + child_processes number of parallel threads running (usually + controlled by --child-processes) + is_fully_parallel are the tests running in a single queue, or + in shards (usually controlled by + --experimental-fully-parallel) + + Note that the last two args are separate rather than bundled into + the options structure so that this object does not assume any flags + set in options that weren't returned from logging_options(), above. + The two are used to determine whether or not we can sensibly use + the 'detailed-progress' option, or can only use 'one-line-progress'. + """ + self._buildbot_stream = buildbot_output + self._options = options + self._port = port + self._stream = regular_output + + # These are used for --print detailed-progress to track status by + # directory. + self._current_dir = None + self._current_progress_str = "" + self._current_test_number = 0 + + self._meter = metered_stream.MeteredStream(options.verbose, + regular_output) + configure_logging(self._options, self._meter) + + self.switches = parse_print_options(options.print_options, + options.verbose, child_processes, is_fully_parallel) + + # These two routines just hide the implmentation of the switches. + def disabled(self, option): + return not option in self.switches + + def enabled(self, option): + return option in self.switches + + def help_printing(self): + self._write(HELP_PRINTING) + + def print_actual(self, msg): + if self.disabled('actual'): + return + self._buildbot_stream.write("%s\n" % msg) + + def print_config(self, msg): + self.write(msg, 'config') + + def print_expected(self, msg): + self.write(msg, 'expected') + + def print_timing(self, msg): + self.write(msg, 'timing') + + def print_one_line_summary(self, total, expected): + """Print a one-line summary of the test run to stdout. + + Args: + total: total number of tests run + expected: number of expected results + """ + if self.disabled('one-line-summary'): + return + + unexpected = total - expected + if unexpected == 0: + self._write("All %d tests ran as expected." % expected) + elif expected == 1: + self._write("1 test ran as expected, %d didn't:" % unexpected) + else: + self._write("%d tests ran as expected, %d didn't:" % + (expected, unexpected)) + self._write("") + + def print_test_result(self, result, expected, exp_str, got_str): + """Print the result of the test as determined by --print.""" + if (self.enabled('trace-everything') or + self.enabled('trace-unexpected') and not expected): + self._print_test_trace(result, exp_str, got_str) + elif (not expected and self.enabled('unexpected') and + self.disabled('detailed-progress')): + # Note: 'detailed-progress' handles unexpected results internally, + # so we skip it here. + self._print_unexpected_test_result(result) + + def _print_test_trace(self, result, exp_str, got_str): + """Print detailed results of a test (triggered by --print trace-*). + For each test, print: + - location of the expected baselines + - expected results + - actual result + - timing info + """ + filename = result.filename + test_name = self._port.relative_test_filename(filename) + self._write('trace: %s' % test_name) + self._write(' txt: %s' % + self._port.relative_test_filename( + self._port.expected_filename(filename, '.txt'))) + png_file = self._port.expected_filename(filename, '.png') + if os.path.exists(png_file): + self._write(' png: %s' % + self._port.relative_test_filename(filename)) + else: + self._write(' png: <none>') + self._write(' exp: %s' % exp_str) + self._write(' got: %s' % got_str) + self._write(' took: %-.3f' % result.test_run_time) + self._write('') + + def _print_unexpected_test_result(self, result): + """Prints one unexpected test result line.""" + desc = TestExpectationsFile.EXPECTATION_DESCRIPTIONS[result.type][0] + self.write(" %s -> unexpected %s" % + (self._port.relative_test_filename(result.filename), + desc), "unexpected") + + def print_progress(self, result_summary, retrying, test_list): + """Print progress through the tests as determined by --print.""" + if self.enabled('detailed-progress'): + self._print_detailed_progress(result_summary, test_list) + elif self.enabled('one-line-progress'): + self._print_one_line_progress(result_summary, retrying) + else: + return + + if result_summary.remaining == 0: + self._meter.update('') + + def _print_one_line_progress(self, result_summary, retrying): + """Displays the progress through the test run.""" + percent_complete = 100 * (result_summary.expected + + result_summary.unexpected) / result_summary.total + action = "Testing" + if retrying: + action = "Retrying" + self._meter.progress("%s (%d%%): %d ran as expected, %d didn't," + " %d left" % (action, percent_complete, result_summary.expected, + result_summary.unexpected, result_summary.remaining)) + + def _print_detailed_progress(self, result_summary, test_list): + """Display detailed progress output where we print the directory name + and one dot for each completed test. This is triggered by + "--log detailed-progress".""" + if self._current_test_number == len(test_list): + return + + next_test = test_list[self._current_test_number] + next_dir = os.path.dirname( + self._port.relative_test_filename(next_test)) + if self._current_progress_str == "": + self._current_progress_str = "%s: " % (next_dir) + self._current_dir = next_dir + + while next_test in result_summary.results: + if next_dir != self._current_dir: + self._meter.write("%s\n" % (self._current_progress_str)) + self._current_progress_str = "%s: ." % (next_dir) + self._current_dir = next_dir + else: + self._current_progress_str += "." + + if (next_test in result_summary.unexpected_results and + self.enabled('unexpected')): + self._meter.write("%s\n" % self._current_progress_str) + test_result = result_summary.results[next_test] + self._print_unexpected_test_result(test_result) + self._current_progress_str = "%s: " % self._current_dir + + self._current_test_number += 1 + if self._current_test_number == len(test_list): + break + + next_test = test_list[self._current_test_number] + next_dir = os.path.dirname( + self._port.relative_test_filename(next_test)) + + if result_summary.remaining: + remain_str = " (%d)" % (result_summary.remaining) + self._meter.progress("%s%s" % (self._current_progress_str, + remain_str)) + else: + self._meter.progress("%s" % (self._current_progress_str)) + + def print_unexpected_results(self, unexpected_results): + """Prints a list of the unexpected results to the buildbot stream.""" + if self.disabled('unexpected-results'): + return + + passes = {} + flaky = {} + regressions = {} + + for test, results in unexpected_results['tests'].iteritems(): + actual = results['actual'].split(" ") + expected = results['expected'].split(" ") + if actual == ['PASS']: + if 'CRASH' in expected: + _add_to_dict_of_lists(passes, + 'Expected to crash, but passed', + test) + elif 'TIMEOUT' in expected: + _add_to_dict_of_lists(passes, + 'Expected to timeout, but passed', + test) + else: + _add_to_dict_of_lists(passes, + 'Expected to fail, but passed', + test) + elif len(actual) > 1: + # We group flaky tests by the first actual result we got. + _add_to_dict_of_lists(flaky, actual[0], test) + else: + _add_to_dict_of_lists(regressions, results['actual'], test) + + if len(passes) or len(flaky) or len(regressions): + self._buildbot_stream.write("\n") + + if len(passes): + for key, tests in passes.iteritems(): + self._buildbot_stream.write("%s: (%d)\n" % (key, len(tests))) + tests.sort() + for test in tests: + self._buildbot_stream.write(" %s\n" % test) + self._buildbot_stream.write("\n") + self._buildbot_stream.write("\n") + + if len(flaky): + descriptions = TestExpectationsFile.EXPECTATION_DESCRIPTIONS + for key, tests in flaky.iteritems(): + result = TestExpectationsFile.EXPECTATIONS[key.lower()] + self._buildbot_stream.write("Unexpected flakiness: %s (%d)\n" + % (descriptions[result][1], len(tests))) + tests.sort() + + for test in tests: + result = unexpected_results['tests'][test] + actual = result['actual'].split(" ") + expected = result['expected'].split(" ") + result = TestExpectationsFile.EXPECTATIONS[key.lower()] + new_expectations_list = list(set(actual) | set(expected)) + self._buildbot_stream.write(" %s = %s\n" % + (test, " ".join(new_expectations_list))) + self._buildbot_stream.write("\n") + self._buildbot_stream.write("\n") + + if len(regressions): + descriptions = TestExpectationsFile.EXPECTATION_DESCRIPTIONS + for key, tests in regressions.iteritems(): + result = TestExpectationsFile.EXPECTATIONS[key.lower()] + self._buildbot_stream.write( + "Regressions: Unexpected %s : (%d)\n" % ( + descriptions[result][1], len(tests))) + tests.sort() + for test in tests: + self._buildbot_stream.write(" %s = %s\n" % (test, key)) + self._buildbot_stream.write("\n") + self._buildbot_stream.write("\n") + + if len(unexpected_results['tests']) and self._options.verbose: + self._buildbot_stream.write("%s\n" % ("-" * 78)) + + def print_update(self, msg): + if self.disabled('updates'): + return + self._meter.update(msg) + + def write(self, msg, option="misc"): + if self.disabled(option): + return + self._write(msg) + + def _write(self, msg): + # FIXME: we could probably get away with calling _log.info() all of + # the time, but there doesn't seem to be a good way to test the output + # from the logger :(. + if self._options.verbose: + _log.info(msg) + elif msg == "": + self._meter.write("\n") + else: + self._meter.write(msg) + +# +# Utility routines used by the Controller class +# + + +def _add_to_dict_of_lists(dict, key, value): + dict.setdefault(key, []).append(value) diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/printing_unittest.py b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/printing_unittest.py new file mode 100644 index 0000000..8e6aa8f --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/printing_unittest.py @@ -0,0 +1,463 @@ +#!/usr/bin/python +# Copyright (C) 2010 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Unit tests for printing.py.""" + +import os +import optparse +import pdb +import sys +import unittest +import logging + +from webkitpy.common import array_stream +from webkitpy.layout_tests import port +from webkitpy.layout_tests.layout_package import printing +from webkitpy.layout_tests.layout_package import dump_render_tree_thread +from webkitpy.layout_tests.layout_package import test_expectations +from webkitpy.layout_tests.layout_package import test_failures +from webkitpy.layout_tests import run_webkit_tests + + +def get_options(args): + print_options = printing.print_options() + option_parser = optparse.OptionParser(option_list=print_options) + return option_parser.parse_args(args) + + +def get_result(filename, result_type=test_expectations.PASS, run_time=0): + failures = [] + if result_type == test_expectations.TIMEOUT: + failures = [test_failures.FailureTimeout()] + elif result_type == test_expectations.CRASH: + failures = [test_failures.FailureCrash()] + return dump_render_tree_thread.TestResult(filename, failures, run_time, + total_time_for_all_diffs=0, + time_for_diffs=0) + + +def get_result_summary(port_obj, test_files, expectations_str): + expectations = test_expectations.TestExpectations( + port_obj, test_files, expectations_str, + port_obj.test_platform_name(), is_debug_mode=False, + is_lint_mode=False, tests_are_present=False) + + rs = run_webkit_tests.ResultSummary(expectations, test_files) + return rs, expectations + + +class TestUtilityFunctions(unittest.TestCase): + def test_configure_logging(self): + # FIXME: We need to figure out how to reset the basic logger. + # FIXME: If other testing classes call logging.basicConfig() then + # FIXME: these calls become no-ops and we can't control the + # FIXME: configuration to test things properly. + options, args = get_options([]) + stream = array_stream.ArrayStream() + printing.configure_logging(options, stream) + logging.info("this should be logged") + # self.assertFalse(stream.empty()) + + stream.reset() + logging.debug("this should not be logged") + # self.assertTrue(stream.empty()) + + stream.reset() + options, args = get_options(['--verbose']) + printing.configure_logging(options, stream) + logging.debug("this should be logged") + # self.assertFalse(stream.empty()) + + def test_print_options(self): + options, args = get_options([]) + self.assertTrue(options is not None) + + +class Testprinter(unittest.TestCase): + def get_printer(self, args=None, single_threaded=False, + is_fully_parallel=False): + printing_options = printing.print_options() + option_parser = optparse.OptionParser(option_list=printing_options) + options, args = option_parser.parse_args(args) + self._port = port.get('test', options) + nproc = 2 + if single_threaded: + nproc = 1 + + regular_output = array_stream.ArrayStream() + buildbot_output = array_stream.ArrayStream() + printer = printing.Printer(self._port, options, regular_output, + buildbot_output, single_threaded, + is_fully_parallel) + return printer, regular_output, buildbot_output + + def test_help_printer(self): + # Here and below we'll call the "regular" printer err and the + # buildbot printer out; this corresponds to how things run on the + # bots with stderr and stdout. + printer, err, out = self.get_printer() + + # This routine should print something to stdout. testing what it is + # is kind of pointless. + printer.help_printing() + self.assertFalse(err.empty()) + self.assertTrue(out.empty()) + + def do_switch_tests(self, method_name, switch, to_buildbot, + message='hello', exp_err=None, exp_bot=None): + def do_helper(method_name, switch, message, exp_err, exp_bot): + printer, err, bot = self.get_printer(['--print', switch]) + getattr(printer, method_name)(message) + self.assertEqual(err.get(), exp_err) + self.assertEqual(bot.get(), exp_bot) + + if to_buildbot: + if exp_err is None: + exp_err = [] + if exp_bot is None: + exp_bot = [message + "\n"] + else: + if exp_err is None: + exp_err = [message] + if exp_bot is None: + exp_bot = [] + do_helper(method_name, 'nothing', 'hello', [], []) + do_helper(method_name, switch, 'hello', exp_err, exp_bot) + do_helper(method_name, 'everything', 'hello', exp_err, exp_bot) + + def test_print_actual(self): + # Actual results need to be logged to the buildbot's stream. + self.do_switch_tests('print_actual', 'actual', to_buildbot=True) + + def test_print_actual_buildbot(self): + # FIXME: Test that the format of the actual results matches what the + # buildbot is expecting. + pass + + def test_print_config(self): + self.do_switch_tests('print_config', 'config', to_buildbot=False) + + def test_print_expected(self): + self.do_switch_tests('print_expected', 'expected', to_buildbot=False) + + def test_print_timing(self): + self.do_switch_tests('print_timing', 'timing', to_buildbot=False) + + def test_print_update(self): + # Note that there shouldn't be a carriage return here; updates() + # are meant to be overwritten. + self.do_switch_tests('print_update', 'updates', to_buildbot=False, + message='hello', exp_err=['hello']) + + def test_print_one_line_summary(self): + printer, err, out = self.get_printer(['--print', 'nothing']) + printer.print_one_line_summary(1, 1) + self.assertTrue(err.empty()) + + printer, err, out = self.get_printer(['--print', 'one-line-summary']) + printer.print_one_line_summary(1, 1) + self.assertEquals(err.get(), ["All 1 tests ran as expected.", "\n"]) + + printer, err, out = self.get_printer(['--print', 'everything']) + printer.print_one_line_summary(1, 1) + self.assertEquals(err.get(), ["All 1 tests ran as expected.", "\n"]) + + err.reset() + printer.print_one_line_summary(2, 1) + self.assertEquals(err.get(), + ["1 test ran as expected, 1 didn't:", "\n"]) + + err.reset() + printer.print_one_line_summary(3, 2) + self.assertEquals(err.get(), + ["2 tests ran as expected, 1 didn't:", "\n"]) + + def test_print_test_result(self): + result = get_result('foo.html') + printer, err, out = self.get_printer(['--print', 'nothing']) + printer.print_test_result(result, expected=False, exp_str='', + got_str='') + self.assertTrue(err.empty()) + + printer, err, out = self.get_printer(['--print', 'unexpected']) + printer.print_test_result(result, expected=True, exp_str='', + got_str='') + self.assertTrue(err.empty()) + printer.print_test_result(result, expected=False, exp_str='', + got_str='') + self.assertEquals(err.get(), + [' foo.html -> unexpected pass']) + + printer, err, out = self.get_printer(['--print', 'everything']) + printer.print_test_result(result, expected=True, exp_str='', + got_str='') + self.assertTrue(err.empty()) + + printer.print_test_result(result, expected=False, exp_str='', + got_str='') + self.assertEquals(err.get(), + [' foo.html -> unexpected pass']) + + printer, err, out = self.get_printer(['--print', 'nothing']) + printer.print_test_result(result, expected=False, exp_str='', + got_str='') + self.assertTrue(err.empty()) + + printer, err, out = self.get_printer(['--print', + 'trace-unexpected']) + printer.print_test_result(result, expected=True, exp_str='', + got_str='') + self.assertTrue(err.empty()) + + err.reset() + printer.print_test_result(result, expected=False, exp_str='', + got_str='') + self.assertFalse(err.empty()) + + printer, err, out = self.get_printer(['--print', 'trace-everything']) + printer.print_test_result(result, expected=True, exp_str='', + got_str='') + self.assertFalse(err.empty()) + + err.reset() + printer.print_test_result(result, expected=False, exp_str='', + got_str='') + + def test_print_progress(self): + test_files = ['foo.html', 'bar.html'] + expectations = '' + + # test that we print nothing + printer, err, out = self.get_printer(['--print', 'nothing']) + rs, exp = get_result_summary(self._port, test_files, expectations) + + printer.print_progress(rs, False, test_files) + self.assertTrue(out.empty()) + self.assertTrue(err.empty()) + + printer.print_progress(rs, True, test_files) + self.assertTrue(out.empty()) + self.assertTrue(err.empty()) + + # test regular functionality + printer, err, out = self.get_printer(['--print', + 'one-line-progress']) + printer.print_progress(rs, False, test_files) + self.assertTrue(out.empty()) + self.assertFalse(err.empty()) + + err.reset() + out.reset() + printer.print_progress(rs, True, test_files) + self.assertFalse(err.empty()) + self.assertTrue(out.empty()) + + def test_print_progress__detailed(self): + test_files = ['pass/pass.html', 'pass/timeout.html', 'fail/crash.html'] + expectations = 'pass/timeout.html = TIMEOUT' + + # first, test that it is disabled properly + # should still print one-line-progress + printer, err, out = self.get_printer( + ['--print', 'detailed-progress'], single_threaded=False) + rs, exp = get_result_summary(self._port, test_files, expectations) + printer.print_progress(rs, False, test_files) + self.assertFalse(err.empty()) + self.assertTrue(out.empty()) + + # now test the enabled paths + printer, err, out = self.get_printer( + ['--print', 'detailed-progress'], single_threaded=True) + rs, exp = get_result_summary(self._port, test_files, expectations) + printer.print_progress(rs, False, test_files) + self.assertFalse(err.empty()) + self.assertTrue(out.empty()) + + err.reset() + out.reset() + printer.print_progress(rs, True, test_files) + self.assertFalse(err.empty()) + self.assertTrue(out.empty()) + + rs.add(get_result('pass/pass.html', test_expectations.TIMEOUT), False) + rs.add(get_result('pass/timeout.html'), True) + rs.add(get_result('fail/crash.html', test_expectations.CRASH), True) + err.reset() + out.reset() + printer.print_progress(rs, False, test_files) + self.assertFalse(err.empty()) + self.assertTrue(out.empty()) + + # We only clear the meter when retrying w/ detailed-progress. + err.reset() + out.reset() + printer.print_progress(rs, True, test_files) + self.assertEqual(err.get(), []) + self.assertTrue(out.empty()) + + printer, err, out = self.get_printer( + ['--print', 'detailed-progress,unexpected'], single_threaded=True) + rs, exp = get_result_summary(self._port, test_files, expectations) + printer.print_progress(rs, False, test_files) + self.assertFalse(err.empty()) + self.assertTrue(out.empty()) + + err.reset() + out.reset() + printer.print_progress(rs, True, test_files) + self.assertFalse(err.empty()) + self.assertTrue(out.empty()) + + rs.add(get_result('pass/pass.html', test_expectations.TIMEOUT), False) + rs.add(get_result('pass/timeout.html'), True) + rs.add(get_result('fail/crash.html', test_expectations.CRASH), True) + err.reset() + out.reset() + printer.print_progress(rs, False, test_files) + self.assertFalse(err.empty()) + self.assertTrue(out.empty()) + + # We only clear the meter when retrying w/ detailed-progress. + err.reset() + out.reset() + printer.print_progress(rs, True, test_files) + self.assertEqual(err.get(), []) + self.assertTrue(out.empty()) + + def test_write(self): + printer, err, out = self.get_printer(['--print', 'nothing']) + printer.write("foo") + self.assertTrue(err.empty()) + + printer, err, out = self.get_printer(['--print', 'misc']) + printer.write("foo") + self.assertFalse(err.empty()) + err.reset() + printer.write("foo", "config") + self.assertTrue(err.empty()) + + printer, err, out = self.get_printer(['--print', 'everything']) + printer.write("foo") + self.assertFalse(err.empty()) + err.reset() + printer.write("foo", "config") + self.assertFalse(err.empty()) + + def test_print_unexpected_results(self): + # This routine is the only one that prints stuff that the bots + # care about. + def get_unexpected_results(expected, passing, flaky): + rs, exp = get_result_summary(self._port, test_files, expectations) + if expected: + rs.add(get_result('pass/pass.html', test_expectations.PASS), + expected) + rs.add(get_result('pass/timeout.html', + test_expectations.TIMEOUT), expected) + rs.add(get_result('fail/crash.html', test_expectations.CRASH), + expected) + elif passing: + rs.add(get_result('pass/pass.html'), expected) + rs.add(get_result('pass/timeout.html'), expected) + rs.add(get_result('fail/crash.html'), expected) + else: + rs.add(get_result('pass/pass.html', test_expectations.TIMEOUT), + expected) + rs.add(get_result('pass/timeout.html', + test_expectations.CRASH), expected) + rs.add(get_result('fail/crash.html', + test_expectations.TIMEOUT), + expected) + retry = rs + if flaky: + retry, exp = get_result_summary(self._port, test_files, + expectations) + retry.add(get_result('pass/pass.html'), True) + retry.add(get_result('pass/timeout.html'), True) + retry.add(get_result('fail/crash.html'), True) + unexpected_results = run_webkit_tests.summarize_unexpected_results( + self._port, exp, rs, retry) + return unexpected_results + + test_files = ['pass/pass.html', 'pass/timeout.html', 'fail/crash.html'] + expectations = '' + + printer, err, out = self.get_printer(['--print', 'nothing']) + ur = get_unexpected_results(expected=False, passing=False, flaky=False) + printer.print_unexpected_results(ur) + self.assertTrue(err.empty()) + self.assertTrue(out.empty()) + + printer, err, out = self.get_printer(['--print', + 'unexpected-results']) + + # test everything running as expected + ur = get_unexpected_results(expected=True, passing=False, flaky=False) + printer.print_unexpected_results(ur) + self.assertTrue(err.empty()) + self.assertTrue(out.empty()) + + # test failures + err.reset() + out.reset() + ur = get_unexpected_results(expected=False, passing=False, flaky=False) + printer.print_unexpected_results(ur) + self.assertTrue(err.empty()) + self.assertFalse(out.empty()) + + # test unexpected flaky results + err.reset() + out.reset() + ur = get_unexpected_results(expected=False, passing=True, flaky=False) + printer.print_unexpected_results(ur) + self.assertTrue(err.empty()) + self.assertFalse(out.empty()) + + # test unexpected passes + err.reset() + out.reset() + ur = get_unexpected_results(expected=False, passing=False, flaky=True) + printer.print_unexpected_results(ur) + self.assertTrue(err.empty()) + self.assertFalse(out.empty()) + + err.reset() + out.reset() + printer, err, out = self.get_printer(['--print', 'everything']) + ur = get_unexpected_results(expected=False, passing=False, flaky=False) + printer.print_unexpected_results(ur) + self.assertTrue(err.empty()) + self.assertFalse(out.empty()) + + def test_print_unexpected_results_buildbot(self): + # FIXME: Test that print_unexpected_results() produces the printer the + # buildbot is expecting. + pass + +if __name__ == '__main__': + unittest.main() diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/test_files.py b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/test_files.py index 6754fa6..8f79505 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/test_files.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/layout_package/test_files.py @@ -62,6 +62,7 @@ def gather_test_files(port, paths): paths_to_walk = set() # if paths is empty, provide a pre-defined list. if paths: + _log.debug("Gathering tests from: %s relative to %s" % (paths, port.layout_tests_dir())) for path in paths: # If there's an * in the name, assume it's a glob pattern. path = os.path.join(port.layout_tests_dir(), path) @@ -71,6 +72,7 @@ def gather_test_files(port, paths): else: paths_to_walk.add(path) else: + _log.debug("Gathering tests from: %s" % port.layout_tests_dir()) paths_to_walk.add(port.layout_tests_dir()) # Now walk all the paths passed in on the command line and get filenames diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/apache_http_server.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/apache_http_server.py index 1dd5b93..46617f6 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/apache_http_server.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/apache_http_server.py @@ -29,6 +29,10 @@ """A class to start/stop the apache http server used by layout tests.""" + +from __future__ import with_statement + +import codecs import logging import optparse import os @@ -151,7 +155,9 @@ class LayoutTestApacheHttpd(http_server_base.HttpServerBase): """ httpd_config = self._port_obj._path_to_apache_config_file() httpd_config_copy = os.path.join(output_dir, "httpd.conf") - httpd_conf = open(httpd_config).read() + # httpd.conf is always utf-8 according to http://archive.apache.org/gnats/10125 + with codecs.open(httpd_config, "r", "utf-8") as httpd_config_file: + httpd_conf = httpd_config_file.read() if self._is_cygwin(): # This is a gross hack, but it lets us use the upstream .conf file # and our checked in cygwin. This tells the server the root @@ -164,9 +170,8 @@ class LayoutTestApacheHttpd(http_server_base.HttpServerBase): httpd_conf = httpd_conf.replace('ServerRoot "/usr"', 'ServerRoot "%s"' % self._get_cygwin_path(cygusr)) - f = open(httpd_config_copy, 'wb') - f.write(httpd_conf) - f.close() + with codecs.open(httpd_config_copy, "w", "utf-8") as file: + file.write(httpd_conf) if self._is_cygwin(): return self._get_cygwin_path(httpd_config_copy) @@ -186,6 +191,9 @@ class LayoutTestApacheHttpd(http_server_base.HttpServerBase): # Use shell=True because we join the arguments into a string for # the sake of Window/Cygwin and it needs quoting that breaks # shell=False. + # FIXME: We should not need to be joining shell arguments into strings. + # shell=True is a trail of tears. + # Note: Not thread safe: http://bugs.python.org/issue2320 self._httpd_proc = subprocess.Popen(self._start_cmd, stderr=subprocess.PIPE, shell=True) diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/base.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/base.py index fb6fddf..25946af 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/base.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/base.py @@ -34,7 +34,7 @@ import cgi import difflib import errno import os -import subprocess +import shlex import sys import time @@ -49,7 +49,19 @@ from webkitpy.common.system.executive import Executive, ScriptError _log = logutils.get_logger(__file__) -# Python bug workaround. See Port.wdiff_text() for an explanation. +# Python's Popen has a bug that causes any pipes opened to a +# process that can't be executed to be leaked. Since this +# code is specifically designed to tolerate exec failures +# to gracefully handle cases where wdiff is not installed, +# the bug results in a massive file descriptor leak. As a +# workaround, if an exec failure is ever experienced for +# wdiff, assume it's not available. This will leak one +# file descriptor but that's better than leaking each time +# wdiff would be run. +# +# http://mail.python.org/pipermail/python-list/ +# 2008-August/505753.html +# http://bugs.python.org/issue3210 _wdiff_available = True _pretty_patch_available = True @@ -135,17 +147,13 @@ class Port(object): result = True try: - if subprocess.call(cmd) == 0: + if self._executive.run_command(cmd, return_exit_code=True) == 0: return False except OSError, e: if e.errno == errno.ENOENT or e.errno == errno.EACCES: _compare_available = False else: raise e - except ValueError: - # work around a race condition in Python 2.4's implementation - # of subprocess.Popen. See http://bugs.python.org/issue1199282 . - pass return result def diff_text(self, expected_text, actual_text, @@ -313,6 +321,8 @@ class Port(object): if not self._webkit_base_dir: abspath = os.path.abspath(__file__) self._webkit_base_dir = abspath[0:abspath.find('WebKitTools')] + _log.debug("Using WebKit root: %s" % self._webkit_base_dir) + return os.path.join(self._webkit_base_dir, *comps) # FIXME: Callers should eventually move to scm.script_path. @@ -413,9 +423,10 @@ class Port(object): results_filename in a users' browser.""" raise NotImplementedError('Port.show_html_results_file') - def start_driver(self, png_path, options): - """Starts a new test Driver and returns a handle to the object.""" - raise NotImplementedError('Port.start_driver') + def create_driver(self, png_path, options): + """Return a newly created base.Driver subclass for starting/stopping + the test driver.""" + raise NotImplementedError('Port.create_driver') def start_helper(self): """If a port needs to reconfigure graphics settings or do other @@ -519,66 +530,68 @@ class Port(object): expectations, determining search paths, and logging information.""" raise NotImplementedError('Port.version') + _WDIFF_DEL = '##WDIFF_DEL##' + _WDIFF_ADD = '##WDIFF_ADD##' + _WDIFF_END = '##WDIFF_END##' + + def _format_wdiff_output_as_html(self, wdiff): + wdiff = cgi.escape(wdiff) + wdiff = wdiff.replace(self._WDIFF_DEL, "<span class=del>") + wdiff = wdiff.replace(self._WDIFF_ADD, "<span class=add>") + wdiff = wdiff.replace(self._WDIFF_END, "</span>") + html = "<head><style>.del { background: #faa; } " + html += ".add { background: #afa; }</style></head>" + html += "<pre>%s</pre>" % wdiff + return html + + def _wdiff_command(self, actual_filename, expected_filename): + executable = self._path_to_wdiff() + return [executable, + "--start-delete=%s" % self._WDIFF_DEL, + "--end-delete=%s" % self._WDIFF_END, + "--start-insert=%s" % self._WDIFF_ADD, + "--end-insert=%s" % self._WDIFF_END, + actual_filename, + expected_filename] + + @staticmethod + def _handle_wdiff_error(script_error): + # Exit 1 means the files differed, any other exit code is an error. + if script_error.exit_code != 1: + raise script_error + + def _run_wdiff(self, actual_filename, expected_filename): + """Runs wdiff and may throw exceptions. + This is mostly a hook for unit testing.""" + # Diffs are treated as binary as they may include multiple files + # with conflicting encodings. Thus we do not decode the output. + command = self._wdiff_command(actual_filename, expected_filename) + wdiff = self._executive.run_command(command, decode_output=False, + error_handler=self._handle_wdiff_error) + return self._format_wdiff_output_as_html(wdiff) + def wdiff_text(self, actual_filename, expected_filename): """Returns a string of HTML indicating the word-level diff of the contents of the two filenames. Returns an empty string if word-level diffing isn't available.""" - executable = self._path_to_wdiff() - cmd = [executable, - '--start-delete=##WDIFF_DEL##', - '--end-delete=##WDIFF_END##', - '--start-insert=##WDIFF_ADD##', - '--end-insert=##WDIFF_END##', - actual_filename, - expected_filename] - # FIXME: Why not just check os.exists(executable) once? - global _wdiff_available - result = '' + global _wdiff_available # See explaination at top of file. + if not _wdiff_available: + return "" try: - # Python's Popen has a bug that causes any pipes opened to a - # process that can't be executed to be leaked. Since this - # code is specifically designed to tolerate exec failures - # to gracefully handle cases where wdiff is not installed, - # the bug results in a massive file descriptor leak. As a - # workaround, if an exec failure is ever experienced for - # wdiff, assume it's not available. This will leak one - # file descriptor but that's better than leaking each time - # wdiff would be run. - # - # http://mail.python.org/pipermail/python-list/ - # 2008-August/505753.html - # http://bugs.python.org/issue3210 - # - # It also has a threading bug, so we don't output wdiff if - # the Popen raises a ValueError. - # http://bugs.python.org/issue1236 - if _wdiff_available: - try: - # FIXME: Use Executive() here. - wdiff = subprocess.Popen(cmd, - stdout=subprocess.PIPE).communicate()[0] - except ValueError, e: - # Working around a race in Python 2.4's implementation - # of Popen(). - wdiff = '' - wdiff = cgi.escape(wdiff) - wdiff = wdiff.replace('##WDIFF_DEL##', '<span class=del>') - wdiff = wdiff.replace('##WDIFF_ADD##', '<span class=add>') - wdiff = wdiff.replace('##WDIFF_END##', '</span>') - result = '<head><style>.del { background: #faa; } ' - result += '.add { background: #afa; }</style></head>' - result += '<pre>' + wdiff + '</pre>' + # It's possible to raise a ScriptError we pass wdiff invalid paths. + return self._run_wdiff(actual_filename, expected_filename) except OSError, e: - if (e.errno == errno.ENOENT or e.errno == errno.EACCES or - e.errno == errno.ECHILD): + if e.errno in [errno.ENOENT, errno.EACCES, errno.ECHILD]: + # Silently ignore cases where wdiff is missing. _wdiff_available = False - else: - raise e - return result + return "" + raise + assert(False) # Should never be reached. _pretty_patch_error_html = "Failed to run PrettyPatch, see error console." def pretty_patch_text(self, diff_path): + # FIXME: Much of this function could move to prettypatch.rb global _pretty_patch_available if not _pretty_patch_available: return self._pretty_patch_error_html @@ -586,7 +599,9 @@ class Port(object): prettify_path = os.path.join(pretty_patch_path, "prettify.rb") command = ["ruby", "-I", pretty_patch_path, prettify_path, diff_path] try: - return self._executive.run_command(command) + # Diffs are treated as binary (we pass decode_output=False) as they + # may contain multiple files of conflicting encodings. + return self._executive.run_command(command, decode_output=False) except OSError, e: # If the system is missing ruby log the error and stop trying. _pretty_patch_available = False @@ -718,6 +733,24 @@ class Driver: specified in the __init__() call.""" raise NotImplementedError('Driver.run_test') + # FIXME: This is static so we can test it w/o creating a Base instance. + @classmethod + def _command_wrapper(cls, wrapper_option): + # Hook for injecting valgrind or other runtime instrumentation, + # used by e.g. tools/valgrind/valgrind_tests.py. + wrapper = [] + browser_wrapper = os.environ.get("BROWSER_WRAPPER", None) + if browser_wrapper: + # FIXME: There seems to be no reason to use BROWSER_WRAPPER over --wrapper. + # Remove this code any time after the date listed below. + _log.error("BROWSER_WRAPPER is deprecated, please use --wrapper instead.") + _log.error("BROWSER_WRAPPER will be removed any time after June 1st 2010 and your scripts will break.") + wrapper += [browser_wrapper] + + if wrapper_option: + wrapper += shlex.split(wrapper_option) + return wrapper + def poll(self): """Returns None if the Driver is still running. Returns the returncode if it has exited.""" diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/base_unittest.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/base_unittest.py new file mode 100644 index 0000000..f821353 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/base_unittest.py @@ -0,0 +1,126 @@ +# Copyright (C) 2010 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import base +import unittest +import tempfile + +from webkitpy.common.system.executive import Executive, ScriptError +from webkitpy.thirdparty.mock import Mock + + +class PortTest(unittest.TestCase): + + def test_format_wdiff_output_as_html(self): + output = "OUTPUT %s %s %s" % (base.Port._WDIFF_DEL, base.Port._WDIFF_ADD, base.Port._WDIFF_END) + html = base.Port()._format_wdiff_output_as_html(output) + expected_html = "<head><style>.del { background: #faa; } .add { background: #afa; }</style></head><pre>OUTPUT <span class=del> <span class=add> </span></pre>" + self.assertEqual(html, expected_html) + + def test_wdiff_command(self): + port = base.Port() + port._path_to_wdiff = lambda: "/path/to/wdiff" + command = port._wdiff_command("/actual/path", "/expected/path") + expected_command = [ + "/path/to/wdiff", + "--start-delete=##WDIFF_DEL##", + "--end-delete=##WDIFF_END##", + "--start-insert=##WDIFF_ADD##", + "--end-insert=##WDIFF_END##", + "/actual/path", + "/expected/path", + ] + self.assertEqual(command, expected_command) + + def _file_with_contents(self, contents, encoding="utf-8"): + new_file = tempfile.NamedTemporaryFile() + new_file.write(contents.encode(encoding)) + new_file.flush() + return new_file + + def test_run_wdiff(self): + executive = Executive() + # This may fail on some systems. We could ask the port + # object for the wdiff path, but since we don't know what + # port object to use, this is sufficient for now. + try: + wdiff_path = executive.run_command(["which", "wdiff"]).rstrip() + except Exception, e: + wdiff_path = None + + port = base.Port() + port._path_to_wdiff = lambda: wdiff_path + + if wdiff_path: + # "with tempfile.NamedTemporaryFile() as actual" does not seem to work in Python 2.5 + actual = self._file_with_contents(u"foo") + expected = self._file_with_contents(u"bar") + wdiff = port._run_wdiff(actual.name, expected.name) + expected_wdiff = "<head><style>.del { background: #faa; } .add { background: #afa; }</style></head><pre><span class=del>foo</span><span class=add>bar</span></pre>" + self.assertEqual(wdiff, expected_wdiff) + # Running the full wdiff_text method should give the same result. + base._wdiff_available = True # In case it's somehow already disabled. + wdiff = port.wdiff_text(actual.name, expected.name) + self.assertEqual(wdiff, expected_wdiff) + # wdiff should still be available after running wdiff_text with a valid diff. + self.assertTrue(base._wdiff_available) + actual.close() + expected.close() + + # Bogus paths should raise a script error. + self.assertRaises(ScriptError, port._run_wdiff, "/does/not/exist", "/does/not/exist2") + self.assertRaises(ScriptError, port.wdiff_text, "/does/not/exist", "/does/not/exist2") + # wdiff will still be available after running wdiff_text with invalid paths. + self.assertTrue(base._wdiff_available) + base._wdiff_available = True + + # If wdiff does not exist _run_wdiff should throw an OSError. + port._path_to_wdiff = lambda: "/invalid/path/to/wdiff" + self.assertRaises(OSError, port._run_wdiff, "foo", "bar") + + # wdiff_text should not throw an error if wdiff does not exist. + self.assertEqual(port.wdiff_text("foo", "bar"), "") + # However wdiff should not be available after running wdiff_text if wdiff is missing. + self.assertFalse(base._wdiff_available) + base._wdiff_available = True + + +class DriverTest(unittest.TestCase): + + def _assert_wrapper(self, wrapper_string, expected_wrapper): + wrapper = base.Driver._command_wrapper(wrapper_string) + self.assertEqual(wrapper, expected_wrapper) + + def test_command_wrapper(self): + self._assert_wrapper(None, []) + self._assert_wrapper("valgrind", ["valgrind"]) + + # Validate that shlex works as expected. + command_with_spaces = "valgrind --smc-check=\"check with spaces!\" --foo" + expected_parse = ["valgrind", "--smc-check=check with spaces!", "--foo"] + self._assert_wrapper(command_with_spaces, expected_parse) diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium.py index 8bae2a9..bcbd498 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium.py @@ -29,6 +29,9 @@ """Chromium implementations of the Port interface.""" +from __future__ import with_statement + +import codecs import logging import os import shutil @@ -41,6 +44,8 @@ import webbrowser import base import http_server +from webkitpy.common.system.executive import Executive + # FIXME: To use the DRT-based version of this file, we need to be able to # run the webkit code, which uses server_process, which requires UNIX-style # non-blocking I/O with selects(), which requires fcntl() which doesn't exist @@ -77,8 +82,8 @@ def check_file_exists(path_to_file, file_description, override_step=None, class ChromiumPort(base.Port): """Abstract base class for Chromium implementations of the Port class.""" - def __init__(self, port_name=None, options=None): - base.Port.__init__(self, port_name, options) + def __init__(self, port_name=None, options=None, **kwargs): + base.Port.__init__(self, port_name, options, **kwargs) self._chromium_base_dir = None def baseline_path(self): @@ -115,10 +120,8 @@ class ChromiumPort(base.Port): return result def check_sys_deps(self, needs_http): - dump_render_tree_binary_path = self._path_to_driver() - proc = subprocess.Popen([dump_render_tree_binary_path, - '--check-layout-test-sys-deps']) - if proc.wait(): + cmd = [self._path_to_driver(), '--check-layout-test-sys-deps'] + if self._executive.run_command(cmd, return_exit_code=True): _log.error('System dependencies check failed.') _log.error('To override, invoke with --nocheck-sys-deps') _log.error('') @@ -140,6 +143,7 @@ class ChromiumPort(base.Port): abspath = os.path.abspath(__file__) offset = abspath.find('third_party') if offset == -1: + # FIXME: This seems like the wrong error to throw. raise AssertionError('could not find Chromium base dir from ' + abspath) self._chromium_base_dir = abspath[0:offset] @@ -169,20 +173,23 @@ class ChromiumPort(base.Port): def show_results_html_file(self, results_filename): uri = self.filename_to_uri(results_filename) if self._options.use_drt: + # FIXME: This should use User.open_url webbrowser.open(uri, new=1) else: + # Note: Not thread safe: http://bugs.python.org/issue2320 subprocess.Popen([self._path_to_driver(), uri]) - def start_driver(self, image_path, options): + def create_driver(self, image_path, options): """Starts a new Driver and returns a handle to it.""" if self._options.use_drt: - return webkit.WebKitDriver(self, image_path, options) - return ChromiumDriver(self, image_path, options) + return webkit.WebKitDriver(self, image_path, options, executive=self._executive) + return ChromiumDriver(self, image_path, options, executive=self._executive) def start_helper(self): helper_path = self._path_to_helper() if helper_path: _log.debug("Starting layout helper %s" % helper_path) + # Note: Not thread safe: http://bugs.python.org/issue2320 self._helper = subprocess.Popen([helper_path], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None) is_ready = self._helper.stdout.readline() @@ -194,6 +201,8 @@ class ChromiumPort(base.Port): _log.debug("Stopping layout test helper") self._helper.stdin.write("x\n") self._helper.stdin.close() + # wait() is not threadsafe and can throw OSError due to: + # http://bugs.python.org/issue1731717 self._helper.wait() def test_base_platform_names(self): @@ -204,19 +213,20 @@ class ChromiumPort(base.Port): Basically this string should contain the equivalent of a test_expectations file. See test_expectations.py for more details.""" - expectations_file = self.path_to_test_expectations_file() - return file(expectations_file, "r").read() + expectations_path = self.path_to_test_expectations_file() + with codecs.open(expectations_path, "r", "utf-8") as file: + return file.read() def test_expectations_overrides(self): try: - overrides_file = self.path_from_chromium_base('webkit', 'tools', + overrides_path = self.path_from_chromium_base('webkit', 'tools', 'layout_tests', 'test_expectations.txt') except AssertionError: return None - if os.path.exists(overrides_file): - return file(overrides_file, "r").read() - else: + if not os.path.exists(overrides_path): return None + with codecs.open(overrides_path, "r", "utf-8") as file: + return file.read() def test_platform_names(self): return self.test_base_platform_names() + ('win-xp', @@ -265,29 +275,26 @@ class ChromiumPort(base.Port): class ChromiumDriver(base.Driver): - """Abstract interface for the DumpRenderTree interface.""" + """Abstract interface for test_shell.""" - def __init__(self, port, image_path, options): + def __init__(self, port, image_path, options, executive=Executive()): self._port = port - self._options = options self._configuration = port._options.configuration + # FIXME: _options is very confusing, because it's not an Options() element. + # FIXME: These don't need to be passed into the constructor, but could rather + # be passed into .start() + self._options = options self._image_path = image_path + self._executive = executive + def start(self): + # FIXME: Should be an error to call this method twice. cmd = [] - # Hook for injecting valgrind or other runtime instrumentation, - # used by e.g. tools/valgrind/valgrind_tests.py. - wrapper = os.environ.get("BROWSER_WRAPPER", None) - if wrapper != None: - cmd += [wrapper] - if self._port._options.wrapper: - # This split() isn't really what we want -- it incorrectly will - # split quoted strings within the wrapper argument -- but in - # practice it shouldn't come up and the --help output warns - # about it anyway. - cmd += self._options.wrapper.split() - cmd += [port._path_to_driver(), '--layout-tests'] - if options: - cmd += options + # FIXME: We should not be grabbing at self._port._options.wrapper directly. + cmd += self._command_wrapper(self._port._options.wrapper) + cmd += [self._port._path_to_driver(), '--layout-tests'] + if self._options: + cmd += self._options # We need to pass close_fds=True to work around Python bug #2320 # (otherwise we can hang when we kill DumpRenderTree when we are running @@ -299,12 +306,42 @@ class ChromiumDriver(base.Driver): stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=close_flag) + def poll(self): + # poll() is not threadsafe and can throw OSError due to: + # http://bugs.python.org/issue1731717 return self._proc.poll() def returncode(self): return self._proc.returncode + def _write_command_and_read_line(self, input=None): + """Returns a tuple: (line, did_crash)""" + try: + if input: + if isinstance(input, unicode): + # TestShell expects utf-8 + input = input.encode("utf-8") + self._proc.stdin.write(input) + # DumpRenderTree text output is always UTF-8. However some tests + # (e.g. webarchive) may spit out binary data instead of text so we + # don't bother to decode the output (for either DRT or test_shell). + line = self._proc.stdout.readline() + # We could assert() here that line correctly decodes as UTF-8. + return (line, False) + except IOError, e: + _log.error("IOError communicating w/ test_shell: " + str(e)) + return (None, True) + + def _test_shell_command(self, uri, timeoutms, checksum): + cmd = uri + if timeoutms: + cmd += ' ' + str(timeoutms) + if checksum: + cmd += ' ' + checksum + cmd += "\n" + return cmd + def run_test(self, uri, timeoutms, checksum): output = [] error = [] @@ -314,26 +351,16 @@ class ChromiumDriver(base.Driver): actual_checksum = None start_time = time.time() - cmd = uri - if timeoutms: - cmd += ' ' + str(timeoutms) - if checksum: - cmd += ' ' + checksum - cmd += "\n" - try: - self._proc.stdin.write(cmd) - line = self._proc.stdout.readline() - except IOError, e: - _log.error("IOError communicating w/ test_shell: " + str(e)) - crash = True + cmd = self._test_shell_command(uri, timeoutms, checksum) + (line, crash) = self._write_command_and_read_line(input=cmd) while not crash and line.rstrip() != "#EOF": # Make sure we haven't crashed. if line == '' and self.poll() is not None: # This is hex code 0xc000001d, which is used for abrupt # termination. This happens if we hit ctrl+c from the prompt - # and we happen to be waiting on the DumpRenderTree. + # and we happen to be waiting on test_shell. # sdoyon: Not sure for which OS and in what circumstances the # above code is valid. What works for me under Linux to detect # ctrl+c is for the subprocess returncode to be negative @@ -361,11 +388,7 @@ class ChromiumDriver(base.Driver): else: error.append(line) - try: - line = self._proc.stdout.readline() - except IOError, e: - _log.error("IOError while reading: " + str(e)) - crash = True + (line, crash) = self._write_command_and_read_line(input=None) return (crash, timeout, actual_checksum, ''.join(output), ''.join(error)) @@ -379,17 +402,18 @@ class ChromiumDriver(base.Driver): if sys.platform not in ('win32', 'cygwin'): # Closing stdin/stdout/stderr hangs sometimes on OS X, # (see __init__(), above), and anyway we don't want to hang - # the harness if DumpRenderTree is buggy, so we wait a couple - # seconds to give DumpRenderTree a chance to clean up, but then + # the harness if test_shell is buggy, so we wait a couple + # seconds to give test_shell a chance to clean up, but then # force-kill the process if necessary. KILL_TIMEOUT = 3.0 timeout = time.time() + KILL_TIMEOUT + # poll() is not threadsafe and can throw OSError due to: + # http://bugs.python.org/issue1731717 while self._proc.poll() is None and time.time() < timeout: time.sleep(0.1) + # poll() is not threadsafe and can throw OSError due to: + # http://bugs.python.org/issue1731717 if self._proc.poll() is None: _log.warning('stopping test driver timed out, ' 'killing it') - null = open(os.devnull, "w") - subprocess.Popen(["kill", "-9", - str(self._proc.pid)], stderr=null) - null.close() + self._executive.kill_process(self._proc.pid) diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_linux.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_linux.py index 9a595f2..a01bd14 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_linux.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_linux.py @@ -31,9 +31,7 @@ import logging import os -import platform import signal -import subprocess import chromium @@ -122,15 +120,9 @@ class ChromiumLinuxPort(chromium.ChromiumPort): _log.error(' Please install using: "sudo apt-get install ' 'wdiff"') _log.error('') + # FIXME: The ChromiumMac port always returns True. return result - - def _kill_all_process(self, process_name): - null = open(os.devnull) - subprocess.call(['killall', '-TERM', '-u', os.getenv('USER'), - process_name], stderr=null) - null.close() - def _path_to_apache(self): if self._is_redhat_based(): return '/usr/sbin/httpd' @@ -187,8 +179,8 @@ class ChromiumLinuxPort(chromium.ChromiumPort): # TODO(mmoss) This isn't ideal, since it could conflict with # lighttpd processes not started by http_server.py, # but good enough for now. - self._kill_all_process('lighttpd') - self._kill_all_process('apache2') + self._executive.kill_all("lighttpd") + self._executive.kill_all("apache2") else: try: os.kill(server_pid, signal.SIGTERM) diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_mac.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_mac.py index d5e1757..4ead26f 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_mac.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_mac.py @@ -33,10 +33,11 @@ import logging import os import platform import signal -import subprocess import chromium +from webkitpy.common.system.executive import Executive + _log = logging.getLogger("webkitpy.layout_tests.port.chromium_mac") @@ -66,6 +67,15 @@ class ChromiumMacPort(chromium.ChromiumPort): 'MacBuildInstructions') return result + def default_child_processes(self): + # FIXME: we need to run single-threaded for now. See + # https://bugs.webkit.org/show_bug.cgi?id=38553. Unfortunately this + # routine is called right before the logger is configured, so if we + # try to _log.warning(), it gets thrown away. + import sys + sys.stderr.write("Defaulting to one child - see https://bugs.webkit.org/show_bug.cgi?id=38553\n") + return 1 + def driver_name(self): """name for this port's equivalent of DumpRenderTree.""" if self._options.use_drt: @@ -99,33 +109,18 @@ class ChromiumMacPort(chromium.ChromiumPort): return self.path_from_chromium_base('xcodebuild', *comps) def _check_wdiff_install(self): - f = open(os.devnull, 'w') - rcode = 0 try: - rcode = subprocess.call(['wdiff'], stderr=f) + # We're ignoring the return and always returning True + self._executive.run_command([self._path_to_wdiff()], error_handler=Executive.ignore_error) except OSError: _log.warning('wdiff not found. Install using MacPorts or some ' 'other means') - pass - f.close() return True def _lighttpd_path(self, *comps): return self.path_from_chromium_base('third_party', 'lighttpd', 'mac', *comps) - def _kill_all_process(self, process_name): - """Kill any processes running under this name.""" - # On Mac OS X 10.6, killall has a new constraint: -SIGNALNAME or - # -SIGNALNUMBER must come first. Example problem: - # $ killall -u $USER -TERM lighttpd - # killall: illegal option -- T - # Use of the earlier -TERM placement is just fine on 10.5. - null = open(os.devnull) - subprocess.call(['killall', '-TERM', '-u', os.getenv('USER'), - process_name], stderr=null) - null.close() - def _path_to_apache(self): return '/usr/sbin/httpd' @@ -177,8 +172,8 @@ class ChromiumMacPort(chromium.ChromiumPort): # TODO(mmoss) This isn't ideal, since it could conflict with # lighttpd processes not started by http_server.py, # but good enough for now. - self._kill_all_process('lighttpd') - self._kill_all_process('httpd') + self._executive.kill_all('lighttpd') + self._executive.kill_all('httpd') else: try: os.kill(server_pid, signal.SIGTERM) diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_mac_unittest.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_mac_unittest.py new file mode 100644 index 0000000..d63faa0 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_mac_unittest.py @@ -0,0 +1,40 @@ +# Copyright (C) 2010 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import chromium_mac +import unittest + +from webkitpy.thirdparty.mock import Mock + + +class ChromiumMacPortTest(unittest.TestCase): + + def test_check_wdiff_install(self): + port = chromium_mac.ChromiumMacPort() + # Currently is always true, just logs if missing. + self.assertTrue(port._check_wdiff_install()) diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_unittest.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_unittest.py new file mode 100644 index 0000000..95d6378 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_unittest.py @@ -0,0 +1,80 @@ +# Copyright (C) 2010 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import chromium +import unittest +import StringIO + +from webkitpy.thirdparty.mock import Mock + + +class ChromiumDriverTest(unittest.TestCase): + + def setUp(self): + mock_port = Mock() + # FIXME: The Driver should not be grabbing at port._options! + mock_port._options = Mock() + mock_port._options.wrapper = "" + self.driver = chromium.ChromiumDriver(mock_port, image_path=None, options=None) + + def test_test_shell_command(self): + expected_command = "test.html 2 checksum\n" + self.assertEqual(self.driver._test_shell_command("test.html", 2, "checksum"), expected_command) + + def _assert_write_command_and_read_line(self, input=None, expected_line=None, expected_stdin=None, expected_crash=False): + if not expected_stdin: + if input: + expected_stdin = input + else: + # We reset stdin, so we should expect stdin.getValue = "" + expected_stdin = "" + self.driver._proc.stdin = StringIO.StringIO() + line, did_crash = self.driver._write_command_and_read_line(input) + self.assertEqual(self.driver._proc.stdin.getvalue(), expected_stdin) + self.assertEqual(line, expected_line) + self.assertEqual(did_crash, expected_crash) + + def test_write_command_and_read_line(self): + self.driver._proc = Mock() + # Set up to read 3 lines before we get an IOError + self.driver._proc.stdout = StringIO.StringIO("first\nsecond\nthird\n") + + unicode_input = u"I \u2661 Unicode" + utf8_input = unicode_input.encode("utf-8") + # Test unicode input conversion to utf-8 + self._assert_write_command_and_read_line(input=unicode_input, expected_stdin=utf8_input, expected_line="first\n") + # Test str() input. + self._assert_write_command_and_read_line(input="foo", expected_line="second\n") + # Test input=None + self._assert_write_command_and_read_line(expected_line="third\n") + # Test reading from a closed/empty stream. + # reading from a StringIO does not raise IOError like a real file would, so raise IOError manually. + def mock_readline(): + raise IOError + self.driver._proc.stdout.readline = mock_readline + self._assert_write_command_and_read_line(expected_crash=True) diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_win.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_win.py index 2e3de85..302af86 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_win.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/chromium_win.py @@ -31,9 +31,6 @@ import logging import os -import platform -import signal -import subprocess import sys import chromium @@ -151,11 +148,7 @@ class ChromiumWinPort(chromium.ChromiumPort): Args: server_pid: The process ID of the running server. """ - subprocess.Popen(('taskkill.exe', '/f', '/im', 'LightTPD.exe'), - stdin=open(os.devnull, 'r'), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE).wait() - subprocess.Popen(('taskkill.exe', '/f', '/im', 'httpd.exe'), - stdin=open(os.devnull, 'r'), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE).wait() + # FIXME: Why are we ignoring server_pid and calling + # _kill_all instead of Executive.kill_process(pid)? + self._executive.kill_all("LightTPD.exe") + self._executive.kill_all("httpd.exe") diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/dryrun.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/dryrun.py index 7a6717f..2cbb1b9 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/dryrun.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/dryrun.py @@ -116,7 +116,7 @@ class DryRunPort(object): def stop_websocket_server(self): pass - def start_driver(self, image_path, options): + def create_driver(self, image_path, options): return DryrunDriver(self, image_path, options) @@ -153,6 +153,9 @@ class DryrunDriver(base.Driver): hash = None return (False, False, hash, text_output, None) + def start(self): + pass + def stop(self): pass diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/gtk.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/gtk.py index de5e28a..59dc1d9 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/gtk.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/gtk.py @@ -30,7 +30,6 @@ import logging import os -import subprocess from webkitpy.layout_tests.port.webkit import WebKitPort @@ -61,12 +60,6 @@ class GtkPort(WebKitPort): return os.path.join(self.layout_tests_dir(), 'http', 'conf', 'apache2-debian-httpd.conf') - def _kill_all_process(self, process_name): - null = open(os.devnull) - subprocess.call(['killall', '-TERM', '-u', os.getenv('USER'), - process_name], stderr=null) - null.close() - def _shut_down_http_server(self, server_pid): """Shut down the httpd web server. Blocks until it's fully shut down. @@ -79,7 +72,7 @@ class GtkPort(WebKitPort): # FIXME: This isn't ideal, since it could conflict with # lighttpd processes not started by http_server.py, # but good enough for now. - self._kill_all_process('apache2') + self._executive.kill_all('apache2') else: try: os.kill(server_pid, signal.SIGTERM) diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/http_server.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/http_server.py index cc434bc..fbe47e3 100755 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/http_server.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/http_server.py @@ -29,7 +29,9 @@ """A class to help start/stop the lighttpd server used by layout tests.""" +from __future__ import with_statement +import codecs import logging import optparse import os @@ -114,11 +116,14 @@ class Lighttpd(http_server_base.HttpServerBase): self.remove_log_files(self._output_dir, "error.log-") # Write out the config - f = file(base_conf_file, 'rb') - base_conf = f.read() - f.close() - - f = file(out_conf_file, 'wb') + with codecs.open(base_conf_file, "r", "utf-8") as file: + base_conf = file.read() + + # FIXME: This should be re-worked so that this block can + # use with open() instead of a manual file.close() call. + # lighttpd.conf files seem to be UTF-8 without BOM: + # http://redmine.lighttpd.net/issues/992 + f = codecs.open(out_conf_file, "w", "utf-8") f.write(base_conf) # Write out our cgi handlers. Run perl through env so that it @@ -205,9 +210,11 @@ class Lighttpd(http_server_base.HttpServerBase): if sys.platform == 'win32' and self._register_cygwin: setup_mount = self._port_obj.path_from_chromium_base('third_party', 'cygwin', 'setup_mount.bat') + # FIXME: Should use Executive.run_command subprocess.Popen(setup_mount).wait() _log.debug('Starting http server') + # FIXME: Should use Executive.run_command self._process = subprocess.Popen(start_cmd, env=env) # Wait for server to start. @@ -234,5 +241,7 @@ class Lighttpd(http_server_base.HttpServerBase): self._port_obj._shut_down_http_server(httpd_pid) if self._process: + # wait() is not threadsafe and can throw OSError due to: + # http://bugs.python.org/issue1731717 self._process.wait() self._process = None diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/mac.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/mac.py index cf4daa8..350b088 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/mac.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/mac.py @@ -30,15 +30,8 @@ import logging import os -import pdb import platform -import re -import shutil import signal -import subprocess -import sys -import time -import webbrowser import webkitpy.common.system.ospath as ospath import webkitpy.layout_tests.port.server_process as server_process @@ -131,18 +124,6 @@ class MacPort(WebKitPort): "platform/win", ] - # FIXME: This doesn't have anything to do with WebKit. - def _kill_all_process(self, process_name): - # On Mac OS X 10.6, killall has a new constraint: -SIGNALNAME or - # -SIGNALNUMBER must come first. Example problem: - # $ killall -u $USER -TERM lighttpd - # killall: illegal option -- T - # Use of the earlier -TERM placement is just fine on 10.5. - null = open(os.devnull) - subprocess.call(['killall', '-TERM', '-u', os.getenv('USER'), - process_name], stderr=null) - null.close() - def _path_to_apache_config_file(self): return os.path.join(self.layout_tests_dir(), 'http', 'conf', 'apache2-httpd.conf') @@ -160,7 +141,7 @@ class MacPort(WebKitPort): # FIXME: This isn't ideal, since it could conflict with # lighttpd processes not started by http_server.py, # but good enough for now. - self._kill_all_process('httpd') + self._executive.kill_all('httpd') else: try: os.kill(server_pid, signal.SIGTERM) diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/mac_unittest.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/mac_unittest.py index e47a4a4..ae7d40c 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/mac_unittest.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/mac_unittest.py @@ -40,7 +40,7 @@ class MacTest(unittest.TestCase): relative_paths = [path[len(port.path_from_webkit_base()):] for path in skipped_paths] self.assertEqual(relative_paths, ['LayoutTests/platform/mac-leopard/Skipped', 'LayoutTests/platform/mac/Skipped']) - example_skipped_file = """ + example_skipped_file = u""" # <rdar://problem/5647952> fast/events/mouseout-on-window.html needs mac DRT to issue mouse out events fast/events/mouseout-on-window.html diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/qt.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/qt.py index 67cdefe..9032a24 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/qt.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/qt.py @@ -30,7 +30,6 @@ import logging import os -import subprocess import signal from webkitpy.layout_tests.port.webkit import WebKitPort @@ -62,12 +61,6 @@ class QtPort(WebKitPort): return os.path.join(self.layout_tests_dir(), 'http', 'conf', 'apache2-debian-httpd.conf') - def _kill_all_process(self, process_name): - null = open(os.devnull) - subprocess.call(['killall', '-TERM', '-u', os.getenv('USER'), - process_name], stderr=null) - null.close() - def _shut_down_http_server(self, server_pid): """Shut down the httpd web server. Blocks until it's fully shut down. @@ -80,7 +73,7 @@ class QtPort(WebKitPort): # FIXME: This isn't ideal, since it could conflict with # lighttpd processes not started by http_server.py, # but good enough for now. - self._kill_all_process('apache2') + self._executive.kill_all('apache2') else: try: os.kill(server_pid, signal.SIGTERM) diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/server_process.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/server_process.py index f1c6d73..62ca693 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/server_process.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/server_process.py @@ -38,6 +38,8 @@ import subprocess import sys import time +from webkitpy.common.system.executive import Executive + _log = logging.getLogger("webkitpy.layout_tests.port.server_process") @@ -48,12 +50,13 @@ class ServerProcess: indefinitely. The class also handles transparently restarting processes as necessary to keep issuing commands.""" - def __init__(self, port_obj, name, cmd, env=None): + def __init__(self, port_obj, name, cmd, env=None, executive=Executive()): self._port = port_obj self._name = name self._cmd = cmd self._env = env self._reset() + self._executive = executive def _reset(self): self._proc = None @@ -66,6 +69,7 @@ class ServerProcess: if self._proc: raise ValueError("%s already running" % self._name) self._reset() + # close_fds is a workaround for http://bugs.python.org/issue2320 close_fds = sys.platform not in ('win32', 'cygwin') self._proc = subprocess.Popen(self._cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, @@ -100,6 +104,8 @@ class ServerProcess: """Check to see if the underlying process is running; returns None if it still is (wrapper around subprocess.poll).""" if self._proc: + # poll() is not threadsafe and can throw OSError due to: + # http://bugs.python.org/issue1731717 return self._proc.poll() return None @@ -164,6 +170,8 @@ class ServerProcess: select_fds = (out_fd, err_fd) deadline = time.time() + timeout while not self.timed_out and not self.crashed: + # poll() is not threadsafe and can throw OSError due to: + # http://bugs.python.org/issue1731717 if self._proc.poll() != None: self.crashed = True self.handle_interrupt() @@ -210,14 +218,15 @@ class ServerProcess: # force-kill the process if necessary. KILL_TIMEOUT = 3.0 timeout = time.time() + KILL_TIMEOUT + # poll() is not threadsafe and can throw OSError due to: + # http://bugs.python.org/issue1731717 while self._proc.poll() is None and time.time() < timeout: time.sleep(0.1) + # poll() is not threadsafe and can throw OSError due to: + # http://bugs.python.org/issue1731717 if self._proc.poll() is None: _log.warning('stopping %s timed out, killing it' % self._name) - null = open(os.devnull, "w") - subprocess.Popen(["kill", "-9", - str(self._proc.pid)], stderr=null) - null.close() + self._executive.kill_process(self._proc.pid) _log.warning('killed') self._reset() diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/test.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/test.py index edef485..5d563cd 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/test.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/test.py @@ -66,6 +66,13 @@ class TestPort(base.Port): expected_filename, actual_filename): return '' + def relative_test_filename(self, filename): + return filename + + def expected_filename(self, filename, suffix): + (basename, ext) = os.path.splitext(filename) + return basename + '.' + suffix + def name(self): return self._name @@ -81,7 +88,7 @@ class TestPort(base.Port): def show_results_html_file(self, filename): pass - def start_driver(self, image_path, options): + def create_driver(self, image_path, options): return TestDriver(image_path, options, self) def start_http_server(self): @@ -132,5 +139,8 @@ class TestDriver(base.Driver): def run_test(self, uri, timeoutms, image_hash): return (False, False, image_hash, '', None) + def start(self): + pass + def stop(self): pass diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/webkit.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/webkit.py index f2f5237..ada83ce 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/webkit.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/webkit.py @@ -29,18 +29,21 @@ """WebKit implementations of the Port interface.""" + +from __future__ import with_statement + +import codecs import logging import os -import pdb -import platform import re import shutil import signal -import subprocess import sys import time import webbrowser +from webkitpy.common.system.executive import Executive + import webkitpy.common.system.ospath as ospath import webkitpy.layout_tests.port.base as base import webkitpy.layout_tests.port.server_process as server_process @@ -51,8 +54,8 @@ _log = logging.getLogger("webkitpy.layout_tests.port.webkit") class WebKitPort(base.Port): """WebKit implementation of the Port class.""" - def __init__(self, port_name=None, options=None): - base.Port.__init__(self, port_name, options) + def __init__(self, port_name=None, options=None, **kwargs): + base.Port.__init__(self, port_name, options, **kwargs) self._cached_build_root = None self._cached_apache_path = None @@ -134,9 +137,11 @@ class WebKitPort(base.Port): sp = server_process.ServerProcess(self, 'ImageDiff', command) actual_length = os.stat(actual_filename).st_size - actual_file = open(actual_filename).read() + with open(actual_filename) as file: + actual_file = file.read() expected_length = os.stat(expected_filename).st_size - expected_file = open(expected_filename).read() + with open(expected_filename) as file: + expected_file = file.read() sp.write('Content-Length: %d\n%sContent-Length: %d\n%s' % (actual_length, actual_file, expected_length, expected_file)) @@ -165,7 +170,8 @@ class WebKitPort(base.Port): if m.group(2) == 'passed': result = False elif output and diff_filename: - open(diff_filename, 'w').write(output) # FIXME: This leaks a file handle. + with open(diff_filename, 'w') as file: + file.write(output) elif sp.timed_out: _log.error("ImageDiff timed out on %s" % expected_filename) elif sp.crashed: @@ -187,8 +193,8 @@ class WebKitPort(base.Port): # FIXME: We should open results in the version of WebKit we built. webbrowser.open(uri, new=1) - def start_driver(self, image_path, options): - return WebKitDriver(self, image_path, options) + def create_driver(self, image_path, options): + return WebKitDriver(self, image_path, options, executive=self._executive) def test_base_platform_names(self): # At the moment we don't use test platform names, but we have @@ -252,17 +258,16 @@ class WebKitPort(base.Port): if not os.path.exists(filename): _log.warn("Failed to open Skipped file: %s" % filename) continue - skipped_file = file(filename) - tests_to_skip.extend(self._tests_from_skipped_file(skipped_file)) - skipped_file.close() + with codecs.open(filename, "r", "utf-8") as skipped_file: + tests_to_skip.extend(self._tests_from_skipped_file(skipped_file)) return tests_to_skip def test_expectations(self): # The WebKit mac port uses a combination of a test_expectations file # and 'Skipped' files. - expectations_file = self.path_to_test_expectations_file() - expectations = file(expectations_file, "r").read() - return expectations + self._skips() + expectations_path = self.path_to_test_expectations_file() + with codecs.open(expectations_path, "r", "utf-8") as file: + return file.read() + self._skips() def _skips(self): # Each Skipped file contains a list of files @@ -341,28 +346,17 @@ class WebKitPort(base.Port): class WebKitDriver(base.Driver): """WebKit implementation of the DumpRenderTree interface.""" - def __init__(self, port, image_path, driver_options): + def __init__(self, port, image_path, driver_options, executive=Executive()): self._port = port - self._driver_options = driver_options + # FIXME: driver_options is never used. self._image_path = image_path + def start(self): command = [] - # Hook for injecting valgrind or other runtime instrumentation, - # used by e.g. tools/valgrind/valgrind_tests.py. - wrapper = os.environ.get("BROWSER_WRAPPER", None) - if wrapper != None: - command += [wrapper] - if self._port._options.wrapper: - # This split() isn't really what we want -- it incorrectly will - # split quoted strings within the wrapper argument -- but in - # practice it shouldn't come up and the --help output warns - # about it anyway. - # FIXME: Use a real shell parser. - command += self._options.wrapper.split() - - command += [port._path_to_driver(), '-'] - - if image_path: + # FIXME: We should not be grabbing at self._port._options.wrapper directly. + command += self._command_wrapper(self._port._options.wrapper) + command += [self._port._path_to_driver(), '-'] + if self._image_path: command.append('--pixel-tests') environment = os.environ environment['DYLD_FRAMEWORK_PATH'] = self._port._build_path() @@ -391,13 +385,12 @@ class WebKitDriver(base.Driver): command += "'" + image_hash command += "\n" - # pdb.set_trace() self._server_process.write(command) have_seen_content_type = False actual_image_hash = None - output = '' - image = '' + output = str() # Use a byte array for output, even though it should be UTF-8. + image = str() timeout = int(timeoutms) / 1000.0 deadline = time.time() + timeout @@ -409,6 +402,10 @@ class WebKitDriver(base.Driver): have_seen_content_type): have_seen_content_type = True else: + # Note: Text output from DumpRenderTree is always UTF-8. + # However, some tests (e.g. webarchives) spit out binary + # data instead of text. So to make things simple, we + # always treat the output as binary. output += line line = self._server_process.read_line(timeout) timeout = deadline - time.time() @@ -433,14 +430,24 @@ class WebKitDriver(base.Driver): line = self._server_process.read_line(timeout) if self._image_path and len(self._image_path): - image_file = file(self._image_path, "wb") - image_file.write(image) - image_file.close() + with open(self._image_path, "wb") as image_file: + image_file.write(image) + + error_lines = self._server_process.error.splitlines() + # FIXME: This is a hack. It is unclear why sometimes + # we do not get any error lines from the server_process + # probably we are not flushing stderr. + if error_lines and error_lines[-1] == "#EOF": + error_lines.pop() # Remove the expected "#EOF" + error = "\n".join(error_lines) + # FIXME: This seems like the wrong section of code to be doing + # this reset in. + self._server_process.error = "" return (self._server_process.crashed, self._server_process.timed_out, actual_image_hash, output, - self._server_process.error) + error) def stop(self): if self._server_process: diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/websocket_server.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/websocket_server.py index a9ba160..ad557bd 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/websocket_server.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/websocket_server.py @@ -30,6 +30,9 @@ """A class to help start/stop the PyWebSocket server used by layout tests.""" +from __future__ import with_statement + +import codecs import logging import optparse import os @@ -151,7 +154,7 @@ class PyWebSocket(http_server.Lighttpd): error_log = os.path.join(self._output_dir, log_file_name + "-err.txt") output_log = os.path.join(self._output_dir, log_file_name + "-out.txt") - self._wsout = open(output_log, "w") + self._wsout = codecs.open(output_log, "w", "utf-8") python_interp = sys.executable pywebsocket_base = os.path.join( @@ -204,6 +207,7 @@ class PyWebSocket(http_server.Lighttpd): self._server_name, self._port)) _log.debug('cmdline: %s' % ' '.join(start_cmd)) # FIXME: We should direct this call through Executive for testing. + # Note: Not thread safe: http://bugs.python.org/issue2320 self._process = subprocess.Popen(start_cmd, stdin=open(os.devnull, 'r'), stdout=self._wsout, @@ -216,7 +220,7 @@ class PyWebSocket(http_server.Lighttpd): url = 'http' url = url + '://127.0.0.1:%d/' % self._port if not url_is_alive(url): - fp = open(output_log) + fp = codecs.open(output_log, "utf-8") try: for line in fp: _log.error(line) @@ -231,9 +235,8 @@ class PyWebSocket(http_server.Lighttpd): raise PyWebSocketNotStarted( 'Failed to start %s server.' % self._server_name) if self._pidfile: - f = open(self._pidfile, 'w') - f.write("%d" % self._process.pid) - f.close() + with codecs.open(self._pidfile, "w", "ascii") as file: + file.write("%d" % self._process.pid) def stop(self, force=False): if not force and not self.is_running(): @@ -243,9 +246,8 @@ class PyWebSocket(http_server.Lighttpd): if self._process: pid = self._process.pid elif self._pidfile: - f = open(self._pidfile) - pid = int(f.read().strip()) - f.close() + with codecs.open(self._pidfile, "r", "ascii") as file: + pid = int(file.read().strip()) if not pid: raise PyWebSocketNotFound( @@ -256,6 +258,8 @@ class PyWebSocket(http_server.Lighttpd): Executive().kill_process(pid) if self._process: + # wait() is not threadsafe and can throw OSError due to: + # http://bugs.python.org/issue1731717 self._process.wait() self._process = None diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/port/win.py b/WebKitTools/Scripts/webkitpy/layout_tests/port/win.py index 2bf692b..3b7a817 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/port/win.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/port/win.py @@ -30,7 +30,6 @@ import logging import os -import subprocess from webkitpy.layout_tests.port.webkit import WebKitPort @@ -69,7 +68,4 @@ class WinPort(WebKitPort): """ # Looks like we ignore server_pid. # Copy/pasted from chromium-win. - subprocess.Popen(('taskkill.exe', '/f', '/im', 'httpd.exe'), - stdin=open(os.devnull, 'r'), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE).wait() + self._executive.kill_all("httpd.exe") diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/rebaseline_chromium_webkit_tests.py b/WebKitTools/Scripts/webkitpy/layout_tests/rebaseline_chromium_webkit_tests.py index b972154..211ce93 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/rebaseline_chromium_webkit_tests.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/rebaseline_chromium_webkit_tests.py @@ -41,6 +41,9 @@ The script does the following for each platform specified: At the end, the script generates a html that compares old and new baselines. """ +from __future__ import with_statement + +import codecs import copy import logging import optparse @@ -55,6 +58,8 @@ import urllib import webbrowser import zipfile +from webkitpy.common.system.executive import run_command + import port from layout_package import test_expectations from test_types import image_diff @@ -93,7 +98,9 @@ def run_shell_with_return_code(command, print_output=False): """ # Use a shell for subcommands on Windows to get a PATH search. + # FIXME: shell=True is a trail of tears, and should be removed. use_shell = sys.platform.startswith('win') + # Note: Not thread safe: http://bugs.python.org/issue2320 p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=use_shell) if print_output: @@ -278,10 +285,10 @@ class Rebaseliner(object): def get_rebaselining_tests(self): return self._rebaselining_tests + # FIXME: Callers should use scm.py instead. def _get_repo_type(self): """Get the repository type that client is using.""" - output, return_code = run_shell_with_return_code(['svn', 'info'], - False) + return_code = run_command(['svn', 'info'], return_exit_code=True) if return_code == 0: return REPO_SVN @@ -598,12 +605,14 @@ class Rebaseliner(object): os.remove(backup_file) _log.info('Saving original file to "%s"', backup_file) os.rename(path, backup_file) - f = open(path, "w") - f.write(new_expectations) - f.close() + # FIXME: What encoding are these files? + # Or is new_expectations always a byte array? + with open(path, "w") as file: + file.write(new_expectations) else: _log.info('No test was rebaselined so nothing to remove.') + # FIXME: Callers should move to SCM.add instead. def _svn_add(self, filename): """Add the file to SVN repository. @@ -715,9 +724,10 @@ class Rebaseliner(object): base_file = get_result_file_fullpath(self._options.html_directory, baseline_filename, self._platform, 'old') - f = open(base_file, 'wb') - f.write(output) - f.close() + # FIXME: This assumes run_shell returns a byte array. + # We should be using an explicit encoding here. + with open(base_file, "wb") as file: + file.write(output) _log.info(' Html: created old baseline file: "%s".', base_file) @@ -748,9 +758,9 @@ class Rebaseliner(object): diff_file = get_result_file_fullpath( self._options.html_directory, baseline_filename, self._platform, 'diff') - f = open(diff_file, 'wb') - f.write(output) - f.close() + # FIXME: This assumes run_shell returns a byte array, not unicode() + with open(diff_file, 'wb') as file: + file.write(output) _log.info(' Html: created baseline diff file: "%s".', diff_file) @@ -835,9 +845,8 @@ class HtmlGenerator(object): 'body': html_body}) _log.debug(html) - f = open(self._html_file, 'w') - f.write(html) - f.close() + with codecs.open(self._html_file, "w", "utf-8") as file: + file.write(html) _log.info('Baseline comparison html generated at "%s"', self._html_file) diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/run_webkit_tests.py b/WebKitTools/Scripts/webkitpy/layout_tests/run_webkit_tests.py index 73195b3..456c6f3 100755 --- a/WebKitTools/Scripts/webkitpy/layout_tests/run_webkit_tests.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/run_webkit_tests.py @@ -64,7 +64,7 @@ import traceback from layout_package import test_expectations from layout_package import json_layout_results_generator -from layout_package import metered_stream +from layout_package import printing from layout_package import test_failures from layout_package import dump_render_tree_thread from layout_package import test_files @@ -80,33 +80,6 @@ import port _log = logging.getLogger("webkitpy.layout_tests.run_webkit_tests") -# dummy value used for command-line explicitness to disable defaults -LOG_NOTHING = 'nothing' - -# Display the one-line progress bar (% completed) while testing -LOG_PROGRESS = 'progress' - -# Indicates that we want detailed progress updates in the output (prints -# directory-by-directory feedback). -LOG_DETAILED_PROGRESS = 'detailed-progress' - -# Log the one-line summary at the end of the run -LOG_SUMMARY = 'summary' - -# "Trace" the test - log the expected result, the actual result, and the -# baselines used -LOG_TRACE = 'trace' - -# Log any unexpected results while running (instead of just at the end). -LOG_UNEXPECTED = 'unexpected' -LOG_UNEXPECTED_RESULTS = 'unexpected-results' - -LOG_VALUES = ",".join(("actual", "config", LOG_DETAILED_PROGRESS, "expected", - LOG_NOTHING, LOG_PROGRESS, LOG_SUMMARY, "timing", - LOG_UNEXPECTED, LOG_UNEXPECTED_RESULTS)) -LOG_DEFAULT_VALUE = ",".join((LOG_DETAILED_PROGRESS, LOG_SUMMARY, - LOG_UNEXPECTED, LOG_UNEXPECTED_RESULTS)) - # Builder base URL where we have the archived test results. BUILDER_BASE_URL = "http://build.chromium.org/buildbot/layout_test_results/" @@ -181,7 +154,7 @@ class ResultSummary(object): """ self.tests_by_expectation[result.type].add(result.filename) - self.results[result.filename] = result.type + self.results[result.filename] = result self.remaining -= 1 if len(result.failures): self.failures[result.filename] = result.failures @@ -192,6 +165,83 @@ class ResultSummary(object): self.unexpected += 1 +def summarize_unexpected_results(port_obj, expectations, result_summary, + retry_summary): + """Summarize any unexpected results as a dict. + + FIXME: split this data structure into a separate class? + + Args: + port_obj: interface to port-specific hooks + expectations: test_expectations.TestExpectations object + result_summary: summary object from initial test runs + retry_summary: summary object from final test run of retried tests + Returns: + A dictionary containing a summary of the unexpected results from the + run, with the following fields: + 'version': a version indicator (1 in this version) + 'fixable': # of fixable tests (NOW - PASS) + 'skipped': # of skipped tests (NOW & SKIPPED) + 'num_regressions': # of non-flaky failures + 'num_flaky': # of flaky failures + 'num_passes': # of unexpected passes + 'tests': a dict of tests -> {'expected': '...', 'actual': '...'} + """ + results = {} + results['version'] = 1 + + tbe = result_summary.tests_by_expectation + tbt = result_summary.tests_by_timeline + results['fixable'] = len(tbt[test_expectations.NOW] - + tbe[test_expectations.PASS]) + results['skipped'] = len(tbt[test_expectations.NOW] & + tbe[test_expectations.SKIP]) + + num_passes = 0 + num_flaky = 0 + num_regressions = 0 + keywords = {} + for k, v in TestExpectationsFile.EXPECTATIONS.iteritems(): + keywords[v] = k.upper() + + tests = {} + for filename, result in result_summary.unexpected_results.iteritems(): + # Note that if a test crashed in the original run, we ignore + # whether or not it crashed when we retried it (if we retried it), + # and always consider the result not flaky. + test = port_obj.relative_test_filename(filename) + expected = expectations.get_expectations_string(filename) + actual = [keywords[result]] + + if result == test_expectations.PASS: + num_passes += 1 + elif result == test_expectations.CRASH: + num_regressions += 1 + else: + if filename not in retry_summary.unexpected_results: + actual.extend(expectations.get_expectations_string( + filename).split(" ")) + num_flaky += 1 + else: + retry_result = retry_summary.unexpected_results[filename] + if result != retry_result: + actual.append(keywords[retry_result]) + num_flaky += 1 + else: + num_regressions += 1 + + tests[test] = {} + tests[test]['expected'] = expected + tests[test]['actual'] = " ".join(actual) + + results['tests'] = tests + results['num_passes'] = num_passes + results['num_flaky'] = num_flaky + results['num_regressions'] = num_regressions + + return results + + class TestRunner: """A class for managing running a series of tests on a series of layout test files.""" @@ -204,19 +254,17 @@ class TestRunner: # in DumpRenderTree. DEFAULT_TEST_TIMEOUT_MS = 6 * 1000 - NUM_RETRY_ON_UNEXPECTED_FAILURE = 1 - - def __init__(self, port, options, meter): + def __init__(self, port, options, printer): """Initialize test runner data structures. Args: port: an object implementing port-specific options: a dictionary of command line options - meter: a MeteredStream object to record updates to. + printer: a Printer object to record updates to. """ self._port = port self._options = options - self._meter = meter + self._printer = printer # disable wss server. need to install pyOpenSSL on buildbots. # self._websocket_secure_server = websocket_server.PyWebSocket( @@ -230,13 +278,10 @@ class TestRunner: self._test_files_list = None self._result_queue = Queue.Queue() - # These are used for --log detailed-progress to track status by - # directory. - self._current_dir = None - self._current_progress_str = "" - self._current_test_number = 0 + self._retrying = False - self._retries = 0 + # Hack for dumping threads on the bots + self._last_thread_dump = None def __del__(self): _log.debug("flushing stdout") @@ -278,19 +323,20 @@ class TestRunner: else: raise err - def prepare_lists_and_print_output(self, write): + def prepare_lists_and_print_output(self): """Create appropriate subsets of test lists and returns a ResultSummary object. Also prints expected test counts. - - Args: - write: A callback to write info to (e.g., a LoggingWriter) or - sys.stdout.write. """ # Remove skipped - both fixable and ignored - files from the # top-level list of files to test. num_all_test_files = len(self._test_files) - write("Found: %d tests" % (len(self._test_files))) + self._printer.print_expected("Found: %d tests" % + (len(self._test_files))) + if not num_all_test_files: + _log.critical("No tests to run.") + sys.exit(1) + skipped = set() if num_all_test_files > 1 and not self._options.force: skipped = self._expectations.get_tests_with_result_type( @@ -353,7 +399,7 @@ class TestRunner: tests_run_msg = 'Running: %d tests (chunk slice [%d:%d] of %d)' % ( (slice_end - slice_start), slice_start, slice_end, num_tests) - write(tests_run_msg) + self._printer.print_expected(tests_run_msg) # If we reached the end and we don't have enough tests, we run some # from the beginning. @@ -362,14 +408,13 @@ class TestRunner: extra = 1 + chunk_len - (slice_end - slice_start) extra_msg = (' last chunk is partial, appending [0:%d]' % extra) - write(extra_msg) + self._printer.print_expected(extra_msg) tests_run_msg += "\n" + extra_msg files.extend(test_files[0:extra]) tests_run_filename = os.path.join(self._options.results_directory, "tests_run.txt") - tests_run_file = open(tests_run_filename, "w") - tests_run_file.write(tests_run_msg + "\n") - tests_run_file.close() + with codecs.open(tests_run_filename, "w", "utf-8") as file: + file.write(tests_run_msg + "\n") len_skip_chunk = int(len(files) * len(skipped) / float(len(self._test_files))) @@ -395,17 +440,18 @@ class TestRunner: result_summary = ResultSummary(self._expectations, self._test_files | skip_chunk) - self._print_expected_results_of_type(write, result_summary, + self._print_expected_results_of_type(result_summary, test_expectations.PASS, "passes") - self._print_expected_results_of_type(write, result_summary, + self._print_expected_results_of_type(result_summary, test_expectations.FAIL, "failures") - self._print_expected_results_of_type(write, result_summary, + self._print_expected_results_of_type(result_summary, test_expectations.FLAKY, "flaky") - self._print_expected_results_of_type(write, result_summary, + self._print_expected_results_of_type(result_summary, test_expectations.SKIP, "skipped") if self._options.force: - write('Running all tests, including skips (--force)') + self._printer.print_expected('Running all tests, including ' + 'skips (--force)') else: # Note that we don't actually run the skipped tests (they were # subtracted out of self._test_files, above), but we stub out the @@ -416,7 +462,7 @@ class TestRunner: time_for_diffs=0) result.type = test_expectations.SKIP result_summary.add(result, expected=True) - write("") + self._printer.print_expected('') return result_summary @@ -580,6 +626,29 @@ class TestRunner: """Returns whether we should run all the tests in the main thread.""" return int(self._options.child_processes) == 1 + def _dump_thread_states(self): + for thread_id, stack in sys._current_frames().items(): + # FIXME: Python 2.6 has thread.ident which we could + # use to map from thread_id back to thread.name + print "\n# Thread: %d" % thread_id + for filename, lineno, name, line in traceback.extract_stack(stack): + print 'File: "%s", line %d, in %s' % (filename, lineno, name) + if line: + print " %s" % (line.strip()) + + def _dump_thread_states_if_necessary(self): + # HACK: Dump thread states every minute to figure out what's + # hanging on the bots. + if not self._options.verbose: + return + dump_threads_every = 60 # Dump every minute + if not self._last_thread_dump: + self._last_thread_dump = time.time() + time_since_last_dump = time.time() - self._last_thread_dump + if time_since_last_dump > dump_threads_every: + self._dump_thread_states() + self._last_thread_dump = time.time() + def _run_tests(self, file_list, result_summary): """Runs the tests in the file_list. @@ -594,14 +663,15 @@ class TestRunner: in the form {filename:filename, test_run_time:test_run_time} result_summary: summary object to populate with the results """ + # FIXME: We should use webkitpy.tool.grammar.pluralize here. plural = "" if self._options.child_processes > 1: plural = "s" - self._meter.update('Starting %s%s ...' % - (self._port.driver_name(), plural)) + self._printer.print_update('Starting %s%s ...' % + (self._port.driver_name(), plural)) threads = self._instantiate_dump_render_tree_threads(file_list, result_summary) - self._meter.update("Starting testing ...") + self._printer.print_update("Starting testing ...") # Wait for the threads to finish and collect test failures. failures = {} @@ -609,21 +679,28 @@ class TestRunner: individual_test_timings = [] thread_timings = [] try: + # Loop through all the threads waiting for them to finish. for thread in threads: + # FIXME: We'll end up waiting on the first thread the whole + # time. That means we won't notice exceptions on other + # threads until the first one exits. + # We should instead while True: in the outer loop + # and then loop through threads joining and checking + # isAlive and get_exception_info. Exiting on any exception. while thread.isAlive(): - # Let it timeout occasionally so it can notice a - # KeyboardInterrupt. Actually, the timeout doesn't - # really matter: apparently it suffices to not use - # an indefinite blocking join for it to - # be interruptible by KeyboardInterrupt. + # Wake the main thread every 0.1 seconds so we + # can call update_summary in a timely fashion. thread.join(0.1) + # HACK: Used for debugging threads on the bots. + self._dump_thread_states_if_necessary() self.update_summary(result_summary) + + # This thread is done, save off the timing information. thread_timings.append({'name': thread.getName(), 'num_tests': thread.get_num_tests(), 'total_time': thread.get_total_time()}) test_timings.update(thread.get_directory_timing_stats()) - individual_test_timings.extend( - thread.get_test_results()) + individual_test_timings.extend(thread.get_test_results()) except KeyboardInterrupt: for thread in threads: thread.cancel() @@ -637,7 +714,9 @@ class TestRunner: # would be assumed to have passed. raise exception_info[0], exception_info[1], exception_info[2] - # Make sure we pick up any remaining tests. + # FIXME: This update_summary call seems unecessary. + # Calls are already made right after join() above, + # as well as from the individual threads themselves. self.update_summary(result_summary) return (thread_timings, test_timings, individual_test_timings) @@ -645,7 +724,7 @@ class TestRunner: """Returns whether the test runner needs an HTTP server.""" return self._contains_tests(self.HTTP_SUBDIR) - def run(self, result_summary, print_results): + def run(self, result_summary): """Run all our tests on all our test files. For each test file, we run each test type. If there are any failures, @@ -653,7 +732,6 @@ class TestRunner: Args: result_summary: a summary object tracking the test results. - print_results: whether or not to print the summary at the end Return: The number of unexpected results (0 == success) @@ -663,11 +741,12 @@ class TestRunner: start_time = time.time() if self.needs_http(): - self._meter.update('Starting HTTP server ...') + self._printer.print_update('Starting HTTP server ...') + self._port.start_http_server() if self._contains_tests(self.WEBSOCKET_SUBDIR): - self._meter.update('Starting WebSocket server ...') + self._printer.print_update('Starting WebSocket server ...') self._port.start_websocket_server() # self._websocket_secure_server.Start() @@ -678,53 +757,34 @@ class TestRunner: # we want to treat even a potentially flaky crash as an error. failures = self._get_failures(result_summary, include_crashes=False) retry_summary = result_summary - while (self._retries < self.NUM_RETRY_ON_UNEXPECTED_FAILURE and - len(failures)): + while (len(failures) and self._options.retry_failures and + not self._retrying): _log.info('') - _log.info("Retrying %d unexpected failure(s)" % len(failures)) + _log.info("Retrying %d unexpected failure(s) ..." % len(failures)) _log.info('') - self._retries += 1 + self._retrying = True retry_summary = ResultSummary(self._expectations, failures.keys()) self._run_tests(failures.keys(), retry_summary) failures = self._get_failures(retry_summary, include_crashes=True) end_time = time.time() - write = create_logging_writer(self._options, 'timing') - self._print_timing_statistics(write, end_time - start_time, - thread_timings, test_timings, - individual_test_timings, - result_summary) + self._print_timing_statistics(end_time - start_time, + thread_timings, test_timings, + individual_test_timings, + result_summary) - self._meter.update("") - - if self._options.verbose: - # We write this block to stdout for compatibility with the - # buildbot log parser, which only looks at stdout, not stderr :( - write = lambda s: sys.stdout.write("%s\n" % s) - else: - write = create_logging_writer(self._options, 'actual') - - self._print_result_summary(write, result_summary) + self._print_result_summary(result_summary) sys.stdout.flush() sys.stderr.flush() - # This summary data gets written to stdout regardless of log level - # (unless of course we're printing nothing). - if print_results: - if (LOG_DETAILED_PROGRESS in self._options.log or - (LOG_UNEXPECTED in self._options.log and - result_summary.total != result_summary.expected)): - print - if LOG_SUMMARY in self._options.log: - self._print_one_line_summary(result_summary.total, + self._printer.print_one_line_summary(result_summary.total, result_summary.expected) - unexpected_results = self._summarize_unexpected_results(result_summary, - retry_summary) - if LOG_UNEXPECTED_RESULTS in self._options.log: - self._print_unexpected_results(unexpected_results) + unexpected_results = summarize_unexpected_results(self._port, + self._expectations, result_summary, retry_summary) + self._printer.print_unexpected_results(unexpected_results) # Write the same data to log files. self._write_json_files(unexpected_results, result_summary, @@ -746,112 +806,16 @@ class TestRunner: result = self._result_queue.get_nowait() except Queue.Empty: return + expected = self._expectations.matches_an_expected_result( result.filename, result.type, self._options.pixel_tests) result_summary.add(result, expected) - self._print_test_results(result, expected, result_summary) - - def _print_test_results(self, result, expected, result_summary): - "Print the result of the test as determined by the --log switches." - if LOG_TRACE in self._options.log: - self._print_test_trace(result) - elif (LOG_DETAILED_PROGRESS in self._options.log and - (self._options.experimental_fully_parallel or - self._is_single_threaded())): - self._print_detailed_progress(result_summary) - else: - if (not expected and LOG_UNEXPECTED in self._options.log): - self._print_unexpected_test_result(result) - self._print_one_line_progress(result_summary) - - def _print_test_trace(self, result): - """Print detailed results of a test (triggered by --log trace). - For each test, print: - - location of the expected baselines - - expected results - - actual result - - timing info - """ - filename = result.filename - test_name = self._port.relative_test_filename(filename) - _log.info('trace: %s' % test_name) - _log.info(' txt: %s' % - self._port.relative_test_filename( - self._port.expected_filename(filename, '.txt'))) - png_file = self._port.expected_filename(filename, '.png') - if os.path.exists(png_file): - _log.info(' png: %s' % - self._port.relative_test_filename(filename)) - else: - _log.info(' png: <none>') - _log.info(' exp: %s' % - self._expectations.get_expectations_string(filename)) - _log.info(' got: %s' % - self._expectations.expectation_to_string(result.type)) - _log.info(' took: %-.3f' % result.test_run_time) - _log.info('') - - def _print_one_line_progress(self, result_summary): - """Displays the progress through the test run.""" - percent_complete = 100 * (result_summary.expected + - result_summary.unexpected) / result_summary.total - action = "Testing" - if self._retries > 0: - action = "Retrying" - self._meter.progress("%s (%d%%): %d ran as expected, %d didn't," - " %d left" % (action, percent_complete, result_summary.expected, - result_summary.unexpected, result_summary.remaining)) - - def _print_detailed_progress(self, result_summary): - """Display detailed progress output where we print the directory name - and one dot for each completed test. This is triggered by - "--log detailed-progress".""" - if self._current_test_number == len(self._test_files_list): - return - - next_test = self._test_files_list[self._current_test_number] - next_dir = os.path.dirname( - self._port.relative_test_filename(next_test)) - if self._current_progress_str == "": - self._current_progress_str = "%s: " % (next_dir) - self._current_dir = next_dir - - while next_test in result_summary.results: - if next_dir != self._current_dir: - self._meter.write("%s\n" % (self._current_progress_str)) - self._current_progress_str = "%s: ." % (next_dir) - self._current_dir = next_dir - else: - self._current_progress_str += "." - - if (next_test in result_summary.unexpected_results and - LOG_UNEXPECTED in self._options.log): - result = result_summary.unexpected_results[next_test] - self._meter.write("%s\n" % self._current_progress_str) - self._print_unexpected_test_result(next_test, result) - self._current_progress_str = "%s: " % self._current_dir - - self._current_test_number += 1 - if self._current_test_number == len(self._test_files_list): - break - - next_test = self._test_files_list[self._current_test_number] - next_dir = os.path.dirname( - self._port.relative_test_filename(next_test)) - - if result_summary.remaining: - remain_str = " (%d)" % (result_summary.remaining) - self._meter.progress("%s%s" % - (self._current_progress_str, remain_str)) - else: - self._meter.progress("%s\n" % (self._current_progress_str)) - - def _print_unexpected_test_result(self, result): - """Prints one unexpected test result line.""" - desc = TestExpectationsFile.EXPECTATION_DESCRIPTIONS[result.type][0] - self._meter.write(" %s -> unexpected %s\n" % - (self._port.relative_test_filename(result.filename), - desc)) + exp_str = self._expectations.get_expectations_string( + result.filename) + got_str = self._expectations.expectation_to_string(result.type) + self._printer.print_test_result(result, expected, exp_str, got_str) + self._printer.print_progress(result_summary, self._retrying, + self._test_files_list) def _get_failures(self, result_summary, include_crashes): """Filters a dict of results and returns only the failures. @@ -874,80 +838,6 @@ class TestRunner: return failed_results - def _summarize_unexpected_results(self, result_summary, retry_summary): - """Summarize any unexpected results as a dict. - - TODO(dpranke): split this data structure into a separate class? - - Args: - result_summary: summary object from initial test runs - retry_summary: summary object from final test run of retried tests - Returns: - A dictionary containing a summary of the unexpected results from the - run, with the following fields: - 'version': a version indicator (1 in this version) - 'fixable': # of fixable tests (NOW - PASS) - 'skipped': # of skipped tests (NOW & SKIPPED) - 'num_regressions': # of non-flaky failures - 'num_flaky': # of flaky failures - 'num_passes': # of unexpected passes - 'tests': a dict of tests -> {'expected': '...', 'actual': '...'} - """ - results = {} - results['version'] = 1 - - tbe = result_summary.tests_by_expectation - tbt = result_summary.tests_by_timeline - results['fixable'] = len(tbt[test_expectations.NOW] - - tbe[test_expectations.PASS]) - results['skipped'] = len(tbt[test_expectations.NOW] & - tbe[test_expectations.SKIP]) - - num_passes = 0 - num_flaky = 0 - num_regressions = 0 - keywords = {} - for k, v in TestExpectationsFile.EXPECTATIONS.iteritems(): - keywords[v] = k.upper() - - tests = {} - for filename, result in result_summary.unexpected_results.iteritems(): - # Note that if a test crashed in the original run, we ignore - # whether or not it crashed when we retried it (if we retried it), - # and always consider the result not flaky. - test = self._port.relative_test_filename(filename) - expected = self._expectations.get_expectations_string(filename) - actual = [keywords[result]] - - if result == test_expectations.PASS: - num_passes += 1 - elif result == test_expectations.CRASH: - num_regressions += 1 - else: - if filename not in retry_summary.unexpected_results: - actual.extend( - self._expectations.get_expectations_string( - filename).split(" ")) - num_flaky += 1 - else: - retry_result = retry_summary.unexpected_results[filename] - if result != retry_result: - actual.append(keywords[retry_result]) - num_flaky += 1 - else: - num_regressions += 1 - - tests[test] = {} - tests[test]['expected'] = expected - tests[test]['actual'] = " ".join(actual) - - results['tests'] = tests - results['num_passes'] = num_passes - results['num_flaky'] = num_flaky - results['num_regressions'] = num_regressions - - return results - def _write_json_files(self, unexpected_results, result_summary, individual_test_timings): """Writes the results of the test run as JSON files into the results @@ -966,22 +856,19 @@ class TestRunner: individual_test_timings: list of test times (used by the flakiness dashboard). """ - _log.debug("Writing JSON files in %s." % - self._options.results_directory) - unexpected_file = open(os.path.join(self._options.results_directory, - "unexpected_results.json"), "w") - unexpected_file.write(simplejson.dumps(unexpected_results, - sort_keys=True, indent=2)) - unexpected_file.close() + results_directory = self._options.results_directory + _log.debug("Writing JSON files in %s." % results_directory) + unexpected_json_path = os.path.join(results_directory, "unexpected_results.json") + with codecs.open(unexpected_json_path, "w", "utf-8") as file: + simplejson.dump(unexpected_results, file, sort_keys=True, indent=2) # Write a json file of the test_expectations.txt file for the layout # tests dashboard. - expectations_file = open(os.path.join(self._options.results_directory, - "expectations.json"), "w") + expectations_path = os.path.join(results_directory, "expectations.json") expectations_json = \ self._expectations.get_expectations_json_for_all_platforms() - expectations_file.write("ADD_EXPECTATIONS(" + expectations_json + ");") - expectations_file.close() + with codecs.open(expectations_path, "w", "utf-8") as file: + file.write(u"ADD_EXPECTATIONS(%s);" % expectations_json) json_layout_results_generator.JSONLayoutResultsGenerator( self._port, self._options.builder_name, self._options.build_name, @@ -991,13 +878,11 @@ class TestRunner: _log.debug("Finished writing JSON files.") - def _print_expected_results_of_type(self, write, result_summary, + def _print_expected_results_of_type(self, result_summary, result_type, result_type_str): """Print the number of the tests in a given result class. Args: - write: A callback to write info to (e.g., a LoggingWriter) or - sys.stdout.write. result_summary - the object containing all the results to report on result_type - the particular result type to report in the summary. result_type_str - a string description of the result_type. @@ -1012,8 +897,9 @@ class TestRunner: fmtstr = ("Expect: %%5d %%-8s (%%%dd now, %%%dd defer, %%%dd wontfix)" % (self._num_digits(now), self._num_digits(defer), self._num_digits(wontfix))) - write(fmtstr % (len(tests), result_type_str, len(tests & now), - len(tests & defer), len(tests & wontfix))) + self._printer.print_expected(fmtstr % + (len(tests), result_type_str, len(tests & now), + len(tests & defer), len(tests & wontfix))) def _num_digits(self, num): """Returns the number of digits needed to represent the length of a @@ -1023,43 +909,39 @@ class TestRunner: ndigits = int(math.log10(len(num))) + 1 return ndigits - def _print_timing_statistics(self, write, total_time, thread_timings, + def _print_timing_statistics(self, total_time, thread_timings, directory_test_timings, individual_test_timings, result_summary): """Record timing-specific information for the test run. Args: - write: A callback to write info to (e.g., a LoggingWriter) or - sys.stdout.write. total_time: total elapsed time (in seconds) for the test run thread_timings: wall clock time each thread ran for directory_test_timings: timing by directory individual_test_timings: timing by file result_summary: summary object for the test run """ - write("Test timing:") - write(" %6.2f total testing time" % total_time) - write("") - write("Thread timing:") + self._printer.print_timing("Test timing:") + self._printer.print_timing(" %6.2f total testing time" % total_time) + self._printer.print_timing("") + self._printer.print_timing("Thread timing:") cuml_time = 0 for t in thread_timings: - write(" %10s: %5d tests, %6.2f secs" % + self._printer.print_timing(" %10s: %5d tests, %6.2f secs" % (t['name'], t['num_tests'], t['total_time'])) cuml_time += t['total_time'] - write(" %6.2f cumulative, %6.2f optimal" % + self._printer.print_timing(" %6.2f cumulative, %6.2f optimal" % (cuml_time, cuml_time / int(self._options.child_processes))) - write("") + self._printer.print_timing("") - self._print_aggregate_test_statistics(write, individual_test_timings) - self._print_individual_test_times(write, individual_test_timings, + self._print_aggregate_test_statistics(individual_test_timings) + self._print_individual_test_times(individual_test_timings, result_summary) - self._print_directory_timings(write, directory_test_timings) + self._print_directory_timings(directory_test_timings) - def _print_aggregate_test_statistics(self, write, individual_test_timings): + def _print_aggregate_test_statistics(self, individual_test_timings): """Prints aggregate statistics (e.g. median, mean, etc.) for all tests. Args: - write: A callback to write info to (e.g., a LoggingWriter) or - sys.stdout.write. individual_test_timings: List of dump_render_tree_thread.TestStats for all tests. """ @@ -1081,23 +963,21 @@ class TestRunner: times_per_test_type[test_type].append( time_for_diffs[test_type]) - self._print_statistics_for_test_timings(write, + self._print_statistics_for_test_timings( "PER TEST TIME IN TESTSHELL (seconds):", times_for_dump_render_tree) - self._print_statistics_for_test_timings(write, + self._print_statistics_for_test_timings( "PER TEST DIFF PROCESSING TIMES (seconds):", times_for_diff_processing) for test_type in test_types: - self._print_statistics_for_test_timings(write, + self._print_statistics_for_test_timings( "PER TEST TIMES BY TEST TYPE: %s" % test_type, times_per_test_type[test_type]) - def _print_individual_test_times(self, write, individual_test_timings, + def _print_individual_test_times(self, individual_test_timings, result_summary): """Prints the run times for slow, timeout and crash tests. Args: - write: A callback to write info to (e.g., a LoggingWriter) or - sys.stdout.write. individual_test_timings: List of dump_render_tree_thread.TestStats for all tests. result_summary: summary object for test run @@ -1119,53 +999,52 @@ class TestRunner: slow_tests.append(test_tuple) if filename in result_summary.failures: - result = result_summary.results[filename] + result = result_summary.results[filename].type if (result == test_expectations.TIMEOUT or result == test_expectations.CRASH): is_timeout_crash_or_slow = True timeout_or_crash_tests.append(test_tuple) if (not is_timeout_crash_or_slow and - num_printed < self._options.num_slow_tests_to_log): + num_printed < printing.NUM_SLOW_TESTS_TO_LOG): num_printed = num_printed + 1 unexpected_slow_tests.append(test_tuple) - write("") - self._print_test_list_timing(write, "%s slowest tests that are not " + self._printer.print_timing("") + self._print_test_list_timing("%s slowest tests that are not " "marked as SLOW and did not timeout/crash:" % - self._options.num_slow_tests_to_log, unexpected_slow_tests) - write("") - self._print_test_list_timing(write, "Tests marked as SLOW:", - slow_tests) - write("") - self._print_test_list_timing(write, "Tests that timed out or crashed:", + printing.NUM_SLOW_TESTS_TO_LOG, unexpected_slow_tests) + self._printer.print_timing("") + self._print_test_list_timing("Tests marked as SLOW:", slow_tests) + self._printer.print_timing("") + self._print_test_list_timing("Tests that timed out or crashed:", timeout_or_crash_tests) - write("") + self._printer.print_timing("") - def _print_test_list_timing(self, write, title, test_list): + def _print_test_list_timing(self, title, test_list): """Print timing info for each test. Args: - write: A callback to write info to (e.g., a LoggingWriter) or - sys.stdout.write. title: section heading test_list: tests that fall in this section """ - write(title) + if self._printer.disabled('slowest'): + return + + self._printer.print_timing(title) for test_tuple in test_list: filename = test_tuple.filename[len( self._port.layout_tests_dir()) + 1:] filename = filename.replace('\\', '/') test_run_time = round(test_tuple.test_run_time, 1) - write(" %s took %s seconds" % (filename, test_run_time)) + self._printer.print_timing(" %s took %s seconds" % + (filename, test_run_time)) - def _print_directory_timings(self, write, directory_test_timings): + def _print_directory_timings(self, directory_test_timings): """Print timing info by directory for any directories that take > 10 seconds to run. Args: - write: A callback to write info to (e.g., a LoggingWriter) or - sys.stdout.write. directory_test_timing: time info for each directory """ timings = [] @@ -1175,25 +1054,24 @@ class TestRunner: num_tests)) timings.sort() - write("Time to process slowest subdirectories:") + self._printer.print_timing("Time to process slowest subdirectories:") min_seconds_to_print = 10 for timing in timings: if timing[0] > min_seconds_to_print: - write(" %s took %s seconds to run %s tests." % (timing[1], - timing[0], timing[2])) - write("") + self._printer.print_timing( + " %s took %s seconds to run %s tests." % (timing[1], + timing[0], timing[2])) + self._printer.print_timing("") - def _print_statistics_for_test_timings(self, write, title, timings): + def _print_statistics_for_test_timings(self, title, timings): """Prints the median, mean and standard deviation of the values in timings. Args: - write: A callback to write info to (e.g., a LoggingWriter) or - sys.stdout.write. title: Title for these timings. timings: A list of floats representing times. """ - write(title) + self._printer.print_timing(title) timings.sort() num_tests = len(timings) @@ -1215,19 +1093,17 @@ class TestRunner: sum_of_deviations = math.pow(time - mean, 2) std_deviation = math.sqrt(sum_of_deviations / num_tests) - write(" Median: %6.3f" % median) - write(" Mean: %6.3f" % mean) - write(" 90th percentile: %6.3f" % percentile90) - write(" 99th percentile: %6.3f" % percentile99) - write(" Standard dev: %6.3f" % std_deviation) - write("") - - def _print_result_summary(self, write, result_summary): + self._printer.print_timing(" Median: %6.3f" % median) + self._printer.print_timing(" Mean: %6.3f" % mean) + self._printer.print_timing(" 90th percentile: %6.3f" % percentile90) + self._printer.print_timing(" 99th percentile: %6.3f" % percentile99) + self._printer.print_timing(" Standard dev: %6.3f" % std_deviation) + self._printer.print_timing("") + + def _print_result_summary(self, result_summary): """Print a short summary about how many tests passed. Args: - write: A callback to write info to (e.g., a LoggingWriter) or - sys.stdout.write. result_summary: information to log """ failed = len(result_summary.failures) @@ -1239,30 +1115,29 @@ class TestRunner: if total > 0: pct_passed = float(passed) * 100 / total - write("") - write("=> Results: %d/%d tests passed (%.1f%%)" % + self._printer.print_actual("") + self._printer.print_actual("=> Results: %d/%d tests passed (%.1f%%)" % (passed, total, pct_passed)) - write("") - self._print_result_summary_entry(write, result_summary, + self._printer.print_actual("") + self._print_result_summary_entry(result_summary, test_expectations.NOW, "Tests to be fixed for the current release") - write("") - self._print_result_summary_entry(write, result_summary, + self._printer.print_actual("") + self._print_result_summary_entry(result_summary, test_expectations.DEFER, "Tests we'll fix in the future if they fail (DEFER)") - write("") - self._print_result_summary_entry(write, result_summary, + self._printer.print_actual("") + self._print_result_summary_entry(result_summary, test_expectations.WONTFIX, "Tests that will only be fixed if they crash (WONTFIX)") + self._printer.print_actual("") - def _print_result_summary_entry(self, write, result_summary, timeline, + def _print_result_summary_entry(self, result_summary, timeline, heading): """Print a summary block of results for a particular timeline of test. Args: - write: A callback to write info to (e.g., a LoggingWriter) or - sys.stdout.write. result_summary: summary to print results for timeline: the timeline to print results for (NOT, WONTFIX, etc.) heading: a textual description of the timeline @@ -1271,7 +1146,7 @@ class TestRunner: not_passing = (total - len(result_summary.tests_by_expectation[test_expectations.PASS] & result_summary.tests_by_timeline[timeline])) - write("=> %s (%d):" % (heading, not_passing)) + self._printer.print_actual("=> %s (%d):" % (heading, not_passing)) for result in TestExpectationsFile.EXPECTATION_ORDER: if result == test_expectations.PASS: @@ -1281,94 +1156,34 @@ class TestRunner: desc = TestExpectationsFile.EXPECTATION_DESCRIPTIONS[result] if not_passing and len(results): pct = len(results) * 100.0 / not_passing - write(" %5d %-24s (%4.1f%%)" % (len(results), - desc[len(results) != 1], pct)) - - def _print_one_line_summary(self, total, expected): - """Print a one-line summary of the test run to stdout. + self._printer.print_actual(" %5d %-24s (%4.1f%%)" % + (len(results), desc[len(results) != 1], pct)) - Args: - total: total number of tests run - expected: number of expected results + def _results_html(self, test_files, failures, title="Test Failures", override_time=None): """ - unexpected = total - expected - if unexpected == 0: - print "All %d tests ran as expected." % expected - elif expected == 1: - print "1 test ran as expected, %d didn't:" % unexpected - else: - print "%d tests ran as expected, %d didn't:" % (expected, - unexpected) - - def _print_unexpected_results(self, unexpected_results): - """Prints any unexpected results in a human-readable form to stdout.""" - passes = {} - flaky = {} - regressions = {} - - if len(unexpected_results['tests']): - print "" - - for test, results in unexpected_results['tests'].iteritems(): - actual = results['actual'].split(" ") - expected = results['expected'].split(" ") - if actual == ['PASS']: - if 'CRASH' in expected: - _add_to_dict_of_lists(passes, - 'Expected to crash, but passed', - test) - elif 'TIMEOUT' in expected: - _add_to_dict_of_lists(passes, - 'Expected to timeout, but passed', - test) - else: - _add_to_dict_of_lists(passes, - 'Expected to fail, but passed', - test) - elif len(actual) > 1: - # We group flaky tests by the first actual result we got. - _add_to_dict_of_lists(flaky, actual[0], test) - else: - _add_to_dict_of_lists(regressions, results['actual'], test) - - if len(passes): - for key, tests in passes.iteritems(): - print "%s: (%d)" % (key, len(tests)) - tests.sort() - for test in tests: - print " %s" % test - print - - if len(flaky): - descriptions = TestExpectationsFile.EXPECTATION_DESCRIPTIONS - for key, tests in flaky.iteritems(): - result = TestExpectationsFile.EXPECTATIONS[key.lower()] - print "Unexpected flakiness: %s (%d)" % ( - descriptions[result][1], len(tests)) - tests.sort() - - for test in tests: - result = unexpected_results['tests'][test] - actual = result['actual'].split(" ") - expected = result['expected'].split(" ") - result = TestExpectationsFile.EXPECTATIONS[key.lower()] - new_expectations_list = list(set(actual) | set(expected)) - print " %s = %s" % (test, " ".join(new_expectations_list)) - print - - if len(regressions): - descriptions = TestExpectationsFile.EXPECTATION_DESCRIPTIONS - for key, tests in regressions.iteritems(): - result = TestExpectationsFile.EXPECTATIONS[key.lower()] - print "Regressions: Unexpected %s : (%d)" % ( - descriptions[result][1], len(tests)) - tests.sort() - for test in tests: - print " %s = %s" % (test, key) - print - - if len(unexpected_results['tests']) and self._options.verbose: - print "-" * 78 + test_files = a list of file paths + failures = dictionary mapping test paths to failure objects + title = title printed at top of test + override_time = current time (used by unit tests) + """ + page = """<html> + <head> + <title>Layout Test Results (%(time)s)</title> + </head> + <body> + <h2>%(title)s (%(time)s)</h2> + """ % {'title': title, 'time': override_time or time.asctime()} + + for test_file in sorted(test_files): + test_name = self._port.relative_test_filename(test_file) + test_url = self._port.filename_to_uri(test_file) + page += u"<p><a href='%s'>%s</a><br />\n" % (test_url, test_name) + test_failures = failures.get(test_file, []) + for failure in test_failures: + page += u" %s<br/>" % failure.result_html_output(test_name) + page += "</p>\n" + page += "</body></html>\n" + return page def _write_results_html_file(self, result_summary): """Write results.html which is a summary of tests that failed. @@ -1382,8 +1197,10 @@ class TestRunner: """ # test failures if self._options.full_results_html: + results_title = "Test Failures" test_files = result_summary.failures.keys() else: + results_title = "Unexpected Test Failures" unexpected_failures = self._get_failures(result_summary, include_crashes=True) test_files = unexpected_failures.keys() @@ -1392,30 +1209,10 @@ class TestRunner: out_filename = os.path.join(self._options.results_directory, "results.html") - out_file = open(out_filename, 'w') - # header - if self._options.full_results_html: - h2 = "Test Failures" - else: - h2 = "Unexpected Test Failures" - out_file.write("<html><head><title>Layout Test Results (%(time)s)" - "</title></head><body><h2>%(h2)s (%(time)s)</h2>\n" - % {'h2': h2, 'time': time.asctime()}) + with codecs.open(out_filename, "w", "utf-8") as results_file: + html = self._results_html(test_files, result_summary.failures, results_title) + results_file.write(html) - test_files.sort() - for test_file in test_files: - test_failures = result_summary.failures.get(test_file, []) - out_file.write("<p><a href='%s'>%s</a><br />\n" - % (self._port.filename_to_uri(test_file), - self._port.relative_test_filename(test_file))) - for failure in test_failures: - out_file.write(" %s<br/>" - % failure.result_html_output( - self._port.relative_test_filename(test_file))) - out_file.write("</p>\n") - - # footer - out_file.write("</body></html>\n") return True def _show_results_html_file(self): @@ -1425,64 +1222,49 @@ class TestRunner: self._port.show_results_html_file(results_filename) -def _add_to_dict_of_lists(dict, key, value): - dict.setdefault(key, []).append(value) - - def read_test_files(files): tests = [] for file in files: - for line in open(file): + # FIXME: This could be cleaner using a list comprehension. + for line in codecs.open(file, "r", "utf-8"): line = test_expectations.strip_comments(line) if line: tests.append(line) return tests -def create_logging_writer(options, log_option): - """Returns a write() function that will write the string to _log.info() - if comp was specified in --log or if --verbose is true. Otherwise the - message is dropped. - - Args: - options: list of command line options from optparse - log_option: option to match in options.log in order for the messages - to be logged (e.g., 'actual' or 'expected') - """ - if options.verbose or log_option in options.log.split(","): - return _log.info - return lambda str: 1 - - -def main(options, args, print_results=True): +def run(port_obj, options, args, regular_output=sys.stderr, + buildbot_output=sys.stdout): """Run the tests. Args: + port_obj: Port object for port-specific behavior options: a dictionary of command line options args: a list of sub directories or files to test - print_results: whether or not to log anything to stdout. - Set to false by the unit tests + regular_output: a stream-like object that we can send logging/debug + output to + buildbot_output: a stream-like object that we can write all output that + is intended to be parsed by the buildbot to Returns: the number of unexpected results that occurred, or -1 if there is an error. """ - if options.sources: - options.verbose = True + # Configure the printing subsystem for printing output, logging debug + # info, and tracing tests. - # Set up our logging format. - meter = metered_stream.MeteredStream(options.verbose, sys.stderr) - log_fmt = '%(message)s' - log_datefmt = '%y%m%d %H:%M:%S' - log_level = logging.INFO - if options.verbose: - log_fmt = ('%(asctime)s %(filename)s:%(lineno)-4d %(levelname)s ' - '%(message)s') - log_level = logging.DEBUG - logging.basicConfig(level=log_level, format=log_fmt, datefmt=log_datefmt, - stream=meter) + if not options.child_processes: + # FIXME: Investigate perf/flakiness impact of using cpu_count + 1. + options.child_processes = port_obj.default_child_processes() + + printer = printing.Printer(port_obj, options, regular_output=regular_output, + buildbot_output=buildbot_output, + child_processes=int(options.child_processes), + is_fully_parallel=options.experimental_fully_parallel) + if options.help_printing: + printer.help_printing() + return 0 - port_obj = port.get(options.platform, options) executive = Executive() if not options.configuration: @@ -1504,14 +1286,13 @@ def main(options, args, print_results=True): options.results_directory = port_obj.results_directory() last_unexpected_results = [] - if options.print_unexpected_results or options.retry_unexpected_results: + if options.print_last_failures or options.retest_last_failures: unexpected_results_filename = os.path.join( options.results_directory, "unexpected_results.json") - f = file(unexpected_results_filename) - results = simplejson.load(f) - f.close() + with open(unexpected_results_filename, "r", "utf-8") as file: + results = simplejson.load(file) last_unexpected_results = results['tests'].keys() - if options.print_unexpected_results: + if options.print_last_failures: print "\n".join(last_unexpected_results) + "\n" return 0 @@ -1519,8 +1300,8 @@ def main(options, args, print_results=True): # Just clobber the actual test results directories since the other # files in the results directory are explicitly used for cross-run # tracking. - meter.update("Clobbering old results in %s" % - options.results_directory) + printer.print_update("Clobbering old results in %s" % + options.results_directory) layout_tests_dir = port_obj.layout_tests_dir() possible_dirs = os.listdir(layout_tests_dir) for dirname in possible_dirs: @@ -1528,17 +1309,6 @@ def main(options, args, print_results=True): shutil.rmtree(os.path.join(options.results_directory, dirname), ignore_errors=True) - if not options.child_processes: - # FIXME: Investigate perf/flakiness impact of using cpu_count + 1. - options.child_processes = port_obj.default_child_processes() - - write = create_logging_writer(options, 'config') - if options.child_processes == 1: - write("Running one %s" % port_obj.driver_name) - else: - write("Running %s %ss in parallel" % ( - options.child_processes, port_obj.driver_name())) - if not options.time_out_ms: if options.configuration == "Debug": options.time_out_ms = str(2 * TestRunner.DEFAULT_TEST_TIMEOUT_MS) @@ -1546,8 +1316,14 @@ def main(options, args, print_results=True): options.time_out_ms = str(TestRunner.DEFAULT_TEST_TIMEOUT_MS) options.slow_time_out_ms = str(5 * int(options.time_out_ms)) - write("Regular timeout: %s, slow test timeout: %s" % - (options.time_out_ms, options.slow_time_out_ms)) + printer.print_config("Regular timeout: %s, slow test timeout: %s" % + (options.time_out_ms, options.slow_time_out_ms)) + + if int(options.child_processes) == 1: + printer.print_config("Running one %s" % port_obj.driver_name()) + else: + printer.print_config("Running %s %ss in parallel" % ( + options.child_processes, port_obj.driver_name())) # Include all tests if none are specified. new_args = [] @@ -1564,9 +1340,9 @@ def main(options, args, print_results=True): # Create the output directory if it doesn't already exist. port_obj.maybe_make_directory(options.results_directory) - meter.update("Collecting tests ...") + printer.print_update("Collecting tests ...") - test_runner = TestRunner(port_obj, options, meter) + test_runner = TestRunner(port_obj, options, printer) test_runner.gather_file_paths(paths) if options.lint_test_files: @@ -1576,43 +1352,43 @@ def main(options, args, print_results=True): for platform_name in port_obj.test_platform_names(): test_runner.parse_expectations(platform_name, is_debug_mode=True) test_runner.parse_expectations(platform_name, is_debug_mode=False) - meter.update("") - print ("If there are no fail messages, errors or exceptions, then the " - "lint succeeded.") + printer.write("") + _log.info("If there are no fail messages, errors or exceptions, " + "then the lint succeeded.") return 0 - write = create_logging_writer(options, "config") - write("Using port '%s'" % port_obj.name()) - write("Placing test results in %s" % options.results_directory) + printer.print_config("Using port '%s'" % port_obj.name()) + printer.print_config("Placing test results in %s" % + options.results_directory) if options.new_baseline: - write("Placing new baselines in %s" % port_obj.baseline_path()) - write("Using %s build" % options.configuration) + printer.print_config("Placing new baselines in %s" % + port_obj.baseline_path()) + printer.print_config("Using %s build" % options.configuration) if options.pixel_tests: - write("Pixel tests enabled") + printer.print_config("Pixel tests enabled") else: - write("Pixel tests disabled") - write("") + printer.print_config("Pixel tests disabled") + printer.print_config("") - meter.update("Parsing expectations ...") + printer.print_update("Parsing expectations ...") test_runner.parse_expectations(port_obj.test_platform_name(), options.configuration == 'Debug') - meter.update("Checking build ...") + printer.print_update("Checking build ...") if not port_obj.check_build(test_runner.needs_http()): return -1 - meter.update("Starting helper ...") + printer.print_update("Starting helper ...") port_obj.start_helper() # Check that the system dependencies (themes, fonts, ...) are correct. if not options.nocheck_sys_deps: - meter.update("Checking system dependencies ...") + printer.print_update("Checking system dependencies ...") if not port_obj.check_sys_deps(test_runner.needs_http()): return -1 - meter.update("Preparing tests ...") - write = create_logging_writer(options, "expected") - result_summary = test_runner.prepare_lists_and_print_output(write) + printer.print_update("Preparing tests ...") + result_summary = test_runner.prepare_lists_and_print_output() port_obj.setup_test_run() @@ -1622,7 +1398,7 @@ def main(options, args, print_results=True): if options.fuzzy_pixel_tests: test_runner.add_test_type(fuzzy_image_diff.FuzzyImageDiff) - num_unexpected_results = test_runner.run(result_summary, print_results) + num_unexpected_results = test_runner.run(result_summary) port_obj.stop_helper() @@ -1634,8 +1410,10 @@ def _compat_shim_callback(option, opt_str, value, parser): print "Ignoring unsupported option: %s" % opt_str -def _compat_shim_option(option_name, nargs=0): - return optparse.make_option(option_name, action="callback", callback=_compat_shim_callback, nargs=nargs, help="Ignored, for old-run-webkit-tests compat only.") +def _compat_shim_option(option_name, **kwargs): + return optparse.make_option(option_name, action="callback", + callback=_compat_shim_callback, + help="Ignored, for old-run-webkit-tests compat only.", **kwargs) def parse_args(args=None): @@ -1659,22 +1437,7 @@ def parse_args(args=None): # old-run-webkit-tests also accepts -c, --configuration CONFIGURATION. ] - logging_options = [ - optparse.make_option("--log", action="store", - default=LOG_DEFAULT_VALUE, - help=("log various types of data. The argument value should be a " - "comma-separated list of values from: %s (defaults to " - "--log %s)" % (LOG_VALUES, LOG_DEFAULT_VALUE))), - optparse.make_option("-v", "--verbose", action="store_true", - default=False, help="include debug-level logging"), - optparse.make_option("--sources", action="store_true", - help="show expected result file path for each test " + - "(implies --verbose)"), - # old-run-webkit-tests has a --slowest option which just prints - # the slowest 10. - optparse.make_option("--num-slow-tests-to-log", default=50, - help="Number of slow tests whose timings to print."), - ] + print_options = printing.print_options() # FIXME: These options should move onto the ChromiumPort. chromium_options = [ @@ -1706,7 +1469,7 @@ def parse_args(args=None): _compat_shim_option("--use-remote-links-to-tests"), # FIXME: NRWT doesn't need this option as much since failures are # designed to be cheap. We eventually plan to add this support. - _compat_shim_option("--exit-after-n-failures", nargs=1), + _compat_shim_option("--exit-after-n-failures", nargs=1, type="int"), ] results_options = [ @@ -1815,12 +1578,18 @@ def parse_args(args=None): # Exit after the first N failures instead of running all tests # FIXME: consider: --iterations n # Number of times to run the set of tests (e.g. ABCABCABC) - optparse.make_option("--print-unexpected-results", action="store_true", - default=False, help="print the tests in the last run that " - "had unexpected results."), - optparse.make_option("--retry-unexpected-results", action="store_true", - default=False, help="re-try the tests in the last run that " - "had unexpected results."), + optparse.make_option("--print-last-failures", action="store_true", + default=False, help="Print the tests in the last run that " + "had unexpected failures (or passes)."), + optparse.make_option("--retest-last-failures", action="store_true", + default=False, help="re-test the tests in the last run that " + "had unexpected failures (or passes)."), + optparse.make_option("--retry-failures", action="store_true", + default=True, + help="Re-try any tests that produce unexpected results (default)"), + optparse.make_option("--no-retry-failures", action="store_false", + dest="retry_failures", + help="Don't re-try any tests that produce unexpected results."), ] misc_options = [ @@ -1841,13 +1610,23 @@ def parse_args(args=None): help=("The build number of the builder running this script.")), ] - option_list = (configuration_options + logging_options + + option_list = (configuration_options + print_options + chromium_options + results_options + test_options + misc_options + results_json_options + old_run_webkit_tests_compat) option_parser = optparse.OptionParser(option_list=option_list) - return option_parser.parse_args(args) -if '__main__' == __name__: + options, args = option_parser.parse_args(args) + if options.sources: + options.verbose = True + + return options, args + + +def main(): options, args = parse_args() - sys.exit(main(options, args)) + port_obj = port.get(options.platform, options) + return run(port_obj, options, args) + +if '__main__' == __name__: + sys.exit(main()) diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/run_webkit_tests_unittest.py b/WebKitTools/Scripts/webkitpy/layout_tests/run_webkit_tests_unittest.py index 9fe0e74..cd72fa3 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/run_webkit_tests_unittest.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/run_webkit_tests_unittest.py @@ -33,14 +33,32 @@ import os import sys import unittest -import webkitpy.layout_tests.run_webkit_tests as run_webkit_tests +from webkitpy.common import array_stream +from webkitpy.layout_tests import port +from webkitpy.layout_tests import run_webkit_tests +from webkitpy.thirdparty.mock import Mock -def passing_run(args): + +def passing_run(args, port_obj=None, logging_included=False): + if not logging_included: + args.extend(['--print', 'nothing']) options, args = run_webkit_tests.parse_args(args) - res = run_webkit_tests.main(options, args, False) + if port_obj is None: + port_obj = port.get(options.platform, options) + res = run_webkit_tests.run(port_obj, options, args) return res == 0 +def logging_run(args): + options, args = run_webkit_tests.parse_args(args) + port_obj = port.get(options.platform, options) + buildbot_output = array_stream.ArrayStream() + regular_output = array_stream.ArrayStream() + res = run_webkit_tests.run(port_obj, options, args, + buildbot_output=buildbot_output, + regular_output=regular_output) + return (res, buildbot_output, regular_output) + class MainTest(unittest.TestCase): def test_fast(self): @@ -53,9 +71,44 @@ class MainTest(unittest.TestCase): 'fast/html/article-element.html'])) self.assertTrue(passing_run(['--platform', 'test', '--child-processes', '1', - '--log', 'unexpected', + '--print', 'unexpected', 'fast/html'])) + def test_child_processes(self): + (res, buildbot_output, regular_output) = logging_run( + ['--platform', 'test', '--print', 'config', '--child-processes', + '1', 'fast/html']) + self.assertTrue('Running one DumpRenderTree' + in regular_output.get()) + + (res, buildbot_output, regular_output) = logging_run( + ['--platform', 'test', '--print', 'config', '--child-processes', + '2', 'fast/html']) + self.assertTrue('Running 2 DumpRenderTrees in parallel' + in regular_output.get()) + + + +class TestRunnerTest(unittest.TestCase): + def test_results_html(self): + mock_port = Mock() + mock_port.relative_test_filename = lambda name: name + mock_port.filename_to_uri = lambda name: name + + runner = run_webkit_tests.TestRunner(port=mock_port, options=Mock(), printer=Mock()) + expected_html = u"""<html> + <head> + <title>Layout Test Results (time)</title> + </head> + <body> + <h2>Title (time)</h2> + <p><a href='test_path'>test_path</a><br /> +</p> +</body></html> +""" + html = runner._results_html(["test_path"], {}, "Title", override_time="time") + self.assertEqual(html, expected_html) + class DryrunTest(unittest.TestCase): def test_basics(self): diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/test_types/image_diff.py b/WebKitTools/Scripts/webkitpy/layout_tests/test_types/image_diff.py index b414358..b37f4b3 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/test_types/image_diff.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/test_types/image_diff.py @@ -34,6 +34,9 @@ match, returns FailureImageHashMismatch and outputs both hashes into the layout test results directory. """ +from __future__ import with_statement + +import codecs import errno import logging import os @@ -78,11 +81,10 @@ class ImageDiff(test_type_base.TestTypeBase): png_path: path to the actual PNG result file checksum: value of the actual checksum result """ - png_file = open(png_path, "rb") - png_data = png_file.read() - png_file.close() - self._save_baseline_data(filename, png_data, ".png") - self._save_baseline_data(filename, checksum, ".checksum") + with open(png_path, "rb") as png_file: + png_data = png_file.read() + self._save_baseline_data(filename, png_data, ".png", encoding=None) + self._save_baseline_data(filename, checksum, ".checksum", encoding="ascii") def _create_image_diff(self, port, filename, configuration): """Creates the visual diff of the expected/actual PNGs. @@ -140,8 +142,10 @@ class ImageDiff(test_type_base.TestTypeBase): _log.debug('Using %s' % expected_hash_file) _log.debug('Using %s' % expected_png_file) + # FIXME: We repeat this pattern often, we should share code. try: - expected_hash = open(expected_hash_file, "r").read() + with codecs.open(expected_hash_file, "r", "ascii") as file: + expected_hash = file.read() except IOError, e: if errno.ENOENT != e.errno: raise @@ -152,6 +156,7 @@ class ImageDiff(test_type_base.TestTypeBase): # Report a missing expected PNG file. self.write_output_files(port, filename, '.checksum', test_args.hash, expected_hash, + encoding="ascii", print_text_diffs=False) self._copy_output_png(filename, test_args.png_path, '-actual.png') failures.append(test_failures.FailureMissingImage(self)) @@ -162,6 +167,7 @@ class ImageDiff(test_type_base.TestTypeBase): self.write_output_files(port, filename, '.checksum', test_args.hash, expected_hash, + encoding="ascii", print_text_diffs=False) self._copy_output_png(filename, test_args.png_path, '-actual.png') self._copy_output_png(filename, expected_png_file, '-expected.png') diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/test_types/test_type_base.py b/WebKitTools/Scripts/webkitpy/layout_tests/test_types/test_type_base.py index 4c99be0..cf0b9ec 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/test_types/test_type_base.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/test_types/test_type_base.py @@ -32,6 +32,9 @@ Also defines the TestArguments "struct" to pass them additional arguments. """ +from __future__ import with_statement + +import codecs import cgi import errno import logging @@ -90,7 +93,7 @@ class TestTypeBase(object): self._port.relative_test_filename(filename)) self._port.maybe_make_directory(os.path.split(output_filename)[0]) - def _save_baseline_data(self, filename, data, modifier): + def _save_baseline_data(self, filename, data, modifier, encoding): """Saves a new baseline file into the port's baseline directory. The file will be named simply "<test>-expected<modifier>", suitable for @@ -112,7 +115,7 @@ class TestTypeBase(object): self._port.maybe_make_directory(output_dir) output_path = os.path.join(output_dir, output_file) _log.debug('writing new baseline to "%s"' % (output_path)) - self._write_into_file_at_path(output_path, data) + self._write_into_file_at_path(output_path, data, encoding) def output_filename(self, filename, modifier): """Returns a filename inside the output dir that contains modifier. @@ -150,13 +153,15 @@ class TestTypeBase(object): """ raise NotImplemented - def _write_into_file_at_path(self, file_path, contents): - file = open(file_path, "wb") - file.write(contents) - file.close() + def _write_into_file_at_path(self, file_path, contents, encoding): + """This method assumes that byte_array is already encoded + into the right format.""" + with codecs.open(file_path, "w", encoding=encoding) as file: + file.write(contents) def write_output_files(self, port, filename, file_type, - output, expected, print_text_diffs=False): + output, expected, encoding, + print_text_diffs=False): """Writes the test output, the expected output and optionally the diff between the two to files in the results directory. @@ -175,10 +180,12 @@ class TestTypeBase(object): self._make_output_directory(filename) actual_filename = self.output_filename(filename, self.FILENAME_SUFFIX_ACTUAL + file_type) expected_filename = self.output_filename(filename, self.FILENAME_SUFFIX_EXPECTED + file_type) + # FIXME: This function is poorly designed. We should be passing in some sort of + # encoding information from the callers. if output: - self._write_into_file_at_path(actual_filename, output) + self._write_into_file_at_path(actual_filename, output, encoding) if expected: - self._write_into_file_at_path(expected_filename, expected) + self._write_into_file_at_path(expected_filename, expected, encoding) if not output or not expected: return @@ -186,16 +193,19 @@ class TestTypeBase(object): if not print_text_diffs: return + # Note: We pass encoding=None for all diff writes, as we treat diff + # output as binary. Diff output may contain multiple files in + # conflicting encodings. diff = port.diff_text(expected, output, expected_filename, actual_filename) diff_filename = self.output_filename(filename, self.FILENAME_SUFFIX_DIFF + file_type) - self._write_into_file_at_path(diff_filename, diff) + self._write_into_file_at_path(diff_filename, diff, encoding=None) # Shell out to wdiff to get colored inline diffs. wdiff = port.wdiff_text(expected_filename, actual_filename) wdiff_filename = self.output_filename(filename, self.FILENAME_SUFFIX_WDIFF) - self._write_into_file_at_path(wdiff_filename, wdiff) + self._write_into_file_at_path(wdiff_filename, wdiff, encoding=None) # Use WebKit's PrettyPatch.rb to get an HTML diff. pretty_patch = port.pretty_patch_text(diff_filename) pretty_patch_filename = self.output_filename(filename, self.FILENAME_SUFFIX_PRETTY_PATCH) - self._write_into_file_at_path(pretty_patch_filename, pretty_patch) + self._write_into_file_at_path(pretty_patch_filename, pretty_patch, encoding=None) diff --git a/WebKitTools/Scripts/webkitpy/layout_tests/test_types/text_diff.py b/WebKitTools/Scripts/webkitpy/layout_tests/test_types/text_diff.py index 8f7907c..9fed474 100644 --- a/WebKitTools/Scripts/webkitpy/layout_tests/test_types/text_diff.py +++ b/WebKitTools/Scripts/webkitpy/layout_tests/test_types/text_diff.py @@ -33,6 +33,9 @@ If the output doesn't match, returns FailureTextMismatch and outputs the diff files into the layout test results directory. """ +from __future__ import with_statement + +import codecs import errno import logging import os.path @@ -43,12 +46,6 @@ from webkitpy.layout_tests.test_types import test_type_base _log = logging.getLogger("webkitpy.layout_tests.test_types.text_diff") -def is_render_tree_dump(data): - """Returns true if data appears to be a render tree dump as opposed to a - plain text dump.""" - return data.find("RenderView at (0,0)") != -1 - - class TestTextDiff(test_type_base.TestTypeBase): def get_normalized_output_text(self, output): @@ -70,8 +67,14 @@ class TestTextDiff(test_type_base.TestTypeBase): return self.get_normalized_text(expected_filename) def get_normalized_text(self, filename): + # FIXME: We repeat this pattern often, we should share code. try: - text = open(filename).read() + # NOTE: -expected.txt files are ALWAYS utf-8. However, + # we do not decode the output from DRT, so we should not + # decode the -expected.txt values either to allow comparisons. + with codecs.open(filename, "r", encoding=None) as file: + text = file.read() + # We could assert that the text is valid utf-8. except IOError, e: if errno.ENOENT != e.errno: raise @@ -87,7 +90,10 @@ class TestTextDiff(test_type_base.TestTypeBase): # If we're generating a new baseline, we pass. if test_args.new_baseline: - self._save_baseline_data(filename, output, ".txt") + # Although all test_shell/DumpRenderTree output should be utf-8, + # we do not ever decode it inside run-webkit-tests. For some tests + # DumpRenderTree may not output utf-8 text (e.g. webarchives). + self._save_baseline_data(filename, output, ".txt", encoding=None) return failures # Normalize text to diff @@ -99,7 +105,8 @@ class TestTextDiff(test_type_base.TestTypeBase): if port.compare_text(output, expected): # Text doesn't match, write output files. self.write_output_files(port, filename, ".txt", output, - expected, print_text_diffs=True) + expected, encoding=None, + print_text_diffs=True) if expected == '': failures.append(test_failures.FailureMissingResult(self)) diff --git a/WebKitTools/Scripts/webkitpy/style/checker.py b/WebKitTools/Scripts/webkitpy/style/checker.py index 84ae3da..59a3d39 100644 --- a/WebKitTools/Scripts/webkitpy/style/checker.py +++ b/WebKitTools/Scripts/webkitpy/style/checker.py @@ -30,20 +30,19 @@ """Front end of some style-checker modules.""" -import codecs import logging import os.path import sys +from checkers.common import categories as CommonCategories +from checkers.common import CarriageReturnChecker +from checkers.cpp import CppChecker +from checkers.python import PythonChecker +from checkers.text import TextChecker from error_handlers import DefaultStyleErrorHandler from filter import FilterConfiguration from optparser import ArgumentParser from optparser import DefaultCommandOptionValues -from processors.common import categories as CommonCategories -from processors.common import CarriageReturnProcessor -from processors.cpp import CppProcessor -from processors.python import PythonProcessor -from processors.text import TextProcessor from webkitpy.style_references import parse_patch from webkitpy.style_references import configure_logging as _configure_logging @@ -158,11 +157,51 @@ _PATH_RULES_SPECIFIER = [ ] +_CPP_FILE_EXTENSIONS = [ + 'c', + 'cpp', + 'h', + ] + +_PYTHON_FILE_EXTENSION = 'py' + +# FIXME: Include 'vcproj' files as text files after creating a mechanism +# for exempting them from the carriage-return checker (since they +# are Windows-only files). +_TEXT_FILE_EXTENSIONS = [ + 'ac', + 'cc', + 'cgi', + 'css', + 'exp', + 'flex', + 'gyp', + 'gypi', + 'html', + 'idl', + 'in', + 'js', + 'mm', + 'php', + 'pl', + 'pm', + 'pri', + 'pro', + 'rb', + 'sh', + 'txt', +# 'vcproj', # See FIXME above. + 'wm', + 'xhtml', + 'y', + ] + + +# Files to skip that are less obvious. +# # Some files should be skipped when checking style. For example, # WebKit maintains some files in Mozilla style on purpose to ease # future merges. -# -# Include a warning for skipped files that are less obvious. _SKIPPED_FILES_WITH_WARNING = [ # The Qt API and tests do not follow WebKit style. # They follow Qt style. :) @@ -175,11 +214,12 @@ _SKIPPED_FILES_WITH_WARNING = [ ] -# Don't include a warning for skipped files that are more common -# and more obvious. +# Files to skip that are more common or obvious. +# +# This list should be in addition to files with FileType.NONE. Files +# with FileType.NONE are automatically skipped without warning. _SKIPPED_FILES_WITHOUT_WARNING = [ "LayoutTests/", - ".pyc", ] @@ -192,8 +232,8 @@ _MAX_REPORTS_PER_CATEGORY = { def _all_categories(): """Return the set of all categories used by check-webkit-style.""" - # Take the union across all processors. - categories = CommonCategories.union(CppProcessor.categories) + # Take the union across all checkers. + categories = CommonCategories.union(CppChecker.categories) # FIXME: Consider adding all of the pep8 categories. Since they # are not too meaningful for documentation purposes, for @@ -221,7 +261,7 @@ def check_webkit_style_parser(): def check_webkit_style_configuration(options): - """Return a StyleCheckerConfiguration instance for check-webkit-style. + """Return a StyleProcessorConfiguration instance for check-webkit-style. Args: options: A CommandOptionValues instance. @@ -232,7 +272,7 @@ def check_webkit_style_configuration(options): path_specific=_PATH_RULES_SPECIFIER, user_rules=options.filter_rules) - return StyleCheckerConfiguration(filter_configuration=filter_configuration, + return StyleProcessorConfiguration(filter_configuration=filter_configuration, max_reports_per_category=_MAX_REPORTS_PER_CATEGORY, min_confidence=options.min_confidence, output_format=options.output_format, @@ -330,34 +370,17 @@ def configure_logging(stream, logger=None, is_verbose=False): # Enum-like idiom class FileType: - NONE = 1 + NONE = 0 # FileType.NONE evaluates to False. # Alphabetize remaining types - CPP = 2 - PYTHON = 3 - TEXT = 4 + CPP = 1 + PYTHON = 2 + TEXT = 3 -class ProcessorDispatcher(object): +class CheckerDispatcher(object): """Supports determining whether and how to check style, based on path.""" - cpp_file_extensions = ( - 'c', - 'cpp', - 'h', - ) - - text_file_extensions = ( - 'css', - 'html', - 'idl', - 'js', - 'mm', - 'php', - 'pm', - 'txt', - ) - def _file_extension(self, file_path): """Return the file extension without the leading dot.""" return os.path.splitext(file_path)[1].lstrip(".") @@ -371,6 +394,16 @@ class ProcessorDispatcher(object): def should_skip_without_warning(self, file_path): """Return whether the given file should be skipped without a warning.""" + if not self._file_type(file_path): # FileType.NONE. + return True + # Since "LayoutTests" is in _SKIPPED_FILES_WITHOUT_WARNING, make + # an exception to prevent files like "LayoutTests/ChangeLog" and + # "LayoutTests/ChangeLog-2009-06-16" from being skipped. + # + # FIXME: Figure out a good way to avoid having to add special logic + # for this special case. + if os.path.basename(file_path).startswith('ChangeLog'): + return False for skipped_file in _SKIPPED_FILES_WITHOUT_WARNING: if file_path.find(skipped_file) >= 0: return True @@ -380,7 +413,7 @@ class ProcessorDispatcher(object): """Return the file type corresponding to the given file.""" file_extension = self._file_extension(file_path) - if (file_extension in self.cpp_file_extensions) or (file_path == '-'): + if (file_extension in _CPP_FILE_EXTENSIONS) or (file_path == '-'): # FIXME: Do something about the comment below and the issue it # raises since cpp_style already relies on the extension. # @@ -388,28 +421,28 @@ class ProcessorDispatcher(object): # reading from stdin, cpp_style tests should not rely on # the extension. return FileType.CPP - elif file_extension == "py": + elif file_extension == _PYTHON_FILE_EXTENSION: return FileType.PYTHON - elif ("ChangeLog" in file_path or + elif (os.path.basename(file_path).startswith('ChangeLog') or (not file_extension and "WebKitTools/Scripts/" in file_path) or - file_extension in self.text_file_extensions): + file_extension in _TEXT_FILE_EXTENSIONS): return FileType.TEXT else: return FileType.NONE - def _create_processor(self, file_type, file_path, handle_style_error, - min_confidence): - """Instantiate and return a style processor based on file type.""" + def _create_checker(self, file_type, file_path, handle_style_error, + min_confidence): + """Instantiate and return a style checker based on file type.""" if file_type == FileType.NONE: - processor = None + checker = None elif file_type == FileType.CPP: file_extension = self._file_extension(file_path) - processor = CppProcessor(file_path, file_extension, - handle_style_error, min_confidence) + checker = CppChecker(file_path, file_extension, + handle_style_error, min_confidence) elif file_type == FileType.PYTHON: - processor = PythonProcessor(file_path, handle_style_error) + checker = PythonChecker(file_path, handle_style_error) elif file_type == FileType.TEXT: - processor = TextProcessor(file_path, handle_style_error) + checker = TextChecker(file_path, handle_style_error) else: raise ValueError('Invalid file type "%(file_type)s": the only valid file types ' "are %(NONE)s, %(CPP)s, and %(TEXT)s." @@ -418,24 +451,24 @@ class ProcessorDispatcher(object): "CPP": FileType.CPP, "TEXT": FileType.TEXT}) - return processor + return checker - def dispatch_processor(self, file_path, handle_style_error, min_confidence): - """Instantiate and return a style processor based on file path.""" + def dispatch(self, file_path, handle_style_error, min_confidence): + """Instantiate and return a style checker based on file path.""" file_type = self._file_type(file_path) - processor = self._create_processor(file_type, - file_path, - handle_style_error, - min_confidence) - return processor + checker = self._create_checker(file_type, + file_path, + handle_style_error, + min_confidence) + return checker # FIXME: Remove the stderr_write attribute from this class and replace # its use with calls to a logging module logger. -class StyleCheckerConfiguration(object): +class StyleProcessorConfiguration(object): - """Stores configuration values for the StyleChecker class. + """Stores configuration values for the StyleProcessor class. Attributes: min_confidence: An integer between 1 and 5 inclusive that is the @@ -455,7 +488,7 @@ class StyleCheckerConfiguration(object): min_confidence, output_format, stderr_write): - """Create a StyleCheckerConfiguration instance. + """Create a StyleProcessorConfiguration instance. Args: filter_configuration: A FilterConfiguration instance. The default @@ -528,7 +561,13 @@ class ProcessorBase(object): """The base class for processors of lists of lines.""" def should_process(self, file_path): - """Return whether the file at file_path should be processed.""" + """Return whether the file at file_path should be processed. + + The TextFileReader class calls this method prior to reading in + the lines of a file. Use this method, for example, to prevent + the style checker from reading binary files into memory. + + """ raise NotImplementedError('Subclasses should implement.') def process(self, lines, file_path, **kwargs): @@ -540,7 +579,7 @@ class ProcessorBase(object): **kwargs: This argument signifies that the process() method of subclasses of ProcessorBase may support additional keyword arguments. - For example, a style processor's process() method + For example, a style checker's check() method may support a "reportable_lines" parameter that represents the line numbers of the lines for which style errors should be reported. @@ -549,209 +588,129 @@ class ProcessorBase(object): raise NotImplementedError('Subclasses should implement.') -# FIXME: Modify this class to start using the TextFileReader class in -# webkitpy/style/filereader.py. This probably means creating -# a StyleProcessor class that inherits from ProcessorBase. -class StyleChecker(object): +class StyleProcessor(ProcessorBase): - """Supports checking style in files and patches. + """A ProcessorBase for checking style. - Attributes: - error_count: An integer that is the total number of reported - errors for the lifetime of this StyleChecker - instance. - file_count: An integer that is the total number of processed - files. Note that the number of skipped files is - included in this value. + Attributes: + error_count: An integer that is the total number of reported + errors for the lifetime of this instance. """ - def __init__(self, configuration): - """Create a StyleChecker instance. + def __init__(self, configuration, mock_dispatcher=None, + mock_increment_error_count=None, + mock_carriage_checker_class=None): + """Create an instance. Args: - configuration: A StyleCheckerConfiguration instance that controls - the behavior of style checking. + configuration: A StyleProcessorConfiguration instance. + mock_dispatcher: A mock CheckerDispatcher instance. This + parameter is for unit testing. Defaults to a + CheckerDispatcher instance. + mock_increment_error_count: A mock error-count incrementer. + mock_carriage_checker_class: A mock class for checking and + transforming carriage returns. + This parameter is for unit testing. + Defaults to CarriageReturnChecker. """ - self._configuration = configuration - self.error_count = 0 - self.file_count = 0 - - def _increment_error_count(self): - """Increment the total count of reported errors.""" - self.error_count += 1 - - def _read_lines(self, file_path): - """Read the file at a path, and return its lines. - - Raises: - IOError: if the file does not exist or cannot be read. - - """ - # Support the UNIX convention of using "-" for stdin. - if file_path == '-': - file = codecs.StreamReaderWriter(sys.stdin, - codecs.getreader('utf8'), - codecs.getwriter('utf8'), - 'replace') + if mock_dispatcher is None: + dispatcher = CheckerDispatcher() else: - # We do not open the file with universal newline support - # (codecs does not support it anyway), so the resulting - # lines contain trailing "\r" characters if we are reading - # a file with CRLF endings. - file = codecs.open(file_path, 'r', 'utf8', 'replace') - - contents = file.read() - - lines = contents.split("\n") - return lines - - def _process_file(self, processor, file_path, handle_style_error): - """Process the file using the given style processor.""" - try: - lines = self._read_lines(file_path) - except IOError: - message = 'Could not read file. Skipping: "%s"' % file_path - _log.warn(message) - return - - # Check for and remove trailing carriage returns ("\r"). - # - # FIXME: We should probably use the SVN "eol-style" property - # or a white list to decide whether or not to do - # the carriage-return check. Originally, we did the - # check only if (os.linesep != '\r\n'). - carriage_return_processor = CarriageReturnProcessor(handle_style_error) - lines = carriage_return_processor.process(lines) - - processor.process(lines) + dispatcher = mock_dispatcher - def check_paths(self, paths, mock_check_file=None, mock_os=None): - """Check style in the given files or directories. + if mock_increment_error_count is None: + # The following blank line is present to avoid flagging by pep8.py. - Args: - paths: A list of file paths and directory paths. - mock_check_file: A mock of self.check_file for unit testing. - mock_os: A mock os for unit testing. - - """ - check_file = self.check_file if mock_check_file is None else \ - mock_check_file - os_module = os if mock_os is None else mock_os - - for path in paths: - if os_module.path.isdir(path): - self._check_directory(directory=path, - check_file=check_file, - mock_os_walk=os_module.walk) - else: - check_file(path) + def increment_error_count(): + """Increment the total count of reported errors.""" + self.error_count += 1 + else: + increment_error_count = mock_increment_error_count - def _check_directory(self, directory, check_file, mock_os_walk=None): - """Check style in all files in a directory, recursively. + if mock_carriage_checker_class is None: + # This needs to be a class rather than an instance since the + # process() method instantiates one using parameters. + carriage_checker_class = CarriageReturnChecker + else: + carriage_checker_class = mock_carriage_checker_class - Args: - directory: A path to a directory. - check_file: The function to use in place of self.check_file(). - mock_os_walk: A mock os.walk for unit testing. + self.error_count = 0 - """ - os_walk = os.walk if mock_os_walk is None else mock_os_walk + self._carriage_checker_class = carriage_checker_class + self._configuration = configuration + self._dispatcher = dispatcher + self._increment_error_count = increment_error_count - for dir_path, dir_names, file_names in os_walk(directory): - for file_name in file_names: - file_path = os.path.join(dir_path, file_name) - check_file(file_path) + def should_process(self, file_path): + """Return whether the file should be checked for style.""" + if self._dispatcher.should_skip_without_warning(file_path): + return False + if self._dispatcher.should_skip_with_warning(file_path): + _log.warn('File exempt from style guide. Skipping: "%s"' + % file_path) + return False + return True - def check_file(self, file_path, line_numbers=None, - mock_handle_style_error=None, - mock_os_path_exists=None, - mock_process_file=None): - """Check style in the given file. + def process(self, lines, file_path, line_numbers=None): + """Check the given lines for style. - Args: + Arguments: + lines: A list of all lines in the file to check. file_path: The path of the file to process. If possible, the path should be relative to the source root. Otherwise, path-specific logic may not behave as expected. - line_numbers: An array of line numbers of the lines for which + line_numbers: A list of line numbers of the lines for which style errors should be reported, or None if errors - for all lines should be reported. Normally, this - array contains the line numbers corresponding to the - modified lines of a patch. - mock_handle_style_error: A unit-testing replacement for the function - to call when a style error occurs. Defaults - to a DefaultStyleErrorHandler instance. - mock_os_path_exists: A unit-test replacement for os.path.exists. - This parameter should only be used for unit - tests. - mock_process_file: The function to call to process the file. This - parameter should be used only for unit tests. - Defaults to the file processing method of this - class. - - Raises: - SystemExit: if the file does not exist. + for all lines should be reported. When not None, this + list normally contains the line numbers corresponding + to the modified lines of a patch. """ - if mock_handle_style_error is None: - increment = self._increment_error_count - handle_style_error = DefaultStyleErrorHandler( - configuration=self._configuration, - file_path=file_path, - increment_error_count=increment, - line_numbers=line_numbers) - else: - handle_style_error = mock_handle_style_error - - os_path_exists = (os.path.exists if mock_os_path_exists is None else - mock_os_path_exists) - process_file = (self._process_file if mock_process_file is None else - mock_process_file) - - if not os_path_exists(file_path) and file_path != "-": - _log.error("File does not exist: %s" % file_path) - sys.exit(1) + _log.debug("Checking style: " + file_path) - _log.debug("Checking: " + file_path) + style_error_handler = DefaultStyleErrorHandler( + configuration=self._configuration, + file_path=file_path, + increment_error_count=self._increment_error_count, + line_numbers=line_numbers) - self.file_count += 1 + carriage_checker = self._carriage_checker_class(style_error_handler) - dispatcher = ProcessorDispatcher() - - if dispatcher.should_skip_without_warning(file_path): - return - if dispatcher.should_skip_with_warning(file_path): - _log.warn('File exempt from style guide. Skipping: "%s"' - % file_path) - return + # FIXME: We should probably use the SVN "eol-style" property + # or a white list to decide whether or not to do + # the carriage-return check. Originally, we did the + # check only if (os.linesep != '\r\n'). + # + # Check for and remove trailing carriage returns ("\r"). + lines = carriage_checker.check(lines) min_confidence = self._configuration.min_confidence - processor = dispatcher.dispatch_processor(file_path, - handle_style_error, - min_confidence) - if processor is None: - _log.debug('File not a recognized type to check. Skipping: "%s"' - % file_path) - return + checker = self._dispatcher.dispatch(file_path, + style_error_handler, + min_confidence) + + if checker is None: + raise AssertionError("File should not be checked: '%s'" % file_path) - _log.debug("Using class: " + processor.__class__.__name__) + _log.debug("Using class: " + checker.__class__.__name__) - process_file(processor, file_path, handle_style_error) + checker.check(lines) -class PatchChecker(object): +class PatchReader(object): """Supports checking style in patches.""" - def __init__(self, style_checker): - """Create a PatchChecker instance. + def __init__(self, text_file_reader): + """Create a PatchReader instance. Args: - style_checker: A StyleChecker instance. + text_file_reader: A TextFileReader instance. """ - self._file_checker = style_checker + self._text_file_reader = text_file_reader def check(self, patch_string): """Check style in the given patch.""" @@ -775,5 +734,5 @@ class PatchChecker(object): # This optimization also prevents the program from exiting # due to a deleted file. if line_numbers: - self._file_checker.check_file(file_path=path, - line_numbers=line_numbers) + self._text_file_reader.process_file(file_path=path, + line_numbers=line_numbers) diff --git a/WebKitTools/Scripts/webkitpy/style/checker_unittest.py b/WebKitTools/Scripts/webkitpy/style/checker_unittest.py index 401a7b3..6e1eaa2 100755 --- a/WebKitTools/Scripts/webkitpy/style/checker_unittest.py +++ b/WebKitTools/Scripts/webkitpy/style/checker_unittest.py @@ -49,18 +49,21 @@ from checker import _all_categories from checker import check_webkit_style_configuration from checker import check_webkit_style_parser from checker import configure_logging -from checker import ProcessorDispatcher -from checker import PatchChecker -from checker import StyleChecker -from checker import StyleCheckerConfiguration +from checker import CheckerDispatcher +from checker import PatchReader +from checker import ProcessorBase +from checker import StyleProcessor +from checker import StyleProcessorConfiguration +from checkers.cpp import CppChecker +from checkers.python import PythonChecker +from checkers.text import TextChecker +from error_handlers import DefaultStyleErrorHandler from filter import validate_filter_rules from filter import FilterConfiguration from optparser import ArgumentParser from optparser import CommandOptionValues -from processors.cpp import CppProcessor -from processors.python import PythonProcessor -from processors.text import TextProcessor from webkitpy.common.system.logtesting import LoggingTestCase +from webkitpy.style.filereader import TextFileReader class ConfigureLoggingTestBase(unittest.TestCase): @@ -265,16 +268,17 @@ class CheckWebKitStyleFunctionTest(unittest.TestCase): parser = check_webkit_style_parser() -class ProcessorDispatcherSkipTest(unittest.TestCase): +class CheckerDispatcherSkipTest(unittest.TestCase): - """Tests the "should skip" methods of the ProcessorDispatcher class.""" + """Tests the "should skip" methods of the CheckerDispatcher class.""" + + def setUp(self): + self._dispatcher = CheckerDispatcher() def test_should_skip_with_warning(self): """Test should_skip_with_warning().""" - dispatcher = ProcessorDispatcher() - # Check a non-skipped file. - self.assertFalse(dispatcher.should_skip_with_warning("foo.txt")) + self.assertFalse(self._dispatcher.should_skip_with_warning("foo.txt")) # Check skipped files. paths_to_skip = [ @@ -289,51 +293,72 @@ class ProcessorDispatcherSkipTest(unittest.TestCase): ] for path in paths_to_skip: - self.assertTrue(dispatcher.should_skip_with_warning(path), + self.assertTrue(self._dispatcher.should_skip_with_warning(path), "Checking: " + path) - def test_should_skip_without_warning(self): - """Test should_skip_without_warning().""" - dispatcher = ProcessorDispatcher() - - # Check a non-skipped file. - self.assertFalse(dispatcher.should_skip_without_warning("foo.txt")) - - # Check skipped files. - paths_to_skip = [ - # LayoutTests folder - "LayoutTests/foo.txt", - ] + def _assert_should_skip_without_warning(self, path, is_checker_none, + expected): + # Check the file type before asserting the return value. + checker = self._dispatcher.dispatch(file_path=path, + handle_style_error=None, + min_confidence=3) + message = 'while checking: %s' % path + self.assertEquals(checker is None, is_checker_none, message) + self.assertEquals(self._dispatcher.should_skip_without_warning(path), + expected, message) + + def test_should_skip_without_warning__true(self): + """Test should_skip_without_warning() for True return values.""" + # Check a file with NONE file type. + path = 'foo.asdf' # Non-sensical file extension. + self._assert_should_skip_without_warning(path, + is_checker_none=True, + expected=True) + + # Check files with non-NONE file type. These examples must be + # drawn from the _SKIPPED_FILES_WITHOUT_WARNING configuration + # variable. + path = os.path.join('LayoutTests', 'foo.txt') + self._assert_should_skip_without_warning(path, + is_checker_none=False, + expected=True) + + def test_should_skip_without_warning__false(self): + """Test should_skip_without_warning() for False return values.""" + paths = ['foo.txt', + os.path.join('LayoutTests', 'ChangeLog'), + ] - for path in paths_to_skip: - self.assertTrue(dispatcher.should_skip_without_warning(path), - "Checking: " + path) + for path in paths: + self._assert_should_skip_without_warning(path, + is_checker_none=False, + expected=False) -class ProcessorDispatcherDispatchTest(unittest.TestCase): +class CheckerDispatcherDispatchTest(unittest.TestCase): - """Tests dispatch_processor() method of ProcessorDispatcher class.""" + """Tests dispatch() method of CheckerDispatcher class.""" def mock_handle_style_error(self): pass - def dispatch_processor(self, file_path): - """Call dispatch_processor() with the given file path.""" - dispatcher = ProcessorDispatcher() - processor = dispatcher.dispatch_processor(file_path, - self.mock_handle_style_error, - min_confidence=3) - return processor - - def assert_processor_none(self, file_path): - """Assert that the dispatched processor is None.""" - processor = self.dispatch_processor(file_path) - self.assertTrue(processor is None, 'Checking: "%s"' % file_path) - - def assert_processor(self, file_path, expected_class): - """Assert the type of the dispatched processor.""" - processor = self.dispatch_processor(file_path) - got_class = processor.__class__ + def dispatch(self, file_path): + """Call dispatch() with the given file path.""" + dispatcher = CheckerDispatcher() + checker = dispatcher.dispatch(file_path, + self.mock_handle_style_error, + min_confidence=3) + return checker + + def assert_checker_none(self, file_path): + """Assert that the dispatched checker is None.""" + checker = self.dispatch(file_path) + self.assertTrue(checker is None, 'Checking: "%s"' % file_path) + + def assert_checker(self, file_path, expected_class): + """Assert the type of the dispatched checker.""" + checker = self.dispatch(file_path) + got_class = checker.__class__ self.assertEquals(got_class, expected_class, 'For path "%(file_path)s" got %(got_class)s when ' "expecting %(expected_class)s." @@ -341,17 +366,17 @@ class ProcessorDispatcherDispatchTest(unittest.TestCase): "got_class": got_class, "expected_class": expected_class}) - def assert_processor_cpp(self, file_path): - """Assert that the dispatched processor is a CppProcessor.""" - self.assert_processor(file_path, CppProcessor) + def assert_checker_cpp(self, file_path): + """Assert that the dispatched checker is a CppChecker.""" + self.assert_checker(file_path, CppChecker) - def assert_processor_python(self, file_path): - """Assert that the dispatched processor is a PythonProcessor.""" - self.assert_processor(file_path, PythonProcessor) + def assert_checker_python(self, file_path): + """Assert that the dispatched checker is a PythonChecker.""" + self.assert_checker(file_path, PythonChecker) - def assert_processor_text(self, file_path): - """Assert that the dispatched processor is a TextProcessor.""" - self.assert_processor(file_path, TextProcessor) + def assert_checker_text(self, file_path): + """Assert that the dispatched checker is a TextChecker.""" + self.assert_checker(file_path, TextChecker) def test_cpp_paths(self): """Test paths that should be checked as C++.""" @@ -363,26 +388,26 @@ class ProcessorDispatcherDispatchTest(unittest.TestCase): ] for path in paths: - self.assert_processor_cpp(path) + self.assert_checker_cpp(path) - # Check processor attributes on a typical input. + # Check checker attributes on a typical input. file_base = "foo" file_extension = "c" file_path = file_base + "." + file_extension - self.assert_processor_cpp(file_path) - processor = self.dispatch_processor(file_path) - self.assertEquals(processor.file_extension, file_extension) - self.assertEquals(processor.file_path, file_path) - self.assertEquals(processor.handle_style_error, self.mock_handle_style_error) - self.assertEquals(processor.min_confidence, 3) + self.assert_checker_cpp(file_path) + checker = self.dispatch(file_path) + self.assertEquals(checker.file_extension, file_extension) + self.assertEquals(checker.file_path, file_path) + self.assertEquals(checker.handle_style_error, self.mock_handle_style_error) + self.assertEquals(checker.min_confidence, 3) # Check "-" for good measure. file_base = "-" file_extension = "" file_path = file_base - self.assert_processor_cpp(file_path) - processor = self.dispatch_processor(file_path) - self.assertEquals(processor.file_extension, file_extension) - self.assertEquals(processor.file_path, file_path) + self.assert_checker_cpp(file_path) + checker = self.dispatch(file_path) + self.assertEquals(checker.file_extension, file_extension) + self.assertEquals(checker.file_path, file_path) def test_python_paths(self): """Test paths that should be checked as Python.""" @@ -392,63 +417,81 @@ class ProcessorDispatcherDispatchTest(unittest.TestCase): ] for path in paths: - self.assert_processor_python(path) + self.assert_checker_python(path) - # Check processor attributes on a typical input. + # Check checker attributes on a typical input. file_base = "foo" file_extension = "css" file_path = file_base + "." + file_extension - self.assert_processor_text(file_path) - processor = self.dispatch_processor(file_path) - self.assertEquals(processor.file_path, file_path) - self.assertEquals(processor.handle_style_error, + self.assert_checker_text(file_path) + checker = self.dispatch(file_path) + self.assertEquals(checker.file_path, file_path) + self.assertEquals(checker.handle_style_error, self.mock_handle_style_error) def test_text_paths(self): """Test paths that should be checked as text.""" paths = [ "ChangeLog", + "ChangeLog-2009-06-16", + "foo.ac", + "foo.cc", + "foo.cgi", "foo.css", + "foo.exp", + "foo.flex", + "foo.gyp", + "foo.gypi", "foo.html", "foo.idl", + "foo.in", "foo.js", "foo.mm", "foo.php", + "foo.pl", "foo.pm", + "foo.pri", + "foo.pro", + "foo.rb", + "foo.sh", "foo.txt", - "FooChangeLog.bak", - "WebCore/ChangeLog", - "WebCore/inspector/front-end/inspector.js", - "WebKitTools/Scripts/check-webkit-style", + "foo.wm", + "foo.xhtml", + "foo.y", + os.path.join("WebCore", "ChangeLog"), + os.path.join("WebCore", "inspector", "front-end", "inspector.js"), + os.path.join("WebKitTools", "Scripts", "check-webkit-style"), ] for path in paths: - self.assert_processor_text(path) + self.assert_checker_text(path) - # Check processor attributes on a typical input. + # Check checker attributes on a typical input. file_base = "foo" file_extension = "css" file_path = file_base + "." + file_extension - self.assert_processor_text(file_path) - processor = self.dispatch_processor(file_path) - self.assertEquals(processor.file_path, file_path) - self.assertEquals(processor.handle_style_error, self.mock_handle_style_error) + self.assert_checker_text(file_path) + checker = self.dispatch(file_path) + self.assertEquals(checker.file_path, file_path) + self.assertEquals(checker.handle_style_error, self.mock_handle_style_error) def test_none_paths(self): """Test paths that have no file type..""" paths = [ "Makefile", + "foo.asdf", # Non-sensical file extension. "foo.png", "foo.exe", + "foo.vcproj", ] for path in paths: - self.assert_processor_none(path) + self.assert_checker_none(path) -class StyleCheckerConfigurationTest(unittest.TestCase): +class StyleProcessorConfigurationTest(unittest.TestCase): - """Tests the StyleCheckerConfiguration class.""" + """Tests the StyleProcessorConfiguration class.""" def setUp(self): self._error_messages = [] @@ -458,11 +501,11 @@ class StyleCheckerConfigurationTest(unittest.TestCase): self._error_messages.append(message) def _style_checker_configuration(self, output_format="vs7"): - """Return a StyleCheckerConfiguration instance for testing.""" + """Return a StyleProcessorConfiguration instance for testing.""" base_rules = ["-whitespace", "+whitespace/tab"] filter_configuration = FilterConfiguration(base_rules=base_rules) - return StyleCheckerConfiguration( + return StyleProcessorConfiguration( filter_configuration=filter_configuration, max_reports_per_category={"whitespace/newline": 1}, min_confidence=3, @@ -512,277 +555,246 @@ class StyleCheckerConfigurationTest(unittest.TestCase): ["foo.h(100): message [whitespace/tab] [5]\n"]) -class StyleCheckerTest(unittest.TestCase): +class StyleProcessor_EndToEndTest(LoggingTestCase): - """Test the StyleChecker class.""" + """Test the StyleProcessor class with an emphasis on end-to-end tests.""" - def _mock_stderr_write(self, message): - pass + def setUp(self): + LoggingTestCase.setUp(self) + self._messages = [] - def _style_checker(self, configuration): - return StyleChecker(configuration) + def _mock_stderr_write(self, message): + """Save a message so it can later be asserted.""" + self._messages.append(message) def test_init(self): """Test __init__ constructor.""" - configuration = StyleCheckerConfiguration( + configuration = StyleProcessorConfiguration( filter_configuration=FilterConfiguration(), max_reports_per_category={}, min_confidence=3, output_format="vs7", stderr_write=self._mock_stderr_write) + processor = StyleProcessor(configuration) - style_checker = self._style_checker(configuration) - - self.assertEquals(style_checker._configuration, configuration) - self.assertEquals(style_checker.error_count, 0) - self.assertEquals(style_checker.file_count, 0) - - -class StyleCheckerCheckFileBase(LoggingTestCase): - - def setUp(self): - LoggingTestCase.setUp(self) - self.warning_messages = "" - - def mock_stderr_write(self, warning_message): - self.warning_messages += warning_message - - def _style_checker_configuration(self): - return StyleCheckerConfiguration( - filter_configuration=FilterConfiguration(), - max_reports_per_category={"whitespace/newline": 1}, - min_confidence=3, - output_format="vs7", - stderr_write=self.mock_stderr_write) + self.assertEquals(processor.error_count, 0) + self.assertEquals(self._messages, []) + def test_process(self): + configuration = StyleProcessorConfiguration( + filter_configuration=FilterConfiguration(), + max_reports_per_category={}, + min_confidence=3, + output_format="vs7", + stderr_write=self._mock_stderr_write) + processor = StyleProcessor(configuration) -class StyleCheckerCheckFileTest(StyleCheckerCheckFileBase): + processor.process(lines=['line1', 'Line with tab:\t'], + file_path='foo.txt') + self.assertEquals(processor.error_count, 1) + expected_messages = ['foo.txt(2): Line contains tab character. ' + '[whitespace/tab] [5]\n'] + self.assertEquals(self._messages, expected_messages) - """Test the check_file() method of the StyleChecker class. - The check_file() method calls its process_file parameter when - given a file that should not be skipped. +class StyleProcessor_CodeCoverageTest(LoggingTestCase): - The "got_*" attributes of this class are the parameters passed - to process_file by calls to check_file() made by this test - class. These attributes allow us to check the parameter values - passed internally to the process_file function. + """Test the StyleProcessor class with an emphasis on code coverage. - Attributes: - got_file_path: The file_path parameter passed by check_file() - to its process_file parameter. - got_handle_style_error: The handle_style_error parameter passed - by check_file() to its process_file - parameter. - got_processor: The processor parameter passed by check_file() to - its process_file parameter. - warning_messages: A string containing all of the warning messages - written to the mock_stderr_write method of - this class. + This class makes heavy use of mock objects. """ - def setUp(self): - StyleCheckerCheckFileBase.setUp(self) - self.got_file_path = None - self.got_handle_style_error = None - self.got_processor = None - def mock_handle_style_error(self): - pass - - def mock_os_path_exists(self, path): - # We deliberately make it so that this method returns False unless - # the caller has made an effort to put "does_exist" in the path. - return path.find("does_exist") > -1 - - def mock_process_file(self, processor, file_path, handle_style_error): - """A mock _process_file(). + class MockDispatchedChecker(object): - See the documentation for this class for more information - on this function. + """A mock checker dispatched by the MockDispatcher.""" - """ - self.got_file_path = file_path - self.got_handle_style_error = handle_style_error - self.got_processor = processor - - def assert_attributes(self, - expected_file_path, - expected_handle_style_error, - expected_processor, - expected_warning_messages): - """Assert that the attributes of this class equal the given values.""" - self.assertEquals(self.got_file_path, expected_file_path) - self.assertEquals(self.got_handle_style_error, expected_handle_style_error) - self.assertEquals(self.got_processor, expected_processor) - self.assertEquals(self.warning_messages, expected_warning_messages) - - def call_check_file(self, file_path): - """Call the check_file() method of a test StyleChecker instance.""" - # Confirm that the attributes are reset. - self.assert_attributes(None, None, None, "") - - configuration = self._style_checker_configuration() + def __init__(self, file_path, min_confidence, style_error_handler): + self.file_path = file_path + self.min_confidence = min_confidence + self.style_error_handler = style_error_handler - style_checker = StyleChecker(configuration) + def check(self, lines): + self.lines = lines - style_checker.check_file(file_path=file_path, - mock_handle_style_error=self.mock_handle_style_error, - mock_os_path_exists=self.mock_os_path_exists, - mock_process_file=self.mock_process_file) + class MockDispatcher(object): - self.assertEquals(style_checker.file_count, 1) + """A mock CheckerDispatcher class.""" - def test_check_file_does_not_exist(self): - file_path = "file_does_not_exist.txt" - - # Confirm that the file does not exist. - self.assertFalse(self.mock_os_path_exists(file_path)) + def __init__(self): + self.dispatched_checker = None - # Check the outcome. - self.assertRaises(SystemExit, self.call_check_file, file_path) - self.assertLog(["ERROR: File does not exist: " - "file_does_not_exist.txt\n"]) + def should_skip_with_warning(self, file_path): + return file_path.endswith('skip_with_warning.txt') - def test_check_file_stdin(self): - file_path = "-" + def should_skip_without_warning(self, file_path): + return file_path.endswith('skip_without_warning.txt') - # Confirm that the file does not exist. - self.assertFalse(self.mock_os_path_exists(file_path)) + def dispatch(self, file_path, style_error_handler, min_confidence): + if file_path.endswith('do_not_process.txt'): + return None - # Check the outcome. - self.call_check_file(file_path) - expected_processor = CppProcessor(file_path, - "", - self.mock_handle_style_error, 3) - self.assert_attributes(file_path, - self.mock_handle_style_error, - expected_processor, - "") + checker = StyleProcessor_CodeCoverageTest.MockDispatchedChecker( + file_path, + min_confidence, + style_error_handler) - def test_check_file_on_skip_without_warning(self): - """Test check_file() for a skipped-without-warning file.""" + # Save the dispatched checker so the current test case has a + # way to access and check it. + self.dispatched_checker = checker - file_path = "LayoutTests/does_exist/foo.txt" + return checker - dispatcher = ProcessorDispatcher() - # Confirm that the input file is truly a skipped-without-warning file. - self.assertTrue(dispatcher.should_skip_without_warning(file_path)) + def setUp(self): + LoggingTestCase.setUp(self) + # We can pass an error-message swallower here because error message + # output is tested instead in the end-to-end test case above. + configuration = StyleProcessorConfiguration( + filter_configuration=FilterConfiguration(), + max_reports_per_category={"whitespace/newline": 1}, + min_confidence=3, + output_format="vs7", + stderr_write=self._swallow_stderr_message) + + mock_carriage_checker_class = self._create_carriage_checker_class() + mock_dispatcher = self.MockDispatcher() + # We do not need to use a real incrementer here because error-count + # incrementing is tested instead in the end-to-end test case above. + mock_increment_error_count = self._do_nothing + + processor = StyleProcessor(configuration=configuration, + mock_carriage_checker_class=mock_carriage_checker_class, + mock_dispatcher=mock_dispatcher, + mock_increment_error_count=mock_increment_error_count) + + self._configuration = configuration + self._mock_dispatcher = mock_dispatcher + self._processor = processor + + def _do_nothing(self): + # We provide this function so the caller can pass it to the + # StyleProcessor constructor. This lets us assert the equality of + # the DefaultStyleErrorHandler instance generated by the process() + # method with an expected instance. + pass - # Check the outcome. - self.call_check_file(file_path) - self.assert_attributes(None, None, None, "") + def _swallow_stderr_message(self, message): + """Swallow a message passed to stderr.write().""" + # This is a mock stderr.write() for passing to the constructor + # of the StyleProcessorConfiguration class. + pass - def test_check_file_on_skip_with_warning(self): - """Test check_file() for a skipped-with-warning file.""" + def _create_carriage_checker_class(self): - file_path = "does_exist/gtk2drawing.c" + # Create a reference to self with a new name so its name does not + # conflict with the self introduced below. + test_case = self - dispatcher = ProcessorDispatcher() - # Check that the input file is truly a skipped-with-warning file. - self.assertTrue(dispatcher.should_skip_with_warning(file_path)) + class MockCarriageChecker(object): - # Check the outcome. - self.call_check_file(file_path) - self.assert_attributes(None, None, None, "") - self.assertLog(["WARNING: File exempt from style guide. " - 'Skipping: "does_exist/gtk2drawing.c"\n']) + """A mock carriage-return checker.""" - def test_check_file_on_non_skipped(self): + def __init__(self, style_error_handler): + self.style_error_handler = style_error_handler - # We use a C++ file since by using a CppProcessor, we can check - # that all of the possible information is getting passed to - # process_file (in particular, the min_confidence parameter). - file_base = "foo_does_exist" - file_extension = "cpp" - file_path = file_base + "." + file_extension + # This gives the current test case access to the + # instantiated carriage checker. + test_case.carriage_checker = self - dispatcher = ProcessorDispatcher() - # Check that the input file is truly a C++ file. - self.assertEquals(dispatcher._file_type(file_path), style.FileType.CPP) + def check(self, lines): + # Save the lines so the current test case has a way to access + # and check them. + self.lines = lines - # Check the outcome. - self.call_check_file(file_path) + return lines - expected_processor = CppProcessor(file_path, file_extension, self.mock_handle_style_error, 3) + return MockCarriageChecker - self.assert_attributes(file_path, - self.mock_handle_style_error, - expected_processor, - "") + def test_should_process__skip_without_warning(self): + """Test should_process() for a skip-without-warning file.""" + file_path = "foo/skip_without_warning.txt" + self.assertFalse(self._processor.should_process(file_path)) -class StyleCheckerCheckPathsTest(unittest.TestCase): + def test_should_process__skip_with_warning(self): + """Test should_process() for a skip-with-warning file.""" + file_path = "foo/skip_with_warning.txt" - """Test the check_paths() method of the StyleChecker class.""" + self.assertFalse(self._processor.should_process(file_path)) - class MockOs(object): + self.assertLog(['WARNING: File exempt from style guide. ' + 'Skipping: "foo/skip_with_warning.txt"\n']) - class MockPath(object): + def test_should_process__true_result(self): + """Test should_process() for a file that should be processed.""" + file_path = "foo/skip_process.txt" - """A mock os.path.""" + self.assertTrue(self._processor.should_process(file_path)) - def isdir(self, path): - return path == "directory" + def test_process__checker_dispatched(self): + """Test the process() method for a path with a dispatched checker.""" + file_path = 'foo.txt' + lines = ['line1', 'line2'] + line_numbers = [100] - def __init__(self): - self.path = self.MockPath() + expected_error_handler = DefaultStyleErrorHandler( + configuration=self._configuration, + file_path=file_path, + increment_error_count=self._do_nothing, + line_numbers=line_numbers) - def walk(self, directory): - """A mock of os.walk.""" - if directory == "directory": - dirs = [("dir_path1", [], ["file1", "file2"]), - ("dir_path2", [], ["file3"])] - return dirs - return None + self._processor.process(lines=lines, + file_path=file_path, + line_numbers=line_numbers) - def setUp(self): - self._checked_files = [] + # Check that the carriage-return checker was instantiated correctly + # and was passed lines correctly. + carriage_checker = self.carriage_checker + self.assertEquals(carriage_checker.style_error_handler, + expected_error_handler) + self.assertEquals(carriage_checker.lines, ['line1', 'line2']) - def _mock_check_file(self, file): - self._checked_files.append(file) + # Check that the style checker was dispatched correctly and was + # passed lines correctly. + checker = self._mock_dispatcher.dispatched_checker + self.assertEquals(checker.file_path, 'foo.txt') + self.assertEquals(checker.min_confidence, 3) + self.assertEquals(checker.style_error_handler, expected_error_handler) - def test_check_paths(self): - """Test StyleChecker.check_paths().""" - checker = StyleChecker(configuration=None) - mock_check_file = self._mock_check_file - mock_os = self.MockOs() + self.assertEquals(checker.lines, ['line1', 'line2']) - # Confirm that checked files is empty at the outset. - self.assertEquals(self._checked_files, []) - checker.check_paths(["path1", "directory"], - mock_check_file=mock_check_file, - mock_os=mock_os) - self.assertEquals(self._checked_files, - ["path1", - os.path.join("dir_path1", "file1"), - os.path.join("dir_path1", "file2"), - os.path.join("dir_path2", "file3")]) + def test_process__no_checker_dispatched(self): + """Test the process() method for a path with no dispatched checker.""" + path = os.path.join('foo', 'do_not_process.txt') + self.assertRaises(AssertionError, self._processor.process, + lines=['line1', 'line2'], file_path=path, + line_numbers=[100]) -class PatchCheckerTest(unittest.TestCase): +class PatchReaderTest(unittest.TestCase): - """Test the PatchChecker class.""" + """Test the PatchReader class.""" - class MockStyleChecker(object): + class MockTextFileReader(object): def __init__(self): - self.checked_files = [] + self.passed_to_process_file = [] """A list of (file_path, line_numbers) pairs.""" - def check_file(self, file_path, line_numbers): - self.checked_files.append((file_path, line_numbers)) + def process_file(self, file_path, line_numbers): + self.passed_to_process_file.append((file_path, line_numbers)) def setUp(self): - style_checker = self.MockStyleChecker() - self._style_checker = style_checker - self._patch_checker = PatchChecker(style_checker) + file_reader = self.MockTextFileReader() + self._file_reader = file_reader + self._patch_checker = PatchReader(file_reader) def _call_check_patch(self, patch_string): self._patch_checker.check(patch_string) - def _assert_checked(self, checked_files): - self.assertEquals(self._style_checker.checked_files, checked_files) + def _assert_checked(self, passed_to_process_file): + self.assertEquals(self._file_reader.passed_to_process_file, + passed_to_process_file) def test_check_patch(self): # The modified line_numbers array for this patch is: [2]. diff --git a/WebKitTools/Scripts/webkitpy/style/processors/__init__.py b/WebKitTools/Scripts/webkitpy/style/checkers/__init__.py index ef65bee..ef65bee 100644 --- a/WebKitTools/Scripts/webkitpy/style/processors/__init__.py +++ b/WebKitTools/Scripts/webkitpy/style/checkers/__init__.py diff --git a/WebKitTools/Scripts/webkitpy/style/processors/common.py b/WebKitTools/Scripts/webkitpy/style/checkers/common.py index 30b8fed..a2d933f 100644 --- a/WebKitTools/Scripts/webkitpy/style/processors/common.py +++ b/WebKitTools/Scripts/webkitpy/style/checkers/common.py @@ -23,7 +23,7 @@ """Supports style checking not specific to any one file type.""" -# FIXME: Test this list in the same way that the list of CppProcessor +# FIXME: Test this list in the same way that the list of CppChecker # categories is tested, for example by checking that all of its # elements appear in the unit tests. This should probably be done # after moving the relevant cpp_unittest.ErrorCollector code @@ -33,14 +33,14 @@ categories = set([ ]) -class CarriageReturnProcessor(object): +class CarriageReturnChecker(object): """Supports checking for and handling carriage returns.""" def __init__(self, handle_style_error): self._handle_style_error = handle_style_error - def process(self, lines): + def check(self, lines): """Check for and strip trailing carriage returns from lines.""" for line_number in range(len(lines)): if not lines[line_number].endswith("\r"): diff --git a/WebKitTools/Scripts/webkitpy/style/processors/common_unittest.py b/WebKitTools/Scripts/webkitpy/style/checkers/common_unittest.py index 3dde7b9..b67b7b0 100644 --- a/WebKitTools/Scripts/webkitpy/style/processors/common_unittest.py +++ b/WebKitTools/Scripts/webkitpy/style/checkers/common_unittest.py @@ -24,16 +24,16 @@ import unittest -from common import CarriageReturnProcessor +from common import CarriageReturnChecker -# FIXME: The unit tests for the cpp, text, and common processors should +# FIXME: The unit tests for the cpp, text, and common checkers should # share supporting test code. This can include, for example, the # mock style error handling code and the code to check that all -# of a processor's categories are covered by the unit tests. +# of a checker's categories are covered by the unit tests. # Such shared code can be located in a shared test file, perhaps # even this file. -class CarriageReturnProcessorTest(unittest.TestCase): +class CarriageReturnCheckerTest(unittest.TestCase): """Tests check_no_carriage_return().""" @@ -55,8 +55,8 @@ class CarriageReturnProcessorTest(unittest.TestCase): """Process the given line and assert that the result is correct.""" handle_style_error = self._mock_style_error_handler - processor = CarriageReturnProcessor(handle_style_error) - output_lines = processor.process(input_lines) + checker = CarriageReturnChecker(handle_style_error) + output_lines = checker.check(input_lines) # Check both the return value and error messages. self.assertEquals(output_lines, expected_lines) @@ -82,7 +82,7 @@ class CarriageReturnProcessorTest(unittest.TestCase): []) def test_carriage_in_middle(self): - # The CarriageReturnProcessor checks only the final character + # The CarriageReturnChecker checks only the final character # of each line. self.assert_carriage_return(["carriage\r in a string"], ["carriage\r in a string"], diff --git a/WebKitTools/Scripts/webkitpy/style/processors/cpp.py b/WebKitTools/Scripts/webkitpy/style/checkers/cpp.py index 23be9f9..3e787d6 100644 --- a/WebKitTools/Scripts/webkitpy/style/processors/cpp.py +++ b/WebKitTools/Scripts/webkitpy/style/checkers/cpp.py @@ -2499,6 +2499,10 @@ def check_identifier_name_in_declaration(filename, line_number, line, error): and not modified_identifier == "const_iterator"): error(line_number, 'readability/naming', 4, identifier + " is incorrectly named. Don't use underscores in your identifier names.") + # Check for variables named 'l', these are too easy to confuse with '1' in some fonts + if modified_identifier == 'l': + error(line_number, 'readability/naming', 4, identifier + " is incorrectly named. Don't use the single letter 'l' as an identifier name.") + # There can be only one declaration in non-for-control statements. if control_statement: return @@ -2511,7 +2515,6 @@ def check_identifier_name_in_declaration(filename, line_number, line, error): number_of_identifiers += 1 line = line[matched.end():] - def check_c_style_cast(line_number, line, raw_line, cast_type, pattern, error): """Checks for a C-style cast by looking for the pattern. @@ -2877,7 +2880,7 @@ def _process_lines(filename, file_extension, lines, error, min_confidence): check_for_new_line_at_eof(lines, error) -class CppProcessor(object): +class CppChecker(object): """Processes C++ lines for checking style.""" @@ -2952,7 +2955,7 @@ class CppProcessor(object): def __init__(self, file_path, file_extension, handle_style_error, min_confidence): - """Create a CppProcessor instance. + """Create a CppChecker instance. Args: file_extension: A string that is the file extension, without @@ -2966,7 +2969,7 @@ class CppProcessor(object): # Useful for unit testing. def __eq__(self, other): - """Return whether this CppProcessor instance is equal to another.""" + """Return whether this CppChecker instance is equal to another.""" if self.file_extension != other.file_extension: return False if self.file_path != other.file_path: @@ -2983,12 +2986,12 @@ class CppProcessor(object): # Python does not automatically deduce __ne__() from __eq__(). return not self.__eq__(other) - def process(self, lines): + def check(self, lines): _process_lines(self.file_path, self.file_extension, lines, self.handle_style_error, self.min_confidence) # FIXME: Remove this function (requires refactoring unit tests). def process_file_data(filename, file_extension, lines, error, min_confidence): - processor = CppProcessor(filename, file_extension, error, min_confidence) - processor.process(lines) + checker = CppChecker(filename, file_extension, error, min_confidence) + checker.check(lines) diff --git a/WebKitTools/Scripts/webkitpy/style/processors/cpp_unittest.py b/WebKitTools/Scripts/webkitpy/style/checkers/cpp_unittest.py index 0a3fe08..5a5aabd 100644 --- a/WebKitTools/Scripts/webkitpy/style/processors/cpp_unittest.py +++ b/WebKitTools/Scripts/webkitpy/style/checkers/cpp_unittest.py @@ -42,13 +42,13 @@ import random import re import unittest import cpp as cpp_style -from cpp import CppProcessor +from cpp import CppChecker # This class works as an error collector and replaces cpp_style.Error # function for the unit tests. We also verify each category we see # is in STYLE_CATEGORIES, to help keep that list up to date. class ErrorCollector: - _all_style_categories = CppProcessor.categories + _all_style_categories = CppChecker.categories # This is a list including all categories seen in any unit test. _seen_style_categories = {} @@ -3538,7 +3538,8 @@ class WebKitStyleTest(CppStyleTestBase): 'foo.h') def test_names(self): - name_error_message = " is incorrectly named. Don't use underscores in your identifier names. [readability/naming] [4]" + name_underscore_error_message = " is incorrectly named. Don't use underscores in your identifier names. [readability/naming] [4]" + name_tooshort_error_message = " is incorrectly named. Don't use the single letter 'l' as an identifier name. [readability/naming] [4]" # Basic cases from WebKit style guide. self.assert_lint('struct Data;', '') @@ -3546,60 +3547,65 @@ class WebKitStyleTest(CppStyleTestBase): self.assert_lint('class HTMLDocument;', '') self.assert_lint('String mimeType();', '') self.assert_lint('size_t buffer_size;', - 'buffer_size' + name_error_message) + 'buffer_size' + name_underscore_error_message) self.assert_lint('short m_length;', '') self.assert_lint('short _length;', - '_length' + name_error_message) + '_length' + name_underscore_error_message) self.assert_lint('short length_;', - 'length_' + name_error_message) + 'length_' + name_underscore_error_message) self.assert_lint('unsigned _length;', - '_length' + name_error_message) + '_length' + name_underscore_error_message) self.assert_lint('unsigned int _length;', - '_length' + name_error_message) + '_length' + name_underscore_error_message) self.assert_lint('unsigned long long _length;', - '_length' + name_error_message) + '_length' + name_underscore_error_message) + + # Variable name 'l' is easy to confuse with '1' + self.assert_lint('int l;', 'l' + name_tooshort_error_message) + self.assert_lint('size_t l;', 'l' + name_tooshort_error_message) + self.assert_lint('long long l;', 'l' + name_tooshort_error_message) # Pointers, references, functions, templates, and adjectives. self.assert_lint('char* under_score;', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('const int UNDER_SCORE;', - 'UNDER_SCORE' + name_error_message) + 'UNDER_SCORE' + name_underscore_error_message) self.assert_lint('static inline const char const& const under_score;', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('WebCore::RenderObject* under_score;', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('int func_name();', - 'func_name' + name_error_message) + 'func_name' + name_underscore_error_message) self.assert_lint('RefPtr<RenderObject*> under_score;', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('WTF::Vector<WTF::RefPtr<const RenderObject* const> > under_score;', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('int under_score[];', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('struct dirent* under_score;', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('long under_score;', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('long long under_score;', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('long double under_score;', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('long long int under_score;', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) # Declarations in control statement. self.assert_lint('if (int under_score = 42) {', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('else if (int under_score = 42) {', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('for (int under_score = 42; cond; i++) {', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('while (foo & under_score = bar) {', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('for (foo * under_score = p; cond; i++) {', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('for (foo * under_score; cond; i++) {', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('while (foo & value_in_thirdparty_library) {', '') self.assert_lint('while (foo * value_in_thirdparty_library) {', '') self.assert_lint('if (mli && S_OK == mli->foo()) {', '') @@ -3607,38 +3613,38 @@ class WebKitStyleTest(CppStyleTestBase): # More member variables and functions. self.assert_lint('int SomeClass::s_validName', '') self.assert_lint('int m_under_score;', - 'm_under_score' + name_error_message) + 'm_under_score' + name_underscore_error_message) self.assert_lint('int SomeClass::s_under_score = 0;', - 'SomeClass::s_under_score' + name_error_message) + 'SomeClass::s_under_score' + name_underscore_error_message) self.assert_lint('int SomeClass::under_score = 0;', - 'SomeClass::under_score' + name_error_message) + 'SomeClass::under_score' + name_underscore_error_message) # Other statements. self.assert_lint('return INT_MAX;', '') self.assert_lint('return_t under_score;', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('goto under_score;', - 'under_score' + name_error_message) + 'under_score' + name_underscore_error_message) self.assert_lint('delete static_cast<Foo*>(p);', '') # Multiple variables in one line. self.assert_lint('void myFunction(int variable1, int another_variable);', - 'another_variable' + name_error_message) + 'another_variable' + name_underscore_error_message) self.assert_lint('int variable1, another_variable;', - 'another_variable' + name_error_message) + 'another_variable' + name_underscore_error_message) self.assert_lint('int first_variable, secondVariable;', - 'first_variable' + name_error_message) + 'first_variable' + name_underscore_error_message) self.assert_lint('void my_function(int variable_1, int variable_2);', - ['my_function' + name_error_message, - 'variable_1' + name_error_message, - 'variable_2' + name_error_message]) + ['my_function' + name_underscore_error_message, + 'variable_1' + name_underscore_error_message, + 'variable_2' + name_underscore_error_message]) self.assert_lint('for (int variable_1, variable_2;;) {', - ['variable_1' + name_error_message, - 'variable_2' + name_error_message]) + ['variable_1' + name_underscore_error_message, + 'variable_2' + name_underscore_error_message]) # There is an exception for op code functions but only in the JavaScriptCore directory. self.assert_lint('void this_op_code(int var1, int var2)', '', 'JavaScriptCore/foo.cpp') - self.assert_lint('void this_op_code(int var1, int var2)', 'this_op_code' + name_error_message) + self.assert_lint('void this_op_code(int var1, int var2)', 'this_op_code' + name_underscore_error_message) # GObject requires certain magical names in class declarations. self.assert_lint('void webkit_dom_object_init();', '') @@ -3656,7 +3662,7 @@ class WebKitStyleTest(CppStyleTestBase): # Bitfields. self.assert_lint('unsigned _fillRule : 1;', - '_fillRule' + name_error_message) + '_fillRule' + name_underscore_error_message) def test_comments(self): @@ -3673,52 +3679,52 @@ class WebKitStyleTest(CppStyleTestBase): pass -class CppProcessorTest(unittest.TestCase): +class CppCheckerTest(unittest.TestCase): - """Tests CppProcessor class.""" + """Tests CppChecker class.""" def mock_handle_style_error(self): pass - def _processor(self): - return CppProcessor("foo", "h", self.mock_handle_style_error, 3) + def _checker(self): + return CppChecker("foo", "h", self.mock_handle_style_error, 3) def test_init(self): """Test __init__ constructor.""" - processor = self._processor() - self.assertEquals(processor.file_extension, "h") - self.assertEquals(processor.file_path, "foo") - self.assertEquals(processor.handle_style_error, self.mock_handle_style_error) - self.assertEquals(processor.min_confidence, 3) + checker = self._checker() + self.assertEquals(checker.file_extension, "h") + self.assertEquals(checker.file_path, "foo") + self.assertEquals(checker.handle_style_error, self.mock_handle_style_error) + self.assertEquals(checker.min_confidence, 3) def test_eq(self): """Test __eq__ equality function.""" - processor1 = self._processor() - processor2 = self._processor() + checker1 = self._checker() + checker2 = self._checker() # == calls __eq__. - self.assertTrue(processor1 == processor2) + self.assertTrue(checker1 == checker2) def mock_handle_style_error2(self): pass # Verify that a difference in any argument cause equality to fail. - processor = CppProcessor("foo", "h", self.mock_handle_style_error, 3) - self.assertFalse(processor == CppProcessor("bar", "h", self.mock_handle_style_error, 3)) - self.assertFalse(processor == CppProcessor("foo", "c", self.mock_handle_style_error, 3)) - self.assertFalse(processor == CppProcessor("foo", "h", mock_handle_style_error2, 3)) - self.assertFalse(processor == CppProcessor("foo", "h", self.mock_handle_style_error, 4)) + checker = CppChecker("foo", "h", self.mock_handle_style_error, 3) + self.assertFalse(checker == CppChecker("bar", "h", self.mock_handle_style_error, 3)) + self.assertFalse(checker == CppChecker("foo", "c", self.mock_handle_style_error, 3)) + self.assertFalse(checker == CppChecker("foo", "h", mock_handle_style_error2, 3)) + self.assertFalse(checker == CppChecker("foo", "h", self.mock_handle_style_error, 4)) def test_ne(self): """Test __ne__ inequality function.""" - processor1 = self._processor() - processor2 = self._processor() + checker1 = self._checker() + checker2 = self._checker() # != calls __ne__. # By default, __ne__ always returns true on different objects. # Thus, just check the distinguishing case to verify that the # code defines __ne__. - self.assertFalse(processor1 != processor2) + self.assertFalse(checker1 != checker2) def tearDown(): diff --git a/WebKitTools/Scripts/webkitpy/style/processors/python.py b/WebKitTools/Scripts/webkitpy/style/checkers/python.py index 8ab936d..70d4450 100644 --- a/WebKitTools/Scripts/webkitpy/style/processors/python.py +++ b/WebKitTools/Scripts/webkitpy/style/checkers/python.py @@ -25,7 +25,7 @@ from ...style_references import pep8 -class PythonProcessor(object): +class PythonChecker(object): """Processes text lines for checking style.""" @@ -33,7 +33,7 @@ class PythonProcessor(object): self._file_path = file_path self._handle_style_error = handle_style_error - def process(self, lines): + def check(self, lines): # Initialize pep8.options, which is necessary for # Checker.check_all() to execute. pep8.process_options(arglist=[self._file_path]) diff --git a/WebKitTools/Scripts/webkitpy/style/processors/python_unittest.py b/WebKitTools/Scripts/webkitpy/style/checkers/python_unittest.py index 3ce3311..e003eb8 100644 --- a/WebKitTools/Scripts/webkitpy/style/processors/python_unittest.py +++ b/WebKitTools/Scripts/webkitpy/style/checkers/python_unittest.py @@ -25,25 +25,25 @@ import os import unittest -from python import PythonProcessor +from python import PythonChecker -class PythonProcessorTest(unittest.TestCase): +class PythonCheckerTest(unittest.TestCase): - """Tests the PythonProcessor class.""" + """Tests the PythonChecker class.""" def test_init(self): """Test __init__() method.""" def _mock_handle_style_error(self): pass - processor = PythonProcessor("foo.txt", _mock_handle_style_error) - self.assertEquals(processor._file_path, "foo.txt") - self.assertEquals(processor._handle_style_error, + checker = PythonChecker("foo.txt", _mock_handle_style_error) + self.assertEquals(checker._file_path, "foo.txt") + self.assertEquals(checker._handle_style_error, _mock_handle_style_error) - def test_process(self): - """Test process() method.""" + def test_check(self): + """Test check() method.""" errors = [] def _mock_handle_style_error(line_number, category, confidence, @@ -54,8 +54,8 @@ class PythonProcessorTest(unittest.TestCase): current_dir = os.path.dirname(__file__) file_path = os.path.join(current_dir, "python_unittest_input.py") - processor = PythonProcessor(file_path, _mock_handle_style_error) - processor.process(lines=[]) + checker = PythonChecker(file_path, _mock_handle_style_error) + checker.check(lines=[]) self.assertEquals(len(errors), 1) self.assertEquals(errors[0], diff --git a/WebKitTools/Scripts/webkitpy/style/processors/python_unittest_input.py b/WebKitTools/Scripts/webkitpy/style/checkers/python_unittest_input.py index 9f1d118..9f1d118 100644 --- a/WebKitTools/Scripts/webkitpy/style/processors/python_unittest_input.py +++ b/WebKitTools/Scripts/webkitpy/style/checkers/python_unittest_input.py diff --git a/WebKitTools/Scripts/webkitpy/style/processors/text.py b/WebKitTools/Scripts/webkitpy/style/checkers/text.py index 307e5b8..0d03938 100644 --- a/WebKitTools/Scripts/webkitpy/style/processors/text.py +++ b/WebKitTools/Scripts/webkitpy/style/checkers/text.py @@ -30,7 +30,7 @@ """Checks WebKit style for text files.""" -class TextProcessor(object): +class TextChecker(object): """Processes text lines for checking style.""" @@ -38,7 +38,7 @@ class TextProcessor(object): self.file_path = file_path self.handle_style_error = handle_style_error - def process(self, lines): + def check(self, lines): lines = (["// adjust line numbers to make the first line 1."] + lines) # FIXME: share with cpp_style. @@ -51,6 +51,6 @@ class TextProcessor(object): # FIXME: Remove this function (requires refactoring unit tests). def process_file_data(filename, lines, error): - processor = TextProcessor(filename, error) - processor.process(lines) + checker = TextChecker(filename, error) + checker.check(lines) diff --git a/WebKitTools/Scripts/webkitpy/style/processors/text_unittest.py b/WebKitTools/Scripts/webkitpy/style/checkers/text_unittest.py index 62f825b..ced49a9 100644 --- a/WebKitTools/Scripts/webkitpy/style/processors/text_unittest.py +++ b/WebKitTools/Scripts/webkitpy/style/checkers/text_unittest.py @@ -32,7 +32,7 @@ import unittest import text as text_style -from text import TextProcessor +from text import TextChecker class TextStyleTestCase(unittest.TestCase): """TestCase for text_style.py""" @@ -76,18 +76,18 @@ class TextStyleTestCase(unittest.TestCase): '\tReviewed by NOBODY.'], 3) -class TextProcessorTest(unittest.TestCase): +class TextCheckerTest(unittest.TestCase): - """Tests TextProcessor class.""" + """Tests TextChecker class.""" def mock_handle_style_error(self): pass def test_init(self): """Test __init__ constructor.""" - processor = TextProcessor("foo.txt", self.mock_handle_style_error) - self.assertEquals(processor.file_path, "foo.txt") - self.assertEquals(processor.handle_style_error, self.mock_handle_style_error) + checker = TextChecker("foo.txt", self.mock_handle_style_error) + self.assertEquals(checker.file_path, "foo.txt") + self.assertEquals(checker.handle_style_error, self.mock_handle_style_error) if __name__ == '__main__': diff --git a/WebKitTools/Scripts/webkitpy/style/error_handlers.py b/WebKitTools/Scripts/webkitpy/style/error_handlers.py index 5666bfb..0bede24 100644 --- a/WebKitTools/Scripts/webkitpy/style/error_handlers.py +++ b/WebKitTools/Scripts/webkitpy/style/error_handlers.py @@ -63,7 +63,7 @@ class DefaultStyleErrorHandler(object): Args: file_path: The path to the file containing the error. This is used for reporting to the user. - configuration: A StyleCheckerConfiguration instance. + configuration: A StyleProcessorConfiguration instance. increment_error_count: A function that takes no arguments and increments the total count of reportable errors. diff --git a/WebKitTools/Scripts/webkitpy/style/error_handlers_unittest.py b/WebKitTools/Scripts/webkitpy/style/error_handlers_unittest.py index 05e725a..23619cc 100644 --- a/WebKitTools/Scripts/webkitpy/style/error_handlers_unittest.py +++ b/WebKitTools/Scripts/webkitpy/style/error_handlers_unittest.py @@ -25,7 +25,7 @@ import unittest -from checker import StyleCheckerConfiguration +from checker import StyleProcessorConfiguration from error_handlers import DefaultStyleErrorHandler from filter import FilterConfiguration @@ -51,11 +51,11 @@ class DefaultStyleErrorHandlerTest(unittest.TestCase): self._error_messages.append(message) def _style_checker_configuration(self): - """Return a StyleCheckerConfiguration instance for testing.""" + """Return a StyleProcessorConfiguration instance for testing.""" base_rules = ["-whitespace", "+whitespace/tab"] filter_configuration = FilterConfiguration(base_rules=base_rules) - return StyleCheckerConfiguration( + return StyleProcessorConfiguration( filter_configuration=filter_configuration, max_reports_per_category={"whitespace/tab": 2}, min_confidence=3, diff --git a/WebKitTools/Scripts/webkitpy/style/filereader.py b/WebKitTools/Scripts/webkitpy/style/filereader.py index 081e6dc..48455b3 100644 --- a/WebKitTools/Scripts/webkitpy/style/filereader.py +++ b/WebKitTools/Scripts/webkitpy/style/filereader.py @@ -103,6 +103,10 @@ class TextFileReader(object): """ self.file_count += 1 + if not os.path.exists(file_path) and file_path != "-": + _log.error("File does not exist: '%s'" % file_path) + sys.exit(1) + if not self._processor.should_process(file_path): _log.debug("Skipping file: '%s'" % file_path) return @@ -111,10 +115,6 @@ class TextFileReader(object): try: lines = self._read_lines(file_path) except IOError, err: - if not os.path.exists(file_path): - _log.error("File does not exist: '%s'" % file_path) - sys.exit(1) - message = ("Could not read file. Skipping: '%s'\n %s" % (file_path, err)) _log.warn(message) diff --git a/WebKitTools/Scripts/webkitpy/style/filereader_unittest.py b/WebKitTools/Scripts/webkitpy/style/filereader_unittest.py index 8d1a159..558ec5a 100644 --- a/WebKitTools/Scripts/webkitpy/style/filereader_unittest.py +++ b/WebKitTools/Scripts/webkitpy/style/filereader_unittest.py @@ -22,6 +22,9 @@ """Contains unit tests for filereader.py.""" +from __future__ import with_statement + +import codecs import os import shutil import tempfile @@ -67,14 +70,12 @@ class TextFileReaderTest(LoggingTestCase): LoggingTestCase.tearDown(self) shutil.rmtree(self._temp_dir) - def _create_file(self, rel_path, text): + def _create_file(self, rel_path, text, encoding="utf-8"): """Create a file with given text and return the path to the file.""" + # FIXME: There are better/more secure APIs for creatin tmp file paths. file_path = os.path.join(self._temp_dir, rel_path) - - file = open(file_path, 'w') - file.write(text) - file.close() - + with codecs.open(file_path, "w", encoding) as file: + file.write(text) return file_path def _passed_to_processor(self): @@ -86,10 +87,6 @@ class TextFileReaderTest(LoggingTestCase): self.assertEquals(passed_to_processor, self._passed_to_processor()) self.assertEquals(file_count, self._file_reader.file_count) - def test_process_file__should_not_process(self): - self._file_reader.process_file('should_not_process.txt') - self._assert_file_reader([], 1) - def test_process_file__does_not_exist(self): try: self._file_reader.process_file('does_not_exist.txt') @@ -121,6 +118,12 @@ class TextFileReaderTest(LoggingTestCase): self._assert_file_reader([], 1) + def test_process_file__should_not_process(self): + file_path = self._create_file('should_not_process.txt', 'contents') + + self._file_reader.process_file(file_path) + self._assert_file_reader([], 1) + def test_process_file__multiple_lines(self): file_path = self._create_file('foo.txt', 'line one\r\nline two\n') @@ -128,6 +131,13 @@ class TextFileReaderTest(LoggingTestCase): processed = [(['line one\r', 'line two', ''], file_path, None)] self._assert_file_reader(processed, 1) + def test_process_file__file_stdin(self): + file_path = self._create_file('-', 'file contents') + + self._file_reader.process_file(file_path=file_path, test_kwarg='foo') + processed = [(['file contents'], file_path, 'foo')] + self._assert_file_reader(processed, 1) + def test_process_file__with_kwarg(self): file_path = self._create_file('foo.txt', 'file contents') diff --git a/WebKitTools/Scripts/webkitpy/style/optparser.py b/WebKitTools/Scripts/webkitpy/style/optparser.py index 576c16a..bb4788a 100644 --- a/WebKitTools/Scripts/webkitpy/style/optparser.py +++ b/WebKitTools/Scripts/webkitpy/style/optparser.py @@ -147,7 +147,8 @@ class CommandOptionValues(object): git_commit=None, is_verbose=False, min_confidence=1, - output_format="emacs"): + output_format="emacs", + squash=False): if filter_rules is None: filter_rules = [] @@ -166,6 +167,7 @@ class CommandOptionValues(object): self.is_verbose = is_verbose self.min_confidence = min_confidence self.output_format = output_format + self.squash = squash # Useful for unit testing. def __eq__(self, other): @@ -180,6 +182,8 @@ class CommandOptionValues(object): return False if self.output_format != other.output_format: return False + if self.squash != other.squash: + return False return True @@ -214,6 +218,8 @@ class ArgumentPrinter(object): flags['filter'] = ",".join(filter_rules) if options.git_commit: flags['git-commit'] = options.git_commit + if options.squash: + flags['squash'] = options.squash flag_string = '' # Alphabetizing lets us unit test this method. @@ -303,9 +309,10 @@ class ArgumentParser(object): parser.add_option("-f", "--filter-rules", metavar="RULES", dest="filter_value", help=filter_help) - git_help = "check all changes after the given git commit." - parser.add_option("-g", "--git-commit", "--git-diff", "--git-since", - metavar="COMMIT", dest="git_since", help=git_help,) + git_commit_help = ("check all changes in the given git commit. " + "Use 'commit_id..' to check all changes after commmit_id") + parser.add_option("-g", "--git-diff", "--git-commit", + metavar="COMMIT", dest="git_commit", help=git_commit_help,) min_confidence_help = ("set the minimum confidence of style errors " "to report. Can be an integer 1-5, with 1 " @@ -323,6 +330,14 @@ class ArgumentParser(object): dest="output_format", default=default_output_format, help=output_format_help) + squash_help = ("All diffs from the remote branch are checked." + "If excluded, prompts whether to squash when there are multiple commits.") + parser.add_option("-s", "--squash", action="store_true", dest="squash", help=squash_help) + + squash_help = ("Only working copy diffs are checked." + "If excluded, prompts whether to squash when there are multiple commits.") + parser.add_option("--no-squash", action="store_false", dest="squash", help=squash_help) + verbose_help = "enable verbose logging." parser.add_option("-v", "--verbose", dest="is_verbose", default=False, action="store_true", help=verbose_help) @@ -407,7 +422,7 @@ class ArgumentParser(object): (options, paths) = self._parser.parse_args(args=args) filter_value = options.filter_value - git_commit = options.git_since + git_commit = options.git_commit is_verbose = options.is_verbose min_confidence = options.min_confidence output_format = options.output_format @@ -423,14 +438,6 @@ class ArgumentParser(object): self._parse_error('You cannot provide both paths and a git ' 'commit at the same time.') - # FIXME: Add unit tests. - if git_commit and '..' in git_commit: - # FIXME: If the range is a "...", the code should find the common - # ancestor and start there. See git diff --help for how - # "..." usually works. - self._parse_error('invalid --git-commit option: option does ' - 'not support ranges "..": %s' % git_commit) - min_confidence = int(min_confidence) if (min_confidence < 1) or (min_confidence > 5): self._parse_error('option --min-confidence: invalid integer: ' @@ -451,7 +458,8 @@ class ArgumentParser(object): git_commit=git_commit, is_verbose=is_verbose, min_confidence=min_confidence, - output_format=output_format) + output_format=output_format, + squash=options.squash) return (paths, options) diff --git a/WebKitTools/Scripts/webkitpy/style/optparser_unittest.py b/WebKitTools/Scripts/webkitpy/style/optparser_unittest.py index 1c525c6..b7e3eda 100644 --- a/WebKitTools/Scripts/webkitpy/style/optparser_unittest.py +++ b/WebKitTools/Scripts/webkitpy/style/optparser_unittest.py @@ -114,10 +114,6 @@ class ArgumentParserTest(LoggingTestCase): self.assertRaises(SystemExit, parse, ['--bad']) self.assertLog(['ERROR: no such option: --bad\n']) - self.assertRaises(SystemExit, parse, ['--git-diff=aa..bb']) - self.assertLog(['ERROR: invalid --git-commit option: ' - 'option does not support ranges "..": aa..bb\n']) - self.assertRaises(SystemExit, parse, ['--min-confidence=bad']) self.assertLog(['ERROR: option --min-confidence: ' "invalid integer value: 'bad'\n"]) @@ -173,8 +169,6 @@ class ArgumentParserTest(LoggingTestCase): self.assertEquals(options.git_commit, 'commit') (files, options) = parse(['--git-diff=commit']) self.assertEquals(options.git_commit, 'commit') - (files, options) = parse(['--git-since=commit']) - self.assertEquals(options.git_commit, 'commit') (files, options) = parse(['--verbose']) self.assertEquals(options.is_verbose, True) diff --git a/WebKitTools/Scripts/webkitpy/style_references.py b/WebKitTools/Scripts/webkitpy/style_references.py index ba2806e..1bf087d 100644 --- a/WebKitTools/Scripts/webkitpy/style_references.py +++ b/WebKitTools/Scripts/webkitpy/style_references.py @@ -75,9 +75,6 @@ class WebKitCheckout(object): """Return the checkout root as an absolute path.""" return self._scm.checkout_root - def create_patch(self): - return self._scm.create_patch() - - def create_patch_since_local_commit(self, commit): - return self._scm.create_patch_since_local_commit(commit) + def create_patch(self, git_commit, squash): + return self._scm.create_patch(git_commit, squash) diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py index f1e5334..c052f00 100644 --- a/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py +++ b/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py @@ -24,6 +24,9 @@ """Autoinstalls third-party code required by WebKit.""" +from __future__ import with_statement + +import codecs import os from webkitpy.common.system.autoinstall import AutoInstaller @@ -88,10 +91,7 @@ installer.install(url="http://iweb.dl.sourceforge.net/project/python-irclib/pyth readme_path = os.path.join(autoinstalled_dir, "README") if not os.path.exists(readme_path): - file = open(readme_path, "w") - try: + with codecs.open(readme_path, "w", "ascii") as file: file.write("This directory is auto-generated by WebKit and is " "safe to delete.\nIt contains needed third-party Python " "packages automatically downloaded from the web.") - finally: - file.close() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.mechanize.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.mechanize.url new file mode 100644 index 0000000..4186aee --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.mechanize.url @@ -0,0 +1 @@ +http://pypi.python.org/packages/source/m/mechanize/mechanize-0.1.11.zip
\ No newline at end of file diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.pep8.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.pep8.py.url new file mode 100644 index 0000000..0fb1ef6 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/.pep8.py.url @@ -0,0 +1 @@ +http://pypi.python.org/packages/source/p/pep8/pep8-0.5.0.tar.gz#md5=512a818af9979290cd619cce8e9c2e2b
\ No newline at end of file diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/README b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/README new file mode 100644 index 0000000..1d68cf3 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/README @@ -0,0 +1,2 @@ +This directory is auto-generated by WebKit and is safe to delete. +It contains needed third-party Python packages automatically downloaded from the web.
\ No newline at end of file diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/__init__.py new file mode 100644 index 0000000..c1e4c6d --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/__init__.py @@ -0,0 +1 @@ +# This file is required for Python to search this directory for modules. diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/.ClientForm.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/.ClientForm.py.url new file mode 100644 index 0000000..c723abf --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/.ClientForm.py.url @@ -0,0 +1 @@ +http://pypi.python.org/packages/source/C/ClientForm/ClientForm-0.2.10.zip
\ No newline at end of file diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/ClientForm.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/ClientForm.py new file mode 100644 index 0000000..a622de7 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/ClientForm.py @@ -0,0 +1,3401 @@ +"""HTML form handling for web clients. + +ClientForm is a Python module for handling HTML forms on the client +side, useful for parsing HTML forms, filling them in and returning the +completed forms to the server. It has developed from a port of Gisle +Aas' Perl module HTML::Form, from the libwww-perl library, but the +interface is not the same. + +The most useful docstring is the one for HTMLForm. + +RFC 1866: HTML 2.0 +RFC 1867: Form-based File Upload in HTML +RFC 2388: Returning Values from Forms: multipart/form-data +HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX) +HTML 4.01 Specification, W3C Recommendation 24 December 1999 + + +Copyright 2002-2007 John J. Lee <jjl@pobox.com> +Copyright 2005 Gary Poster +Copyright 2005 Zope Corporation +Copyright 1998-2000 Gisle Aas. + +This code is free software; you can redistribute it and/or modify it +under the terms of the BSD or ZPL 2.1 licenses (see the file +COPYING.txt included with the distribution). + +""" + +# XXX +# Remove parser testing hack +# safeUrl()-ize action +# Switch to unicode throughout (would be 0.3.x) +# See Wichert Akkerman's 2004-01-22 message to c.l.py. +# Add charset parameter to Content-type headers? How to find value?? +# Add some more functional tests +# Especially single and multiple file upload on the internet. +# Does file upload work when name is missing? Sourceforge tracker form +# doesn't like it. Check standards, and test with Apache. Test +# binary upload with Apache. +# mailto submission & enctype text/plain +# I'm not going to fix this unless somebody tells me what real servers +# that want this encoding actually expect: If enctype is +# application/x-www-form-urlencoded and there's a FILE control present. +# Strictly, it should be 'name=data' (see HTML 4.01 spec., section +# 17.13.2), but I send "name=" ATM. What about multiple file upload?? + +# Would be nice, but I'm not going to do it myself: +# ------------------------------------------------- +# Maybe a 0.4.x? +# Replace by_label etc. with moniker / selector concept. Allows, eg., +# a choice between selection by value / id / label / element +# contents. Or choice between matching labels exactly or by +# substring. Etc. +# Remove deprecated methods. +# ...what else? +# Work on DOMForm. +# XForms? Don't know if there's a need here. + +__all__ = ['AmbiguityError', 'CheckboxControl', 'Control', + 'ControlNotFoundError', 'FileControl', 'FormParser', 'HTMLForm', + 'HiddenControl', 'IgnoreControl', 'ImageControl', 'IsindexControl', + 'Item', 'ItemCountError', 'ItemNotFoundError', 'Label', + 'ListControl', 'LocateError', 'Missing', 'ParseError', 'ParseFile', + 'ParseFileEx', 'ParseResponse', 'ParseResponseEx','PasswordControl', + 'RadioControl', 'ScalarControl', 'SelectControl', + 'SubmitButtonControl', 'SubmitControl', 'TextControl', + 'TextareaControl', 'XHTMLCompatibleFormParser'] + +try: True +except NameError: + True = 1 + False = 0 + +try: bool +except NameError: + def bool(expr): + if expr: return True + else: return False + +try: + import logging + import inspect +except ImportError: + def debug(msg, *args, **kwds): + pass +else: + _logger = logging.getLogger("ClientForm") + OPTIMIZATION_HACK = True + + def debug(msg, *args, **kwds): + if OPTIMIZATION_HACK: + return + + caller_name = inspect.stack()[1][3] + extended_msg = '%%s %s' % msg + extended_args = (caller_name,)+args + debug = _logger.debug(extended_msg, *extended_args, **kwds) + + def _show_debug_messages(): + global OPTIMIZATION_HACK + OPTIMIZATION_HACK = False + _logger.setLevel(logging.DEBUG) + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(logging.DEBUG) + _logger.addHandler(handler) + +import sys, urllib, urllib2, types, mimetools, copy, urlparse, \ + htmlentitydefs, re, random +from cStringIO import StringIO + +import sgmllib +# monkeypatch to fix http://www.python.org/sf/803422 :-( +sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]") + +# HTMLParser.HTMLParser is recent, so live without it if it's not available +# (also, sgmllib.SGMLParser is much more tolerant of bad HTML) +try: + import HTMLParser +except ImportError: + HAVE_MODULE_HTMLPARSER = False +else: + HAVE_MODULE_HTMLPARSER = True + +try: + import warnings +except ImportError: + def deprecation(message, stack_offset=0): + pass +else: + def deprecation(message, stack_offset=0): + warnings.warn(message, DeprecationWarning, stacklevel=3+stack_offset) + +VERSION = "0.2.10" + +CHUNK = 1024 # size of chunks fed to parser, in bytes + +DEFAULT_ENCODING = "latin-1" + +class Missing: pass + +_compress_re = re.compile(r"\s+") +def compress_text(text): return _compress_re.sub(" ", text.strip()) + +def normalize_line_endings(text): + return re.sub(r"(?:(?<!\r)\n)|(?:\r(?!\n))", "\r\n", text) + + +# This version of urlencode is from my Python 1.5.2 back-port of the +# Python 2.1 CVS maintenance branch of urllib. It will accept a sequence +# of pairs instead of a mapping -- the 2.0 version only accepts a mapping. +def urlencode(query,doseq=False,): + """Encode a sequence of two-element tuples or dictionary into a URL query \ +string. + + If any values in the query arg are sequences and doseq is true, each + sequence element is converted to a separate parameter. + + If the query arg is a sequence of two-element tuples, the order of the + parameters in the output will match the order of parameters in the + input. + """ + + if hasattr(query,"items"): + # mapping objects + query = query.items() + else: + # it's a bother at times that strings and string-like objects are + # sequences... + try: + # non-sequence items should not work with len() + x = len(query) + # non-empty strings will fail this + if len(query) and type(query[0]) != types.TupleType: + raise TypeError() + # zero-length sequences of all types will get here and succeed, + # but that's a minor nit - since the original implementation + # allowed empty dicts that type of behavior probably should be + # preserved for consistency + except TypeError: + ty,va,tb = sys.exc_info() + raise TypeError("not a valid non-string sequence or mapping " + "object", tb) + + l = [] + if not doseq: + # preserve old behavior + for k, v in query: + k = urllib.quote_plus(str(k)) + v = urllib.quote_plus(str(v)) + l.append(k + '=' + v) + else: + for k, v in query: + k = urllib.quote_plus(str(k)) + if type(v) == types.StringType: + v = urllib.quote_plus(v) + l.append(k + '=' + v) + elif type(v) == types.UnicodeType: + # is there a reasonable way to convert to ASCII? + # encode generates a string, but "replace" or "ignore" + # lose information and "strict" can raise UnicodeError + v = urllib.quote_plus(v.encode("ASCII","replace")) + l.append(k + '=' + v) + else: + try: + # is this a sufficient test for sequence-ness? + x = len(v) + except TypeError: + # not a sequence + v = urllib.quote_plus(str(v)) + l.append(k + '=' + v) + else: + # loop over the sequence + for elt in v: + l.append(k + '=' + urllib.quote_plus(str(elt))) + return '&'.join(l) + +def unescape(data, entities, encoding=DEFAULT_ENCODING): + if data is None or "&" not in data: + return data + + def replace_entities(match, entities=entities, encoding=encoding): + ent = match.group() + if ent[1] == "#": + return unescape_charref(ent[2:-1], encoding) + + repl = entities.get(ent) + if repl is not None: + if type(repl) != type(""): + try: + repl = repl.encode(encoding) + except UnicodeError: + repl = ent + else: + repl = ent + + return repl + + return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data) + +def unescape_charref(data, encoding): + name, base = data, 10 + if name.startswith("x"): + name, base= name[1:], 16 + uc = unichr(int(name, base)) + if encoding is None: + return uc + else: + try: + repl = uc.encode(encoding) + except UnicodeError: + repl = "&#%s;" % data + return repl + +def get_entitydefs(): + import htmlentitydefs + from codecs import latin_1_decode + entitydefs = {} + try: + htmlentitydefs.name2codepoint + except AttributeError: + entitydefs = {} + for name, char in htmlentitydefs.entitydefs.items(): + uc = latin_1_decode(char)[0] + if uc.startswith("&#") and uc.endswith(";"): + uc = unescape_charref(uc[2:-1], None) + entitydefs["&%s;" % name] = uc + else: + for name, codepoint in htmlentitydefs.name2codepoint.items(): + entitydefs["&%s;" % name] = unichr(codepoint) + return entitydefs + + +def issequence(x): + try: + x[0] + except (TypeError, KeyError): + return False + except IndexError: + pass + return True + +def isstringlike(x): + try: x+"" + except: return False + else: return True + + +def choose_boundary(): + """Return a string usable as a multipart boundary.""" + # follow IE and firefox + nonce = "".join([str(random.randint(0, sys.maxint-1)) for i in 0,1,2]) + return "-"*27 + nonce + +# This cut-n-pasted MimeWriter from standard library is here so can add +# to HTTP headers rather than message body when appropriate. It also uses +# \r\n in place of \n. This is a bit nasty. +class MimeWriter: + + """Generic MIME writer. + + Methods: + + __init__() + addheader() + flushheaders() + startbody() + startmultipartbody() + nextpart() + lastpart() + + A MIME writer is much more primitive than a MIME parser. It + doesn't seek around on the output file, and it doesn't use large + amounts of buffer space, so you have to write the parts in the + order they should occur on the output file. It does buffer the + headers you add, allowing you to rearrange their order. + + General usage is: + + f = <open the output file> + w = MimeWriter(f) + ...call w.addheader(key, value) 0 or more times... + + followed by either: + + f = w.startbody(content_type) + ...call f.write(data) for body data... + + or: + + w.startmultipartbody(subtype) + for each part: + subwriter = w.nextpart() + ...use the subwriter's methods to create the subpart... + w.lastpart() + + The subwriter is another MimeWriter instance, and should be + treated in the same way as the toplevel MimeWriter. This way, + writing recursive body parts is easy. + + Warning: don't forget to call lastpart()! + + XXX There should be more state so calls made in the wrong order + are detected. + + Some special cases: + + - startbody() just returns the file passed to the constructor; + but don't use this knowledge, as it may be changed. + + - startmultipartbody() actually returns a file as well; + this can be used to write the initial 'if you can read this your + mailer is not MIME-aware' message. + + - If you call flushheaders(), the headers accumulated so far are + written out (and forgotten); this is useful if you don't need a + body part at all, e.g. for a subpart of type message/rfc822 + that's (mis)used to store some header-like information. + + - Passing a keyword argument 'prefix=<flag>' to addheader(), + start*body() affects where the header is inserted; 0 means + append at the end, 1 means insert at the start; default is + append for addheader(), but insert for start*body(), which use + it to determine where the Content-type header goes. + + """ + + def __init__(self, fp, http_hdrs=None): + self._http_hdrs = http_hdrs + self._fp = fp + self._headers = [] + self._boundary = [] + self._first_part = True + + def addheader(self, key, value, prefix=0, + add_to_http_hdrs=0): + """ + prefix is ignored if add_to_http_hdrs is true. + """ + lines = value.split("\r\n") + while lines and not lines[-1]: del lines[-1] + while lines and not lines[0]: del lines[0] + if add_to_http_hdrs: + value = "".join(lines) + # 2.2 urllib2 doesn't normalize header case + self._http_hdrs.append((key.capitalize(), value)) + else: + for i in range(1, len(lines)): + lines[i] = " " + lines[i].strip() + value = "\r\n".join(lines) + "\r\n" + line = key.title() + ": " + value + if prefix: + self._headers.insert(0, line) + else: + self._headers.append(line) + + def flushheaders(self): + self._fp.writelines(self._headers) + self._headers = [] + + def startbody(self, ctype=None, plist=[], prefix=1, + add_to_http_hdrs=0, content_type=1): + """ + prefix is ignored if add_to_http_hdrs is true. + """ + if content_type and ctype: + for name, value in plist: + ctype = ctype + ';\r\n %s=%s' % (name, value) + self.addheader("Content-Type", ctype, prefix=prefix, + add_to_http_hdrs=add_to_http_hdrs) + self.flushheaders() + if not add_to_http_hdrs: self._fp.write("\r\n") + self._first_part = True + return self._fp + + def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1, + add_to_http_hdrs=0, content_type=1): + boundary = boundary or choose_boundary() + self._boundary.append(boundary) + return self.startbody("multipart/" + subtype, + [("boundary", boundary)] + plist, + prefix=prefix, + add_to_http_hdrs=add_to_http_hdrs, + content_type=content_type) + + def nextpart(self): + boundary = self._boundary[-1] + if self._first_part: + self._first_part = False + else: + self._fp.write("\r\n") + self._fp.write("--" + boundary + "\r\n") + return self.__class__(self._fp) + + def lastpart(self): + if self._first_part: + self.nextpart() + boundary = self._boundary.pop() + self._fp.write("\r\n--" + boundary + "--\r\n") + + +class LocateError(ValueError): pass +class AmbiguityError(LocateError): pass +class ControlNotFoundError(LocateError): pass +class ItemNotFoundError(LocateError): pass + +class ItemCountError(ValueError): pass + +# for backwards compatibility, ParseError derives from exceptions that were +# raised by versions of ClientForm <= 0.2.5 +if HAVE_MODULE_HTMLPARSER: + SGMLLIB_PARSEERROR = sgmllib.SGMLParseError + class ParseError(sgmllib.SGMLParseError, + HTMLParser.HTMLParseError, + ): + pass +else: + if hasattr(sgmllib, "SGMLParseError"): + SGMLLIB_PARSEERROR = sgmllib.SGMLParseError + class ParseError(sgmllib.SGMLParseError): + pass + else: + SGMLLIB_PARSEERROR = RuntimeError + class ParseError(RuntimeError): + pass + + +class _AbstractFormParser: + """forms attribute contains HTMLForm instances on completion.""" + # thanks to Moshe Zadka for an example of sgmllib/htmllib usage + def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): + if entitydefs is None: + entitydefs = get_entitydefs() + self._entitydefs = entitydefs + self._encoding = encoding + + self.base = None + self.forms = [] + self.labels = [] + self._current_label = None + self._current_form = None + self._select = None + self._optgroup = None + self._option = None + self._textarea = None + + # forms[0] will contain all controls that are outside of any form + # self._global_form is an alias for self.forms[0] + self._global_form = None + self.start_form([]) + self.end_form() + self._current_form = self._global_form = self.forms[0] + + def do_base(self, attrs): + debug("%s", attrs) + for key, value in attrs: + if key == "href": + self.base = self.unescape_attr_if_required(value) + + def end_body(self): + debug("") + if self._current_label is not None: + self.end_label() + if self._current_form is not self._global_form: + self.end_form() + + def start_form(self, attrs): + debug("%s", attrs) + if self._current_form is not self._global_form: + raise ParseError("nested FORMs") + name = None + action = None + enctype = "application/x-www-form-urlencoded" + method = "GET" + d = {} + for key, value in attrs: + if key == "name": + name = self.unescape_attr_if_required(value) + elif key == "action": + action = self.unescape_attr_if_required(value) + elif key == "method": + method = self.unescape_attr_if_required(value.upper()) + elif key == "enctype": + enctype = self.unescape_attr_if_required(value.lower()) + d[key] = self.unescape_attr_if_required(value) + controls = [] + self._current_form = (name, action, method, enctype), d, controls + + def end_form(self): + debug("") + if self._current_label is not None: + self.end_label() + if self._current_form is self._global_form: + raise ParseError("end of FORM before start") + self.forms.append(self._current_form) + self._current_form = self._global_form + + def start_select(self, attrs): + debug("%s", attrs) + if self._select is not None: + raise ParseError("nested SELECTs") + if self._textarea is not None: + raise ParseError("SELECT inside TEXTAREA") + d = {} + for key, val in attrs: + d[key] = self.unescape_attr_if_required(val) + + self._select = d + self._add_label(d) + + self._append_select_control({"__select": d}) + + def end_select(self): + debug("") + if self._select is None: + raise ParseError("end of SELECT before start") + + if self._option is not None: + self._end_option() + + self._select = None + + def start_optgroup(self, attrs): + debug("%s", attrs) + if self._select is None: + raise ParseError("OPTGROUP outside of SELECT") + d = {} + for key, val in attrs: + d[key] = self.unescape_attr_if_required(val) + + self._optgroup = d + + def end_optgroup(self): + debug("") + if self._optgroup is None: + raise ParseError("end of OPTGROUP before start") + self._optgroup = None + + def _start_option(self, attrs): + debug("%s", attrs) + if self._select is None: + raise ParseError("OPTION outside of SELECT") + if self._option is not None: + self._end_option() + + d = {} + for key, val in attrs: + d[key] = self.unescape_attr_if_required(val) + + self._option = {} + self._option.update(d) + if (self._optgroup and self._optgroup.has_key("disabled") and + not self._option.has_key("disabled")): + self._option["disabled"] = None + + def _end_option(self): + debug("") + if self._option is None: + raise ParseError("end of OPTION before start") + + contents = self._option.get("contents", "").strip() + self._option["contents"] = contents + if not self._option.has_key("value"): + self._option["value"] = contents + if not self._option.has_key("label"): + self._option["label"] = contents + # stuff dict of SELECT HTML attrs into a special private key + # (gets deleted again later) + self._option["__select"] = self._select + self._append_select_control(self._option) + self._option = None + + def _append_select_control(self, attrs): + debug("%s", attrs) + controls = self._current_form[2] + name = self._select.get("name") + controls.append(("select", name, attrs)) + + def start_textarea(self, attrs): + debug("%s", attrs) + if self._textarea is not None: + raise ParseError("nested TEXTAREAs") + if self._select is not None: + raise ParseError("TEXTAREA inside SELECT") + d = {} + for key, val in attrs: + d[key] = self.unescape_attr_if_required(val) + self._add_label(d) + + self._textarea = d + + def end_textarea(self): + debug("") + if self._textarea is None: + raise ParseError("end of TEXTAREA before start") + controls = self._current_form[2] + name = self._textarea.get("name") + controls.append(("textarea", name, self._textarea)) + self._textarea = None + + def start_label(self, attrs): + debug("%s", attrs) + if self._current_label: + self.end_label() + d = {} + for key, val in attrs: + d[key] = self.unescape_attr_if_required(val) + taken = bool(d.get("for")) # empty id is invalid + d["__text"] = "" + d["__taken"] = taken + if taken: + self.labels.append(d) + self._current_label = d + + def end_label(self): + debug("") + label = self._current_label + if label is None: + # something is ugly in the HTML, but we're ignoring it + return + self._current_label = None + # if it is staying around, it is True in all cases + del label["__taken"] + + def _add_label(self, d): + #debug("%s", d) + if self._current_label is not None: + if not self._current_label["__taken"]: + self._current_label["__taken"] = True + d["__label"] = self._current_label + + def handle_data(self, data): + debug("%s", data) + + if self._option is not None: + # self._option is a dictionary of the OPTION element's HTML + # attributes, but it has two special keys, one of which is the + # special "contents" key contains text between OPTION tags (the + # other is the "__select" key: see the end_option method) + map = self._option + key = "contents" + elif self._textarea is not None: + map = self._textarea + key = "value" + data = normalize_line_endings(data) + # not if within option or textarea + elif self._current_label is not None: + map = self._current_label + key = "__text" + else: + return + + if data and not map.has_key(key): + # according to + # http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.1 line break + # immediately after start tags or immediately before end tags must + # be ignored, but real browsers only ignore a line break after a + # start tag, so we'll do that. + if data[0:2] == "\r\n": + data = data[2:] + elif data[0:1] in ["\n", "\r"]: + data = data[1:] + map[key] = data + else: + map[key] = map[key] + data + + def do_button(self, attrs): + debug("%s", attrs) + d = {} + d["type"] = "submit" # default + for key, val in attrs: + d[key] = self.unescape_attr_if_required(val) + controls = self._current_form[2] + + type = d["type"] + name = d.get("name") + # we don't want to lose information, so use a type string that + # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON} + # e.g. type for BUTTON/RESET is "resetbutton" + # (type for INPUT/RESET is "reset") + type = type+"button" + self._add_label(d) + controls.append((type, name, d)) + + def do_input(self, attrs): + debug("%s", attrs) + d = {} + d["type"] = "text" # default + for key, val in attrs: + d[key] = self.unescape_attr_if_required(val) + controls = self._current_form[2] + + type = d["type"] + name = d.get("name") + self._add_label(d) + controls.append((type, name, d)) + + def do_isindex(self, attrs): + debug("%s", attrs) + d = {} + for key, val in attrs: + d[key] = self.unescape_attr_if_required(val) + controls = self._current_form[2] + + self._add_label(d) + # isindex doesn't have type or name HTML attributes + controls.append(("isindex", None, d)) + + def handle_entityref(self, name): + #debug("%s", name) + self.handle_data(unescape( + '&%s;' % name, self._entitydefs, self._encoding)) + + def handle_charref(self, name): + #debug("%s", name) + self.handle_data(unescape_charref(name, self._encoding)) + + def unescape_attr(self, name): + #debug("%s", name) + return unescape(name, self._entitydefs, self._encoding) + + def unescape_attrs(self, attrs): + #debug("%s", attrs) + escaped_attrs = {} + for key, val in attrs.items(): + try: + val.items + except AttributeError: + escaped_attrs[key] = self.unescape_attr(val) + else: + # e.g. "__select" -- yuck! + escaped_attrs[key] = self.unescape_attrs(val) + return escaped_attrs + + def unknown_entityref(self, ref): self.handle_data("&%s;" % ref) + def unknown_charref(self, ref): self.handle_data("&#%s;" % ref) + + +if not HAVE_MODULE_HTMLPARSER: + class XHTMLCompatibleFormParser: + def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): + raise ValueError("HTMLParser could not be imported") +else: + class XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser): + """Good for XHTML, bad for tolerance of incorrect HTML.""" + # thanks to Michael Howitz for this! + def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): + HTMLParser.HTMLParser.__init__(self) + _AbstractFormParser.__init__(self, entitydefs, encoding) + + def feed(self, data): + try: + HTMLParser.HTMLParser.feed(self, data) + except HTMLParser.HTMLParseError, exc: + raise ParseError(exc) + + def start_option(self, attrs): + _AbstractFormParser._start_option(self, attrs) + + def end_option(self): + _AbstractFormParser._end_option(self) + + def handle_starttag(self, tag, attrs): + try: + method = getattr(self, "start_" + tag) + except AttributeError: + try: + method = getattr(self, "do_" + tag) + except AttributeError: + pass # unknown tag + else: + method(attrs) + else: + method(attrs) + + def handle_endtag(self, tag): + try: + method = getattr(self, "end_" + tag) + except AttributeError: + pass # unknown tag + else: + method() + + def unescape(self, name): + # Use the entitydefs passed into constructor, not + # HTMLParser.HTMLParser's entitydefs. + return self.unescape_attr(name) + + def unescape_attr_if_required(self, name): + return name # HTMLParser.HTMLParser already did it + def unescape_attrs_if_required(self, attrs): + return attrs # ditto + + def close(self): + HTMLParser.HTMLParser.close(self) + self.end_body() + + +class _AbstractSgmllibParser(_AbstractFormParser): + + def do_option(self, attrs): + _AbstractFormParser._start_option(self, attrs) + + if sys.version_info[:2] >= (2,5): + # we override this attr to decode hex charrefs + entity_or_charref = re.compile( + '&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)') + def convert_entityref(self, name): + return unescape("&%s;" % name, self._entitydefs, self._encoding) + def convert_charref(self, name): + return unescape_charref("%s" % name, self._encoding) + def unescape_attr_if_required(self, name): + return name # sgmllib already did it + def unescape_attrs_if_required(self, attrs): + return attrs # ditto + else: + def unescape_attr_if_required(self, name): + return self.unescape_attr(name) + def unescape_attrs_if_required(self, attrs): + return self.unescape_attrs(attrs) + + +class FormParser(_AbstractSgmllibParser, sgmllib.SGMLParser): + """Good for tolerance of incorrect HTML, bad for XHTML.""" + def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): + sgmllib.SGMLParser.__init__(self) + _AbstractFormParser.__init__(self, entitydefs, encoding) + + def feed(self, data): + try: + sgmllib.SGMLParser.feed(self, data) + except SGMLLIB_PARSEERROR, exc: + raise ParseError(exc) + + def close(self): + sgmllib.SGMLParser.close(self) + self.end_body() + + +# sigh, must support mechanize by allowing dynamic creation of classes based on +# its bundled copy of BeautifulSoup (which was necessary because of dependency +# problems) + +def _create_bs_classes(bs, + icbinbs, + ): + class _AbstractBSFormParser(_AbstractSgmllibParser): + bs_base_class = None + def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): + _AbstractFormParser.__init__(self, entitydefs, encoding) + self.bs_base_class.__init__(self) + def handle_data(self, data): + _AbstractFormParser.handle_data(self, data) + self.bs_base_class.handle_data(self, data) + def feed(self, data): + try: + self.bs_base_class.feed(self, data) + except SGMLLIB_PARSEERROR, exc: + raise ParseError(exc) + def close(self): + self.bs_base_class.close(self) + self.end_body() + + class RobustFormParser(_AbstractBSFormParser, bs): + """Tries to be highly tolerant of incorrect HTML.""" + pass + RobustFormParser.bs_base_class = bs + class NestingRobustFormParser(_AbstractBSFormParser, icbinbs): + """Tries to be highly tolerant of incorrect HTML. + + Different from RobustFormParser in that it more often guesses nesting + above missing end tags (see BeautifulSoup docs). + + """ + pass + NestingRobustFormParser.bs_base_class = icbinbs + + return RobustFormParser, NestingRobustFormParser + +try: + if sys.version_info[:2] < (2, 2): + raise ImportError # BeautifulSoup uses generators + import BeautifulSoup +except ImportError: + pass +else: + RobustFormParser, NestingRobustFormParser = _create_bs_classes( + BeautifulSoup.BeautifulSoup, BeautifulSoup.ICantBelieveItsBeautifulSoup + ) + __all__ += ['RobustFormParser', 'NestingRobustFormParser'] + + +#FormParser = XHTMLCompatibleFormParser # testing hack +#FormParser = RobustFormParser # testing hack + + +def ParseResponseEx(response, + select_default=False, + form_parser_class=FormParser, + request_class=urllib2.Request, + entitydefs=None, + encoding=DEFAULT_ENCODING, + + # private + _urljoin=urlparse.urljoin, + _urlparse=urlparse.urlparse, + _urlunparse=urlparse.urlunparse, + ): + """Identical to ParseResponse, except that: + + 1. The returned list contains an extra item. The first form in the list + contains all controls not contained in any FORM element. + + 2. The arguments ignore_errors and backwards_compat have been removed. + + 3. Backwards-compatibility mode (backwards_compat=True) is not available. + """ + return _ParseFileEx(response, response.geturl(), + select_default, + False, + form_parser_class, + request_class, + entitydefs, + False, + encoding, + _urljoin=_urljoin, + _urlparse=_urlparse, + _urlunparse=_urlunparse, + ) + +def ParseFileEx(file, base_uri, + select_default=False, + form_parser_class=FormParser, + request_class=urllib2.Request, + entitydefs=None, + encoding=DEFAULT_ENCODING, + + # private + _urljoin=urlparse.urljoin, + _urlparse=urlparse.urlparse, + _urlunparse=urlparse.urlunparse, + ): + """Identical to ParseFile, except that: + + 1. The returned list contains an extra item. The first form in the list + contains all controls not contained in any FORM element. + + 2. The arguments ignore_errors and backwards_compat have been removed. + + 3. Backwards-compatibility mode (backwards_compat=True) is not available. + """ + return _ParseFileEx(file, base_uri, + select_default, + False, + form_parser_class, + request_class, + entitydefs, + False, + encoding, + _urljoin=_urljoin, + _urlparse=_urlparse, + _urlunparse=_urlunparse, + ) + +def ParseResponse(response, *args, **kwds): + """Parse HTTP response and return a list of HTMLForm instances. + + The return value of urllib2.urlopen can be conveniently passed to this + function as the response parameter. + + ClientForm.ParseError is raised on parse errors. + + response: file-like object (supporting read() method) with a method + geturl(), returning the URI of the HTTP response + select_default: for multiple-selection SELECT controls and RADIO controls, + pick the first item as the default if none are selected in the HTML + form_parser_class: class to instantiate and use to pass + request_class: class to return from .click() method (default is + urllib2.Request) + entitydefs: mapping like {"&": "&", ...} containing HTML entity + definitions (a sensible default is used) + encoding: character encoding used for encoding numeric character references + when matching link text. ClientForm does not attempt to find the encoding + in a META HTTP-EQUIV attribute in the document itself (mechanize, for + example, does do that and will pass the correct value to ClientForm using + this parameter). + + backwards_compat: boolean that determines whether the returned HTMLForm + objects are backwards-compatible with old code. If backwards_compat is + true: + + - ClientForm 0.1 code will continue to work as before. + + - Label searches that do not specify a nr (number or count) will always + get the first match, even if other controls match. If + backwards_compat is False, label searches that have ambiguous results + will raise an AmbiguityError. + + - Item label matching is done by strict string comparison rather than + substring matching. + + - De-selecting individual list items is allowed even if the Item is + disabled. + + The backwards_compat argument will be deprecated in a future release. + + Pass a true value for select_default if you want the behaviour specified by + RFC 1866 (the HTML 2.0 standard), which is to select the first item in a + RADIO or multiple-selection SELECT control if none were selected in the + HTML. Most browsers (including Microsoft Internet Explorer (IE) and + Netscape Navigator) instead leave all items unselected in these cases. The + W3C HTML 4.0 standard leaves this behaviour undefined in the case of + multiple-selection SELECT controls, but insists that at least one RADIO + button should be checked at all times, in contradiction to browser + behaviour. + + There is a choice of parsers. ClientForm.XHTMLCompatibleFormParser (uses + HTMLParser.HTMLParser) works best for XHTML, ClientForm.FormParser (uses + sgmllib.SGMLParser) (the default) works better for ordinary grubby HTML. + Note that HTMLParser is only available in Python 2.2 and later. You can + pass your own class in here as a hack to work around bad HTML, but at your + own risk: there is no well-defined interface. + + """ + return _ParseFileEx(response, response.geturl(), *args, **kwds)[1:] + +def ParseFile(file, base_uri, *args, **kwds): + """Parse HTML and return a list of HTMLForm instances. + + ClientForm.ParseError is raised on parse errors. + + file: file-like object (supporting read() method) containing HTML with zero + or more forms to be parsed + base_uri: the URI of the document (note that the base URI used to submit + the form will be that given in the BASE element if present, not that of + the document) + + For the other arguments and further details, see ParseResponse.__doc__. + + """ + return _ParseFileEx(file, base_uri, *args, **kwds)[1:] + +def _ParseFileEx(file, base_uri, + select_default=False, + ignore_errors=False, + form_parser_class=FormParser, + request_class=urllib2.Request, + entitydefs=None, + backwards_compat=True, + encoding=DEFAULT_ENCODING, + _urljoin=urlparse.urljoin, + _urlparse=urlparse.urlparse, + _urlunparse=urlparse.urlunparse, + ): + if backwards_compat: + deprecation("operating in backwards-compatibility mode", 1) + fp = form_parser_class(entitydefs, encoding) + while 1: + data = file.read(CHUNK) + try: + fp.feed(data) + except ParseError, e: + e.base_uri = base_uri + raise + if len(data) != CHUNK: break + fp.close() + if fp.base is not None: + # HTML BASE element takes precedence over document URI + base_uri = fp.base + labels = [] # Label(label) for label in fp.labels] + id_to_labels = {} + for l in fp.labels: + label = Label(l) + labels.append(label) + for_id = l["for"] + coll = id_to_labels.get(for_id) + if coll is None: + id_to_labels[for_id] = [label] + else: + coll.append(label) + forms = [] + for (name, action, method, enctype), attrs, controls in fp.forms: + if action is None: + action = base_uri + else: + action = _urljoin(base_uri, action) + # would be nice to make HTMLForm class (form builder) pluggable + form = HTMLForm( + action, method, enctype, name, attrs, request_class, + forms, labels, id_to_labels, backwards_compat) + form._urlparse = _urlparse + form._urlunparse = _urlunparse + for ii in range(len(controls)): + type, name, attrs = controls[ii] + # index=ii*10 allows ImageControl to return multiple ordered pairs + form.new_control( + type, name, attrs, select_default=select_default, index=ii*10) + forms.append(form) + for form in forms: + form.fixup() + return forms + + +class Label: + def __init__(self, attrs): + self.id = attrs.get("for") + self._text = attrs.get("__text").strip() + self._ctext = compress_text(self._text) + self.attrs = attrs + self._backwards_compat = False # maintained by HTMLForm + + def __getattr__(self, name): + if name == "text": + if self._backwards_compat: + return self._text + else: + return self._ctext + return getattr(Label, name) + + def __setattr__(self, name, value): + if name == "text": + # don't see any need for this, so make it read-only + raise AttributeError("text attribute is read-only") + self.__dict__[name] = value + + def __str__(self): + return "<Label(id=%r, text=%r)>" % (self.id, self.text) + + +def _get_label(attrs): + text = attrs.get("__label") + if text is not None: + return Label(text) + else: + return None + +class Control: + """An HTML form control. + + An HTMLForm contains a sequence of Controls. The Controls in an HTMLForm + are accessed using the HTMLForm.find_control method or the + HTMLForm.controls attribute. + + Control instances are usually constructed using the ParseFile / + ParseResponse functions. If you use those functions, you can ignore the + rest of this paragraph. A Control is only properly initialised after the + fixup method has been called. In fact, this is only strictly necessary for + ListControl instances. This is necessary because ListControls are built up + from ListControls each containing only a single item, and their initial + value(s) can only be known after the sequence is complete. + + The types and values that are acceptable for assignment to the value + attribute are defined by subclasses. + + If the disabled attribute is true, this represents the state typically + represented by browsers by 'greying out' a control. If the disabled + attribute is true, the Control will raise AttributeError if an attempt is + made to change its value. In addition, the control will not be considered + 'successful' as defined by the W3C HTML 4 standard -- ie. it will + contribute no data to the return value of the HTMLForm.click* methods. To + enable a control, set the disabled attribute to a false value. + + If the readonly attribute is true, the Control will raise AttributeError if + an attempt is made to change its value. To make a control writable, set + the readonly attribute to a false value. + + All controls have the disabled and readonly attributes, not only those that + may have the HTML attributes of the same names. + + On assignment to the value attribute, the following exceptions are raised: + TypeError, AttributeError (if the value attribute should not be assigned + to, because the control is disabled, for example) and ValueError. + + If the name or value attributes are None, or the value is an empty list, or + if the control is disabled, the control is not successful. + + Public attributes: + + type: string describing type of control (see the keys of the + HTMLForm.type2class dictionary for the allowable values) (readonly) + name: name of control (readonly) + value: current value of control (subclasses may allow a single value, a + sequence of values, or either) + disabled: disabled state + readonly: readonly state + id: value of id HTML attribute + + """ + def __init__(self, type, name, attrs, index=None): + """ + type: string describing type of control (see the keys of the + HTMLForm.type2class dictionary for the allowable values) + name: control name + attrs: HTML attributes of control's HTML element + + """ + raise NotImplementedError() + + def add_to_form(self, form): + self._form = form + form.controls.append(self) + + def fixup(self): + pass + + def is_of_kind(self, kind): + raise NotImplementedError() + + def clear(self): + raise NotImplementedError() + + def __getattr__(self, name): raise NotImplementedError() + def __setattr__(self, name, value): raise NotImplementedError() + + def pairs(self): + """Return list of (key, value) pairs suitable for passing to urlencode. + """ + return [(k, v) for (i, k, v) in self._totally_ordered_pairs()] + + def _totally_ordered_pairs(self): + """Return list of (key, value, index) tuples. + + Like pairs, but allows preserving correct ordering even where several + controls are involved. + + """ + raise NotImplementedError() + + def _write_mime_data(self, mw, name, value): + """Write data for a subitem of this control to a MimeWriter.""" + # called by HTMLForm + mw2 = mw.nextpart() + mw2.addheader("Content-Disposition", + 'form-data; name="%s"' % name, 1) + f = mw2.startbody(prefix=0) + f.write(value) + + def __str__(self): + raise NotImplementedError() + + def get_labels(self): + """Return all labels (Label instances) for this control. + + If the control was surrounded by a <label> tag, that will be the first + label; all other labels, connected by 'for' and 'id', are in the order + that appear in the HTML. + + """ + res = [] + if self._label: + res.append(self._label) + if self.id: + res.extend(self._form._id_to_labels.get(self.id, ())) + return res + + +#--------------------------------------------------- +class ScalarControl(Control): + """Control whose value is not restricted to one of a prescribed set. + + Some ScalarControls don't accept any value attribute. Otherwise, takes a + single value, which must be string-like. + + Additional read-only public attribute: + + attrs: dictionary mapping the names of original HTML attributes of the + control to their values + + """ + def __init__(self, type, name, attrs, index=None): + self._index = index + self._label = _get_label(attrs) + self.__dict__["type"] = type.lower() + self.__dict__["name"] = name + self._value = attrs.get("value") + self.disabled = attrs.has_key("disabled") + self.readonly = attrs.has_key("readonly") + self.id = attrs.get("id") + + self.attrs = attrs.copy() + + self._clicked = False + + self._urlparse = urlparse.urlparse + self._urlunparse = urlparse.urlunparse + + def __getattr__(self, name): + if name == "value": + return self.__dict__["_value"] + else: + raise AttributeError("%s instance has no attribute '%s'" % + (self.__class__.__name__, name)) + + def __setattr__(self, name, value): + if name == "value": + if not isstringlike(value): + raise TypeError("must assign a string") + elif self.readonly: + raise AttributeError("control '%s' is readonly" % self.name) + elif self.disabled: + raise AttributeError("control '%s' is disabled" % self.name) + self.__dict__["_value"] = value + elif name in ("name", "type"): + raise AttributeError("%s attribute is readonly" % name) + else: + self.__dict__[name] = value + + def _totally_ordered_pairs(self): + name = self.name + value = self.value + if name is None or value is None or self.disabled: + return [] + return [(self._index, name, value)] + + def clear(self): + if self.readonly: + raise AttributeError("control '%s' is readonly" % self.name) + self.__dict__["_value"] = None + + def __str__(self): + name = self.name + value = self.value + if name is None: name = "<None>" + if value is None: value = "<None>" + + infos = [] + if self.disabled: infos.append("disabled") + if self.readonly: infos.append("readonly") + info = ", ".join(infos) + if info: info = " (%s)" % info + + return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info) + + +#--------------------------------------------------- +class TextControl(ScalarControl): + """Textual input control. + + Covers: + + INPUT/TEXT + INPUT/PASSWORD + INPUT/HIDDEN + TEXTAREA + + """ + def __init__(self, type, name, attrs, index=None): + ScalarControl.__init__(self, type, name, attrs, index) + if self.type == "hidden": self.readonly = True + if self._value is None: + self._value = "" + + def is_of_kind(self, kind): return kind == "text" + +#--------------------------------------------------- +class FileControl(ScalarControl): + """File upload with INPUT TYPE=FILE. + + The value attribute of a FileControl is always None. Use add_file instead. + + Additional public method: add_file + + """ + + def __init__(self, type, name, attrs, index=None): + ScalarControl.__init__(self, type, name, attrs, index) + self._value = None + self._upload_data = [] + + def is_of_kind(self, kind): return kind == "file" + + def clear(self): + if self.readonly: + raise AttributeError("control '%s' is readonly" % self.name) + self._upload_data = [] + + def __setattr__(self, name, value): + if name in ("value", "name", "type"): + raise AttributeError("%s attribute is readonly" % name) + else: + self.__dict__[name] = value + + def add_file(self, file_object, content_type=None, filename=None): + if not hasattr(file_object, "read"): + raise TypeError("file-like object must have read method") + if content_type is not None and not isstringlike(content_type): + raise TypeError("content type must be None or string-like") + if filename is not None and not isstringlike(filename): + raise TypeError("filename must be None or string-like") + if content_type is None: + content_type = "application/octet-stream" + self._upload_data.append((file_object, content_type, filename)) + + def _totally_ordered_pairs(self): + # XXX should it be successful even if unnamed? + if self.name is None or self.disabled: + return [] + return [(self._index, self.name, "")] + + def _write_mime_data(self, mw, _name, _value): + # called by HTMLForm + # assert _name == self.name and _value == '' + if len(self._upload_data) < 2: + if len(self._upload_data) == 0: + file_object = StringIO() + content_type = "application/octet-stream" + filename = "" + else: + file_object, content_type, filename = self._upload_data[0] + if filename is None: + filename = "" + mw2 = mw.nextpart() + fn_part = '; filename="%s"' % filename + disp = 'form-data; name="%s"%s' % (self.name, fn_part) + mw2.addheader("Content-Disposition", disp, prefix=1) + fh = mw2.startbody(content_type, prefix=0) + fh.write(file_object.read()) + else: + # multiple files + mw2 = mw.nextpart() + disp = 'form-data; name="%s"' % self.name + mw2.addheader("Content-Disposition", disp, prefix=1) + fh = mw2.startmultipartbody("mixed", prefix=0) + for file_object, content_type, filename in self._upload_data: + mw3 = mw2.nextpart() + if filename is None: + filename = "" + fn_part = '; filename="%s"' % filename + disp = "file%s" % fn_part + mw3.addheader("Content-Disposition", disp, prefix=1) + fh2 = mw3.startbody(content_type, prefix=0) + fh2.write(file_object.read()) + mw2.lastpart() + + def __str__(self): + name = self.name + if name is None: name = "<None>" + + if not self._upload_data: + value = "<No files added>" + else: + value = [] + for file, ctype, filename in self._upload_data: + if filename is None: + value.append("<Unnamed file>") + else: + value.append(filename) + value = ", ".join(value) + + info = [] + if self.disabled: info.append("disabled") + if self.readonly: info.append("readonly") + info = ", ".join(info) + if info: info = " (%s)" % info + + return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info) + + +#--------------------------------------------------- +class IsindexControl(ScalarControl): + """ISINDEX control. + + ISINDEX is the odd-one-out of HTML form controls. In fact, it isn't really + part of regular HTML forms at all, and predates it. You're only allowed + one ISINDEX per HTML document. ISINDEX and regular form submission are + mutually exclusive -- either submit a form, or the ISINDEX. + + Having said this, since ISINDEX controls may appear in forms (which is + probably bad HTML), ParseFile / ParseResponse will include them in the + HTMLForm instances it returns. You can set the ISINDEX's value, as with + any other control (but note that ISINDEX controls have no name, so you'll + need to use the type argument of set_value!). When you submit the form, + the ISINDEX will not be successful (ie., no data will get returned to the + server as a result of its presence), unless you click on the ISINDEX + control, in which case the ISINDEX gets submitted instead of the form: + + form.set_value("my isindex value", type="isindex") + urllib2.urlopen(form.click(type="isindex")) + + ISINDEX elements outside of FORMs are ignored. If you want to submit one + by hand, do it like so: + + url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value")) + result = urllib2.urlopen(url) + + """ + def __init__(self, type, name, attrs, index=None): + ScalarControl.__init__(self, type, name, attrs, index) + if self._value is None: + self._value = "" + + def is_of_kind(self, kind): return kind in ["text", "clickable"] + + def _totally_ordered_pairs(self): + return [] + + def _click(self, form, coord, return_type, request_class=urllib2.Request): + # Relative URL for ISINDEX submission: instead of "foo=bar+baz", + # want "bar+baz". + # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is + # deprecated in 4.01, but it should still say how to submit it). + # Submission of ISINDEX is explained in the HTML 3.2 spec, though. + parts = self._urlparse(form.action) + rest, (query, frag) = parts[:-2], parts[-2:] + parts = rest + (urllib.quote_plus(self.value), None) + url = self._urlunparse(parts) + req_data = url, None, [] + + if return_type == "pairs": + return [] + elif return_type == "request_data": + return req_data + else: + return request_class(url) + + def __str__(self): + value = self.value + if value is None: value = "<None>" + + infos = [] + if self.disabled: infos.append("disabled") + if self.readonly: infos.append("readonly") + info = ", ".join(infos) + if info: info = " (%s)" % info + + return "<%s(%s)%s>" % (self.__class__.__name__, value, info) + + +#--------------------------------------------------- +class IgnoreControl(ScalarControl): + """Control that we're not interested in. + + Covers: + + INPUT/RESET + BUTTON/RESET + INPUT/BUTTON + BUTTON/BUTTON + + These controls are always unsuccessful, in the terminology of HTML 4 (ie. + they never require any information to be returned to the server). + + BUTTON/BUTTON is used to generate events for script embedded in HTML. + + The value attribute of IgnoreControl is always None. + + """ + def __init__(self, type, name, attrs, index=None): + ScalarControl.__init__(self, type, name, attrs, index) + self._value = None + + def is_of_kind(self, kind): return False + + def __setattr__(self, name, value): + if name == "value": + raise AttributeError( + "control '%s' is ignored, hence read-only" % self.name) + elif name in ("name", "type"): + raise AttributeError("%s attribute is readonly" % name) + else: + self.__dict__[name] = value + + +#--------------------------------------------------- +# ListControls + +# helpers and subsidiary classes + +class Item: + def __init__(self, control, attrs, index=None): + label = _get_label(attrs) + self.__dict__.update({ + "name": attrs["value"], + "_labels": label and [label] or [], + "attrs": attrs, + "_control": control, + "disabled": attrs.has_key("disabled"), + "_selected": False, + "id": attrs.get("id"), + "_index": index, + }) + control.items.append(self) + + def get_labels(self): + """Return all labels (Label instances) for this item. + + For items that represent radio buttons or checkboxes, if the item was + surrounded by a <label> tag, that will be the first label; all other + labels, connected by 'for' and 'id', are in the order that appear in + the HTML. + + For items that represent select options, if the option had a label + attribute, that will be the first label. If the option has contents + (text within the option tags) and it is not the same as the label + attribute (if any), that will be a label. There is nothing in the + spec to my knowledge that makes an option with an id unable to be the + target of a label's for attribute, so those are included, if any, for + the sake of consistency and completeness. + + """ + res = [] + res.extend(self._labels) + if self.id: + res.extend(self._control._form._id_to_labels.get(self.id, ())) + return res + + def __getattr__(self, name): + if name=="selected": + return self._selected + raise AttributeError(name) + + def __setattr__(self, name, value): + if name == "selected": + self._control._set_selected_state(self, value) + elif name == "disabled": + self.__dict__["disabled"] = bool(value) + else: + raise AttributeError(name) + + def __str__(self): + res = self.name + if self.selected: + res = "*" + res + if self.disabled: + res = "(%s)" % res + return res + + def __repr__(self): + # XXX appending the attrs without distinguishing them from name and id + # is silly + attrs = [("name", self.name), ("id", self.id)]+self.attrs.items() + return "<%s %s>" % ( + self.__class__.__name__, + " ".join(["%s=%r" % (k, v) for k, v in attrs]) + ) + +def disambiguate(items, nr, **kwds): + msgs = [] + for key, value in kwds.items(): + msgs.append("%s=%r" % (key, value)) + msg = " ".join(msgs) + if not items: + raise ItemNotFoundError(msg) + if nr is None: + if len(items) > 1: + raise AmbiguityError(msg) + nr = 0 + if len(items) <= nr: + raise ItemNotFoundError(msg) + return items[nr] + +class ListControl(Control): + """Control representing a sequence of items. + + The value attribute of a ListControl represents the successful list items + in the control. The successful list items are those that are selected and + not disabled. + + ListControl implements both list controls that take a length-1 value + (single-selection) and those that take length >1 values + (multiple-selection). + + ListControls accept sequence values only. Some controls only accept + sequences of length 0 or 1 (RADIO, and single-selection SELECT). + In those cases, ItemCountError is raised if len(sequence) > 1. CHECKBOXes + and multiple-selection SELECTs (those having the "multiple" HTML attribute) + accept sequences of any length. + + Note the following mistake: + + control.value = some_value + assert control.value == some_value # not necessarily true + + The reason for this is that the value attribute always gives the list items + in the order they were listed in the HTML. + + ListControl items can also be referred to by their labels instead of names. + Use the label argument to .get(), and the .set_value_by_label(), + .get_value_by_label() methods. + + Note that, rather confusingly, though SELECT controls are represented in + HTML by SELECT elements (which contain OPTION elements, representing + individual list items), CHECKBOXes and RADIOs are not represented by *any* + element. Instead, those controls are represented by a collection of INPUT + elements. For example, this is a SELECT control, named "control1": + + <select name="control1"> + <option>foo</option> + <option value="1">bar</option> + </select> + + and this is a CHECKBOX control, named "control2": + + <input type="checkbox" name="control2" value="foo" id="cbe1"> + <input type="checkbox" name="control2" value="bar" id="cbe2"> + + The id attribute of a CHECKBOX or RADIO ListControl is always that of its + first element (for example, "cbe1" above). + + + Additional read-only public attribute: multiple. + + """ + + # ListControls are built up by the parser from their component items by + # creating one ListControl per item, consolidating them into a single + # master ListControl held by the HTMLForm: + + # -User calls form.new_control(...) + # -Form creates Control, and calls control.add_to_form(self). + # -Control looks for a Control with the same name and type in the form, + # and if it finds one, merges itself with that control by calling + # control.merge_control(self). The first Control added to the form, of + # a particular name and type, is the only one that survives in the + # form. + # -Form calls control.fixup for all its controls. ListControls in the + # form know they can now safely pick their default values. + + # To create a ListControl without an HTMLForm, use: + + # control.merge_control(new_control) + + # (actually, it's much easier just to use ParseFile) + + _label = None + + def __init__(self, type, name, attrs={}, select_default=False, + called_as_base_class=False, index=None): + """ + select_default: for RADIO and multiple-selection SELECT controls, pick + the first item as the default if no 'selected' HTML attribute is + present + + """ + if not called_as_base_class: + raise NotImplementedError() + + self.__dict__["type"] = type.lower() + self.__dict__["name"] = name + self._value = attrs.get("value") + self.disabled = False + self.readonly = False + self.id = attrs.get("id") + self._closed = False + + # As Controls are merged in with .merge_control(), self.attrs will + # refer to each Control in turn -- always the most recently merged + # control. Each merged-in Control instance corresponds to a single + # list item: see ListControl.__doc__. + self.items = [] + self._form = None + + self._select_default = select_default + self._clicked = False + + def clear(self): + self.value = [] + + def is_of_kind(self, kind): + if kind == "list": + return True + elif kind == "multilist": + return bool(self.multiple) + elif kind == "singlelist": + return not self.multiple + else: + return False + + def get_items(self, name=None, label=None, id=None, + exclude_disabled=False): + """Return matching items by name or label. + + For argument docs, see the docstring for .get() + + """ + if name is not None and not isstringlike(name): + raise TypeError("item name must be string-like") + if label is not None and not isstringlike(label): + raise TypeError("item label must be string-like") + if id is not None and not isstringlike(id): + raise TypeError("item id must be string-like") + items = [] # order is important + compat = self._form.backwards_compat + for o in self.items: + if exclude_disabled and o.disabled: + continue + if name is not None and o.name != name: + continue + if label is not None: + for l in o.get_labels(): + if ((compat and l.text == label) or + (not compat and l.text.find(label) > -1)): + break + else: + continue + if id is not None and o.id != id: + continue + items.append(o) + return items + + def get(self, name=None, label=None, id=None, nr=None, + exclude_disabled=False): + """Return item by name or label, disambiguating if necessary with nr. + + All arguments must be passed by name, with the exception of 'name', + which may be used as a positional argument. + + If name is specified, then the item must have the indicated name. + + If label is specified, then the item must have a label whose + whitespace-compressed, stripped, text substring-matches the indicated + label string (eg. label="please choose" will match + " Do please choose an item "). + + If id is specified, then the item must have the indicated id. + + nr is an optional 0-based index of the items matching the query. + + If nr is the default None value and more than item is found, raises + AmbiguityError (unless the HTMLForm instance's backwards_compat + attribute is true). + + If no item is found, or if items are found but nr is specified and not + found, raises ItemNotFoundError. + + Optionally excludes disabled items. + + """ + if nr is None and self._form.backwards_compat: + nr = 0 # :-/ + items = self.get_items(name, label, id, exclude_disabled) + return disambiguate(items, nr, name=name, label=label, id=id) + + def _get(self, name, by_label=False, nr=None, exclude_disabled=False): + # strictly for use by deprecated methods + if by_label: + name, label = None, name + else: + name, label = name, None + return self.get(name, label, nr, exclude_disabled) + + def toggle(self, name, by_label=False, nr=None): + """Deprecated: given a name or label and optional disambiguating index + nr, toggle the matching item's selection. + + Selecting items follows the behavior described in the docstring of the + 'get' method. + + if the item is disabled, or this control is disabled or readonly, + raise AttributeError. + + """ + deprecation( + "item = control.get(...); item.selected = not item.selected") + o = self._get(name, by_label, nr) + self._set_selected_state(o, not o.selected) + + def set(self, selected, name, by_label=False, nr=None): + """Deprecated: given a name or label and optional disambiguating index + nr, set the matching item's selection to the bool value of selected. + + Selecting items follows the behavior described in the docstring of the + 'get' method. + + if the item is disabled, or this control is disabled or readonly, + raise AttributeError. + + """ + deprecation( + "control.get(...).selected = <boolean>") + self._set_selected_state(self._get(name, by_label, nr), selected) + + def _set_selected_state(self, item, action): + # action: + # bool False: off + # bool True: on + if self.disabled: + raise AttributeError("control '%s' is disabled" % self.name) + if self.readonly: + raise AttributeError("control '%s' is readonly" % self.name) + action == bool(action) + compat = self._form.backwards_compat + if not compat and item.disabled: + raise AttributeError("item is disabled") + else: + if compat and item.disabled and action: + raise AttributeError("item is disabled") + if self.multiple: + item.__dict__["_selected"] = action + else: + if not action: + item.__dict__["_selected"] = False + else: + for o in self.items: + o.__dict__["_selected"] = False + item.__dict__["_selected"] = True + + def toggle_single(self, by_label=None): + """Deprecated: toggle the selection of the single item in this control. + + Raises ItemCountError if the control does not contain only one item. + + by_label argument is ignored, and included only for backwards + compatibility. + + """ + deprecation( + "control.items[0].selected = not control.items[0].selected") + if len(self.items) != 1: + raise ItemCountError( + "'%s' is not a single-item control" % self.name) + item = self.items[0] + self._set_selected_state(item, not item.selected) + + def set_single(self, selected, by_label=None): + """Deprecated: set the selection of the single item in this control. + + Raises ItemCountError if the control does not contain only one item. + + by_label argument is ignored, and included only for backwards + compatibility. + + """ + deprecation( + "control.items[0].selected = <boolean>") + if len(self.items) != 1: + raise ItemCountError( + "'%s' is not a single-item control" % self.name) + self._set_selected_state(self.items[0], selected) + + def get_item_disabled(self, name, by_label=False, nr=None): + """Get disabled state of named list item in a ListControl.""" + deprecation( + "control.get(...).disabled") + return self._get(name, by_label, nr).disabled + + def set_item_disabled(self, disabled, name, by_label=False, nr=None): + """Set disabled state of named list item in a ListControl. + + disabled: boolean disabled state + + """ + deprecation( + "control.get(...).disabled = <boolean>") + self._get(name, by_label, nr).disabled = disabled + + def set_all_items_disabled(self, disabled): + """Set disabled state of all list items in a ListControl. + + disabled: boolean disabled state + + """ + for o in self.items: + o.disabled = disabled + + def get_item_attrs(self, name, by_label=False, nr=None): + """Return dictionary of HTML attributes for a single ListControl item. + + The HTML element types that describe list items are: OPTION for SELECT + controls, INPUT for the rest. These elements have HTML attributes that + you may occasionally want to know about -- for example, the "alt" HTML + attribute gives a text string describing the item (graphical browsers + usually display this as a tooltip). + + The returned dictionary maps HTML attribute names to values. The names + and values are taken from the original HTML. + + """ + deprecation( + "control.get(...).attrs") + return self._get(name, by_label, nr).attrs + + def close_control(self): + self._closed = True + + def add_to_form(self, form): + assert self._form is None or form == self._form, ( + "can't add control to more than one form") + self._form = form + if self.name is None: + # always count nameless elements as separate controls + Control.add_to_form(self, form) + else: + for ii in range(len(form.controls)-1, -1, -1): + control = form.controls[ii] + if control.name == self.name and control.type == self.type: + if control._closed: + Control.add_to_form(self, form) + else: + control.merge_control(self) + break + else: + Control.add_to_form(self, form) + + def merge_control(self, control): + assert bool(control.multiple) == bool(self.multiple) + # usually, isinstance(control, self.__class__) + self.items.extend(control.items) + + def fixup(self): + """ + ListControls are built up from component list items (which are also + ListControls) during parsing. This method should be called after all + items have been added. See ListControl.__doc__ for the reason this is + required. + + """ + # Need to set default selection where no item was indicated as being + # selected by the HTML: + + # CHECKBOX: + # Nothing should be selected. + # SELECT/single, SELECT/multiple and RADIO: + # RFC 1866 (HTML 2.0): says first item should be selected. + # W3C HTML 4.01 Specification: says that client behaviour is + # undefined in this case. For RADIO, exactly one must be selected, + # though which one is undefined. + # Both Netscape and Microsoft Internet Explorer (IE) choose first + # item for SELECT/single. However, both IE5 and Mozilla (both 1.0 + # and Firebird 0.6) leave all items unselected for RADIO and + # SELECT/multiple. + + # Since both Netscape and IE all choose the first item for + # SELECT/single, we do the same. OTOH, both Netscape and IE + # leave SELECT/multiple with nothing selected, in violation of RFC 1866 + # (but not in violation of the W3C HTML 4 standard); the same is true + # of RADIO (which *is* in violation of the HTML 4 standard). We follow + # RFC 1866 if the _select_default attribute is set, and Netscape and IE + # otherwise. RFC 1866 and HTML 4 are always violated insofar as you + # can deselect all items in a RadioControl. + + for o in self.items: + # set items' controls to self, now that we've merged + o.__dict__["_control"] = self + + def __getattr__(self, name): + if name == "value": + compat = self._form.backwards_compat + if self.name is None: + return [] + return [o.name for o in self.items if o.selected and + (not o.disabled or compat)] + else: + raise AttributeError("%s instance has no attribute '%s'" % + (self.__class__.__name__, name)) + + def __setattr__(self, name, value): + if name == "value": + if self.disabled: + raise AttributeError("control '%s' is disabled" % self.name) + if self.readonly: + raise AttributeError("control '%s' is readonly" % self.name) + self._set_value(value) + elif name in ("name", "type", "multiple"): + raise AttributeError("%s attribute is readonly" % name) + else: + self.__dict__[name] = value + + def _set_value(self, value): + if value is None or isstringlike(value): + raise TypeError("ListControl, must set a sequence") + if not value: + compat = self._form.backwards_compat + for o in self.items: + if not o.disabled or compat: + o.selected = False + elif self.multiple: + self._multiple_set_value(value) + elif len(value) > 1: + raise ItemCountError( + "single selection list, must set sequence of " + "length 0 or 1") + else: + self._single_set_value(value) + + def _get_items(self, name, target=1): + all_items = self.get_items(name) + items = [o for o in all_items if not o.disabled] + if len(items) < target: + if len(all_items) < target: + raise ItemNotFoundError( + "insufficient items with name %r" % name) + else: + raise AttributeError( + "insufficient non-disabled items with name %s" % name) + on = [] + off = [] + for o in items: + if o.selected: + on.append(o) + else: + off.append(o) + return on, off + + def _single_set_value(self, value): + assert len(value) == 1 + on, off = self._get_items(value[0]) + assert len(on) <= 1 + if not on: + off[0].selected = True + + def _multiple_set_value(self, value): + compat = self._form.backwards_compat + turn_on = [] # transactional-ish + turn_off = [item for item in self.items if + item.selected and (not item.disabled or compat)] + names = {} + for nn in value: + if nn in names.keys(): + names[nn] += 1 + else: + names[nn] = 1 + for name, count in names.items(): + on, off = self._get_items(name, count) + for i in range(count): + if on: + item = on[0] + del on[0] + del turn_off[turn_off.index(item)] + else: + item = off[0] + del off[0] + turn_on.append(item) + for item in turn_off: + item.selected = False + for item in turn_on: + item.selected = True + + def set_value_by_label(self, value): + """Set the value of control by item labels. + + value is expected to be an iterable of strings that are substrings of + the item labels that should be selected. Before substring matching is + performed, the original label text is whitespace-compressed + (consecutive whitespace characters are converted to a single space + character) and leading and trailing whitespace is stripped. Ambiguous + labels are accepted without complaint if the form's backwards_compat is + True; otherwise, it will not complain as long as all ambiguous labels + share the same item name (e.g. OPTION value). + + """ + if isstringlike(value): + raise TypeError(value) + if not self.multiple and len(value) > 1: + raise ItemCountError( + "single selection list, must set sequence of " + "length 0 or 1") + items = [] + for nn in value: + found = self.get_items(label=nn) + if len(found) > 1: + if not self._form.backwards_compat: + # ambiguous labels are fine as long as item names (e.g. + # OPTION values) are same + opt_name = found[0].name + if [o for o in found[1:] if o.name != opt_name]: + raise AmbiguityError(nn) + else: + # OK, we'll guess :-( Assume first available item. + found = found[:1] + for o in found: + # For the multiple-item case, we could try to be smarter, + # saving them up and trying to resolve, but that's too much. + if self._form.backwards_compat or o not in items: + items.append(o) + break + else: # all of them are used + raise ItemNotFoundError(nn) + # now we have all the items that should be on + # let's just turn everything off and then back on. + self.value = [] + for o in items: + o.selected = True + + def get_value_by_label(self): + """Return the value of the control as given by normalized labels.""" + res = [] + compat = self._form.backwards_compat + for o in self.items: + if (not o.disabled or compat) and o.selected: + for l in o.get_labels(): + if l.text: + res.append(l.text) + break + else: + res.append(None) + return res + + def possible_items(self, by_label=False): + """Deprecated: return the names or labels of all possible items. + + Includes disabled items, which may be misleading for some use cases. + + """ + deprecation( + "[item.name for item in self.items]") + if by_label: + res = [] + for o in self.items: + for l in o.get_labels(): + if l.text: + res.append(l.text) + break + else: + res.append(None) + return res + return [o.name for o in self.items] + + def _totally_ordered_pairs(self): + if self.disabled or self.name is None: + return [] + else: + return [(o._index, self.name, o.name) for o in self.items + if o.selected and not o.disabled] + + def __str__(self): + name = self.name + if name is None: name = "<None>" + + display = [str(o) for o in self.items] + + infos = [] + if self.disabled: infos.append("disabled") + if self.readonly: infos.append("readonly") + info = ", ".join(infos) + if info: info = " (%s)" % info + + return "<%s(%s=[%s])%s>" % (self.__class__.__name__, + name, ", ".join(display), info) + + +class RadioControl(ListControl): + """ + Covers: + + INPUT/RADIO + + """ + def __init__(self, type, name, attrs, select_default=False, index=None): + attrs.setdefault("value", "on") + ListControl.__init__(self, type, name, attrs, select_default, + called_as_base_class=True, index=index) + self.__dict__["multiple"] = False + o = Item(self, attrs, index) + o.__dict__["_selected"] = attrs.has_key("checked") + + def fixup(self): + ListControl.fixup(self) + found = [o for o in self.items if o.selected and not o.disabled] + if not found: + if self._select_default: + for o in self.items: + if not o.disabled: + o.selected = True + break + else: + # Ensure only one item selected. Choose the last one, + # following IE and Firefox. + for o in found[:-1]: + o.selected = False + + def get_labels(self): + return [] + +class CheckboxControl(ListControl): + """ + Covers: + + INPUT/CHECKBOX + + """ + def __init__(self, type, name, attrs, select_default=False, index=None): + attrs.setdefault("value", "on") + ListControl.__init__(self, type, name, attrs, select_default, + called_as_base_class=True, index=index) + self.__dict__["multiple"] = True + o = Item(self, attrs, index) + o.__dict__["_selected"] = attrs.has_key("checked") + + def get_labels(self): + return [] + + +class SelectControl(ListControl): + """ + Covers: + + SELECT (and OPTION) + + + OPTION 'values', in HTML parlance, are Item 'names' in ClientForm parlance. + + SELECT control values and labels are subject to some messy defaulting + rules. For example, if the HTML representation of the control is: + + <SELECT name=year> + <OPTION value=0 label="2002">current year</OPTION> + <OPTION value=1>2001</OPTION> + <OPTION>2000</OPTION> + </SELECT> + + The items, in order, have labels "2002", "2001" and "2000", whereas their + names (the OPTION values) are "0", "1" and "2000" respectively. Note that + the value of the last OPTION in this example defaults to its contents, as + specified by RFC 1866, as do the labels of the second and third OPTIONs. + + The OPTION labels are sometimes more meaningful than the OPTION values, + which can make for more maintainable code. + + Additional read-only public attribute: attrs + + The attrs attribute is a dictionary of the original HTML attributes of the + SELECT element. Other ListControls do not have this attribute, because in + other cases the control as a whole does not correspond to any single HTML + element. control.get(...).attrs may be used as usual to get at the HTML + attributes of the HTML elements corresponding to individual list items (for + SELECT controls, these are OPTION elements). + + Another special case is that the Item.attrs dictionaries have a special key + "contents" which does not correspond to any real HTML attribute, but rather + contains the contents of the OPTION element: + + <OPTION>this bit</OPTION> + + """ + # HTML attributes here are treated slightly differently from other list + # controls: + # -The SELECT HTML attributes dictionary is stuffed into the OPTION + # HTML attributes dictionary under the "__select" key. + # -The content of each OPTION element is stored under the special + # "contents" key of the dictionary. + # After all this, the dictionary is passed to the SelectControl constructor + # as the attrs argument, as usual. However: + # -The first SelectControl constructed when building up a SELECT control + # has a constructor attrs argument containing only the __select key -- so + # this SelectControl represents an empty SELECT control. + # -Subsequent SelectControls have both OPTION HTML-attribute in attrs and + # the __select dictionary containing the SELECT HTML-attributes. + + def __init__(self, type, name, attrs, select_default=False, index=None): + # fish out the SELECT HTML attributes from the OPTION HTML attributes + # dictionary + self.attrs = attrs["__select"].copy() + self.__dict__["_label"] = _get_label(self.attrs) + self.__dict__["id"] = self.attrs.get("id") + self.__dict__["multiple"] = self.attrs.has_key("multiple") + # the majority of the contents, label, and value dance already happened + contents = attrs.get("contents") + attrs = attrs.copy() + del attrs["__select"] + + ListControl.__init__(self, type, name, self.attrs, select_default, + called_as_base_class=True, index=index) + self.disabled = self.attrs.has_key("disabled") + self.readonly = self.attrs.has_key("readonly") + if attrs.has_key("value"): + # otherwise it is a marker 'select started' token + o = Item(self, attrs, index) + o.__dict__["_selected"] = attrs.has_key("selected") + # add 'label' label and contents label, if different. If both are + # provided, the 'label' label is used for display in HTML + # 4.0-compliant browsers (and any lower spec? not sure) while the + # contents are used for display in older or less-compliant + # browsers. We make label objects for both, if the values are + # different. + label = attrs.get("label") + if label: + o._labels.append(Label({"__text": label})) + if contents and contents != label: + o._labels.append(Label({"__text": contents})) + elif contents: + o._labels.append(Label({"__text": contents})) + + def fixup(self): + ListControl.fixup(self) + # Firefox doesn't exclude disabled items from those considered here + # (i.e. from 'found', for both branches of the if below). Note that + # IE6 doesn't support the disabled attribute on OPTIONs at all. + found = [o for o in self.items if o.selected] + if not found: + if not self.multiple or self._select_default: + for o in self.items: + if not o.disabled: + was_disabled = self.disabled + self.disabled = False + try: + o.selected = True + finally: + o.disabled = was_disabled + break + elif not self.multiple: + # Ensure only one item selected. Choose the last one, + # following IE and Firefox. + for o in found[:-1]: + o.selected = False + + +#--------------------------------------------------- +class SubmitControl(ScalarControl): + """ + Covers: + + INPUT/SUBMIT + BUTTON/SUBMIT + + """ + def __init__(self, type, name, attrs, index=None): + ScalarControl.__init__(self, type, name, attrs, index) + # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it + # blank, Konqueror 3.1 defaults to "Submit". HTML spec. doesn't seem + # to define this. + if self.value is None: self.value = "" + self.readonly = True + + def get_labels(self): + res = [] + if self.value: + res.append(Label({"__text": self.value})) + res.extend(ScalarControl.get_labels(self)) + return res + + def is_of_kind(self, kind): return kind == "clickable" + + def _click(self, form, coord, return_type, request_class=urllib2.Request): + self._clicked = coord + r = form._switch_click(return_type, request_class) + self._clicked = False + return r + + def _totally_ordered_pairs(self): + if not self._clicked: + return [] + return ScalarControl._totally_ordered_pairs(self) + + +#--------------------------------------------------- +class ImageControl(SubmitControl): + """ + Covers: + + INPUT/IMAGE + + Coordinates are specified using one of the HTMLForm.click* methods. + + """ + def __init__(self, type, name, attrs, index=None): + SubmitControl.__init__(self, type, name, attrs, index) + self.readonly = False + + def _totally_ordered_pairs(self): + clicked = self._clicked + if self.disabled or not clicked: + return [] + name = self.name + if name is None: return [] + pairs = [ + (self._index, "%s.x" % name, str(clicked[0])), + (self._index+1, "%s.y" % name, str(clicked[1])), + ] + value = self._value + if value: + pairs.append((self._index+2, name, value)) + return pairs + + get_labels = ScalarControl.get_labels + +# aliases, just to make str(control) and str(form) clearer +class PasswordControl(TextControl): pass +class HiddenControl(TextControl): pass +class TextareaControl(TextControl): pass +class SubmitButtonControl(SubmitControl): pass + + +def is_listcontrol(control): return control.is_of_kind("list") + + +class HTMLForm: + """Represents a single HTML <form> ... </form> element. + + A form consists of a sequence of controls that usually have names, and + which can take on various values. The values of the various types of + controls represent variously: text, zero-or-one-of-many or many-of-many + choices, and files to be uploaded. Some controls can be clicked on to + submit the form, and clickable controls' values sometimes include the + coordinates of the click. + + Forms can be filled in with data to be returned to the server, and then + submitted, using the click method to generate a request object suitable for + passing to urllib2.urlopen (or the click_request_data or click_pairs + methods if you're not using urllib2). + + import ClientForm + forms = ClientForm.ParseFile(html, base_uri) + form = forms[0] + + form["query"] = "Python" + form.find_control("nr_results").get("lots").selected = True + + response = urllib2.urlopen(form.click()) + + Usually, HTMLForm instances are not created directly. Instead, the + ParseFile or ParseResponse factory functions are used. If you do construct + HTMLForm objects yourself, however, note that an HTMLForm instance is only + properly initialised after the fixup method has been called (ParseFile and + ParseResponse do this for you). See ListControl.__doc__ for the reason + this is required. + + Indexing a form (form["control_name"]) returns the named Control's value + attribute. Assignment to a form index (form["control_name"] = something) + is equivalent to assignment to the named Control's value attribute. If you + need to be more specific than just supplying the control's name, use the + set_value and get_value methods. + + ListControl values are lists of item names (specifically, the names of the + items that are selected and not disabled, and hence are "successful" -- ie. + cause data to be returned to the server). The list item's name is the + value of the corresponding HTML element's"value" attribute. + + Example: + + <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT> + <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT> + + defines a CHECKBOX control with name "cheeses" which has two items, named + "leicester" and "cheddar". + + Another example: + + <SELECT name="more_cheeses"> + <OPTION>1</OPTION> + <OPTION value="2" label="CHEDDAR">cheddar</OPTION> + </SELECT> + + defines a SELECT control with name "more_cheeses" which has two items, + named "1" and "2" (because the OPTION element's value HTML attribute + defaults to the element contents -- see SelectControl.__doc__ for more on + these defaulting rules). + + To select, deselect or otherwise manipulate individual list items, use the + HTMLForm.find_control() and ListControl.get() methods. To set the whole + value, do as for any other control: use indexing or the set_/get_value + methods. + + Example: + + # select *only* the item named "cheddar" + form["cheeses"] = ["cheddar"] + # select "cheddar", leave other items unaffected + form.find_control("cheeses").get("cheddar").selected = True + + Some controls (RADIO and SELECT without the multiple attribute) can only + have zero or one items selected at a time. Some controls (CHECKBOX and + SELECT with the multiple attribute) can have multiple items selected at a + time. To set the whole value of a ListControl, assign a sequence to a form + index: + + form["cheeses"] = ["cheddar", "leicester"] + + If the ListControl is not multiple-selection, the assigned list must be of + length one. + + To check if a control has an item, if an item is selected, or if an item is + successful (selected and not disabled), respectively: + + "cheddar" in [item.name for item in form.find_control("cheeses").items] + "cheddar" in [item.name for item in form.find_control("cheeses").items and + item.selected] + "cheddar" in form["cheeses"] # (or "cheddar" in form.get_value("cheeses")) + + Note that some list items may be disabled (see below). + + Note the following mistake: + + form[control_name] = control_value + assert form[control_name] == control_value # not necessarily true + + The reason for this is that form[control_name] always gives the list items + in the order they were listed in the HTML. + + List items (hence list values, too) can be referred to in terms of list + item labels rather than list item names using the appropriate label + arguments. Note that each item may have several labels. + + The question of default values of OPTION contents, labels and values is + somewhat complicated: see SelectControl.__doc__ and + ListControl.get_item_attrs.__doc__ if you think you need to know. + + Controls can be disabled or readonly. In either case, the control's value + cannot be changed until you clear those flags (see example below). + Disabled is the state typically represented by browsers by 'greying out' a + control. Disabled controls are not 'successful' -- they don't cause data + to get returned to the server. Readonly controls usually appear in + browsers as read-only text boxes. Readonly controls are successful. List + items can also be disabled. Attempts to select or deselect disabled items + fail with AttributeError. + + If a lot of controls are readonly, it can be useful to do this: + + form.set_all_readonly(False) + + To clear a control's value attribute, so that it is not successful (until a + value is subsequently set): + + form.clear("cheeses") + + More examples: + + control = form.find_control("cheeses") + control.disabled = False + control.readonly = False + control.get("gruyere").disabled = True + control.items[0].selected = True + + See the various Control classes for further documentation. Many methods + take name, type, kind, id, label and nr arguments to specify the control to + be operated on: see HTMLForm.find_control.__doc__. + + ControlNotFoundError (subclass of ValueError) is raised if the specified + control can't be found. This includes occasions where a non-ListControl + is found, but the method (set, for example) requires a ListControl. + ItemNotFoundError (subclass of ValueError) is raised if a list item can't + be found. ItemCountError (subclass of ValueError) is raised if an attempt + is made to select more than one item and the control doesn't allow that, or + set/get_single are called and the control contains more than one item. + AttributeError is raised if a control or item is readonly or disabled and + an attempt is made to alter its value. + + Security note: Remember that any passwords you store in HTMLForm instances + will be saved to disk in the clear if you pickle them (directly or + indirectly). The simplest solution to this is to avoid pickling HTMLForm + objects. You could also pickle before filling in any password, or just set + the password to "" before pickling. + + + Public attributes: + + action: full (absolute URI) form action + method: "GET" or "POST" + enctype: form transfer encoding MIME type + name: name of form (None if no name was specified) + attrs: dictionary mapping original HTML form attributes to their values + + controls: list of Control instances; do not alter this list + (instead, call form.new_control to make a Control and add it to the + form, or control.add_to_form if you already have a Control instance) + + + + Methods for form filling: + ------------------------- + + Most of the these methods have very similar arguments. See + HTMLForm.find_control.__doc__ for details of the name, type, kind, label + and nr arguments. + + def find_control(self, + name=None, type=None, kind=None, id=None, predicate=None, + nr=None, label=None) + + get_value(name=None, type=None, kind=None, id=None, nr=None, + by_label=False, # by_label is deprecated + label=None) + set_value(value, + name=None, type=None, kind=None, id=None, nr=None, + by_label=False, # by_label is deprecated + label=None) + + clear_all() + clear(name=None, type=None, kind=None, id=None, nr=None, label=None) + + set_all_readonly(readonly) + + + Method applying only to FileControls: + + add_file(file_object, + content_type="application/octet-stream", filename=None, + name=None, id=None, nr=None, label=None) + + + Methods applying only to clickable controls: + + click(name=None, type=None, id=None, nr=0, coord=(1,1), label=None) + click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1), + label=None) + click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1), label=None) + + """ + + type2class = { + "text": TextControl, + "password": PasswordControl, + "hidden": HiddenControl, + "textarea": TextareaControl, + + "isindex": IsindexControl, + + "file": FileControl, + + "button": IgnoreControl, + "buttonbutton": IgnoreControl, + "reset": IgnoreControl, + "resetbutton": IgnoreControl, + + "submit": SubmitControl, + "submitbutton": SubmitButtonControl, + "image": ImageControl, + + "radio": RadioControl, + "checkbox": CheckboxControl, + "select": SelectControl, + } + +#--------------------------------------------------- +# Initialisation. Use ParseResponse / ParseFile instead. + + def __init__(self, action, method="GET", + enctype="application/x-www-form-urlencoded", + name=None, attrs=None, + request_class=urllib2.Request, + forms=None, labels=None, id_to_labels=None, + backwards_compat=True): + """ + In the usual case, use ParseResponse (or ParseFile) to create new + HTMLForm objects. + + action: full (absolute URI) form action + method: "GET" or "POST" + enctype: form transfer encoding MIME type + name: name of form + attrs: dictionary mapping original HTML form attributes to their values + + """ + self.action = action + self.method = method + self.enctype = enctype + self.name = name + if attrs is not None: + self.attrs = attrs.copy() + else: + self.attrs = {} + self.controls = [] + self._request_class = request_class + + # these attributes are used by zope.testbrowser + self._forms = forms # this is a semi-public API! + self._labels = labels # this is a semi-public API! + self._id_to_labels = id_to_labels # this is a semi-public API! + + self.backwards_compat = backwards_compat # note __setattr__ + + self._urlunparse = urlparse.urlunparse + self._urlparse = urlparse.urlparse + + def __getattr__(self, name): + if name == "backwards_compat": + return self._backwards_compat + return getattr(HTMLForm, name) + + def __setattr__(self, name, value): + # yuck + if name == "backwards_compat": + name = "_backwards_compat" + value = bool(value) + for cc in self.controls: + try: + items = cc.items + except AttributeError: + continue + else: + for ii in items: + for ll in ii.get_labels(): + ll._backwards_compat = value + self.__dict__[name] = value + + def new_control(self, type, name, attrs, + ignore_unknown=False, select_default=False, index=None): + """Adds a new control to the form. + + This is usually called by ParseFile and ParseResponse. Don't call it + youself unless you're building your own Control instances. + + Note that controls representing lists of items are built up from + controls holding only a single list item. See ListControl.__doc__ for + further information. + + type: type of control (see Control.__doc__ for a list) + attrs: HTML attributes of control + ignore_unknown: if true, use a dummy Control instance for controls of + unknown type; otherwise, use a TextControl + select_default: for RADIO and multiple-selection SELECT controls, pick + the first item as the default if no 'selected' HTML attribute is + present (this defaulting happens when the HTMLForm.fixup method is + called) + index: index of corresponding element in HTML (see + MoreFormTests.test_interspersed_controls for motivation) + + """ + type = type.lower() + klass = self.type2class.get(type) + if klass is None: + if ignore_unknown: + klass = IgnoreControl + else: + klass = TextControl + + a = attrs.copy() + if issubclass(klass, ListControl): + control = klass(type, name, a, select_default, index) + else: + control = klass(type, name, a, index) + + if type == "select" and len(attrs) == 1: + for ii in range(len(self.controls)-1, -1, -1): + ctl = self.controls[ii] + if ctl.type == "select": + ctl.close_control() + break + + control.add_to_form(self) + control._urlparse = self._urlparse + control._urlunparse = self._urlunparse + + def fixup(self): + """Normalise form after all controls have been added. + + This is usually called by ParseFile and ParseResponse. Don't call it + youself unless you're building your own Control instances. + + This method should only be called once, after all controls have been + added to the form. + + """ + for control in self.controls: + control.fixup() + self.backwards_compat = self._backwards_compat + +#--------------------------------------------------- + def __str__(self): + header = "%s%s %s %s" % ( + (self.name and self.name+" " or ""), + self.method, self.action, self.enctype) + rep = [header] + for control in self.controls: + rep.append(" %s" % str(control)) + return "<%s>" % "\n".join(rep) + +#--------------------------------------------------- +# Form-filling methods. + + def __getitem__(self, name): + return self.find_control(name).value + def __contains__(self, name): + return bool(self.find_control(name)) + def __setitem__(self, name, value): + control = self.find_control(name) + try: + control.value = value + except AttributeError, e: + raise ValueError(str(e)) + + def get_value(self, + name=None, type=None, kind=None, id=None, nr=None, + by_label=False, # by_label is deprecated + label=None): + """Return value of control. + + If only name and value arguments are supplied, equivalent to + + form[name] + + """ + if by_label: + deprecation("form.get_value_by_label(...)") + c = self.find_control(name, type, kind, id, label=label, nr=nr) + if by_label: + try: + meth = c.get_value_by_label + except AttributeError: + raise NotImplementedError( + "control '%s' does not yet support by_label" % c.name) + else: + return meth() + else: + return c.value + def set_value(self, value, + name=None, type=None, kind=None, id=None, nr=None, + by_label=False, # by_label is deprecated + label=None): + """Set value of control. + + If only name and value arguments are supplied, equivalent to + + form[name] = value + + """ + if by_label: + deprecation("form.get_value_by_label(...)") + c = self.find_control(name, type, kind, id, label=label, nr=nr) + if by_label: + try: + meth = c.set_value_by_label + except AttributeError: + raise NotImplementedError( + "control '%s' does not yet support by_label" % c.name) + else: + meth(value) + else: + c.value = value + def get_value_by_label( + self, name=None, type=None, kind=None, id=None, label=None, nr=None): + """ + + All arguments should be passed by name. + + """ + c = self.find_control(name, type, kind, id, label=label, nr=nr) + return c.get_value_by_label() + + def set_value_by_label( + self, value, + name=None, type=None, kind=None, id=None, label=None, nr=None): + """ + + All arguments should be passed by name. + + """ + c = self.find_control(name, type, kind, id, label=label, nr=nr) + c.set_value_by_label(value) + + def set_all_readonly(self, readonly): + for control in self.controls: + control.readonly = bool(readonly) + + def clear_all(self): + """Clear the value attributes of all controls in the form. + + See HTMLForm.clear.__doc__. + + """ + for control in self.controls: + control.clear() + + def clear(self, + name=None, type=None, kind=None, id=None, nr=None, label=None): + """Clear the value attribute of a control. + + As a result, the affected control will not be successful until a value + is subsequently set. AttributeError is raised on readonly controls. + + """ + c = self.find_control(name, type, kind, id, label=label, nr=nr) + c.clear() + + +#--------------------------------------------------- +# Form-filling methods applying only to ListControls. + + def possible_items(self, # deprecated + name=None, type=None, kind=None, id=None, + nr=None, by_label=False, label=None): + """Return a list of all values that the specified control can take.""" + c = self._find_list_control(name, type, kind, id, label, nr) + return c.possible_items(by_label) + + def set(self, selected, item_name, # deprecated + name=None, type=None, kind=None, id=None, nr=None, + by_label=False, label=None): + """Select / deselect named list item. + + selected: boolean selected state + + """ + self._find_list_control(name, type, kind, id, label, nr).set( + selected, item_name, by_label) + def toggle(self, item_name, # deprecated + name=None, type=None, kind=None, id=None, nr=None, + by_label=False, label=None): + """Toggle selected state of named list item.""" + self._find_list_control(name, type, kind, id, label, nr).toggle( + item_name, by_label) + + def set_single(self, selected, # deprecated + name=None, type=None, kind=None, id=None, + nr=None, by_label=None, label=None): + """Select / deselect list item in a control having only one item. + + If the control has multiple list items, ItemCountError is raised. + + This is just a convenience method, so you don't need to know the item's + name -- the item name in these single-item controls is usually + something meaningless like "1" or "on". + + For example, if a checkbox has a single item named "on", the following + two calls are equivalent: + + control.toggle("on") + control.toggle_single() + + """ # by_label ignored and deprecated + self._find_list_control( + name, type, kind, id, label, nr).set_single(selected) + def toggle_single(self, name=None, type=None, kind=None, id=None, + nr=None, by_label=None, label=None): # deprecated + """Toggle selected state of list item in control having only one item. + + The rest is as for HTMLForm.set_single.__doc__. + + """ # by_label ignored and deprecated + self._find_list_control(name, type, kind, id, label, nr).toggle_single() + +#--------------------------------------------------- +# Form-filling method applying only to FileControls. + + def add_file(self, file_object, content_type=None, filename=None, + name=None, id=None, nr=None, label=None): + """Add a file to be uploaded. + + file_object: file-like object (with read method) from which to read + data to upload + content_type: MIME content type of data to upload + filename: filename to pass to server + + If filename is None, no filename is sent to the server. + + If content_type is None, the content type is guessed based on the + filename and the data from read from the file object. + + XXX + At the moment, guessed content type is always application/octet-stream. + Use sndhdr, imghdr modules. Should also try to guess HTML, XML, and + plain text. + + Note the following useful HTML attributes of file upload controls (see + HTML 4.01 spec, section 17): + + accept: comma-separated list of content types that the server will + handle correctly; you can use this to filter out non-conforming files + size: XXX IIRC, this is indicative of whether form wants multiple or + single files + maxlength: XXX hint of max content length in bytes? + + """ + self.find_control(name, "file", id=id, label=label, nr=nr).add_file( + file_object, content_type, filename) + +#--------------------------------------------------- +# Form submission methods, applying only to clickable controls. + + def click(self, name=None, type=None, id=None, nr=0, coord=(1,1), + request_class=urllib2.Request, + label=None): + """Return request that would result from clicking on a control. + + The request object is a urllib2.Request instance, which you can pass to + urllib2.urlopen (or ClientCookie.urlopen). + + Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and + IMAGEs) can be clicked. + + Will click on the first clickable control, subject to the name, type + and nr arguments (as for find_control). If no name, type, id or number + is specified and there are no clickable controls, a request will be + returned for the form in its current, un-clicked, state. + + IndexError is raised if any of name, type, id or nr is specified but no + matching control is found. ValueError is raised if the HTMLForm has an + enctype attribute that is not recognised. + + You can optionally specify a coordinate to click at, which only makes a + difference if you clicked on an image. + + """ + return self._click(name, type, id, label, nr, coord, "request", + self._request_class) + + def click_request_data(self, + name=None, type=None, id=None, + nr=0, coord=(1,1), + request_class=urllib2.Request, + label=None): + """As for click method, but return a tuple (url, data, headers). + + You can use this data to send a request to the server. This is useful + if you're using httplib or urllib rather than urllib2. Otherwise, use + the click method. + + # Untested. Have to subclass to add headers, I think -- so use urllib2 + # instead! + import urllib + url, data, hdrs = form.click_request_data() + r = urllib.urlopen(url, data) + + # Untested. I don't know of any reason to use httplib -- you can get + # just as much control with urllib2. + import httplib, urlparse + url, data, hdrs = form.click_request_data() + tup = urlparse(url) + host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:]) + conn = httplib.HTTPConnection(host) + if data: + httplib.request("POST", path, data, hdrs) + else: + httplib.request("GET", path, headers=hdrs) + r = conn.getresponse() + + """ + return self._click(name, type, id, label, nr, coord, "request_data", + self._request_class) + + def click_pairs(self, name=None, type=None, id=None, + nr=0, coord=(1,1), + label=None): + """As for click_request_data, but returns a list of (key, value) pairs. + + You can use this list as an argument to ClientForm.urlencode. This is + usually only useful if you're using httplib or urllib rather than + urllib2 or ClientCookie. It may also be useful if you want to manually + tweak the keys and/or values, but this should not be necessary. + Otherwise, use the click method. + + Note that this method is only useful for forms of MIME type + x-www-form-urlencoded. In particular, it does not return the + information required for file upload. If you need file upload and are + not using urllib2, use click_request_data. + + Also note that Python 2.0's urllib.urlencode is slightly broken: it + only accepts a mapping, not a sequence of pairs, as an argument. This + messes up any ordering in the argument. Use ClientForm.urlencode + instead. + + """ + return self._click(name, type, id, label, nr, coord, "pairs", + self._request_class) + +#--------------------------------------------------- + + def find_control(self, + name=None, type=None, kind=None, id=None, + predicate=None, nr=None, + label=None): + """Locate and return some specific control within the form. + + At least one of the name, type, kind, predicate and nr arguments must + be supplied. If no matching control is found, ControlNotFoundError is + raised. + + If name is specified, then the control must have the indicated name. + + If type is specified then the control must have the specified type (in + addition to the types possible for <input> HTML tags: "text", + "password", "hidden", "submit", "image", "button", "radio", "checkbox", + "file" we also have "reset", "buttonbutton", "submitbutton", + "resetbutton", "textarea", "select" and "isindex"). + + If kind is specified, then the control must fall into the specified + group, each of which satisfies a particular interface. The types are + "text", "list", "multilist", "singlelist", "clickable" and "file". + + If id is specified, then the control must have the indicated id. + + If predicate is specified, then the control must match that function. + The predicate function is passed the control as its single argument, + and should return a boolean value indicating whether the control + matched. + + nr, if supplied, is the sequence number of the control (where 0 is the + first). Note that control 0 is the first control matching all the + other arguments (if supplied); it is not necessarily the first control + in the form. If no nr is supplied, AmbiguityError is raised if + multiple controls match the other arguments (unless the + .backwards-compat attribute is true). + + If label is specified, then the control must have this label. Note + that radio controls and checkboxes never have labels: their items do. + + """ + if ((name is None) and (type is None) and (kind is None) and + (id is None) and (label is None) and (predicate is None) and + (nr is None)): + raise ValueError( + "at least one argument must be supplied to specify control") + return self._find_control(name, type, kind, id, label, predicate, nr) + +#--------------------------------------------------- +# Private methods. + + def _find_list_control(self, + name=None, type=None, kind=None, id=None, + label=None, nr=None): + if ((name is None) and (type is None) and (kind is None) and + (id is None) and (label is None) and (nr is None)): + raise ValueError( + "at least one argument must be supplied to specify control") + + return self._find_control(name, type, kind, id, label, + is_listcontrol, nr) + + def _find_control(self, name, type, kind, id, label, predicate, nr): + if ((name is not None) and (name is not Missing) and + not isstringlike(name)): + raise TypeError("control name must be string-like") + if (type is not None) and not isstringlike(type): + raise TypeError("control type must be string-like") + if (kind is not None) and not isstringlike(kind): + raise TypeError("control kind must be string-like") + if (id is not None) and not isstringlike(id): + raise TypeError("control id must be string-like") + if (label is not None) and not isstringlike(label): + raise TypeError("control label must be string-like") + if (predicate is not None) and not callable(predicate): + raise TypeError("control predicate must be callable") + if (nr is not None) and nr < 0: + raise ValueError("control number must be a positive integer") + + orig_nr = nr + found = None + ambiguous = False + if nr is None and self.backwards_compat: + nr = 0 + + for control in self.controls: + if ((name is not None and name != control.name) and + (name is not Missing or control.name is not None)): + continue + if type is not None and type != control.type: + continue + if kind is not None and not control.is_of_kind(kind): + continue + if id is not None and id != control.id: + continue + if predicate and not predicate(control): + continue + if label: + for l in control.get_labels(): + if l.text.find(label) > -1: + break + else: + continue + if nr is not None: + if nr == 0: + return control # early exit: unambiguous due to nr + nr -= 1 + continue + if found: + ambiguous = True + break + found = control + + if found and not ambiguous: + return found + + description = [] + if name is not None: description.append("name %s" % repr(name)) + if type is not None: description.append("type '%s'" % type) + if kind is not None: description.append("kind '%s'" % kind) + if id is not None: description.append("id '%s'" % id) + if label is not None: description.append("label '%s'" % label) + if predicate is not None: + description.append("predicate %s" % predicate) + if orig_nr: description.append("nr %d" % orig_nr) + description = ", ".join(description) + + if ambiguous: + raise AmbiguityError("more than one control matching "+description) + elif not found: + raise ControlNotFoundError("no control matching "+description) + assert False + + def _click(self, name, type, id, label, nr, coord, return_type, + request_class=urllib2.Request): + try: + control = self._find_control( + name, type, "clickable", id, label, None, nr) + except ControlNotFoundError: + if ((name is not None) or (type is not None) or (id is not None) or + (nr != 0)): + raise + # no clickable controls, but no control was explicitly requested, + # so return state without clicking any control + return self._switch_click(return_type, request_class) + else: + return control._click(self, coord, return_type, request_class) + + def _pairs(self): + """Return sequence of (key, value) pairs suitable for urlencoding.""" + return [(k, v) for (i, k, v, c_i) in self._pairs_and_controls()] + + + def _pairs_and_controls(self): + """Return sequence of (index, key, value, control_index) + of totally ordered pairs suitable for urlencoding. + + control_index is the index of the control in self.controls + """ + pairs = [] + for control_index in range(len(self.controls)): + control = self.controls[control_index] + for ii, key, val in control._totally_ordered_pairs(): + pairs.append((ii, key, val, control_index)) + + # stable sort by ONLY first item in tuple + pairs.sort() + + return pairs + + def _request_data(self): + """Return a tuple (url, data, headers).""" + method = self.method.upper() + #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(self.action) + parts = self._urlparse(self.action) + rest, (query, frag) = parts[:-2], parts[-2:] + + if method == "GET": + if self.enctype != "application/x-www-form-urlencoded": + raise ValueError( + "unknown GET form encoding type '%s'" % self.enctype) + parts = rest + (urlencode(self._pairs()), None) + uri = self._urlunparse(parts) + return uri, None, [] + elif method == "POST": + parts = rest + (query, None) + uri = self._urlunparse(parts) + if self.enctype == "application/x-www-form-urlencoded": + return (uri, urlencode(self._pairs()), + [("Content-Type", self.enctype)]) + elif self.enctype == "multipart/form-data": + data = StringIO() + http_hdrs = [] + mw = MimeWriter(data, http_hdrs) + f = mw.startmultipartbody("form-data", add_to_http_hdrs=True, + prefix=0) + for ii, k, v, control_index in self._pairs_and_controls(): + self.controls[control_index]._write_mime_data(mw, k, v) + mw.lastpart() + return uri, data.getvalue(), http_hdrs + else: + raise ValueError( + "unknown POST form encoding type '%s'" % self.enctype) + else: + raise ValueError("Unknown method '%s'" % method) + + def _switch_click(self, return_type, request_class=urllib2.Request): + # This is called by HTMLForm and clickable Controls to hide switching + # on return_type. + if return_type == "pairs": + return self._pairs() + elif return_type == "request_data": + return self._request_data() + else: + req_data = self._request_data() + req = request_class(req_data[0], req_data[1]) + for key, val in req_data[2]: + add_hdr = req.add_header + if key.lower() == "content-type": + try: + add_hdr = req.add_unredirected_header + except AttributeError: + # pre-2.4 and not using ClientCookie + pass + add_hdr(key, val) + return req diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/__init__.py new file mode 100644 index 0000000..c1e4c6d --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/clientform/__init__.py @@ -0,0 +1 @@ +# This file is required for Python to search this directory for modules. diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.ircbot.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.ircbot.py.url new file mode 100644 index 0000000..f34e243 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.ircbot.py.url @@ -0,0 +1 @@ +http://iweb.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip
\ No newline at end of file diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.irclib.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.irclib.py.url new file mode 100644 index 0000000..f34e243 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/.irclib.py.url @@ -0,0 +1 @@ +http://iweb.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip
\ No newline at end of file diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/__init__.py new file mode 100644 index 0000000..c1e4c6d --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/__init__.py @@ -0,0 +1 @@ +# This file is required for Python to search this directory for modules. diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/ircbot.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/ircbot.py new file mode 100644 index 0000000..6f29a65 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/ircbot.py @@ -0,0 +1,438 @@ +# Copyright (C) 1999--2002 Joel Rosdahl +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# Joel Rosdahl <joel@rosdahl.net> +# +# $Id: ircbot.py,v 1.23 2008/09/11 07:38:30 keltus Exp $ + +"""ircbot -- Simple IRC bot library. + +This module contains a single-server IRC bot class that can be used to +write simpler bots. +""" + +import sys +from UserDict import UserDict + +from irclib import SimpleIRCClient +from irclib import nm_to_n, irc_lower, all_events +from irclib import parse_channel_modes, is_channel +from irclib import ServerConnectionError + +class SingleServerIRCBot(SimpleIRCClient): + """A single-server IRC bot class. + + The bot tries to reconnect if it is disconnected. + + The bot keeps track of the channels it has joined, the other + clients that are present in the channels and which of those that + have operator or voice modes. The "database" is kept in the + self.channels attribute, which is an IRCDict of Channels. + """ + def __init__(self, server_list, nickname, realname, reconnection_interval=60): + """Constructor for SingleServerIRCBot objects. + + Arguments: + + server_list -- A list of tuples (server, port) that + defines which servers the bot should try to + connect to. + + nickname -- The bot's nickname. + + realname -- The bot's realname. + + reconnection_interval -- How long the bot should wait + before trying to reconnect. + + dcc_connections -- A list of initiated/accepted DCC + connections. + """ + + SimpleIRCClient.__init__(self) + self.channels = IRCDict() + self.server_list = server_list + if not reconnection_interval or reconnection_interval < 0: + reconnection_interval = 2**31 + self.reconnection_interval = reconnection_interval + + self._nickname = nickname + self._realname = realname + for i in ["disconnect", "join", "kick", "mode", + "namreply", "nick", "part", "quit"]: + self.connection.add_global_handler(i, + getattr(self, "_on_" + i), + -10) + def _connected_checker(self): + """[Internal]""" + if not self.connection.is_connected(): + self.connection.execute_delayed(self.reconnection_interval, + self._connected_checker) + self.jump_server() + + def _connect(self): + """[Internal]""" + password = None + if len(self.server_list[0]) > 2: + password = self.server_list[0][2] + try: + self.connect(self.server_list[0][0], + self.server_list[0][1], + self._nickname, + password, + ircname=self._realname) + except ServerConnectionError: + pass + + def _on_disconnect(self, c, e): + """[Internal]""" + self.channels = IRCDict() + self.connection.execute_delayed(self.reconnection_interval, + self._connected_checker) + + def _on_join(self, c, e): + """[Internal]""" + ch = e.target() + nick = nm_to_n(e.source()) + if nick == c.get_nickname(): + self.channels[ch] = Channel() + self.channels[ch].add_user(nick) + + def _on_kick(self, c, e): + """[Internal]""" + nick = e.arguments()[0] + channel = e.target() + + if nick == c.get_nickname(): + del self.channels[channel] + else: + self.channels[channel].remove_user(nick) + + def _on_mode(self, c, e): + """[Internal]""" + modes = parse_channel_modes(" ".join(e.arguments())) + t = e.target() + if is_channel(t): + ch = self.channels[t] + for mode in modes: + if mode[0] == "+": + f = ch.set_mode + else: + f = ch.clear_mode + f(mode[1], mode[2]) + else: + # Mode on self... XXX + pass + + def _on_namreply(self, c, e): + """[Internal]""" + + # e.arguments()[0] == "@" for secret channels, + # "*" for private channels, + # "=" for others (public channels) + # e.arguments()[1] == channel + # e.arguments()[2] == nick list + + ch = e.arguments()[1] + for nick in e.arguments()[2].split(): + if nick[0] == "@": + nick = nick[1:] + self.channels[ch].set_mode("o", nick) + elif nick[0] == "+": + nick = nick[1:] + self.channels[ch].set_mode("v", nick) + self.channels[ch].add_user(nick) + + def _on_nick(self, c, e): + """[Internal]""" + before = nm_to_n(e.source()) + after = e.target() + for ch in self.channels.values(): + if ch.has_user(before): + ch.change_nick(before, after) + + def _on_part(self, c, e): + """[Internal]""" + nick = nm_to_n(e.source()) + channel = e.target() + + if nick == c.get_nickname(): + del self.channels[channel] + else: + self.channels[channel].remove_user(nick) + + def _on_quit(self, c, e): + """[Internal]""" + nick = nm_to_n(e.source()) + for ch in self.channels.values(): + if ch.has_user(nick): + ch.remove_user(nick) + + def die(self, msg="Bye, cruel world!"): + """Let the bot die. + + Arguments: + + msg -- Quit message. + """ + + self.connection.disconnect(msg) + sys.exit(0) + + def disconnect(self, msg="I'll be back!"): + """Disconnect the bot. + + The bot will try to reconnect after a while. + + Arguments: + + msg -- Quit message. + """ + self.connection.disconnect(msg) + + def get_version(self): + """Returns the bot version. + + Used when answering a CTCP VERSION request. + """ + return "ircbot.py by Joel Rosdahl <joel@rosdahl.net>" + + def jump_server(self, msg="Changing servers"): + """Connect to a new server, possibly disconnecting from the current. + + The bot will skip to next server in the server_list each time + jump_server is called. + """ + if self.connection.is_connected(): + self.connection.disconnect(msg) + + self.server_list.append(self.server_list.pop(0)) + self._connect() + + def on_ctcp(self, c, e): + """Default handler for ctcp events. + + Replies to VERSION and PING requests and relays DCC requests + to the on_dccchat method. + """ + if e.arguments()[0] == "VERSION": + c.ctcp_reply(nm_to_n(e.source()), + "VERSION " + self.get_version()) + elif e.arguments()[0] == "PING": + if len(e.arguments()) > 1: + c.ctcp_reply(nm_to_n(e.source()), + "PING " + e.arguments()[1]) + elif e.arguments()[0] == "DCC" and e.arguments()[1].split(" ", 1)[0] == "CHAT": + self.on_dccchat(c, e) + + def on_dccchat(self, c, e): + pass + + def start(self): + """Start the bot.""" + self._connect() + SimpleIRCClient.start(self) + + +class IRCDict: + """A dictionary suitable for storing IRC-related things. + + Dictionary keys a and b are considered equal if and only if + irc_lower(a) == irc_lower(b) + + Otherwise, it should behave exactly as a normal dictionary. + """ + + def __init__(self, dict=None): + self.data = {} + self.canon_keys = {} # Canonical keys + if dict is not None: + self.update(dict) + def __repr__(self): + return repr(self.data) + def __cmp__(self, dict): + if isinstance(dict, IRCDict): + return cmp(self.data, dict.data) + else: + return cmp(self.data, dict) + def __len__(self): + return len(self.data) + def __getitem__(self, key): + return self.data[self.canon_keys[irc_lower(key)]] + def __setitem__(self, key, item): + if key in self: + del self[key] + self.data[key] = item + self.canon_keys[irc_lower(key)] = key + def __delitem__(self, key): + ck = irc_lower(key) + del self.data[self.canon_keys[ck]] + del self.canon_keys[ck] + def __iter__(self): + return iter(self.data) + def __contains__(self, key): + return self.has_key(key) + def clear(self): + self.data.clear() + self.canon_keys.clear() + def copy(self): + if self.__class__ is UserDict: + return UserDict(self.data) + import copy + return copy.copy(self) + def keys(self): + return self.data.keys() + def items(self): + return self.data.items() + def values(self): + return self.data.values() + def has_key(self, key): + return irc_lower(key) in self.canon_keys + def update(self, dict): + for k, v in dict.items(): + self.data[k] = v + def get(self, key, failobj=None): + return self.data.get(key, failobj) + + +class Channel: + """A class for keeping information about an IRC channel. + + This class can be improved a lot. + """ + + def __init__(self): + self.userdict = IRCDict() + self.operdict = IRCDict() + self.voiceddict = IRCDict() + self.modes = {} + + def users(self): + """Returns an unsorted list of the channel's users.""" + return self.userdict.keys() + + def opers(self): + """Returns an unsorted list of the channel's operators.""" + return self.operdict.keys() + + def voiced(self): + """Returns an unsorted list of the persons that have voice + mode set in the channel.""" + return self.voiceddict.keys() + + def has_user(self, nick): + """Check whether the channel has a user.""" + return nick in self.userdict + + def is_oper(self, nick): + """Check whether a user has operator status in the channel.""" + return nick in self.operdict + + def is_voiced(self, nick): + """Check whether a user has voice mode set in the channel.""" + return nick in self.voiceddict + + def add_user(self, nick): + self.userdict[nick] = 1 + + def remove_user(self, nick): + for d in self.userdict, self.operdict, self.voiceddict: + if nick in d: + del d[nick] + + def change_nick(self, before, after): + self.userdict[after] = 1 + del self.userdict[before] + if before in self.operdict: + self.operdict[after] = 1 + del self.operdict[before] + if before in self.voiceddict: + self.voiceddict[after] = 1 + del self.voiceddict[before] + + def set_mode(self, mode, value=None): + """Set mode on the channel. + + Arguments: + + mode -- The mode (a single-character string). + + value -- Value + """ + if mode == "o": + self.operdict[value] = 1 + elif mode == "v": + self.voiceddict[value] = 1 + else: + self.modes[mode] = value + + def clear_mode(self, mode, value=None): + """Clear mode on the channel. + + Arguments: + + mode -- The mode (a single-character string). + + value -- Value + """ + try: + if mode == "o": + del self.operdict[value] + elif mode == "v": + del self.voiceddict[value] + else: + del self.modes[mode] + except KeyError: + pass + + def has_mode(self, mode): + return mode in self.modes + + def is_moderated(self): + return self.has_mode("m") + + def is_secret(self): + return self.has_mode("s") + + def is_protected(self): + return self.has_mode("p") + + def has_topic_lock(self): + return self.has_mode("t") + + def is_invite_only(self): + return self.has_mode("i") + + def has_allow_external_messages(self): + return self.has_mode("n") + + def has_limit(self): + return self.has_mode("l") + + def limit(self): + if self.has_limit(): + return self.modes[l] + else: + return None + + def has_key(self): + return self.has_mode("k") + + def key(self): + if self.has_key(): + return self.modes["k"] + else: + return None diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/irclib.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/irclib.py new file mode 100644 index 0000000..5f7141c --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/irc/irclib.py @@ -0,0 +1,1560 @@ +# Copyright (C) 1999--2002 Joel Rosdahl +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# keltus <keltus@users.sourceforge.net> +# +# $Id: irclib.py,v 1.47 2008/09/25 22:00:59 keltus Exp $ + +"""irclib -- Internet Relay Chat (IRC) protocol client library. + +This library is intended to encapsulate the IRC protocol at a quite +low level. It provides an event-driven IRC client framework. It has +a fairly thorough support for the basic IRC protocol, CTCP, DCC chat, +but DCC file transfers is not yet supported. + +In order to understand how to make an IRC client, I'm afraid you more +or less must understand the IRC specifications. They are available +here: [IRC specifications]. + +The main features of the IRC client framework are: + + * Abstraction of the IRC protocol. + * Handles multiple simultaneous IRC server connections. + * Handles server PONGing transparently. + * Messages to the IRC server are done by calling methods on an IRC + connection object. + * Messages from an IRC server triggers events, which can be caught + by event handlers. + * Reading from and writing to IRC server sockets are normally done + by an internal select() loop, but the select()ing may be done by + an external main loop. + * Functions can be registered to execute at specified times by the + event-loop. + * Decodes CTCP tagging correctly (hopefully); I haven't seen any + other IRC client implementation that handles the CTCP + specification subtilties. + * A kind of simple, single-server, object-oriented IRC client class + that dispatches events to instance methods is included. + +Current limitations: + + * The IRC protocol shines through the abstraction a bit too much. + * Data is not written asynchronously to the server, i.e. the write() + may block if the TCP buffers are stuffed. + * There are no support for DCC file transfers. + * The author haven't even read RFC 2810, 2811, 2812 and 2813. + * Like most projects, documentation is lacking... + +.. [IRC specifications] http://www.irchelp.org/irchelp/rfc/ +""" + +import bisect +import re +import select +import socket +import string +import sys +import time +import types + +VERSION = 0, 4, 8 +DEBUG = 0 + +# TODO +# ---- +# (maybe) thread safety +# (maybe) color parser convenience functions +# documentation (including all event types) +# (maybe) add awareness of different types of ircds +# send data asynchronously to the server (and DCC connections) +# (maybe) automatically close unused, passive DCC connections after a while + +# NOTES +# ----- +# connection.quit() only sends QUIT to the server. +# ERROR from the server triggers the error event and the disconnect event. +# dropping of the connection triggers the disconnect event. + +class IRCError(Exception): + """Represents an IRC exception.""" + pass + + +class IRC: + """Class that handles one or several IRC server connections. + + When an IRC object has been instantiated, it can be used to create + Connection objects that represent the IRC connections. The + responsibility of the IRC object is to provide an event-driven + framework for the connections and to keep the connections alive. + It runs a select loop to poll each connection's TCP socket and + hands over the sockets with incoming data for processing by the + corresponding connection. + + The methods of most interest for an IRC client writer are server, + add_global_handler, remove_global_handler, execute_at, + execute_delayed, process_once and process_forever. + + Here is an example: + + irc = irclib.IRC() + server = irc.server() + server.connect(\"irc.some.where\", 6667, \"my_nickname\") + server.privmsg(\"a_nickname\", \"Hi there!\") + irc.process_forever() + + This will connect to the IRC server irc.some.where on port 6667 + using the nickname my_nickname and send the message \"Hi there!\" + to the nickname a_nickname. + """ + + def __init__(self, fn_to_add_socket=None, + fn_to_remove_socket=None, + fn_to_add_timeout=None): + """Constructor for IRC objects. + + Optional arguments are fn_to_add_socket, fn_to_remove_socket + and fn_to_add_timeout. The first two specify functions that + will be called with a socket object as argument when the IRC + object wants to be notified (or stop being notified) of data + coming on a new socket. When new data arrives, the method + process_data should be called. Similarly, fn_to_add_timeout + is called with a number of seconds (a floating point number) + as first argument when the IRC object wants to receive a + notification (by calling the process_timeout method). So, if + e.g. the argument is 42.17, the object wants the + process_timeout method to be called after 42 seconds and 170 + milliseconds. + + The three arguments mainly exist to be able to use an external + main loop (for example Tkinter's or PyGTK's main app loop) + instead of calling the process_forever method. + + An alternative is to just call ServerConnection.process_once() + once in a while. + """ + + if fn_to_add_socket and fn_to_remove_socket: + self.fn_to_add_socket = fn_to_add_socket + self.fn_to_remove_socket = fn_to_remove_socket + else: + self.fn_to_add_socket = None + self.fn_to_remove_socket = None + + self.fn_to_add_timeout = fn_to_add_timeout + self.connections = [] + self.handlers = {} + self.delayed_commands = [] # list of tuples in the format (time, function, arguments) + + self.add_global_handler("ping", _ping_ponger, -42) + + def server(self): + """Creates and returns a ServerConnection object.""" + + c = ServerConnection(self) + self.connections.append(c) + return c + + def process_data(self, sockets): + """Called when there is more data to read on connection sockets. + + Arguments: + + sockets -- A list of socket objects. + + See documentation for IRC.__init__. + """ + for s in sockets: + for c in self.connections: + if s == c._get_socket(): + c.process_data() + + def process_timeout(self): + """Called when a timeout notification is due. + + See documentation for IRC.__init__. + """ + t = time.time() + while self.delayed_commands: + if t >= self.delayed_commands[0][0]: + self.delayed_commands[0][1](*self.delayed_commands[0][2]) + del self.delayed_commands[0] + else: + break + + def process_once(self, timeout=0): + """Process data from connections once. + + Arguments: + + timeout -- How long the select() call should wait if no + data is available. + + This method should be called periodically to check and process + incoming data, if there are any. If that seems boring, look + at the process_forever method. + """ + sockets = map(lambda x: x._get_socket(), self.connections) + sockets = filter(lambda x: x != None, sockets) + if sockets: + (i, o, e) = select.select(sockets, [], [], timeout) + self.process_data(i) + else: + time.sleep(timeout) + self.process_timeout() + + def process_forever(self, timeout=0.2): + """Run an infinite loop, processing data from connections. + + This method repeatedly calls process_once. + + Arguments: + + timeout -- Parameter to pass to process_once. + """ + while 1: + self.process_once(timeout) + + def disconnect_all(self, message=""): + """Disconnects all connections.""" + for c in self.connections: + c.disconnect(message) + + def add_global_handler(self, event, handler, priority=0): + """Adds a global handler function for a specific event type. + + Arguments: + + event -- Event type (a string). Check the values of the + numeric_events dictionary in irclib.py for possible event + types. + + handler -- Callback function. + + priority -- A number (the lower number, the higher priority). + + The handler function is called whenever the specified event is + triggered in any of the connections. See documentation for + the Event class. + + The handler functions are called in priority order (lowest + number is highest priority). If a handler function returns + \"NO MORE\", no more handlers will be called. + """ + if not event in self.handlers: + self.handlers[event] = [] + bisect.insort(self.handlers[event], ((priority, handler))) + + def remove_global_handler(self, event, handler): + """Removes a global handler function. + + Arguments: + + event -- Event type (a string). + + handler -- Callback function. + + Returns 1 on success, otherwise 0. + """ + if not event in self.handlers: + return 0 + for h in self.handlers[event]: + if handler == h[1]: + self.handlers[event].remove(h) + return 1 + + def execute_at(self, at, function, arguments=()): + """Execute a function at a specified time. + + Arguments: + + at -- Execute at this time (standard \"time_t\" time). + + function -- Function to call. + + arguments -- Arguments to give the function. + """ + self.execute_delayed(at-time.time(), function, arguments) + + def execute_delayed(self, delay, function, arguments=()): + """Execute a function after a specified time. + + Arguments: + + delay -- How many seconds to wait. + + function -- Function to call. + + arguments -- Arguments to give the function. + """ + bisect.insort(self.delayed_commands, (delay+time.time(), function, arguments)) + if self.fn_to_add_timeout: + self.fn_to_add_timeout(delay) + + def dcc(self, dcctype="chat"): + """Creates and returns a DCCConnection object. + + Arguments: + + dcctype -- "chat" for DCC CHAT connections or "raw" for + DCC SEND (or other DCC types). If "chat", + incoming data will be split in newline-separated + chunks. If "raw", incoming data is not touched. + """ + c = DCCConnection(self, dcctype) + self.connections.append(c) + return c + + def _handle_event(self, connection, event): + """[Internal]""" + h = self.handlers + for handler in h.get("all_events", []) + h.get(event.eventtype(), []): + if handler[1](connection, event) == "NO MORE": + return + + def _remove_connection(self, connection): + """[Internal]""" + self.connections.remove(connection) + if self.fn_to_remove_socket: + self.fn_to_remove_socket(connection._get_socket()) + +_rfc_1459_command_regexp = re.compile("^(:(?P<prefix>[^ ]+) +)?(?P<command>[^ ]+)( *(?P<argument> .+))?") + +class Connection: + """Base class for IRC connections. + + Must be overridden. + """ + def __init__(self, irclibobj): + self.irclibobj = irclibobj + + def _get_socket(): + raise IRCError, "Not overridden" + + ############################## + ### Convenience wrappers. + + def execute_at(self, at, function, arguments=()): + self.irclibobj.execute_at(at, function, arguments) + + def execute_delayed(self, delay, function, arguments=()): + self.irclibobj.execute_delayed(delay, function, arguments) + + +class ServerConnectionError(IRCError): + pass + +class ServerNotConnectedError(ServerConnectionError): + pass + + +# Huh!? Crrrrazy EFNet doesn't follow the RFC: their ircd seems to +# use \n as message separator! :P +_linesep_regexp = re.compile("\r?\n") + +class ServerConnection(Connection): + """This class represents an IRC server connection. + + ServerConnection objects are instantiated by calling the server + method on an IRC object. + """ + + def __init__(self, irclibobj): + Connection.__init__(self, irclibobj) + self.connected = 0 # Not connected yet. + self.socket = None + self.ssl = None + + def connect(self, server, port, nickname, password=None, username=None, + ircname=None, localaddress="", localport=0, ssl=False, ipv6=False): + """Connect/reconnect to a server. + + Arguments: + + server -- Server name. + + port -- Port number. + + nickname -- The nickname. + + password -- Password (if any). + + username -- The username. + + ircname -- The IRC name ("realname"). + + localaddress -- Bind the connection to a specific local IP address. + + localport -- Bind the connection to a specific local port. + + ssl -- Enable support for ssl. + + ipv6 -- Enable support for ipv6. + + This function can be called to reconnect a closed connection. + + Returns the ServerConnection object. + """ + if self.connected: + self.disconnect("Changing servers") + + self.previous_buffer = "" + self.handlers = {} + self.real_server_name = "" + self.real_nickname = nickname + self.server = server + self.port = port + self.nickname = nickname + self.username = username or nickname + self.ircname = ircname or nickname + self.password = password + self.localaddress = localaddress + self.localport = localport + self.localhost = socket.gethostname() + if ipv6: + self.socket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + else: + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + self.socket.bind((self.localaddress, self.localport)) + self.socket.connect((self.server, self.port)) + if ssl: + self.ssl = socket.ssl(self.socket) + except socket.error, x: + self.socket.close() + self.socket = None + raise ServerConnectionError, "Couldn't connect to socket: %s" % x + self.connected = 1 + if self.irclibobj.fn_to_add_socket: + self.irclibobj.fn_to_add_socket(self.socket) + + # Log on... + if self.password: + self.pass_(self.password) + self.nick(self.nickname) + self.user(self.username, self.ircname) + return self + + def close(self): + """Close the connection. + + This method closes the connection permanently; after it has + been called, the object is unusable. + """ + + self.disconnect("Closing object") + self.irclibobj._remove_connection(self) + + def _get_socket(self): + """[Internal]""" + return self.socket + + def get_server_name(self): + """Get the (real) server name. + + This method returns the (real) server name, or, more + specifically, what the server calls itself. + """ + + if self.real_server_name: + return self.real_server_name + else: + return "" + + def get_nickname(self): + """Get the (real) nick name. + + This method returns the (real) nickname. The library keeps + track of nick changes, so it might not be the nick name that + was passed to the connect() method. """ + + return self.real_nickname + + def process_data(self): + """[Internal]""" + + try: + if self.ssl: + new_data = self.ssl.read(2**14) + else: + new_data = self.socket.recv(2**14) + except socket.error, x: + # The server hung up. + self.disconnect("Connection reset by peer") + return + if not new_data: + # Read nothing: connection must be down. + self.disconnect("Connection reset by peer") + return + + lines = _linesep_regexp.split(self.previous_buffer + new_data) + + # Save the last, unfinished line. + self.previous_buffer = lines.pop() + + for line in lines: + if DEBUG: + print "FROM SERVER:", line + + if not line: + continue + + prefix = None + command = None + arguments = None + self._handle_event(Event("all_raw_messages", + self.get_server_name(), + None, + [line])) + + m = _rfc_1459_command_regexp.match(line) + if m.group("prefix"): + prefix = m.group("prefix") + if not self.real_server_name: + self.real_server_name = prefix + + if m.group("command"): + command = m.group("command").lower() + + if m.group("argument"): + a = m.group("argument").split(" :", 1) + arguments = a[0].split() + if len(a) == 2: + arguments.append(a[1]) + + # Translate numerics into more readable strings. + if command in numeric_events: + command = numeric_events[command] + + if command == "nick": + if nm_to_n(prefix) == self.real_nickname: + self.real_nickname = arguments[0] + elif command == "welcome": + # Record the nickname in case the client changed nick + # in a nicknameinuse callback. + self.real_nickname = arguments[0] + + if command in ["privmsg", "notice"]: + target, message = arguments[0], arguments[1] + messages = _ctcp_dequote(message) + + if command == "privmsg": + if is_channel(target): + command = "pubmsg" + else: + if is_channel(target): + command = "pubnotice" + else: + command = "privnotice" + + for m in messages: + if type(m) is types.TupleType: + if command in ["privmsg", "pubmsg"]: + command = "ctcp" + else: + command = "ctcpreply" + + m = list(m) + if DEBUG: + print "command: %s, source: %s, target: %s, arguments: %s" % ( + command, prefix, target, m) + self._handle_event(Event(command, prefix, target, m)) + if command == "ctcp" and m[0] == "ACTION": + self._handle_event(Event("action", prefix, target, m[1:])) + else: + if DEBUG: + print "command: %s, source: %s, target: %s, arguments: %s" % ( + command, prefix, target, [m]) + self._handle_event(Event(command, prefix, target, [m])) + else: + target = None + + if command == "quit": + arguments = [arguments[0]] + elif command == "ping": + target = arguments[0] + else: + target = arguments[0] + arguments = arguments[1:] + + if command == "mode": + if not is_channel(target): + command = "umode" + + if DEBUG: + print "command: %s, source: %s, target: %s, arguments: %s" % ( + command, prefix, target, arguments) + self._handle_event(Event(command, prefix, target, arguments)) + + def _handle_event(self, event): + """[Internal]""" + self.irclibobj._handle_event(self, event) + if event.eventtype() in self.handlers: + for fn in self.handlers[event.eventtype()]: + fn(self, event) + + def is_connected(self): + """Return connection status. + + Returns true if connected, otherwise false. + """ + return self.connected + + def add_global_handler(self, *args): + """Add global handler. + + See documentation for IRC.add_global_handler. + """ + self.irclibobj.add_global_handler(*args) + + def remove_global_handler(self, *args): + """Remove global handler. + + See documentation for IRC.remove_global_handler. + """ + self.irclibobj.remove_global_handler(*args) + + def action(self, target, action): + """Send a CTCP ACTION command.""" + self.ctcp("ACTION", target, action) + + def admin(self, server=""): + """Send an ADMIN command.""" + self.send_raw(" ".join(["ADMIN", server]).strip()) + + def ctcp(self, ctcptype, target, parameter=""): + """Send a CTCP command.""" + ctcptype = ctcptype.upper() + self.privmsg(target, "\001%s%s\001" % (ctcptype, parameter and (" " + parameter) or "")) + + def ctcp_reply(self, target, parameter): + """Send a CTCP REPLY command.""" + self.notice(target, "\001%s\001" % parameter) + + def disconnect(self, message=""): + """Hang up the connection. + + Arguments: + + message -- Quit message. + """ + if not self.connected: + return + + self.connected = 0 + + self.quit(message) + + try: + self.socket.close() + except socket.error, x: + pass + self.socket = None + self._handle_event(Event("disconnect", self.server, "", [message])) + + def globops(self, text): + """Send a GLOBOPS command.""" + self.send_raw("GLOBOPS :" + text) + + def info(self, server=""): + """Send an INFO command.""" + self.send_raw(" ".join(["INFO", server]).strip()) + + def invite(self, nick, channel): + """Send an INVITE command.""" + self.send_raw(" ".join(["INVITE", nick, channel]).strip()) + + def ison(self, nicks): + """Send an ISON command. + + Arguments: + + nicks -- List of nicks. + """ + self.send_raw("ISON " + " ".join(nicks)) + + def join(self, channel, key=""): + """Send a JOIN command.""" + self.send_raw("JOIN %s%s" % (channel, (key and (" " + key)))) + + def kick(self, channel, nick, comment=""): + """Send a KICK command.""" + self.send_raw("KICK %s %s%s" % (channel, nick, (comment and (" :" + comment)))) + + def links(self, remote_server="", server_mask=""): + """Send a LINKS command.""" + command = "LINKS" + if remote_server: + command = command + " " + remote_server + if server_mask: + command = command + " " + server_mask + self.send_raw(command) + + def list(self, channels=None, server=""): + """Send a LIST command.""" + command = "LIST" + if channels: + command = command + " " + ",".join(channels) + if server: + command = command + " " + server + self.send_raw(command) + + def lusers(self, server=""): + """Send a LUSERS command.""" + self.send_raw("LUSERS" + (server and (" " + server))) + + def mode(self, target, command): + """Send a MODE command.""" + self.send_raw("MODE %s %s" % (target, command)) + + def motd(self, server=""): + """Send an MOTD command.""" + self.send_raw("MOTD" + (server and (" " + server))) + + def names(self, channels=None): + """Send a NAMES command.""" + self.send_raw("NAMES" + (channels and (" " + ",".join(channels)) or "")) + + def nick(self, newnick): + """Send a NICK command.""" + self.send_raw("NICK " + newnick) + + def notice(self, target, text): + """Send a NOTICE command.""" + # Should limit len(text) here! + self.send_raw("NOTICE %s :%s" % (target, text)) + + def oper(self, nick, password): + """Send an OPER command.""" + self.send_raw("OPER %s %s" % (nick, password)) + + def part(self, channels, message=""): + """Send a PART command.""" + if type(channels) == types.StringType: + self.send_raw("PART " + channels + (message and (" " + message))) + else: + self.send_raw("PART " + ",".join(channels) + (message and (" " + message))) + + def pass_(self, password): + """Send a PASS command.""" + self.send_raw("PASS " + password) + + def ping(self, target, target2=""): + """Send a PING command.""" + self.send_raw("PING %s%s" % (target, target2 and (" " + target2))) + + def pong(self, target, target2=""): + """Send a PONG command.""" + self.send_raw("PONG %s%s" % (target, target2 and (" " + target2))) + + def privmsg(self, target, text): + """Send a PRIVMSG command.""" + # Should limit len(text) here! + self.send_raw("PRIVMSG %s :%s" % (target, text)) + + def privmsg_many(self, targets, text): + """Send a PRIVMSG command to multiple targets.""" + # Should limit len(text) here! + self.send_raw("PRIVMSG %s :%s" % (",".join(targets), text)) + + def quit(self, message=""): + """Send a QUIT command.""" + # Note that many IRC servers don't use your QUIT message + # unless you've been connected for at least 5 minutes! + self.send_raw("QUIT" + (message and (" :" + message))) + + def send_raw(self, string): + """Send raw string to the server. + + The string will be padded with appropriate CR LF. + """ + if self.socket is None: + raise ServerNotConnectedError, "Not connected." + try: + if self.ssl: + self.ssl.write(string + "\r\n") + else: + self.socket.send(string + "\r\n") + if DEBUG: + print "TO SERVER:", string + except socket.error, x: + # Ouch! + self.disconnect("Connection reset by peer.") + + def squit(self, server, comment=""): + """Send an SQUIT command.""" + self.send_raw("SQUIT %s%s" % (server, comment and (" :" + comment))) + + def stats(self, statstype, server=""): + """Send a STATS command.""" + self.send_raw("STATS %s%s" % (statstype, server and (" " + server))) + + def time(self, server=""): + """Send a TIME command.""" + self.send_raw("TIME" + (server and (" " + server))) + + def topic(self, channel, new_topic=None): + """Send a TOPIC command.""" + if new_topic is None: + self.send_raw("TOPIC " + channel) + else: + self.send_raw("TOPIC %s :%s" % (channel, new_topic)) + + def trace(self, target=""): + """Send a TRACE command.""" + self.send_raw("TRACE" + (target and (" " + target))) + + def user(self, username, realname): + """Send a USER command.""" + self.send_raw("USER %s 0 * :%s" % (username, realname)) + + def userhost(self, nicks): + """Send a USERHOST command.""" + self.send_raw("USERHOST " + ",".join(nicks)) + + def users(self, server=""): + """Send a USERS command.""" + self.send_raw("USERS" + (server and (" " + server))) + + def version(self, server=""): + """Send a VERSION command.""" + self.send_raw("VERSION" + (server and (" " + server))) + + def wallops(self, text): + """Send a WALLOPS command.""" + self.send_raw("WALLOPS :" + text) + + def who(self, target="", op=""): + """Send a WHO command.""" + self.send_raw("WHO%s%s" % (target and (" " + target), op and (" o"))) + + def whois(self, targets): + """Send a WHOIS command.""" + self.send_raw("WHOIS " + ",".join(targets)) + + def whowas(self, nick, max="", server=""): + """Send a WHOWAS command.""" + self.send_raw("WHOWAS %s%s%s" % (nick, + max and (" " + max), + server and (" " + server))) + +class DCCConnectionError(IRCError): + pass + + +class DCCConnection(Connection): + """This class represents a DCC connection. + + DCCConnection objects are instantiated by calling the dcc + method on an IRC object. + """ + def __init__(self, irclibobj, dcctype): + Connection.__init__(self, irclibobj) + self.connected = 0 + self.passive = 0 + self.dcctype = dcctype + self.peeraddress = None + self.peerport = None + + def connect(self, address, port): + """Connect/reconnect to a DCC peer. + + Arguments: + address -- Host/IP address of the peer. + + port -- The port number to connect to. + + Returns the DCCConnection object. + """ + self.peeraddress = socket.gethostbyname(address) + self.peerport = port + self.socket = None + self.previous_buffer = "" + self.handlers = {} + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.passive = 0 + try: + self.socket.connect((self.peeraddress, self.peerport)) + except socket.error, x: + raise DCCConnectionError, "Couldn't connect to socket: %s" % x + self.connected = 1 + if self.irclibobj.fn_to_add_socket: + self.irclibobj.fn_to_add_socket(self.socket) + return self + + def listen(self): + """Wait for a connection/reconnection from a DCC peer. + + Returns the DCCConnection object. + + The local IP address and port are available as + self.localaddress and self.localport. After connection from a + peer, the peer address and port are available as + self.peeraddress and self.peerport. + """ + self.previous_buffer = "" + self.handlers = {} + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.passive = 1 + try: + self.socket.bind((socket.gethostbyname(socket.gethostname()), 0)) + self.localaddress, self.localport = self.socket.getsockname() + self.socket.listen(10) + except socket.error, x: + raise DCCConnectionError, "Couldn't bind socket: %s" % x + return self + + def disconnect(self, message=""): + """Hang up the connection and close the object. + + Arguments: + + message -- Quit message. + """ + if not self.connected: + return + + self.connected = 0 + try: + self.socket.close() + except socket.error, x: + pass + self.socket = None + self.irclibobj._handle_event( + self, + Event("dcc_disconnect", self.peeraddress, "", [message])) + self.irclibobj._remove_connection(self) + + def process_data(self): + """[Internal]""" + + if self.passive and not self.connected: + conn, (self.peeraddress, self.peerport) = self.socket.accept() + self.socket.close() + self.socket = conn + self.connected = 1 + if DEBUG: + print "DCC connection from %s:%d" % ( + self.peeraddress, self.peerport) + self.irclibobj._handle_event( + self, + Event("dcc_connect", self.peeraddress, None, None)) + return + + try: + new_data = self.socket.recv(2**14) + except socket.error, x: + # The server hung up. + self.disconnect("Connection reset by peer") + return + if not new_data: + # Read nothing: connection must be down. + self.disconnect("Connection reset by peer") + return + + if self.dcctype == "chat": + # The specification says lines are terminated with LF, but + # it seems safer to handle CR LF terminations too. + chunks = _linesep_regexp.split(self.previous_buffer + new_data) + + # Save the last, unfinished line. + self.previous_buffer = chunks[-1] + if len(self.previous_buffer) > 2**14: + # Bad peer! Naughty peer! + self.disconnect() + return + chunks = chunks[:-1] + else: + chunks = [new_data] + + command = "dccmsg" + prefix = self.peeraddress + target = None + for chunk in chunks: + if DEBUG: + print "FROM PEER:", chunk + arguments = [chunk] + if DEBUG: + print "command: %s, source: %s, target: %s, arguments: %s" % ( + command, prefix, target, arguments) + self.irclibobj._handle_event( + self, + Event(command, prefix, target, arguments)) + + def _get_socket(self): + """[Internal]""" + return self.socket + + def privmsg(self, string): + """Send data to DCC peer. + + The string will be padded with appropriate LF if it's a DCC + CHAT session. + """ + try: + self.socket.send(string) + if self.dcctype == "chat": + self.socket.send("\n") + if DEBUG: + print "TO PEER: %s\n" % string + except socket.error, x: + # Ouch! + self.disconnect("Connection reset by peer.") + +class SimpleIRCClient: + """A simple single-server IRC client class. + + This is an example of an object-oriented wrapper of the IRC + framework. A real IRC client can be made by subclassing this + class and adding appropriate methods. + + The method on_join will be called when a "join" event is created + (which is done when the server sends a JOIN messsage/command), + on_privmsg will be called for "privmsg" events, and so on. The + handler methods get two arguments: the connection object (same as + self.connection) and the event object. + + Instance attributes that can be used by sub classes: + + ircobj -- The IRC instance. + + connection -- The ServerConnection instance. + + dcc_connections -- A list of DCCConnection instances. + """ + def __init__(self): + self.ircobj = IRC() + self.connection = self.ircobj.server() + self.dcc_connections = [] + self.ircobj.add_global_handler("all_events", self._dispatcher, -10) + self.ircobj.add_global_handler("dcc_disconnect", self._dcc_disconnect, -10) + + def _dispatcher(self, c, e): + """[Internal]""" + m = "on_" + e.eventtype() + if hasattr(self, m): + getattr(self, m)(c, e) + + def _dcc_disconnect(self, c, e): + self.dcc_connections.remove(c) + + def connect(self, server, port, nickname, password=None, username=None, + ircname=None, localaddress="", localport=0, ssl=False, ipv6=False): + """Connect/reconnect to a server. + + Arguments: + + server -- Server name. + + port -- Port number. + + nickname -- The nickname. + + password -- Password (if any). + + username -- The username. + + ircname -- The IRC name. + + localaddress -- Bind the connection to a specific local IP address. + + localport -- Bind the connection to a specific local port. + + ssl -- Enable support for ssl. + + ipv6 -- Enable support for ipv6. + + This function can be called to reconnect a closed connection. + """ + self.connection.connect(server, port, nickname, + password, username, ircname, + localaddress, localport, ssl, ipv6) + + def dcc_connect(self, address, port, dcctype="chat"): + """Connect to a DCC peer. + + Arguments: + + address -- IP address of the peer. + + port -- Port to connect to. + + Returns a DCCConnection instance. + """ + dcc = self.ircobj.dcc(dcctype) + self.dcc_connections.append(dcc) + dcc.connect(address, port) + return dcc + + def dcc_listen(self, dcctype="chat"): + """Listen for connections from a DCC peer. + + Returns a DCCConnection instance. + """ + dcc = self.ircobj.dcc(dcctype) + self.dcc_connections.append(dcc) + dcc.listen() + return dcc + + def start(self): + """Start the IRC client.""" + self.ircobj.process_forever() + + +class Event: + """Class representing an IRC event.""" + def __init__(self, eventtype, source, target, arguments=None): + """Constructor of Event objects. + + Arguments: + + eventtype -- A string describing the event. + + source -- The originator of the event (a nick mask or a server). + + target -- The target of the event (a nick or a channel). + + arguments -- Any event specific arguments. + """ + self._eventtype = eventtype + self._source = source + self._target = target + if arguments: + self._arguments = arguments + else: + self._arguments = [] + + def eventtype(self): + """Get the event type.""" + return self._eventtype + + def source(self): + """Get the event source.""" + return self._source + + def target(self): + """Get the event target.""" + return self._target + + def arguments(self): + """Get the event arguments.""" + return self._arguments + +_LOW_LEVEL_QUOTE = "\020" +_CTCP_LEVEL_QUOTE = "\134" +_CTCP_DELIMITER = "\001" + +_low_level_mapping = { + "0": "\000", + "n": "\n", + "r": "\r", + _LOW_LEVEL_QUOTE: _LOW_LEVEL_QUOTE +} + +_low_level_regexp = re.compile(_LOW_LEVEL_QUOTE + "(.)") + +def mask_matches(nick, mask): + """Check if a nick matches a mask. + + Returns true if the nick matches, otherwise false. + """ + nick = irc_lower(nick) + mask = irc_lower(mask) + mask = mask.replace("\\", "\\\\") + for ch in ".$|[](){}+": + mask = mask.replace(ch, "\\" + ch) + mask = mask.replace("?", ".") + mask = mask.replace("*", ".*") + r = re.compile(mask, re.IGNORECASE) + return r.match(nick) + +_special = "-[]\\`^{}" +nick_characters = string.ascii_letters + string.digits + _special +_ircstring_translation = string.maketrans(string.ascii_uppercase + "[]\\^", + string.ascii_lowercase + "{}|~") + +def irc_lower(s): + """Returns a lowercased string. + + The definition of lowercased comes from the IRC specification (RFC + 1459). + """ + return s.translate(_ircstring_translation) + +def _ctcp_dequote(message): + """[Internal] Dequote a message according to CTCP specifications. + + The function returns a list where each element can be either a + string (normal message) or a tuple of one or two strings (tagged + messages). If a tuple has only one element (ie is a singleton), + that element is the tag; otherwise the tuple has two elements: the + tag and the data. + + Arguments: + + message -- The message to be decoded. + """ + + def _low_level_replace(match_obj): + ch = match_obj.group(1) + + # If low_level_mapping doesn't have the character as key, we + # should just return the character. + return _low_level_mapping.get(ch, ch) + + if _LOW_LEVEL_QUOTE in message: + # Yup, there was a quote. Release the dequoter, man! + message = _low_level_regexp.sub(_low_level_replace, message) + + if _CTCP_DELIMITER not in message: + return [message] + else: + # Split it into parts. (Does any IRC client actually *use* + # CTCP stacking like this?) + chunks = message.split(_CTCP_DELIMITER) + + messages = [] + i = 0 + while i < len(chunks)-1: + # Add message if it's non-empty. + if len(chunks[i]) > 0: + messages.append(chunks[i]) + + if i < len(chunks)-2: + # Aye! CTCP tagged data ahead! + messages.append(tuple(chunks[i+1].split(" ", 1))) + + i = i + 2 + + if len(chunks) % 2 == 0: + # Hey, a lonely _CTCP_DELIMITER at the end! This means + # that the last chunk, including the delimiter, is a + # normal message! (This is according to the CTCP + # specification.) + messages.append(_CTCP_DELIMITER + chunks[-1]) + + return messages + +def is_channel(string): + """Check if a string is a channel name. + + Returns true if the argument is a channel name, otherwise false. + """ + return string and string[0] in "#&+!" + +def ip_numstr_to_quad(num): + """Convert an IP number as an integer given in ASCII + representation (e.g. '3232235521') to an IP address string + (e.g. '192.168.0.1').""" + n = long(num) + p = map(str, map(int, [n >> 24 & 0xFF, n >> 16 & 0xFF, + n >> 8 & 0xFF, n & 0xFF])) + return ".".join(p) + +def ip_quad_to_numstr(quad): + """Convert an IP address string (e.g. '192.168.0.1') to an IP + number as an integer given in ASCII representation + (e.g. '3232235521').""" + p = map(long, quad.split(".")) + s = str((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]) + if s[-1] == "L": + s = s[:-1] + return s + +def nm_to_n(s): + """Get the nick part of a nickmask. + + (The source of an Event is a nickmask.) + """ + return s.split("!")[0] + +def nm_to_uh(s): + """Get the userhost part of a nickmask. + + (The source of an Event is a nickmask.) + """ + return s.split("!")[1] + +def nm_to_h(s): + """Get the host part of a nickmask. + + (The source of an Event is a nickmask.) + """ + return s.split("@")[1] + +def nm_to_u(s): + """Get the user part of a nickmask. + + (The source of an Event is a nickmask.) + """ + s = s.split("!")[1] + return s.split("@")[0] + +def parse_nick_modes(mode_string): + """Parse a nick mode string. + + The function returns a list of lists with three members: sign, + mode and argument. The sign is \"+\" or \"-\". The argument is + always None. + + Example: + + >>> irclib.parse_nick_modes(\"+ab-c\") + [['+', 'a', None], ['+', 'b', None], ['-', 'c', None]] + """ + + return _parse_modes(mode_string, "") + +def parse_channel_modes(mode_string): + """Parse a channel mode string. + + The function returns a list of lists with three members: sign, + mode and argument. The sign is \"+\" or \"-\". The argument is + None if mode isn't one of \"b\", \"k\", \"l\", \"v\" or \"o\". + + Example: + + >>> irclib.parse_channel_modes(\"+ab-c foo\") + [['+', 'a', None], ['+', 'b', 'foo'], ['-', 'c', None]] + """ + + return _parse_modes(mode_string, "bklvo") + +def _parse_modes(mode_string, unary_modes=""): + """[Internal]""" + modes = [] + arg_count = 0 + + # State variable. + sign = "" + + a = mode_string.split() + if len(a) == 0: + return [] + else: + mode_part, args = a[0], a[1:] + + if mode_part[0] not in "+-": + return [] + for ch in mode_part: + if ch in "+-": + sign = ch + elif ch == " ": + collecting_arguments = 1 + elif ch in unary_modes: + if len(args) >= arg_count + 1: + modes.append([sign, ch, args[arg_count]]) + arg_count = arg_count + 1 + else: + modes.append([sign, ch, None]) + else: + modes.append([sign, ch, None]) + return modes + +def _ping_ponger(connection, event): + """[Internal]""" + connection.pong(event.target()) + +# Numeric table mostly stolen from the Perl IRC module (Net::IRC). +numeric_events = { + "001": "welcome", + "002": "yourhost", + "003": "created", + "004": "myinfo", + "005": "featurelist", # XXX + "200": "tracelink", + "201": "traceconnecting", + "202": "tracehandshake", + "203": "traceunknown", + "204": "traceoperator", + "205": "traceuser", + "206": "traceserver", + "207": "traceservice", + "208": "tracenewtype", + "209": "traceclass", + "210": "tracereconnect", + "211": "statslinkinfo", + "212": "statscommands", + "213": "statscline", + "214": "statsnline", + "215": "statsiline", + "216": "statskline", + "217": "statsqline", + "218": "statsyline", + "219": "endofstats", + "221": "umodeis", + "231": "serviceinfo", + "232": "endofservices", + "233": "service", + "234": "servlist", + "235": "servlistend", + "241": "statslline", + "242": "statsuptime", + "243": "statsoline", + "244": "statshline", + "250": "luserconns", + "251": "luserclient", + "252": "luserop", + "253": "luserunknown", + "254": "luserchannels", + "255": "luserme", + "256": "adminme", + "257": "adminloc1", + "258": "adminloc2", + "259": "adminemail", + "261": "tracelog", + "262": "endoftrace", + "263": "tryagain", + "265": "n_local", + "266": "n_global", + "300": "none", + "301": "away", + "302": "userhost", + "303": "ison", + "305": "unaway", + "306": "nowaway", + "311": "whoisuser", + "312": "whoisserver", + "313": "whoisoperator", + "314": "whowasuser", + "315": "endofwho", + "316": "whoischanop", + "317": "whoisidle", + "318": "endofwhois", + "319": "whoischannels", + "321": "liststart", + "322": "list", + "323": "listend", + "324": "channelmodeis", + "329": "channelcreate", + "331": "notopic", + "332": "currenttopic", + "333": "topicinfo", + "341": "inviting", + "342": "summoning", + "346": "invitelist", + "347": "endofinvitelist", + "348": "exceptlist", + "349": "endofexceptlist", + "351": "version", + "352": "whoreply", + "353": "namreply", + "361": "killdone", + "362": "closing", + "363": "closeend", + "364": "links", + "365": "endoflinks", + "366": "endofnames", + "367": "banlist", + "368": "endofbanlist", + "369": "endofwhowas", + "371": "info", + "372": "motd", + "373": "infostart", + "374": "endofinfo", + "375": "motdstart", + "376": "endofmotd", + "377": "motd2", # 1997-10-16 -- tkil + "381": "youreoper", + "382": "rehashing", + "384": "myportis", + "391": "time", + "392": "usersstart", + "393": "users", + "394": "endofusers", + "395": "nousers", + "401": "nosuchnick", + "402": "nosuchserver", + "403": "nosuchchannel", + "404": "cannotsendtochan", + "405": "toomanychannels", + "406": "wasnosuchnick", + "407": "toomanytargets", + "409": "noorigin", + "411": "norecipient", + "412": "notexttosend", + "413": "notoplevel", + "414": "wildtoplevel", + "421": "unknowncommand", + "422": "nomotd", + "423": "noadmininfo", + "424": "fileerror", + "431": "nonicknamegiven", + "432": "erroneusnickname", # Thiss iz how its speld in thee RFC. + "433": "nicknameinuse", + "436": "nickcollision", + "437": "unavailresource", # "Nick temporally unavailable" + "441": "usernotinchannel", + "442": "notonchannel", + "443": "useronchannel", + "444": "nologin", + "445": "summondisabled", + "446": "usersdisabled", + "451": "notregistered", + "461": "needmoreparams", + "462": "alreadyregistered", + "463": "nopermforhost", + "464": "passwdmismatch", + "465": "yourebannedcreep", # I love this one... + "466": "youwillbebanned", + "467": "keyset", + "471": "channelisfull", + "472": "unknownmode", + "473": "inviteonlychan", + "474": "bannedfromchan", + "475": "badchannelkey", + "476": "badchanmask", + "477": "nochanmodes", # "Channel doesn't support modes" + "478": "banlistfull", + "481": "noprivileges", + "482": "chanoprivsneeded", + "483": "cantkillserver", + "484": "restricted", # Connection is restricted + "485": "uniqopprivsneeded", + "491": "nooperhost", + "492": "noservicehost", + "501": "umodeunknownflag", + "502": "usersdontmatch", +} + +generated_events = [ + # Generated events + "dcc_connect", + "dcc_disconnect", + "dccmsg", + "disconnect", + "ctcp", + "ctcpreply", +] + +protocol_events = [ + # IRC protocol events + "error", + "join", + "kick", + "mode", + "part", + "ping", + "privmsg", + "privnotice", + "pubmsg", + "pubnotice", + "quit", + "invite", + "pong", +] + +all_events = generated_events + protocol_events + numeric_events.values() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/__init__.py new file mode 100644 index 0000000..4bb20aa --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/__init__.py @@ -0,0 +1,140 @@ +__all__ = [ + 'AbstractBasicAuthHandler', + 'AbstractDigestAuthHandler', + 'BaseHandler', + 'Browser', + 'BrowserStateError', + 'CacheFTPHandler', + 'ContentTooShortError', + 'Cookie', + 'CookieJar', + 'CookiePolicy', + 'DefaultCookiePolicy', + 'DefaultFactory', + 'FTPHandler', + 'Factory', + 'FileCookieJar', + 'FileHandler', + 'FormNotFoundError', + 'FormsFactory', + 'HTTPBasicAuthHandler', + 'HTTPCookieProcessor', + 'HTTPDefaultErrorHandler', + 'HTTPDigestAuthHandler', + 'HTTPEquivProcessor', + 'HTTPError', + 'HTTPErrorProcessor', + 'HTTPHandler', + 'HTTPPasswordMgr', + 'HTTPPasswordMgrWithDefaultRealm', + 'HTTPProxyPasswordMgr', + 'HTTPRedirectDebugProcessor', + 'HTTPRedirectHandler', + 'HTTPRefererProcessor', + 'HTTPRefreshProcessor', + 'HTTPRequestUpgradeProcessor', + 'HTTPResponseDebugProcessor', + 'HTTPRobotRulesProcessor', + 'HTTPSClientCertMgr', + 'HTTPSHandler', + 'HeadParser', + 'History', + 'LWPCookieJar', + 'Link', + 'LinkNotFoundError', + 'LinksFactory', + 'LoadError', + 'MSIECookieJar', + 'MozillaCookieJar', + 'OpenerDirector', + 'OpenerFactory', + 'ParseError', + 'ProxyBasicAuthHandler', + 'ProxyDigestAuthHandler', + 'ProxyHandler', + 'Request', + 'ResponseUpgradeProcessor', + 'RobotExclusionError', + 'RobustFactory', + 'RobustFormsFactory', + 'RobustLinksFactory', + 'RobustTitleFactory', + 'SeekableProcessor', + 'SeekableResponseOpener', + 'TitleFactory', + 'URLError', + 'USE_BARE_EXCEPT', + 'UnknownHandler', + 'UserAgent', + 'UserAgentBase', + 'XHTMLCompatibleHeadParser', + '__version__', + 'build_opener', + 'install_opener', + 'lwp_cookie_str', + 'make_response', + 'request_host', + 'response_seek_wrapper', # XXX deprecate in public interface? + 'seek_wrapped_response' # XXX should probably use this internally in place of response_seek_wrapper() + 'str2time', + 'urlopen', + 'urlretrieve'] + +import logging +import sys + +from _mechanize import __version__ + +# high-level stateful browser-style interface +from _mechanize import \ + Browser, History, \ + BrowserStateError, LinkNotFoundError, FormNotFoundError + +# configurable URL-opener interface +from _useragent import UserAgentBase, UserAgent +from _html import \ + ParseError, \ + Link, \ + Factory, DefaultFactory, RobustFactory, \ + FormsFactory, LinksFactory, TitleFactory, \ + RobustFormsFactory, RobustLinksFactory, RobustTitleFactory + +# urllib2 work-alike interface (part from mechanize, part from urllib2) +# This is a superset of the urllib2 interface. +from _urllib2 import * + +# misc +from _opener import ContentTooShortError, OpenerFactory, urlretrieve +from _util import http2time as str2time +from _response import \ + response_seek_wrapper, seek_wrapped_response, make_response +from _http import HeadParser +try: + from _http import XHTMLCompatibleHeadParser +except ImportError: + pass + +# cookies +from _clientcookie import Cookie, CookiePolicy, DefaultCookiePolicy, \ + CookieJar, FileCookieJar, LoadError, request_host_lc as request_host, \ + effective_request_host +from _lwpcookiejar import LWPCookieJar, lwp_cookie_str +# 2.4 raises SyntaxError due to generator / try/finally use +if sys.version_info[:2] > (2,4): + try: + import sqlite3 + except ImportError: + pass + else: + from _firefox3cookiejar import Firefox3CookieJar +from _mozillacookiejar import MozillaCookieJar +from _msiecookiejar import MSIECookieJar + +# If you hate the idea of turning bugs into warnings, do: +# import mechanize; mechanize.USE_BARE_EXCEPT = False +USE_BARE_EXCEPT = True + +logger = logging.getLogger("mechanize") +if logger.level is logging.NOTSET: + logger.setLevel(logging.CRITICAL) +del logger diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_auth.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_auth.py new file mode 100644 index 0000000..232f7d8 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_auth.py @@ -0,0 +1,522 @@ +"""HTTP Authentication and Proxy support. + +All but HTTPProxyPasswordMgr come from Python 2.5. + + +Copyright 2006 John J. Lee <jjl@pobox.com> + +This code is free software; you can redistribute it and/or modify it under +the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt +included with the distribution). + +""" + +import base64 +import copy +import os +import posixpath +import random +import re +import time +import urlparse + +try: + import hashlib +except ImportError: + import md5 + import sha + def sha1_digest(bytes): + return sha.new(bytes).hexdigest() + def md5_digest(bytes): + return md5.new(bytes).hexdigest() +else: + def sha1_digest(bytes): + return hashlib.sha1(bytes).hexdigest() + def md5_digest(bytes): + return hashlib.md5(bytes).hexdigest() + +from urllib2 import BaseHandler, HTTPError, parse_keqv_list, parse_http_list +from urllib import getproxies, unquote, splittype, splituser, splitpasswd, \ + splitport + + +def _parse_proxy(proxy): + """Return (scheme, user, password, host/port) given a URL or an authority. + + If a URL is supplied, it must have an authority (host:port) component. + According to RFC 3986, having an authority component means the URL must + have two slashes after the scheme: + + >>> _parse_proxy('file:/ftp.example.com/') + Traceback (most recent call last): + ValueError: proxy URL with no authority: 'file:/ftp.example.com/' + + The first three items of the returned tuple may be None. + + Examples of authority parsing: + + >>> _parse_proxy('proxy.example.com') + (None, None, None, 'proxy.example.com') + >>> _parse_proxy('proxy.example.com:3128') + (None, None, None, 'proxy.example.com:3128') + + The authority component may optionally include userinfo (assumed to be + username:password): + + >>> _parse_proxy('joe:password@proxy.example.com') + (None, 'joe', 'password', 'proxy.example.com') + >>> _parse_proxy('joe:password@proxy.example.com:3128') + (None, 'joe', 'password', 'proxy.example.com:3128') + + Same examples, but with URLs instead: + + >>> _parse_proxy('http://proxy.example.com/') + ('http', None, None, 'proxy.example.com') + >>> _parse_proxy('http://proxy.example.com:3128/') + ('http', None, None, 'proxy.example.com:3128') + >>> _parse_proxy('http://joe:password@proxy.example.com/') + ('http', 'joe', 'password', 'proxy.example.com') + >>> _parse_proxy('http://joe:password@proxy.example.com:3128') + ('http', 'joe', 'password', 'proxy.example.com:3128') + + Everything after the authority is ignored: + + >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128') + ('ftp', 'joe', 'password', 'proxy.example.com') + + Test for no trailing '/' case: + + >>> _parse_proxy('http://joe:password@proxy.example.com') + ('http', 'joe', 'password', 'proxy.example.com') + + """ + scheme, r_scheme = splittype(proxy) + if not r_scheme.startswith("/"): + # authority + scheme = None + authority = proxy + else: + # URL + if not r_scheme.startswith("//"): + raise ValueError("proxy URL with no authority: %r" % proxy) + # We have an authority, so for RFC 3986-compliant URLs (by ss 3. + # and 3.3.), path is empty or starts with '/' + end = r_scheme.find("/", 2) + if end == -1: + end = None + authority = r_scheme[2:end] + userinfo, hostport = splituser(authority) + if userinfo is not None: + user, password = splitpasswd(userinfo) + else: + user = password = None + return scheme, user, password, hostport + +class ProxyHandler(BaseHandler): + # Proxies must be in front + handler_order = 100 + + def __init__(self, proxies=None): + if proxies is None: + proxies = getproxies() + assert hasattr(proxies, 'has_key'), "proxies must be a mapping" + self.proxies = proxies + for type, url in proxies.items(): + setattr(self, '%s_open' % type, + lambda r, proxy=url, type=type, meth=self.proxy_open: \ + meth(r, proxy, type)) + + def proxy_open(self, req, proxy, type): + orig_type = req.get_type() + proxy_type, user, password, hostport = _parse_proxy(proxy) + if proxy_type is None: + proxy_type = orig_type + if user and password: + user_pass = '%s:%s' % (unquote(user), unquote(password)) + creds = base64.encodestring(user_pass).strip() + req.add_header('Proxy-authorization', 'Basic ' + creds) + hostport = unquote(hostport) + req.set_proxy(hostport, proxy_type) + if orig_type == proxy_type: + # let other handlers take care of it + return None + else: + # need to start over, because the other handlers don't + # grok the proxy's URL type + # e.g. if we have a constructor arg proxies like so: + # {'http': 'ftp://proxy.example.com'}, we may end up turning + # a request for http://acme.example.com/a into one for + # ftp://proxy.example.com/a + return self.parent.open(req) + +class HTTPPasswordMgr: + + def __init__(self): + self.passwd = {} + + def add_password(self, realm, uri, user, passwd): + # uri could be a single URI or a sequence + if isinstance(uri, basestring): + uri = [uri] + if not realm in self.passwd: + self.passwd[realm] = {} + for default_port in True, False: + reduced_uri = tuple( + [self.reduce_uri(u, default_port) for u in uri]) + self.passwd[realm][reduced_uri] = (user, passwd) + + def find_user_password(self, realm, authuri): + domains = self.passwd.get(realm, {}) + for default_port in True, False: + reduced_authuri = self.reduce_uri(authuri, default_port) + for uris, authinfo in domains.iteritems(): + for uri in uris: + if self.is_suburi(uri, reduced_authuri): + return authinfo + return None, None + + def reduce_uri(self, uri, default_port=True): + """Accept authority or URI and extract only the authority and path.""" + # note HTTP URLs do not have a userinfo component + parts = urlparse.urlsplit(uri) + if parts[1]: + # URI + scheme = parts[0] + authority = parts[1] + path = parts[2] or '/' + else: + # host or host:port + scheme = None + authority = uri + path = '/' + host, port = splitport(authority) + if default_port and port is None and scheme is not None: + dport = {"http": 80, + "https": 443, + }.get(scheme) + if dport is not None: + authority = "%s:%d" % (host, dport) + return authority, path + + def is_suburi(self, base, test): + """Check if test is below base in a URI tree + + Both args must be URIs in reduced form. + """ + if base == test: + return True + if base[0] != test[0]: + return False + common = posixpath.commonprefix((base[1], test[1])) + if len(common) == len(base[1]): + return True + return False + + +class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr): + + def find_user_password(self, realm, authuri): + user, password = HTTPPasswordMgr.find_user_password(self, realm, + authuri) + if user is not None: + return user, password + return HTTPPasswordMgr.find_user_password(self, None, authuri) + + +class AbstractBasicAuthHandler: + + rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I) + + # XXX there can actually be multiple auth-schemes in a + # www-authenticate header. should probably be a lot more careful + # in parsing them to extract multiple alternatives + + def __init__(self, password_mgr=None): + if password_mgr is None: + password_mgr = HTTPPasswordMgr() + self.passwd = password_mgr + self.add_password = self.passwd.add_password + + def http_error_auth_reqed(self, authreq, host, req, headers): + # host may be an authority (without userinfo) or a URL with an + # authority + # XXX could be multiple headers + authreq = headers.get(authreq, None) + if authreq: + mo = AbstractBasicAuthHandler.rx.search(authreq) + if mo: + scheme, realm = mo.groups() + if scheme.lower() == 'basic': + return self.retry_http_basic_auth(host, req, realm) + + def retry_http_basic_auth(self, host, req, realm): + user, pw = self.passwd.find_user_password(realm, host) + if pw is not None: + raw = "%s:%s" % (user, pw) + auth = 'Basic %s' % base64.encodestring(raw).strip() + if req.headers.get(self.auth_header, None) == auth: + return None + newreq = copy.copy(req) + newreq.add_header(self.auth_header, auth) + newreq.visit = False + return self.parent.open(newreq) + else: + return None + + +class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): + + auth_header = 'Authorization' + + def http_error_401(self, req, fp, code, msg, headers): + url = req.get_full_url() + return self.http_error_auth_reqed('www-authenticate', + url, req, headers) + + +class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): + + auth_header = 'Proxy-authorization' + + def http_error_407(self, req, fp, code, msg, headers): + # http_error_auth_reqed requires that there is no userinfo component in + # authority. Assume there isn't one, since urllib2 does not (and + # should not, RFC 3986 s. 3.2.1) support requests for URLs containing + # userinfo. + authority = req.get_host() + return self.http_error_auth_reqed('proxy-authenticate', + authority, req, headers) + + +def randombytes(n): + """Return n random bytes.""" + # Use /dev/urandom if it is available. Fall back to random module + # if not. It might be worthwhile to extend this function to use + # other platform-specific mechanisms for getting random bytes. + if os.path.exists("/dev/urandom"): + f = open("/dev/urandom") + s = f.read(n) + f.close() + return s + else: + L = [chr(random.randrange(0, 256)) for i in range(n)] + return "".join(L) + +class AbstractDigestAuthHandler: + # Digest authentication is specified in RFC 2617. + + # XXX The client does not inspect the Authentication-Info header + # in a successful response. + + # XXX It should be possible to test this implementation against + # a mock server that just generates a static set of challenges. + + # XXX qop="auth-int" supports is shaky + + def __init__(self, passwd=None): + if passwd is None: + passwd = HTTPPasswordMgr() + self.passwd = passwd + self.add_password = self.passwd.add_password + self.retried = 0 + self.nonce_count = 0 + + def reset_retry_count(self): + self.retried = 0 + + def http_error_auth_reqed(self, auth_header, host, req, headers): + authreq = headers.get(auth_header, None) + if self.retried > 5: + # Don't fail endlessly - if we failed once, we'll probably + # fail a second time. Hm. Unless the Password Manager is + # prompting for the information. Crap. This isn't great + # but it's better than the current 'repeat until recursion + # depth exceeded' approach <wink> + raise HTTPError(req.get_full_url(), 401, "digest auth failed", + headers, None) + else: + self.retried += 1 + if authreq: + scheme = authreq.split()[0] + if scheme.lower() == 'digest': + return self.retry_http_digest_auth(req, authreq) + + def retry_http_digest_auth(self, req, auth): + token, challenge = auth.split(' ', 1) + chal = parse_keqv_list(parse_http_list(challenge)) + auth = self.get_authorization(req, chal) + if auth: + auth_val = 'Digest %s' % auth + if req.headers.get(self.auth_header, None) == auth_val: + return None + newreq = copy.copy(req) + newreq.add_unredirected_header(self.auth_header, auth_val) + newreq.visit = False + return self.parent.open(newreq) + + def get_cnonce(self, nonce): + # The cnonce-value is an opaque + # quoted string value provided by the client and used by both client + # and server to avoid chosen plaintext attacks, to provide mutual + # authentication, and to provide some message integrity protection. + # This isn't a fabulous effort, but it's probably Good Enough. + dig = sha1_digest("%s:%s:%s:%s" % (self.nonce_count, nonce, + time.ctime(), randombytes(8))) + return dig[:16] + + def get_authorization(self, req, chal): + try: + realm = chal['realm'] + nonce = chal['nonce'] + qop = chal.get('qop') + algorithm = chal.get('algorithm', 'MD5') + # mod_digest doesn't send an opaque, even though it isn't + # supposed to be optional + opaque = chal.get('opaque', None) + except KeyError: + return None + + H, KD = self.get_algorithm_impls(algorithm) + if H is None: + return None + + user, pw = self.passwd.find_user_password(realm, req.get_full_url()) + if user is None: + return None + + # XXX not implemented yet + if req.has_data(): + entdig = self.get_entity_digest(req.get_data(), chal) + else: + entdig = None + + A1 = "%s:%s:%s" % (user, realm, pw) + A2 = "%s:%s" % (req.get_method(), + # XXX selector: what about proxies and full urls + req.get_selector()) + if qop == 'auth': + self.nonce_count += 1 + ncvalue = '%08x' % self.nonce_count + cnonce = self.get_cnonce(nonce) + noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2)) + respdig = KD(H(A1), noncebit) + elif qop is None: + respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) + else: + # XXX handle auth-int. + pass + + # XXX should the partial digests be encoded too? + + base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ + 'response="%s"' % (user, realm, nonce, req.get_selector(), + respdig) + if opaque: + base += ', opaque="%s"' % opaque + if entdig: + base += ', digest="%s"' % entdig + base += ', algorithm="%s"' % algorithm + if qop: + base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce) + return base + + def get_algorithm_impls(self, algorithm): + # lambdas assume digest modules are imported at the top level + if algorithm == 'MD5': + H = md5_digest + elif algorithm == 'SHA': + H = sha1_digest + # XXX MD5-sess + KD = lambda s, d: H("%s:%s" % (s, d)) + return H, KD + + def get_entity_digest(self, data, chal): + # XXX not implemented yet + return None + + +class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): + """An authentication protocol defined by RFC 2069 + + Digest authentication improves on basic authentication because it + does not transmit passwords in the clear. + """ + + auth_header = 'Authorization' + handler_order = 490 + + def http_error_401(self, req, fp, code, msg, headers): + host = urlparse.urlparse(req.get_full_url())[1] + retry = self.http_error_auth_reqed('www-authenticate', + host, req, headers) + self.reset_retry_count() + return retry + + +class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): + + auth_header = 'Proxy-Authorization' + handler_order = 490 + + def http_error_407(self, req, fp, code, msg, headers): + host = req.get_host() + retry = self.http_error_auth_reqed('proxy-authenticate', + host, req, headers) + self.reset_retry_count() + return retry + + +# XXX ugly implementation, should probably not bother deriving +class HTTPProxyPasswordMgr(HTTPPasswordMgr): + # has default realm and host/port + def add_password(self, realm, uri, user, passwd): + # uri could be a single URI or a sequence + if uri is None or isinstance(uri, basestring): + uris = [uri] + else: + uris = uri + passwd_by_domain = self.passwd.setdefault(realm, {}) + for uri in uris: + for default_port in True, False: + reduced_uri = self.reduce_uri(uri, default_port) + passwd_by_domain[reduced_uri] = (user, passwd) + + def find_user_password(self, realm, authuri): + attempts = [(realm, authuri), (None, authuri)] + # bleh, want default realm to take precedence over default + # URI/authority, hence this outer loop + for default_uri in False, True: + for realm, authuri in attempts: + authinfo_by_domain = self.passwd.get(realm, {}) + for default_port in True, False: + reduced_authuri = self.reduce_uri(authuri, default_port) + for uri, authinfo in authinfo_by_domain.iteritems(): + if uri is None and not default_uri: + continue + if self.is_suburi(uri, reduced_authuri): + return authinfo + user, password = None, None + + if user is not None: + break + return user, password + + def reduce_uri(self, uri, default_port=True): + if uri is None: + return None + return HTTPPasswordMgr.reduce_uri(self, uri, default_port) + + def is_suburi(self, base, test): + if base is None: + # default to the proxy's host/port + hostport, path = test + base = (hostport, "/") + return HTTPPasswordMgr.is_suburi(self, base, test) + + +class HTTPSClientCertMgr(HTTPPasswordMgr): + # implementation inheritance: this is not a proper subclass + def add_key_cert(self, uri, key_file, cert_file): + self.add_password(None, uri, key_file, cert_file) + def find_key_cert(self, authuri): + return HTTPPasswordMgr.find_user_password(self, None, authuri) diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_beautifulsoup.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_beautifulsoup.py new file mode 100644 index 0000000..268b305 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_beautifulsoup.py @@ -0,0 +1,1080 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +v2.1.1 +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses arbitrarily invalid XML- or HTML-like substance +into a tree representation. It provides methods and Pythonic idioms +that make it easy to search and modify the tree. + +A well-formed XML/HTML document will yield a well-formed data +structure. An ill-formed XML/HTML document will yield a +correspondingly ill-formed data structure. If your document is only +locally well-formed, you can use this library to find and process the +well-formed part of it. The BeautifulSoup class has heuristics for +obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup has no external dependencies. It works with Python 2.2 +and up. + +Beautiful Soup defines classes for four different parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. + + * ICantBelieveItsBeautifulSoup, for parsing valid but bizarre HTML + that trips up BeautifulSoup. + + * BeautifulSOAP, for making it easier to parse XML documents that use + lots of subelements containing a single string, where you'd prefer + they put that string into an attribute (such as SOAP messages). + +You can subclass BeautifulStoneSoup or BeautifulSoup to create a +parsing strategy specific to an XML schema or a particular bizarre +HTML document. Typically your subclass would just override +SELF_CLOSING_TAGS and/or NESTABLE_TAGS. +""" #" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "2.1.1" +__date__ = "$Date: 2004/10/18 00:14:20 $" +__copyright__ = "Copyright (c) 2004-2005 Leonard Richardson" +__license__ = "PSF" + +from sgmllib import SGMLParser, SGMLParseError +import types +import re +import sgmllib + +#This code makes Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') + +class NullType(object): + + """Similar to NoneType with a corresponding singleton instance + 'Null' that, unlike None, accepts any message and returns itself. + + Examples: + >>> Null("send", "a", "message")("and one more", + ... "and what you get still") is Null + True + """ + + def __new__(cls): return Null + def __call__(self, *args, **kwargs): return Null +## def __getstate__(self, *args): return Null + def __getattr__(self, attr): return Null + def __getitem__(self, item): return Null + def __setattr__(self, attr, value): pass + def __setitem__(self, item, value): pass + def __len__(self): return 0 + # FIXME: is this a python bug? otherwise ``for x in Null: pass`` + # never terminates... + def __iter__(self): return iter([]) + def __contains__(self, item): return False + def __repr__(self): return "Null" +Null = object.__new__(NullType) + +class PageElement: + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=Null, previous=Null): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = Null + self.previousSibling = Null + self.nextSibling = Null + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def findNext(self, name=None, attrs={}, text=None): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._first(self.fetchNext, name, attrs, text) + firstNext = findNext + + def fetchNext(self, name=None, attrs={}, text=None, limit=None): + """Returns all items that match the given criteria and appear + before after Tag in the document.""" + return self._fetch(name, attrs, text, limit, self.nextGenerator) + + def findNextSibling(self, name=None, attrs={}, text=None): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._first(self.fetchNextSiblings, name, attrs, text) + firstNextSibling = findNextSibling + + def fetchNextSiblings(self, name=None, attrs={}, text=None, limit=None): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._fetch(name, attrs, text, limit, self.nextSiblingGenerator) + + def findPrevious(self, name=None, attrs={}, text=None): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._first(self.fetchPrevious, name, attrs, text) + + def fetchPrevious(self, name=None, attrs={}, text=None, limit=None): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._fetch(name, attrs, text, limit, self.previousGenerator) + firstPrevious = findPrevious + + def findPreviousSibling(self, name=None, attrs={}, text=None): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._first(self.fetchPreviousSiblings, name, attrs, text) + firstPreviousSibling = findPreviousSibling + + def fetchPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._fetch(name, attrs, text, limit, + self.previousSiblingGenerator) + + def findParent(self, name=None, attrs={}): + """Returns the closest parent of this Tag that matches the given + criteria.""" + r = Null + l = self.fetchParents(name, attrs, 1) + if l: + r = l[0] + return r + firstParent = findParent + + def fetchParents(self, name=None, attrs={}, limit=None): + """Returns the parents of this Tag that match the given + criteria.""" + return self._fetch(name, attrs, None, limit, self.parentGenerator) + + #These methods do the real heavy lifting. + + def _first(self, method, name, attrs, text): + r = Null + l = method(name, attrs, text, 1) + if l: + r = l[0] + return r + + def _fetch(self, name, attrs, text, limit, generator): + "Iterates over a generator looking for things that match." + if not hasattr(attrs, 'items'): + attrs = {'class' : attrs} + + results = [] + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + found = None + if isinstance(i, Tag): + if not text: + if not name or self._matches(i, name): + match = True + for attr, matchAgainst in attrs.items(): + check = i.get(attr) + if not self._matches(check, matchAgainst): + match = False + break + if match: + found = i + elif text: + if self._matches(i, text): + found = i + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #Generators that can be used to navigate starting from both + #NavigableTexts and Tags. + def nextGenerator(self): + i = self + while i: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i: + i = i.parent + yield i + + def _matches(self, chunk, howToMatch): + #print 'looking for %s in %s' % (howToMatch, chunk) + # + # If given a list of items, return true if the list contains a + # text element that matches. + if isList(chunk) and not isinstance(chunk, Tag): + for tag in chunk: + if isinstance(tag, NavigableText) and self._matches(tag, howToMatch): + return True + return False + if callable(howToMatch): + return howToMatch(chunk) + if isinstance(chunk, Tag): + #Custom match methods take the tag as an argument, but all other + #ways of matching match the tag name as a string + chunk = chunk.name + #Now we know that chunk is a string + if not isinstance(chunk, basestring): + chunk = str(chunk) + if hasattr(howToMatch, 'match'): + # It's a regexp object. + return howToMatch.search(chunk) + if isList(howToMatch): + return chunk in howToMatch + if hasattr(howToMatch, 'items'): + return howToMatch.has_key(chunk) + #It's just a string + return str(howToMatch) == chunk + +class NavigableText(PageElement): + + def __getattr__(self, attr): + "For backwards compatibility, text.string gives you text" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + +class NavigableString(str, NavigableText): + pass + +class NavigableUnicodeString(unicode, NavigableText): + pass + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def __init__(self, name, attrs=None, parent=Null, previous=Null): + "Basic constructor." + self.name = name + if attrs == None: + attrs = [] + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + fetch() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.fetch, args, kwargs) + + def __getattr__(self, tag): + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.first(tag[:-3]) + elif tag.find('__') != 0: + return self.first(tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self): + """Renders this tag as a string.""" + return str(self) + + def __unicode__(self): + return self.__str__(1) + + def __str__(self, needUnicode=None, showStructureIndent=None): + """Returns a string or Unicode representation of this tag and + its contents. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + attrs = [] + if self.attrs: + for key, val in self.attrs: + attrs.append('%s="%s"' % (key, val)) + close = '' + closeTag = '' + if self.isSelfClosing(): + close = ' /' + else: + closeTag = '</%s>' % self.name + indentIncrement = None + if showStructureIndent != None: + indentIncrement = showStructureIndent + if not self.hidden: + indentIncrement += 1 + contents = self.renderContents(indentIncrement, needUnicode=needUnicode) + if showStructureIndent: + space = '\n%s' % (' ' * showStructureIndent) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if showStructureIndent: + s.append(space) + s.append('<%s%s%s>' % (self.name, attributeString, close)) + s.append(contents) + if closeTag and showStructureIndent != None: + s.append(space) + s.append(closeTag) + s = ''.join(s) + isUnicode = type(s) == types.UnicodeType + if needUnicode and not isUnicode: + s = unicode(s) + elif isUnicode and needUnicode==False: + s = str(s) + return s + + def prettify(self, needUnicode=None): + return self.__str__(needUnicode, showStructureIndent=True) + + def renderContents(self, showStructureIndent=None, needUnicode=None): + """Renders the contents of this tag as a (possibly Unicode) + string.""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableUnicodeString) or type(c) == types.UnicodeType: + text = unicode(c) + elif isinstance(c, Tag): + s.append(c.__str__(needUnicode, showStructureIndent)) + elif needUnicode: + text = unicode(c) + else: + text = str(c) + if text: + if showStructureIndent != None: + if text[-1] == '\n': + text = text[:-1] + s.append(text) + return ''.join(s) + + #Soup methods + + def firstText(self, text, recursive=True): + """Convenience method to retrieve the first piece of text matching the + given criteria. 'text' can be a string, a regular expression object, + a callable that takes a string and returns whether or not the + string 'matches', etc.""" + return self.first(recursive=recursive, text=text) + + def fetchText(self, text, recursive=True, limit=None): + """Convenience method to retrieve all pieces of text matching the + given criteria. 'text' can be a string, a regular expression object, + a callable that takes a string and returns whether or not the + string 'matches', etc.""" + return self.fetch(recursive=recursive, text=text, limit=limit) + + def first(self, name=None, attrs={}, recursive=True, text=None): + """Return only the first child of this + Tag matching the given criteria.""" + r = Null + l = self.fetch(name, attrs, recursive, text, 1) + if l: + r = l[0] + return r + findChild = first + + def fetch(self, name=None, attrs={}, recursive=True, text=None, + limit=None): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._fetch(name, attrs, text, limit, generator) + fetchChildren = fetch + + #Utility methods + + def isSelfClosing(self): + """Returns true iff this is a self-closing tag as defined in the HTML + standard. + + TODO: This is specific to BeautifulSoup and its subclasses, but it's + used by __str__""" + return self.name in BeautifulSoup.SELF_CLOSING_TAGS + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.contents.append(tag) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + for i in range(0, len(self.contents)): + yield self.contents[i] + raise StopIteration + + def recursiveChildGenerator(self): + stack = [(self, 0)] + while stack: + tag, start = stack.pop() + if isinstance(tag, Tag): + for i in range(start, len(tag.contents)): + a = tag.contents[i] + yield a + if isinstance(a, Tag) and tag.contents: + if i < len(tag.contents) - 1: + stack.append((tag, i+1)) + stack.append((a, 0)) + break + raise StopIteration + + +def isList(l): + """Convenience method that works with all 2.x versions of Python + to determine whether or not something is listlike.""" + return hasattr(l, '__iter__') \ + or (type(l) in (types.ListType, types.TupleType)) + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS and NESTABLE_TAGS maps out + of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif isList(portion): + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and fetch code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "<foo><bar></foo>" actually means + "<foo><bar></bar></foo>". + + [Another possible explanation is "<foo><bar /></foo>", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + + #As a public service we will by default silently replace MS smart quotes + #and similar characters with their HTML or ASCII equivalents. + MS_CHARS = { '\x80' : '€', + '\x81' : ' ', + '\x82' : '‚', + '\x83' : 'ƒ', + '\x84' : '„', + '\x85' : '…', + '\x86' : '†', + '\x87' : '‡', + '\x88' : '⁁', + '\x89' : '%', + '\x8A' : 'Š', + '\x8B' : '<', + '\x8C' : 'Œ', + '\x8D' : '?', + '\x8E' : 'Z', + '\x8F' : '?', + '\x90' : '?', + '\x91' : '‘', + '\x92' : '’', + '\x93' : '“', + '\x94' : '”', + '\x95' : '•', + '\x96' : '–', + '\x97' : '—', + '\x98' : '˜', + '\x99' : '™', + '\x9a' : 'š', + '\x9b' : '>', + '\x9c' : 'œ', + '\x9d' : '?', + '\x9e' : 'z', + '\x9f' : 'Ÿ',} + + PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda(x):x.group(1) + ' />'), + (re.compile('<!\s+([^<>]*)>'), + lambda(x):'<!' + x.group(1) + '>'), + (re.compile("([\x80-\x9f])"), + lambda(x): BeautifulStoneSoup.MS_CHARS.get(x.group(1))) + ] + + ROOT_TAG_NAME = '[document]' + + def __init__(self, text=None, avoidParserProblems=True, + initialTextIsEverything=True): + """Initialize this as the 'root tag' and feed in any text to + the parser. + + NOTE about avoidParserProblems: sgmllib will process most bad + HTML, and BeautifulSoup has tricks for dealing with some HTML + that kills sgmllib, but Beautiful Soup can nonetheless choke + or lose data if your data uses self-closing tags or + declarations incorrectly. By default, Beautiful Soup sanitizes + its input to avoid the vast majority of these problems. The + problems are relatively rare, even in bad HTML, so feel free + to pass in False to avoidParserProblems if they don't apply to + you, and you'll get better performance. The only reason I have + this turned on by default is so I don't get so many tech + support questions. + + The two most common instances of invalid HTML that will choke + sgmllib are fixed by the default parser massage techniques: + + <br/> (No space between name of closing tag and tag close) + <! --Comment--> (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + Tag.__init__(self, self.ROOT_TAG_NAME) + if avoidParserProblems \ + and not isList(avoidParserProblems): + avoidParserProblems = self.PARSER_MASSAGE + self.avoidParserProblems = avoidParserProblems + SGMLParser.__init__(self) + self.quoteStack = [] + self.hidden = 1 + self.reset() + if hasattr(text, 'read'): + #It's a file-type object. + text = text.read() + if text: + self.feed(text) + if initialTextIsEverything: + self.done() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + if methodName.find('start_') == 0 or methodName.find('end_') == 0 \ + or methodName.find('do_') == 0: + return SGMLParser.__getattr__(self, methodName) + elif methodName.find('__') != 0: + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def feed(self, text): + if self.avoidParserProblems: + for fix, m in self.avoidParserProblems: + text = fix.sub(m, text) + SGMLParser.feed(self, text) + + def done(self): + """Called when you're done parsing, so that the unclosed tags can be + correctly processed.""" + self.endData() #NEW + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def reset(self): + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + # Tags with just one string-owning child get the child as a + # 'string' property, so that soup.tag.string is shorthand for + # soup.tag.contents[0] + if len(self.currentTag.contents) == 1 and \ + isinstance(self.currentTag.contents[0], NavigableText): + self.currentTag.string = self.currentTag.contents[0] + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self): + currentData = ''.join(self.currentData) + if currentData: + if not currentData.strip(): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + c = NavigableString + if type(currentData) == types.UnicodeType: + c = NavigableUnicodeString + o = c(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + self.currentData = [] + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: + <p>Foo<b>Bar<p> should pop to 'p', not 'b'. + <p>Foo<table>Bar<p> should pop to 'table', not 'p'. + <p>Foo<table><tr>Bar<p> should pop to 'tr', not 'p'. + <p>Foo<b>Bar<p> should pop to 'p', not 'b'. + + <li><ul><li> *<li>* should pop to 'ul', not the first 'li'. + <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr' + <td><tr><td> *<td>* should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers != None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers == None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s" % name + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs)) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + if not name in self.SELF_CLOSING_TAGS and not selfClosing: + self._smartPop(name) + tag = Tag(name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or name in self.SELF_CLOSING_TAGS: + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + + def unknown_endtag(self, name): + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print "</%s> is not real!" % name + self.handle_data('</%s>' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def handle_pi(self, text): + "Propagate processing instructions right through." + self.handle_data("<?%s>" % text) + + def handle_comment(self, text): + "Propagate comments right through." + self.handle_data("<!--%s-->" % text) + + def handle_charref(self, ref): + "Propagate char refs right through." + self.handle_data('&#%s;' % ref) + + def handle_entityref(self, ref): + "Propagate entity refs right through." + self.handle_data('&%s;' % ref) + + def handle_decl(self, data): + "Propagate DOCTYPEs and the like right through." + self.handle_data('<!%s>' % data) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as regular data.""" + j = None + if self.rawdata[i:i+9] == '<![CDATA[': + k = self.rawdata.find(']]>', i) + if k == -1: + k = len(self.rawdata) + self.handle_data(self.rawdata[i+9:k]) + j = k+3 + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a <p> tag should implicitly close the previous <p> tag. + + <p>Para1<p>Para2 + should be transformed into: + <p>Para1</p><p>Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a <blockquote> tag should _not_ implicitly close the previous + <blockquote> tag. + + Alice said: <blockquote>Bob said: <blockquote>Blah + should NOT be transformed into: + Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a <tr> tag should + implicitly close the previous <tr> tag within the same <table>, + but not close a <tr> tag in another table. + + <table><tr>Blah<tr>Blah + should be transformed into: + <table><tr>Blah</tr><tr>Blah + but, + <tr>Blah<table><tr>Blah + should NOT be transformed into + <tr>Blah<table></tr><tr>Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup before writing your own + subclass.""" + + SELF_CLOSING_TAGS = buildTagMap(None, ['br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base']) + + QUOTE_TAGS = {'script': None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center'] + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del'] + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + } + + NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre'] + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + <b>Foo<b>Bar</b></b> + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "<b>Foo<b>Bar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '</b></b>' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close (eg.) a 'b' + tag than to actually use nested 'b' tags, and the BeautifulSoup + class handles the common case. This class handles the + not-co-common case: where you can't believe someone wrote what + they did, but it's valid HTML and BeautifulSoup screwed up by + assuming it wouldn't be. + + If this doesn't do what you need, try subclassing this class or + BeautifulSoup, and providing your own list of NESTABLE_TAGS.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big'] + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript'] + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class BeautifulSOAP(BeautifulStoneSoup): + """This class will push a tag with only a single string child into + the tag's parent as an attribute. The attribute's name is the tag + name, and the value is the string child. An example should give + the flavor of the change: + + <foo><bar>baz</bar></foo> + => + <foo bar="baz"><bar>baz</bar></foo> + + You can then access fooTag['bar'] instead of fooTag.barTag.string. + + This is, of course, useful for scraping structures that tend to + use subelements instead of attributes, such as SOAP messages. Note + that it modifies its input, so don't print the modified version + out. + + I'm not sure how many people really want to use this class; let me + know if you do. Mainly I like the name.""" + + def popTag(self): + if len(self.tagStack) > 1: + tag = self.tagStack[-1] + parent = self.tagStack[-2] + parent._getAttrMap() + if (isinstance(tag, Tag) and len(tag.contents) == 1 and + isinstance(tag.contents[0], NavigableText) and + not parent.attrMap.has_key(tag.name)): + parent[tag.name] = tag.contents[0] + BeautifulStoneSoup.popTag(self) + +#Enterprise class names! It has come to our attention that some people +#think the names of the Beautiful Soup parser classes are too silly +#and "unprofessional" for use in enterprise screen-scraping. We feel +#your pain! For such-minded folk, the Beautiful Soup Consortium And +#All-Night Kosher Bakery recommends renaming this file to +#"RobustParser.py" (or, in cases of extreme enterprisitude, +#"RobustParserBeanInterface.class") and using the following +#enterprise-friendly class aliases: +class RobustXMLParser(BeautifulStoneSoup): + pass +class RobustHTMLParser(BeautifulSoup): + pass +class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup): + pass +class SimplifyingSOAPParser(BeautifulSOAP): + pass + +### + + +#By default, act as an HTML pretty-printer. +if __name__ == '__main__': + import sys + soup = BeautifulStoneSoup(sys.stdin.read()) + print soup.prettify() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_clientcookie.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_clientcookie.py new file mode 100644 index 0000000..caeb82b --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_clientcookie.py @@ -0,0 +1,1707 @@ +"""HTTP cookie handling for web clients. + +This module originally developed from my port of Gisle Aas' Perl module +HTTP::Cookies, from the libwww-perl library. + +Docstrings, comments and debug strings in this code refer to the +attributes of the HTTP cookie system as cookie-attributes, to distinguish +them clearly from Python attributes. + + CookieJar____ + / \ \ + FileCookieJar \ \ + / | \ \ \ + MozillaCookieJar | LWPCookieJar \ \ + | | \ + | ---MSIEBase | \ + | / | | \ + | / MSIEDBCookieJar BSDDBCookieJar + |/ + MSIECookieJar + +Comments to John J Lee <jjl@pobox.com>. + + +Copyright 2002-2006 John J Lee <jjl@pobox.com> +Copyright 1997-1999 Gisle Aas (original libwww-perl code) +Copyright 2002-2003 Johnny Lee (original MSIE Perl code) + +This code is free software; you can redistribute it and/or modify it +under the terms of the BSD or ZPL 2.1 licenses (see the file +COPYING.txt included with the distribution). + +""" + +import sys, re, copy, time, urllib, types, logging +try: + import threading + _threading = threading; del threading +except ImportError: + import dummy_threading + _threading = dummy_threading; del dummy_threading + +MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " + "instance initialised with one)") +DEFAULT_HTTP_PORT = "80" + +from _headersutil import split_header_words, parse_ns_headers +from _util import isstringlike +import _rfc3986 + +debug = logging.getLogger("mechanize.cookies").debug + + +def reraise_unmasked_exceptions(unmasked=()): + # There are a few catch-all except: statements in this module, for + # catching input that's bad in unexpected ways. + # This function re-raises some exceptions we don't want to trap. + import mechanize, warnings + if not mechanize.USE_BARE_EXCEPT: + raise + unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError) + etype = sys.exc_info()[0] + if issubclass(etype, unmasked): + raise + # swallowed an exception + import traceback, StringIO + f = StringIO.StringIO() + traceback.print_exc(None, f) + msg = f.getvalue() + warnings.warn("mechanize bug!\n%s" % msg, stacklevel=2) + + +IPV4_RE = re.compile(r"\.\d+$") +def is_HDN(text): + """Return True if text is a host domain name.""" + # XXX + # This may well be wrong. Which RFC is HDN defined in, if any (for + # the purposes of RFC 2965)? + # For the current implementation, what about IPv6? Remember to look + # at other uses of IPV4_RE also, if change this. + return not (IPV4_RE.search(text) or + text == "" or + text[0] == "." or text[-1] == ".") + +def domain_match(A, B): + """Return True if domain A domain-matches domain B, according to RFC 2965. + + A and B may be host domain names or IP addresses. + + RFC 2965, section 1: + + Host names can be specified either as an IP address or a HDN string. + Sometimes we compare one host name with another. (Such comparisons SHALL + be case-insensitive.) Host A's name domain-matches host B's if + + * their host name strings string-compare equal; or + + * A is a HDN string and has the form NB, where N is a non-empty + name string, B has the form .B', and B' is a HDN string. (So, + x.y.com domain-matches .Y.com but not Y.com.) + + Note that domain-match is not a commutative operation: a.b.c.com + domain-matches .c.com, but not the reverse. + + """ + # Note that, if A or B are IP addresses, the only relevant part of the + # definition of the domain-match algorithm is the direct string-compare. + A = A.lower() + B = B.lower() + if A == B: + return True + if not is_HDN(A): + return False + i = A.rfind(B) + has_form_nb = not (i == -1 or i == 0) + return ( + has_form_nb and + B.startswith(".") and + is_HDN(B[1:]) + ) + +def liberal_is_HDN(text): + """Return True if text is a sort-of-like a host domain name. + + For accepting/blocking domains. + + """ + return not IPV4_RE.search(text) + +def user_domain_match(A, B): + """For blocking/accepting domains. + + A and B may be host domain names or IP addresses. + + """ + A = A.lower() + B = B.lower() + if not (liberal_is_HDN(A) and liberal_is_HDN(B)): + if A == B: + # equal IP addresses + return True + return False + initial_dot = B.startswith(".") + if initial_dot and A.endswith(B): + return True + if not initial_dot and A == B: + return True + return False + +cut_port_re = re.compile(r":\d+$") +def request_host(request): + """Return request-host, as defined by RFC 2965. + + Variation from RFC: returned value is lowercased, for convenient + comparison. + + """ + url = request.get_full_url() + host = _rfc3986.urlsplit(url)[1] + if host is None: + host = request.get_header("Host", "") + # remove port, if present + return cut_port_re.sub("", host, 1) + +def request_host_lc(request): + return request_host(request).lower() + +def eff_request_host(request): + """Return a tuple (request-host, effective request-host name).""" + erhn = req_host = request_host(request) + if req_host.find(".") == -1 and not IPV4_RE.search(req_host): + erhn = req_host + ".local" + return req_host, erhn + +def eff_request_host_lc(request): + req_host, erhn = eff_request_host(request) + return req_host.lower(), erhn.lower() + +def effective_request_host(request): + """Return the effective request-host, as defined by RFC 2965.""" + return eff_request_host(request)[1] + +def request_path(request): + """request-URI, as defined by RFC 2965.""" + url = request.get_full_url() + path, query, frag = _rfc3986.urlsplit(url)[2:] + path = escape_path(path) + req_path = _rfc3986.urlunsplit((None, None, path, query, frag)) + if not req_path.startswith("/"): + req_path = "/"+req_path + return req_path + +def request_port(request): + host = request.get_host() + i = host.find(':') + if i >= 0: + port = host[i+1:] + try: + int(port) + except ValueError: + debug("nonnumeric port: '%s'", port) + return None + else: + port = DEFAULT_HTTP_PORT + return port + +def request_is_unverifiable(request): + try: + return request.is_unverifiable() + except AttributeError: + if hasattr(request, "unverifiable"): + return request.unverifiable + else: + raise + +# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't +# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). +HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" +ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") +def uppercase_escaped_char(match): + return "%%%s" % match.group(1).upper() +def escape_path(path): + """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" + # There's no knowing what character encoding was used to create URLs + # containing %-escapes, but since we have to pick one to escape invalid + # path characters, we pick UTF-8, as recommended in the HTML 4.0 + # specification: + # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 + # And here, kind of: draft-fielding-uri-rfc2396bis-03 + # (And in draft IRI specification: draft-duerst-iri-05) + # (And here, for new URI schemes: RFC 2718) + if isinstance(path, types.UnicodeType): + path = path.encode("utf-8") + path = urllib.quote(path, HTTP_PATH_SAFE) + path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) + return path + +def reach(h): + """Return reach of host h, as defined by RFC 2965, section 1. + + The reach R of a host name H is defined as follows: + + * If + + - H is the host domain name of a host; and, + + - H has the form A.B; and + + - A has no embedded (that is, interior) dots; and + + - B has at least one embedded dot, or B is the string "local". + then the reach of H is .B. + + * Otherwise, the reach of H is H. + + >>> reach("www.acme.com") + '.acme.com' + >>> reach("acme.com") + 'acme.com' + >>> reach("acme.local") + '.local' + + """ + i = h.find(".") + if i >= 0: + #a = h[:i] # this line is only here to show what a is + b = h[i+1:] + i = b.find(".") + if is_HDN(h) and (i >= 0 or b == "local"): + return "."+b + return h + +def is_third_party(request): + """ + + RFC 2965, section 3.3.6: + + An unverifiable transaction is to a third-party host if its request- + host U does not domain-match the reach R of the request-host O in the + origin transaction. + + """ + req_host = request_host_lc(request) + # the origin request's request-host was stuffed into request by + # _urllib2_support.AbstractHTTPHandler + return not domain_match(req_host, reach(request.origin_req_host)) + + +class Cookie: + """HTTP Cookie. + + This class represents both Netscape and RFC 2965 cookies. + + This is deliberately a very simple class. It just holds attributes. It's + possible to construct Cookie instances that don't comply with the cookie + standards. CookieJar.make_cookies is the factory function for Cookie + objects -- it deals with cookie parsing, supplying defaults, and + normalising to the representation used in this class. CookiePolicy is + responsible for checking them to see whether they should be accepted from + and returned to the server. + + version: integer; + name: string; + value: string (may be None); + port: string; None indicates no attribute was supplied (eg. "Port", rather + than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list + string (eg. "80,8080") + port_specified: boolean; true if a value was supplied with the Port + cookie-attribute + domain: string; + domain_specified: boolean; true if Domain was explicitly set + domain_initial_dot: boolean; true if Domain as set in HTTP header by server + started with a dot (yes, this really is necessary!) + path: string; + path_specified: boolean; true if Path was explicitly set + secure: boolean; true if should only be returned over secure connection + expires: integer; seconds since epoch (RFC 2965 cookies should calculate + this value from the Max-Age attribute) + discard: boolean, true if this is a session cookie; (if no expires value, + this should be true) + comment: string; + comment_url: string; + rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not + Set-Cookie2:) header, but had a version cookie-attribute of 1 + rest: mapping of other cookie-attributes + + Note that the port may be present in the headers, but unspecified ("Port" + rather than"Port=80", for example); if this is the case, port is None. + + """ + + def __init__(self, version, name, value, + port, port_specified, + domain, domain_specified, domain_initial_dot, + path, path_specified, + secure, + expires, + discard, + comment, + comment_url, + rest, + rfc2109=False, + ): + + if version is not None: version = int(version) + if expires is not None: expires = int(expires) + if port is None and port_specified is True: + raise ValueError("if port is None, port_specified must be false") + + self.version = version + self.name = name + self.value = value + self.port = port + self.port_specified = port_specified + # normalise case, as per RFC 2965 section 3.3.3 + self.domain = domain.lower() + self.domain_specified = domain_specified + # Sigh. We need to know whether the domain given in the + # cookie-attribute had an initial dot, in order to follow RFC 2965 + # (as clarified in draft errata). Needed for the returned $Domain + # value. + self.domain_initial_dot = domain_initial_dot + self.path = path + self.path_specified = path_specified + self.secure = secure + self.expires = expires + self.discard = discard + self.comment = comment + self.comment_url = comment_url + self.rfc2109 = rfc2109 + + self._rest = copy.copy(rest) + + def has_nonstandard_attr(self, name): + return self._rest.has_key(name) + def get_nonstandard_attr(self, name, default=None): + return self._rest.get(name, default) + def set_nonstandard_attr(self, name, value): + self._rest[name] = value + def nonstandard_attr_keys(self): + return self._rest.keys() + + def is_expired(self, now=None): + if now is None: now = time.time() + return (self.expires is not None) and (self.expires <= now) + + def __str__(self): + if self.port is None: p = "" + else: p = ":"+self.port + limit = self.domain + p + self.path + if self.value is not None: + namevalue = "%s=%s" % (self.name, self.value) + else: + namevalue = self.name + return "<Cookie %s for %s>" % (namevalue, limit) + + def __repr__(self): + args = [] + for name in ["version", "name", "value", + "port", "port_specified", + "domain", "domain_specified", "domain_initial_dot", + "path", "path_specified", + "secure", "expires", "discard", "comment", "comment_url", + ]: + attr = getattr(self, name) + args.append("%s=%s" % (name, repr(attr))) + args.append("rest=%s" % repr(self._rest)) + args.append("rfc2109=%s" % repr(self.rfc2109)) + return "Cookie(%s)" % ", ".join(args) + + +class CookiePolicy: + """Defines which cookies get accepted from and returned to server. + + May also modify cookies. + + The subclass DefaultCookiePolicy defines the standard rules for Netscape + and RFC 2965 cookies -- override that if you want a customised policy. + + As well as implementing set_ok and return_ok, implementations of this + interface must also supply the following attributes, indicating which + protocols should be used, and how. These can be read and set at any time, + though whether that makes complete sense from the protocol point of view is + doubtful. + + Public attributes: + + netscape: implement netscape protocol + rfc2965: implement RFC 2965 protocol + rfc2109_as_netscape: + WARNING: This argument will change or go away if is not accepted into + the Python standard library in this form! + If true, treat RFC 2109 cookies as though they were Netscape cookies. The + default is for this attribute to be None, which means treat 2109 cookies + as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is, + by default), and as Netscape cookies otherwise. + hide_cookie2: don't add Cookie2 header to requests (the presence of + this header indicates to the server that we understand RFC 2965 + cookies) + + """ + def set_ok(self, cookie, request): + """Return true if (and only if) cookie should be accepted from server. + + Currently, pre-expired cookies never get this far -- the CookieJar + class deletes such cookies itself. + + cookie: mechanize.Cookie object + request: object implementing the interface defined by + CookieJar.extract_cookies.__doc__ + + """ + raise NotImplementedError() + + def return_ok(self, cookie, request): + """Return true if (and only if) cookie should be returned to server. + + cookie: mechanize.Cookie object + request: object implementing the interface defined by + CookieJar.add_cookie_header.__doc__ + + """ + raise NotImplementedError() + + def domain_return_ok(self, domain, request): + """Return false if cookies should not be returned, given cookie domain. + + This is here as an optimization, to remove the need for checking every + cookie with a particular domain (which may involve reading many files). + The default implementations of domain_return_ok and path_return_ok + (return True) leave all the work to return_ok. + + If domain_return_ok returns true for the cookie domain, path_return_ok + is called for the cookie path. Otherwise, path_return_ok and return_ok + are never called for that cookie domain. If path_return_ok returns + true, return_ok is called with the Cookie object itself for a full + check. Otherwise, return_ok is never called for that cookie path. + + Note that domain_return_ok is called for every *cookie* domain, not + just for the *request* domain. For example, the function might be + called with both ".acme.com" and "www.acme.com" if the request domain + is "www.acme.com". The same goes for path_return_ok. + + For argument documentation, see the docstring for return_ok. + + """ + return True + + def path_return_ok(self, path, request): + """Return false if cookies should not be returned, given cookie path. + + See the docstring for domain_return_ok. + + """ + return True + + +class DefaultCookiePolicy(CookiePolicy): + """Implements the standard rules for accepting and returning cookies. + + Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is + switched off by default. + + The easiest way to provide your own policy is to override this class and + call its methods in your overriden implementations before adding your own + additional checks. + + import mechanize + class MyCookiePolicy(mechanize.DefaultCookiePolicy): + def set_ok(self, cookie, request): + if not mechanize.DefaultCookiePolicy.set_ok( + self, cookie, request): + return False + if i_dont_want_to_store_this_cookie(): + return False + return True + + In addition to the features required to implement the CookiePolicy + interface, this class allows you to block and allow domains from setting + and receiving cookies. There are also some strictness switches that allow + you to tighten up the rather loose Netscape protocol rules a little bit (at + the cost of blocking some benign cookies). + + A domain blacklist and whitelist is provided (both off by default). Only + domains not in the blacklist and present in the whitelist (if the whitelist + is active) participate in cookie setting and returning. Use the + blocked_domains constructor argument, and blocked_domains and + set_blocked_domains methods (and the corresponding argument and methods for + allowed_domains). If you set a whitelist, you can turn it off again by + setting it to None. + + Domains in block or allow lists that do not start with a dot must + string-compare equal. For example, "acme.com" matches a blacklist entry of + "acme.com", but "www.acme.com" does not. Domains that do start with a dot + are matched by more specific domains too. For example, both "www.acme.com" + and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does + not). IP addresses are an exception, and must match exactly. For example, + if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is + blocked, but 193.168.1.2 is not. + + Additional Public Attributes: + + General strictness switches + + strict_domain: don't allow sites to set two-component domains with + country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc. + This is far from perfect and isn't guaranteed to work! + + RFC 2965 protocol strictness switches + + strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable + transactions (usually, an unverifiable transaction is one resulting from + a redirect or an image hosted on another site); if this is false, cookies + are NEVER blocked on the basis of verifiability + + Netscape protocol strictness switches + + strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions + even to Netscape cookies + strict_ns_domain: flags indicating how strict to be with domain-matching + rules for Netscape cookies: + DomainStrictNoDots: when setting cookies, host prefix must not contain a + dot (eg. www.foo.bar.com can't set a cookie for .bar.com, because + www.foo contains a dot) + DomainStrictNonDomain: cookies that did not explicitly specify a Domain + cookie-attribute can only be returned to a domain that string-compares + equal to the domain that set the cookie (eg. rockets.acme.com won't + be returned cookies from acme.com that had no Domain cookie-attribute) + DomainRFC2965Match: when setting cookies, require a full RFC 2965 + domain-match + DomainLiberal and DomainStrict are the most useful combinations of the + above flags, for convenience + strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that + have names starting with '$' + strict_ns_set_path: don't allow setting cookies whose path doesn't + path-match request URI + + """ + + DomainStrictNoDots = 1 + DomainStrictNonDomain = 2 + DomainRFC2965Match = 4 + + DomainLiberal = 0 + DomainStrict = DomainStrictNoDots|DomainStrictNonDomain + + def __init__(self, + blocked_domains=None, allowed_domains=None, + netscape=True, rfc2965=False, + # WARNING: this argument will change or go away if is not + # accepted into the Python standard library in this form! + # default, ie. treat 2109 as netscape iff not rfc2965 + rfc2109_as_netscape=None, + hide_cookie2=False, + strict_domain=False, + strict_rfc2965_unverifiable=True, + strict_ns_unverifiable=False, + strict_ns_domain=DomainLiberal, + strict_ns_set_initial_dollar=False, + strict_ns_set_path=False, + ): + """ + Constructor arguments should be used as keyword arguments only. + + blocked_domains: sequence of domain names that we never accept cookies + from, nor return cookies to + allowed_domains: if not None, this is a sequence of the only domains + for which we accept and return cookies + + For other arguments, see CookiePolicy.__doc__ and + DefaultCookiePolicy.__doc__.. + + """ + self.netscape = netscape + self.rfc2965 = rfc2965 + self.rfc2109_as_netscape = rfc2109_as_netscape + self.hide_cookie2 = hide_cookie2 + self.strict_domain = strict_domain + self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable + self.strict_ns_unverifiable = strict_ns_unverifiable + self.strict_ns_domain = strict_ns_domain + self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar + self.strict_ns_set_path = strict_ns_set_path + + if blocked_domains is not None: + self._blocked_domains = tuple(blocked_domains) + else: + self._blocked_domains = () + + if allowed_domains is not None: + allowed_domains = tuple(allowed_domains) + self._allowed_domains = allowed_domains + + def blocked_domains(self): + """Return the sequence of blocked domains (as a tuple).""" + return self._blocked_domains + def set_blocked_domains(self, blocked_domains): + """Set the sequence of blocked domains.""" + self._blocked_domains = tuple(blocked_domains) + + def is_blocked(self, domain): + for blocked_domain in self._blocked_domains: + if user_domain_match(domain, blocked_domain): + return True + return False + + def allowed_domains(self): + """Return None, or the sequence of allowed domains (as a tuple).""" + return self._allowed_domains + def set_allowed_domains(self, allowed_domains): + """Set the sequence of allowed domains, or None.""" + if allowed_domains is not None: + allowed_domains = tuple(allowed_domains) + self._allowed_domains = allowed_domains + + def is_not_allowed(self, domain): + if self._allowed_domains is None: + return False + for allowed_domain in self._allowed_domains: + if user_domain_match(domain, allowed_domain): + return False + return True + + def set_ok(self, cookie, request): + """ + If you override set_ok, be sure to call this method. If it returns + false, so should your subclass (assuming your subclass wants to be more + strict about which cookies to accept). + + """ + debug(" - checking cookie %s", cookie) + + assert cookie.name is not None + + for n in "version", "verifiability", "name", "path", "domain", "port": + fn_name = "set_ok_"+n + fn = getattr(self, fn_name) + if not fn(cookie, request): + return False + + return True + + def set_ok_version(self, cookie, request): + if cookie.version is None: + # Version is always set to 0 by parse_ns_headers if it's a Netscape + # cookie, so this must be an invalid RFC 2965 cookie. + debug(" Set-Cookie2 without version attribute (%s)", cookie) + return False + if cookie.version > 0 and not self.rfc2965: + debug(" RFC 2965 cookies are switched off") + return False + elif cookie.version == 0 and not self.netscape: + debug(" Netscape cookies are switched off") + return False + return True + + def set_ok_verifiability(self, cookie, request): + if request_is_unverifiable(request) and is_third_party(request): + if cookie.version > 0 and self.strict_rfc2965_unverifiable: + debug(" third-party RFC 2965 cookie during " + "unverifiable transaction") + return False + elif cookie.version == 0 and self.strict_ns_unverifiable: + debug(" third-party Netscape cookie during " + "unverifiable transaction") + return False + return True + + def set_ok_name(self, cookie, request): + # Try and stop servers setting V0 cookies designed to hack other + # servers that know both V0 and V1 protocols. + if (cookie.version == 0 and self.strict_ns_set_initial_dollar and + cookie.name.startswith("$")): + debug(" illegal name (starts with '$'): '%s'", cookie.name) + return False + return True + + def set_ok_path(self, cookie, request): + if cookie.path_specified: + req_path = request_path(request) + if ((cookie.version > 0 or + (cookie.version == 0 and self.strict_ns_set_path)) and + not req_path.startswith(cookie.path)): + debug(" path attribute %s is not a prefix of request " + "path %s", cookie.path, req_path) + return False + return True + + def set_ok_countrycode_domain(self, cookie, request): + """Return False if explicit cookie domain is not acceptable. + + Called by set_ok_domain, for convenience of overriding by + subclasses. + + """ + if cookie.domain_specified and self.strict_domain: + domain = cookie.domain + # since domain was specified, we know that: + assert domain.startswith(".") + if domain.count(".") == 2: + # domain like .foo.bar + i = domain.rfind(".") + tld = domain[i+1:] + sld = domain[1:i] + if (sld.lower() in [ + "co", "ac", + "com", "edu", "org", "net", "gov", "mil", "int", + "aero", "biz", "cat", "coop", "info", "jobs", "mobi", + "museum", "name", "pro", "travel", + ] and + len(tld) == 2): + # domain like .co.uk + return False + return True + + def set_ok_domain(self, cookie, request): + if self.is_blocked(cookie.domain): + debug(" domain %s is in user block-list", cookie.domain) + return False + if self.is_not_allowed(cookie.domain): + debug(" domain %s is not in user allow-list", cookie.domain) + return False + if not self.set_ok_countrycode_domain(cookie, request): + debug(" country-code second level domain %s", cookie.domain) + return False + if cookie.domain_specified: + req_host, erhn = eff_request_host_lc(request) + domain = cookie.domain + if domain.startswith("."): + undotted_domain = domain[1:] + else: + undotted_domain = domain + embedded_dots = (undotted_domain.find(".") >= 0) + if not embedded_dots and domain != ".local": + debug(" non-local domain %s contains no embedded dot", + domain) + return False + if cookie.version == 0: + if (not erhn.endswith(domain) and + (not erhn.startswith(".") and + not ("."+erhn).endswith(domain))): + debug(" effective request-host %s (even with added " + "initial dot) does not end end with %s", + erhn, domain) + return False + if (cookie.version > 0 or + (self.strict_ns_domain & self.DomainRFC2965Match)): + if not domain_match(erhn, domain): + debug(" effective request-host %s does not domain-match " + "%s", erhn, domain) + return False + if (cookie.version > 0 or + (self.strict_ns_domain & self.DomainStrictNoDots)): + host_prefix = req_host[:-len(domain)] + if (host_prefix.find(".") >= 0 and + not IPV4_RE.search(req_host)): + debug(" host prefix %s for domain %s contains a dot", + host_prefix, domain) + return False + return True + + def set_ok_port(self, cookie, request): + if cookie.port_specified: + req_port = request_port(request) + if req_port is None: + req_port = "80" + else: + req_port = str(req_port) + for p in cookie.port.split(","): + try: + int(p) + except ValueError: + debug(" bad port %s (not numeric)", p) + return False + if p == req_port: + break + else: + debug(" request port (%s) not found in %s", + req_port, cookie.port) + return False + return True + + def return_ok(self, cookie, request): + """ + If you override return_ok, be sure to call this method. If it returns + false, so should your subclass (assuming your subclass wants to be more + strict about which cookies to return). + + """ + # Path has already been checked by path_return_ok, and domain blocking + # done by domain_return_ok. + debug(" - checking cookie %s", cookie) + + for n in ("version", "verifiability", "secure", "expires", "port", + "domain"): + fn_name = "return_ok_"+n + fn = getattr(self, fn_name) + if not fn(cookie, request): + return False + return True + + def return_ok_version(self, cookie, request): + if cookie.version > 0 and not self.rfc2965: + debug(" RFC 2965 cookies are switched off") + return False + elif cookie.version == 0 and not self.netscape: + debug(" Netscape cookies are switched off") + return False + return True + + def return_ok_verifiability(self, cookie, request): + if request_is_unverifiable(request) and is_third_party(request): + if cookie.version > 0 and self.strict_rfc2965_unverifiable: + debug(" third-party RFC 2965 cookie during unverifiable " + "transaction") + return False + elif cookie.version == 0 and self.strict_ns_unverifiable: + debug(" third-party Netscape cookie during unverifiable " + "transaction") + return False + return True + + def return_ok_secure(self, cookie, request): + if cookie.secure and request.get_type() != "https": + debug(" secure cookie with non-secure request") + return False + return True + + def return_ok_expires(self, cookie, request): + if cookie.is_expired(self._now): + debug(" cookie expired") + return False + return True + + def return_ok_port(self, cookie, request): + if cookie.port: + req_port = request_port(request) + if req_port is None: + req_port = "80" + for p in cookie.port.split(","): + if p == req_port: + break + else: + debug(" request port %s does not match cookie port %s", + req_port, cookie.port) + return False + return True + + def return_ok_domain(self, cookie, request): + req_host, erhn = eff_request_host_lc(request) + domain = cookie.domain + + # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't + if (cookie.version == 0 and + (self.strict_ns_domain & self.DomainStrictNonDomain) and + not cookie.domain_specified and domain != erhn): + debug(" cookie with unspecified domain does not string-compare " + "equal to request domain") + return False + + if cookie.version > 0 and not domain_match(erhn, domain): + debug(" effective request-host name %s does not domain-match " + "RFC 2965 cookie domain %s", erhn, domain) + return False + if cookie.version == 0 and not ("."+erhn).endswith(domain): + debug(" request-host %s does not match Netscape cookie domain " + "%s", req_host, domain) + return False + return True + + def domain_return_ok(self, domain, request): + # Liberal check of domain. This is here as an optimization to avoid + # having to load lots of MSIE cookie files unless necessary. + + # Munge req_host and erhn to always start with a dot, so as to err on + # the side of letting cookies through. + dotted_req_host, dotted_erhn = eff_request_host_lc(request) + if not dotted_req_host.startswith("."): + dotted_req_host = "."+dotted_req_host + if not dotted_erhn.startswith("."): + dotted_erhn = "."+dotted_erhn + if not (dotted_req_host.endswith(domain) or + dotted_erhn.endswith(domain)): + #debug(" request domain %s does not match cookie domain %s", + # req_host, domain) + return False + + if self.is_blocked(domain): + debug(" domain %s is in user block-list", domain) + return False + if self.is_not_allowed(domain): + debug(" domain %s is not in user allow-list", domain) + return False + + return True + + def path_return_ok(self, path, request): + debug("- checking cookie path=%s", path) + req_path = request_path(request) + if not req_path.startswith(path): + debug(" %s does not path-match %s", req_path, path) + return False + return True + + +def vals_sorted_by_key(adict): + keys = adict.keys() + keys.sort() + return map(adict.get, keys) + +class MappingIterator: + """Iterates over nested mapping, depth-first, in sorted order by key.""" + def __init__(self, mapping): + self._s = [(vals_sorted_by_key(mapping), 0, None)] # LIFO stack + + def __iter__(self): return self + + def next(self): + # this is hairy because of lack of generators + while 1: + try: + vals, i, prev_item = self._s.pop() + except IndexError: + raise StopIteration() + if i < len(vals): + item = vals[i] + i = i + 1 + self._s.append((vals, i, prev_item)) + try: + item.items + except AttributeError: + # non-mapping + break + else: + # mapping + self._s.append((vals_sorted_by_key(item), 0, item)) + continue + return item + + +# Used as second parameter to dict.get method, to distinguish absent +# dict key from one with a None value. +class Absent: pass + +class CookieJar: + """Collection of HTTP cookies. + + You may not need to know about this class: try mechanize.urlopen(). + + The major methods are extract_cookies and add_cookie_header; these are all + you are likely to need. + + CookieJar supports the iterator protocol: + + for cookie in cookiejar: + # do something with cookie + + Methods: + + add_cookie_header(request) + extract_cookies(response, request) + get_policy() + set_policy(policy) + cookies_for_request(request) + make_cookies(response, request) + set_cookie_if_ok(cookie, request) + set_cookie(cookie) + clear_session_cookies() + clear_expired_cookies() + clear(domain=None, path=None, name=None) + + Public attributes + + policy: CookiePolicy object + + """ + + non_word_re = re.compile(r"\W") + quote_re = re.compile(r"([\"\\])") + strict_domain_re = re.compile(r"\.?[^.]*") + domain_re = re.compile(r"[^.]*") + dots_re = re.compile(r"^\.+") + + def __init__(self, policy=None): + """ + See CookieJar.__doc__ for argument documentation. + + """ + if policy is None: + policy = DefaultCookiePolicy() + self._policy = policy + + self._cookies = {} + + # for __getitem__ iteration in pre-2.2 Pythons + self._prev_getitem_index = 0 + + def get_policy(self): + return self._policy + + def set_policy(self, policy): + self._policy = policy + + def _cookies_for_domain(self, domain, request): + cookies = [] + if not self._policy.domain_return_ok(domain, request): + return [] + debug("Checking %s for cookies to return", domain) + cookies_by_path = self._cookies[domain] + for path in cookies_by_path.keys(): + if not self._policy.path_return_ok(path, request): + continue + cookies_by_name = cookies_by_path[path] + for cookie in cookies_by_name.values(): + if not self._policy.return_ok(cookie, request): + debug(" not returning cookie") + continue + debug(" it's a match") + cookies.append(cookie) + return cookies + + def cookies_for_request(self, request): + """Return a list of cookies to be returned to server. + + The returned list of cookie instances is sorted in the order they + should appear in the Cookie: header for return to the server. + + See add_cookie_header.__doc__ for the interface required of the + request argument. + + New in version 0.1.10 + + """ + self._policy._now = self._now = int(time.time()) + cookies = self._cookies_for_request(request) + # add cookies in order of most specific (i.e. longest) path first + def decreasing_size(a, b): return cmp(len(b.path), len(a.path)) + cookies.sort(decreasing_size) + return cookies + + def _cookies_for_request(self, request): + """Return a list of cookies to be returned to server.""" + # this method still exists (alongside cookies_for_request) because it + # is part of an implied protected interface for subclasses of cookiejar + # XXX document that implied interface, or provide another way of + # implementing cookiejars than subclassing + cookies = [] + for domain in self._cookies.keys(): + cookies.extend(self._cookies_for_domain(domain, request)) + return cookies + + def _cookie_attrs(self, cookies): + """Return a list of cookie-attributes to be returned to server. + + The $Version attribute is also added when appropriate (currently only + once per request). + + >>> jar = CookieJar() + >>> ns_cookie = Cookie(0, "foo", '"bar"', None, False, + ... "example.com", False, False, + ... "/", False, False, None, True, + ... None, None, {}) + >>> jar._cookie_attrs([ns_cookie]) + ['foo="bar"'] + >>> rfc2965_cookie = Cookie(1, "foo", "bar", None, False, + ... ".example.com", True, False, + ... "/", False, False, None, True, + ... None, None, {}) + >>> jar._cookie_attrs([rfc2965_cookie]) + ['$Version=1', 'foo=bar', '$Domain="example.com"'] + + """ + version_set = False + + attrs = [] + for cookie in cookies: + # set version of Cookie header + # XXX + # What should it be if multiple matching Set-Cookie headers have + # different versions themselves? + # Answer: there is no answer; was supposed to be settled by + # RFC 2965 errata, but that may never appear... + version = cookie.version + if not version_set: + version_set = True + if version > 0: + attrs.append("$Version=%s" % version) + + # quote cookie value if necessary + # (not for Netscape protocol, which already has any quotes + # intact, due to the poorly-specified Netscape Cookie: syntax) + if ((cookie.value is not None) and + self.non_word_re.search(cookie.value) and version > 0): + value = self.quote_re.sub(r"\\\1", cookie.value) + else: + value = cookie.value + + # add cookie-attributes to be returned in Cookie header + if cookie.value is None: + attrs.append(cookie.name) + else: + attrs.append("%s=%s" % (cookie.name, value)) + if version > 0: + if cookie.path_specified: + attrs.append('$Path="%s"' % cookie.path) + if cookie.domain.startswith("."): + domain = cookie.domain + if (not cookie.domain_initial_dot and + domain.startswith(".")): + domain = domain[1:] + attrs.append('$Domain="%s"' % domain) + if cookie.port is not None: + p = "$Port" + if cookie.port_specified: + p = p + ('="%s"' % cookie.port) + attrs.append(p) + + return attrs + + def add_cookie_header(self, request): + """Add correct Cookie: header to request (urllib2.Request object). + + The Cookie2 header is also added unless policy.hide_cookie2 is true. + + The request object (usually a urllib2.Request instance) must support + the methods get_full_url, get_host, is_unverifiable, get_type, + has_header, get_header, header_items and add_unredirected_header, as + documented by urllib2, and the port attribute (the port number). + Actually, RequestUpgradeProcessor will automatically upgrade your + Request object to one with has_header, get_header, header_items and + add_unredirected_header, if it lacks those methods, for compatibility + with pre-2.4 versions of urllib2. + + """ + debug("add_cookie_header") + cookies = self.cookies_for_request(request) + + attrs = self._cookie_attrs(cookies) + if attrs: + if not request.has_header("Cookie"): + request.add_unredirected_header("Cookie", "; ".join(attrs)) + + # if necessary, advertise that we know RFC 2965 + if self._policy.rfc2965 and not self._policy.hide_cookie2: + for cookie in cookies: + if cookie.version != 1 and not request.has_header("Cookie2"): + request.add_unredirected_header("Cookie2", '$Version="1"') + break + + self.clear_expired_cookies() + + def _normalized_cookie_tuples(self, attrs_set): + """Return list of tuples containing normalised cookie information. + + attrs_set is the list of lists of key,value pairs extracted from + the Set-Cookie or Set-Cookie2 headers. + + Tuples are name, value, standard, rest, where name and value are the + cookie name and value, standard is a dictionary containing the standard + cookie-attributes (discard, secure, version, expires or max-age, + domain, path and port) and rest is a dictionary containing the rest of + the cookie-attributes. + + """ + cookie_tuples = [] + + boolean_attrs = "discard", "secure" + value_attrs = ("version", + "expires", "max-age", + "domain", "path", "port", + "comment", "commenturl") + + for cookie_attrs in attrs_set: + name, value = cookie_attrs[0] + + # Build dictionary of standard cookie-attributes (standard) and + # dictionary of other cookie-attributes (rest). + + # Note: expiry time is normalised to seconds since epoch. V0 + # cookies should have the Expires cookie-attribute, and V1 cookies + # should have Max-Age, but since V1 includes RFC 2109 cookies (and + # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we + # accept either (but prefer Max-Age). + max_age_set = False + + bad_cookie = False + + standard = {} + rest = {} + for k, v in cookie_attrs[1:]: + lc = k.lower() + # don't lose case distinction for unknown fields + if lc in value_attrs or lc in boolean_attrs: + k = lc + if k in boolean_attrs and v is None: + # boolean cookie-attribute is present, but has no value + # (like "discard", rather than "port=80") + v = True + if standard.has_key(k): + # only first value is significant + continue + if k == "domain": + if v is None: + debug(" missing value for domain attribute") + bad_cookie = True + break + # RFC 2965 section 3.3.3 + v = v.lower() + if k == "expires": + if max_age_set: + # Prefer max-age to expires (like Mozilla) + continue + if v is None: + debug(" missing or invalid value for expires " + "attribute: treating as session cookie") + continue + if k == "max-age": + max_age_set = True + if v is None: + debug(" missing value for max-age attribute") + bad_cookie = True + break + try: + v = int(v) + except ValueError: + debug(" missing or invalid (non-numeric) value for " + "max-age attribute") + bad_cookie = True + break + # convert RFC 2965 Max-Age to seconds since epoch + # XXX Strictly you're supposed to follow RFC 2616 + # age-calculation rules. Remember that zero Max-Age is a + # is a request to discard (old and new) cookie, though. + k = "expires" + v = self._now + v + if (k in value_attrs) or (k in boolean_attrs): + if (v is None and + k not in ["port", "comment", "commenturl"]): + debug(" missing value for %s attribute" % k) + bad_cookie = True + break + standard[k] = v + else: + rest[k] = v + + if bad_cookie: + continue + + cookie_tuples.append((name, value, standard, rest)) + + return cookie_tuples + + def _cookie_from_cookie_tuple(self, tup, request): + # standard is dict of standard cookie-attributes, rest is dict of the + # rest of them + name, value, standard, rest = tup + + domain = standard.get("domain", Absent) + path = standard.get("path", Absent) + port = standard.get("port", Absent) + expires = standard.get("expires", Absent) + + # set the easy defaults + version = standard.get("version", None) + if version is not None: + try: + version = int(version) + except ValueError: + return None # invalid version, ignore cookie + secure = standard.get("secure", False) + # (discard is also set if expires is Absent) + discard = standard.get("discard", False) + comment = standard.get("comment", None) + comment_url = standard.get("commenturl", None) + + # set default path + if path is not Absent and path != "": + path_specified = True + path = escape_path(path) + else: + path_specified = False + path = request_path(request) + i = path.rfind("/") + if i != -1: + if version == 0: + # Netscape spec parts company from reality here + path = path[:i] + else: + path = path[:i+1] + if len(path) == 0: path = "/" + + # set default domain + domain_specified = domain is not Absent + # but first we have to remember whether it starts with a dot + domain_initial_dot = False + if domain_specified: + domain_initial_dot = bool(domain.startswith(".")) + if domain is Absent: + req_host, erhn = eff_request_host_lc(request) + domain = erhn + elif not domain.startswith("."): + domain = "."+domain + + # set default port + port_specified = False + if port is not Absent: + if port is None: + # Port attr present, but has no value: default to request port. + # Cookie should then only be sent back on that port. + port = request_port(request) + else: + port_specified = True + port = re.sub(r"\s+", "", port) + else: + # No port attr present. Cookie can be sent back on any port. + port = None + + # set default expires and discard + if expires is Absent: + expires = None + discard = True + + return Cookie(version, + name, value, + port, port_specified, + domain, domain_specified, domain_initial_dot, + path, path_specified, + secure, + expires, + discard, + comment, + comment_url, + rest) + + def _cookies_from_attrs_set(self, attrs_set, request): + cookie_tuples = self._normalized_cookie_tuples(attrs_set) + + cookies = [] + for tup in cookie_tuples: + cookie = self._cookie_from_cookie_tuple(tup, request) + if cookie: cookies.append(cookie) + return cookies + + def _process_rfc2109_cookies(self, cookies): + if self._policy.rfc2109_as_netscape is None: + rfc2109_as_netscape = not self._policy.rfc2965 + else: + rfc2109_as_netscape = self._policy.rfc2109_as_netscape + for cookie in cookies: + if cookie.version == 1: + cookie.rfc2109 = True + if rfc2109_as_netscape: + # treat 2109 cookies as Netscape cookies rather than + # as RFC2965 cookies + cookie.version = 0 + + def _make_cookies(self, response, request): + # get cookie-attributes for RFC 2965 and Netscape protocols + headers = response.info() + rfc2965_hdrs = headers.getheaders("Set-Cookie2") + ns_hdrs = headers.getheaders("Set-Cookie") + + rfc2965 = self._policy.rfc2965 + netscape = self._policy.netscape + + if ((not rfc2965_hdrs and not ns_hdrs) or + (not ns_hdrs and not rfc2965) or + (not rfc2965_hdrs and not netscape) or + (not netscape and not rfc2965)): + return [] # no relevant cookie headers: quick exit + + try: + cookies = self._cookies_from_attrs_set( + split_header_words(rfc2965_hdrs), request) + except: + reraise_unmasked_exceptions() + cookies = [] + + if ns_hdrs and netscape: + try: + # RFC 2109 and Netscape cookies + ns_cookies = self._cookies_from_attrs_set( + parse_ns_headers(ns_hdrs), request) + except: + reraise_unmasked_exceptions() + ns_cookies = [] + self._process_rfc2109_cookies(ns_cookies) + + # Look for Netscape cookies (from Set-Cookie headers) that match + # corresponding RFC 2965 cookies (from Set-Cookie2 headers). + # For each match, keep the RFC 2965 cookie and ignore the Netscape + # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are + # bundled in with the Netscape cookies for this purpose, which is + # reasonable behaviour. + if rfc2965: + lookup = {} + for cookie in cookies: + lookup[(cookie.domain, cookie.path, cookie.name)] = None + + def no_matching_rfc2965(ns_cookie, lookup=lookup): + key = ns_cookie.domain, ns_cookie.path, ns_cookie.name + return not lookup.has_key(key) + ns_cookies = filter(no_matching_rfc2965, ns_cookies) + + if ns_cookies: + cookies.extend(ns_cookies) + + return cookies + + def make_cookies(self, response, request): + """Return sequence of Cookie objects extracted from response object. + + See extract_cookies.__doc__ for the interface required of the + response and request arguments. + + """ + self._policy._now = self._now = int(time.time()) + return [cookie for cookie in self._make_cookies(response, request) + if cookie.expires is None or not cookie.expires <= self._now] + + def set_cookie_if_ok(self, cookie, request): + """Set a cookie if policy says it's OK to do so. + + cookie: mechanize.Cookie instance + request: see extract_cookies.__doc__ for the required interface + + """ + self._policy._now = self._now = int(time.time()) + + if self._policy.set_ok(cookie, request): + self.set_cookie(cookie) + + def set_cookie(self, cookie): + """Set a cookie, without checking whether or not it should be set. + + cookie: mechanize.Cookie instance + """ + c = self._cookies + if not c.has_key(cookie.domain): c[cookie.domain] = {} + c2 = c[cookie.domain] + if not c2.has_key(cookie.path): c2[cookie.path] = {} + c3 = c2[cookie.path] + c3[cookie.name] = cookie + + def extract_cookies(self, response, request): + """Extract cookies from response, where allowable given the request. + + Look for allowable Set-Cookie: and Set-Cookie2: headers in the response + object passed as argument. Any of these headers that are found are + used to update the state of the object (subject to the policy.set_ok + method's approval). + + The response object (usually be the result of a call to + mechanize.urlopen, or similar) should support an info method, which + returns a mimetools.Message object (in fact, the 'mimetools.Message + object' may be any object that provides a getheaders method). + + The request object (usually a urllib2.Request instance) must support + the methods get_full_url, get_type, get_host, and is_unverifiable, as + documented by urllib2, and the port attribute (the port number). The + request is used to set default values for cookie-attributes as well as + for checking that the cookie is OK to be set. + + """ + debug("extract_cookies: %s", response.info()) + self._policy._now = self._now = int(time.time()) + + for cookie in self._make_cookies(response, request): + if cookie.expires is not None and cookie.expires <= self._now: + # Expiry date in past is request to delete cookie. This can't be + # in DefaultCookiePolicy, because can't delete cookies there. + try: + self.clear(cookie.domain, cookie.path, cookie.name) + except KeyError: + pass + debug("Expiring cookie, domain='%s', path='%s', name='%s'", + cookie.domain, cookie.path, cookie.name) + elif self._policy.set_ok(cookie, request): + debug(" setting cookie: %s", cookie) + self.set_cookie(cookie) + + def clear(self, domain=None, path=None, name=None): + """Clear some cookies. + + Invoking this method without arguments will clear all cookies. If + given a single argument, only cookies belonging to that domain will be + removed. If given two arguments, cookies belonging to the specified + path within that domain are removed. If given three arguments, then + the cookie with the specified name, path and domain is removed. + + Raises KeyError if no matching cookie exists. + + """ + if name is not None: + if (domain is None) or (path is None): + raise ValueError( + "domain and path must be given to remove a cookie by name") + del self._cookies[domain][path][name] + elif path is not None: + if domain is None: + raise ValueError( + "domain must be given to remove cookies by path") + del self._cookies[domain][path] + elif domain is not None: + del self._cookies[domain] + else: + self._cookies = {} + + def clear_session_cookies(self): + """Discard all session cookies. + + Discards all cookies held by object which had either no Max-Age or + Expires cookie-attribute or an explicit Discard cookie-attribute, or + which otherwise have ended up with a true discard attribute. For + interactive browsers, the end of a session usually corresponds to + closing the browser window. + + Note that the save method won't save session cookies anyway, unless you + ask otherwise by passing a true ignore_discard argument. + + """ + for cookie in self: + if cookie.discard: + self.clear(cookie.domain, cookie.path, cookie.name) + + def clear_expired_cookies(self): + """Discard all expired cookies. + + You probably don't need to call this method: expired cookies are never + sent back to the server (provided you're using DefaultCookiePolicy), + this method is called by CookieJar itself every so often, and the save + method won't save expired cookies anyway (unless you ask otherwise by + passing a true ignore_expires argument). + + """ + now = time.time() + for cookie in self: + if cookie.is_expired(now): + self.clear(cookie.domain, cookie.path, cookie.name) + + def __getitem__(self, i): + if i == 0: + self._getitem_iterator = self.__iter__() + elif self._prev_getitem_index != i-1: raise IndexError( + "CookieJar.__getitem__ only supports sequential iteration") + self._prev_getitem_index = i + try: + return self._getitem_iterator.next() + except StopIteration: + raise IndexError() + + def __iter__(self): + return MappingIterator(self._cookies) + + def __len__(self): + """Return number of contained cookies.""" + i = 0 + for cookie in self: i = i + 1 + return i + + def __repr__(self): + r = [] + for cookie in self: r.append(repr(cookie)) + return "<%s[%s]>" % (self.__class__, ", ".join(r)) + + def __str__(self): + r = [] + for cookie in self: r.append(str(cookie)) + return "<%s[%s]>" % (self.__class__, ", ".join(r)) + + +class LoadError(Exception): pass + +class FileCookieJar(CookieJar): + """CookieJar that can be loaded from and saved to a file. + + Additional methods + + save(filename=None, ignore_discard=False, ignore_expires=False) + load(filename=None, ignore_discard=False, ignore_expires=False) + revert(filename=None, ignore_discard=False, ignore_expires=False) + + Additional public attributes + + filename: filename for loading and saving cookies + + Additional public readable attributes + + delayload: request that cookies are lazily loaded from disk; this is only + a hint since this only affects performance, not behaviour (unless the + cookies on disk are changing); a CookieJar object may ignore it (in fact, + only MSIECookieJar lazily loads cookies at the moment) + + """ + + def __init__(self, filename=None, delayload=False, policy=None): + """ + See FileCookieJar.__doc__ for argument documentation. + + Cookies are NOT loaded from the named file until either the load or + revert method is called. + + """ + CookieJar.__init__(self, policy) + if filename is not None and not isstringlike(filename): + raise ValueError("filename must be string-like") + self.filename = filename + self.delayload = bool(delayload) + + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + """Save cookies to a file. + + filename: name of file in which to save cookies + ignore_discard: save even cookies set to be discarded + ignore_expires: save even cookies that have expired + + The file is overwritten if it already exists, thus wiping all its + cookies. Saved cookies can be restored later using the load or revert + methods. If filename is not specified, self.filename is used; if + self.filename is None, ValueError is raised. + + """ + raise NotImplementedError() + + def load(self, filename=None, ignore_discard=False, ignore_expires=False): + """Load cookies from a file. + + Old cookies are kept unless overwritten by newly loaded ones. + + Arguments are as for .save(). + + If filename is not specified, self.filename is used; if self.filename + is None, ValueError is raised. The named file must be in the format + understood by the class, or LoadError will be raised. This format will + be identical to that written by the save method, unless the load format + is not sufficiently well understood (as is the case for MSIECookieJar). + + """ + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + f = open(filename) + try: + self._really_load(f, filename, ignore_discard, ignore_expires) + finally: + f.close() + + def revert(self, filename=None, + ignore_discard=False, ignore_expires=False): + """Clear all cookies and reload cookies from a saved file. + + Raises LoadError (or IOError) if reversion is not successful; the + object's state will not be altered if this happens. + + """ + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + old_state = copy.deepcopy(self._cookies) + self._cookies = {} + try: + self.load(filename, ignore_discard, ignore_expires) + except (LoadError, IOError): + self._cookies = old_state + raise diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_debug.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_debug.py new file mode 100644 index 0000000..596b114 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_debug.py @@ -0,0 +1,28 @@ +import logging + +from urllib2 import BaseHandler +from _response import response_seek_wrapper + + +class HTTPResponseDebugProcessor(BaseHandler): + handler_order = 900 # before redirections, after everything else + + def http_response(self, request, response): + if not hasattr(response, "seek"): + response = response_seek_wrapper(response) + info = logging.getLogger("mechanize.http_responses").info + try: + info(response.read()) + finally: + response.seek(0) + info("*****************************************************") + return response + + https_response = http_response + +class HTTPRedirectDebugProcessor(BaseHandler): + def http_request(self, request): + if hasattr(request, "redirect_dict"): + info = logging.getLogger("mechanize.http_redirects").info + info("redirecting to %s", request.get_full_url()) + return request diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_file.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_file.py new file mode 100644 index 0000000..db662a8 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_file.py @@ -0,0 +1,60 @@ +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO +import mimetools +import os +import socket +import urllib +from urllib2 import BaseHandler, URLError + + +class FileHandler(BaseHandler): + # Use local file or FTP depending on form of URL + def file_open(self, req): + url = req.get_selector() + if url[:2] == '//' and url[2:3] != '/': + req.type = 'ftp' + return self.parent.open(req) + else: + return self.open_local_file(req) + + # names for the localhost + names = None + def get_names(self): + if FileHandler.names is None: + try: + FileHandler.names = (socket.gethostbyname('localhost'), + socket.gethostbyname(socket.gethostname())) + except socket.gaierror: + FileHandler.names = (socket.gethostbyname('localhost'),) + return FileHandler.names + + # not entirely sure what the rules are here + def open_local_file(self, req): + try: + import email.utils as emailutils + except ImportError: + import email.Utils as emailutils + import mimetypes + host = req.get_host() + file = req.get_selector() + localfile = urllib.url2pathname(file) + try: + stats = os.stat(localfile) + size = stats.st_size + modified = emailutils.formatdate(stats.st_mtime, usegmt=True) + mtype = mimetypes.guess_type(file)[0] + headers = mimetools.Message(StringIO( + 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % + (mtype or 'text/plain', size, modified))) + if host: + host, port = urllib.splitport(host) + if not host or \ + (not port and socket.gethostbyname(host) in self.get_names()): + return urllib.addinfourl(open(localfile, 'rb'), + headers, 'file:'+file) + except OSError, msg: + # urllib2 users shouldn't expect OSErrors coming from urlopen() + raise URLError(msg) + raise URLError('file not on local host') diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_firefox3cookiejar.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_firefox3cookiejar.py new file mode 100644 index 0000000..34fe979 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_firefox3cookiejar.py @@ -0,0 +1,249 @@ +"""Firefox 3 "cookies.sqlite" cookie persistence. + +Copyright 2008 John J Lee <jjl@pobox.com> + +This code is free software; you can redistribute it and/or modify it +under the terms of the BSD or ZPL 2.1 licenses (see the file +COPYING.txt included with the distribution). + +""" + +import logging +import time +import sqlite3 + +from _clientcookie import CookieJar, Cookie, MappingIterator +from _util import isstringlike, experimental +debug = logging.getLogger("mechanize.cookies").debug + + +class Firefox3CookieJar(CookieJar): + + """Firefox 3 cookie jar. + + The cookies are stored in Firefox 3's "cookies.sqlite" format. + + Constructor arguments: + + filename: filename of cookies.sqlite (typically found at the top level + of a firefox profile directory) + autoconnect: as a convenience, connect to the SQLite cookies database at + Firefox3CookieJar construction time (default True) + policy: an object satisfying the mechanize.CookiePolicy interface + + Note that this is NOT a FileCookieJar, and there are no .load(), + .save() or .restore() methods. The database is in sync with the + cookiejar object's state after each public method call. + + Following Firefox's own behaviour, session cookies are never saved to + the database. + + The file is created, and an sqlite database written to it, if it does + not already exist. The moz_cookies database table is created if it does + not already exist. + """ + + # XXX + # handle DatabaseError exceptions + # add a FileCookieJar (explicit .save() / .revert() / .load() methods) + + def __init__(self, filename, autoconnect=True, policy=None): + experimental("Firefox3CookieJar is experimental code") + CookieJar.__init__(self, policy) + if filename is not None and not isstringlike(filename): + raise ValueError("filename must be string-like") + self.filename = filename + self._conn = None + if autoconnect: + self.connect() + + def connect(self): + self._conn = sqlite3.connect(self.filename) + self._conn.isolation_level = "DEFERRED" + self._create_table_if_necessary() + + def close(self): + self._conn.close() + + def _transaction(self, func): + try: + cur = self._conn.cursor() + try: + result = func(cur) + finally: + cur.close() + except: + self._conn.rollback() + raise + else: + self._conn.commit() + return result + + def _execute(self, query, params=()): + return self._transaction(lambda cur: cur.execute(query, params)) + + def _query(self, query, params=()): + # XXX should we bother with a transaction? + cur = self._conn.cursor() + try: + cur.execute(query, params) + for row in cur.fetchall(): + yield row + finally: + cur.close() + + def _create_table_if_necessary(self): + self._execute("""\ +CREATE TABLE IF NOT EXISTS moz_cookies (id INTEGER PRIMARY KEY, name TEXT, + value TEXT, host TEXT, path TEXT,expiry INTEGER, + lastAccessed INTEGER, isSecure INTEGER, isHttpOnly INTEGER)""") + + def _cookie_from_row(self, row): + (pk, name, value, domain, path, expires, + last_accessed, secure, http_only) = row + + version = 0 + domain = domain.encode("ascii", "ignore") + path = path.encode("ascii", "ignore") + name = name.encode("ascii", "ignore") + value = value.encode("ascii", "ignore") + secure = bool(secure) + + # last_accessed isn't a cookie attribute, so isn't added to rest + rest = {} + if http_only: + rest["HttpOnly"] = None + + if name == "": + name = value + value = None + + initial_dot = domain.startswith(".") + domain_specified = initial_dot + + discard = False + if expires == "": + expires = None + discard = True + + return Cookie(version, name, value, + None, False, + domain, domain_specified, initial_dot, + path, False, + secure, + expires, + discard, + None, + None, + rest) + + def clear(self, domain=None, path=None, name=None): + CookieJar.clear(self, domain, path, name) + where_parts = [] + sql_params = [] + if domain is not None: + where_parts.append("host = ?") + sql_params.append(domain) + if path is not None: + where_parts.append("path = ?") + sql_params.append(path) + if name is not None: + where_parts.append("name = ?") + sql_params.append(name) + where = " AND ".join(where_parts) + if where: + where = " WHERE " + where + def clear(cur): + cur.execute("DELETE FROM moz_cookies%s" % where, + tuple(sql_params)) + self._transaction(clear) + + def _row_from_cookie(self, cookie, cur): + expires = cookie.expires + if cookie.discard: + expires = "" + + domain = unicode(cookie.domain) + path = unicode(cookie.path) + name = unicode(cookie.name) + value = unicode(cookie.value) + secure = bool(int(cookie.secure)) + + if value is None: + value = name + name = "" + + last_accessed = int(time.time()) + http_only = cookie.has_nonstandard_attr("HttpOnly") + + query = cur.execute("""SELECT MAX(id) + 1 from moz_cookies""") + pk = query.fetchone()[0] + if pk is None: + pk = 1 + + return (pk, name, value, domain, path, expires, + last_accessed, secure, http_only) + + def set_cookie(self, cookie): + if cookie.discard: + CookieJar.set_cookie(self, cookie) + return + + def set_cookie(cur): + # XXX + # is this RFC 2965-correct? + # could this do an UPDATE instead? + row = self._row_from_cookie(cookie, cur) + name, unused, domain, path = row[1:5] + cur.execute("""\ +DELETE FROM moz_cookies WHERE host = ? AND path = ? AND name = ?""", + (domain, path, name)) + cur.execute("""\ +INSERT INTO moz_cookies VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) +""", row) + self._transaction(set_cookie) + + def __iter__(self): + # session (non-persistent) cookies + for cookie in MappingIterator(self._cookies): + yield cookie + # persistent cookies + for row in self._query("""\ +SELECT * FROM moz_cookies ORDER BY name, path, host"""): + yield self._cookie_from_row(row) + + def _cookies_for_request(self, request): + session_cookies = CookieJar._cookies_for_request(self, request) + def get_cookies(cur): + query = cur.execute("SELECT host from moz_cookies") + domains = [row[0] for row in query.fetchmany()] + cookies = [] + for domain in domains: + cookies += self._persistent_cookies_for_domain(domain, + request, cur) + return cookies + persistent_coookies = self._transaction(get_cookies) + return session_cookies + persistent_coookies + + def _persistent_cookies_for_domain(self, domain, request, cur): + cookies = [] + if not self._policy.domain_return_ok(domain, request): + return [] + debug("Checking %s for cookies to return", domain) + query = cur.execute("""\ +SELECT * from moz_cookies WHERE host = ? ORDER BY path""", + (domain,)) + cookies = [self._cookie_from_row(row) for row in query.fetchmany()] + last_path = None + r = [] + for cookie in cookies: + if (cookie.path != last_path and + not self._policy.path_return_ok(cookie.path, request)): + last_path = cookie.path + continue + if not self._policy.return_ok(cookie, request): + debug(" not returning cookie") + continue + debug(" it's a match") + r.append(cookie) + return r diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_gzip.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_gzip.py new file mode 100644 index 0000000..26c2743 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_gzip.py @@ -0,0 +1,103 @@ +import urllib2 +from cStringIO import StringIO +import _response + +# GzipConsumer was taken from Fredrik Lundh's effbot.org-0.1-20041009 library +class GzipConsumer: + + def __init__(self, consumer): + self.__consumer = consumer + self.__decoder = None + self.__data = "" + + def __getattr__(self, key): + return getattr(self.__consumer, key) + + def feed(self, data): + if self.__decoder is None: + # check if we have a full gzip header + data = self.__data + data + try: + i = 10 + flag = ord(data[3]) + if flag & 4: # extra + x = ord(data[i]) + 256*ord(data[i+1]) + i = i + 2 + x + if flag & 8: # filename + while ord(data[i]): + i = i + 1 + i = i + 1 + if flag & 16: # comment + while ord(data[i]): + i = i + 1 + i = i + 1 + if flag & 2: # crc + i = i + 2 + if len(data) < i: + raise IndexError("not enough data") + if data[:3] != "\x1f\x8b\x08": + raise IOError("invalid gzip data") + data = data[i:] + except IndexError: + self.__data = data + return # need more data + import zlib + self.__data = "" + self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS) + data = self.__decoder.decompress(data) + if data: + self.__consumer.feed(data) + + def close(self): + if self.__decoder: + data = self.__decoder.flush() + if data: + self.__consumer.feed(data) + self.__consumer.close() + + +# -------------------------------------------------------------------- + +# the rest of this module is John Lee's stupid code, not +# Fredrik's nice code :-) + +class stupid_gzip_consumer: + def __init__(self): self.data = [] + def feed(self, data): self.data.append(data) + +class stupid_gzip_wrapper(_response.closeable_response): + def __init__(self, response): + self._response = response + + c = stupid_gzip_consumer() + gzc = GzipConsumer(c) + gzc.feed(response.read()) + self.__data = StringIO("".join(c.data)) + + def read(self, size=-1): + return self.__data.read(size) + def readline(self, size=-1): + return self.__data.readline(size) + def readlines(self, sizehint=-1): + return self.__data.readlines(sizehint) + + def __getattr__(self, name): + # delegate unknown methods/attributes + return getattr(self._response, name) + +class HTTPGzipProcessor(urllib2.BaseHandler): + handler_order = 200 # response processing before HTTPEquivProcessor + + def http_request(self, request): + request.add_header("Accept-Encoding", "gzip") + return request + + def http_response(self, request, response): + # post-process response + enc_hdrs = response.info().getheaders("Content-encoding") + for enc_hdr in enc_hdrs: + if ("gzip" in enc_hdr) or ("compress" in enc_hdr): + return stupid_gzip_wrapper(response) + return response + + https_response = http_response diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_headersutil.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_headersutil.py new file mode 100644 index 0000000..49ba5de --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_headersutil.py @@ -0,0 +1,232 @@ +"""Utility functions for HTTP header value parsing and construction. + +Copyright 1997-1998, Gisle Aas +Copyright 2002-2006, John J. Lee + +This code is free software; you can redistribute it and/or modify it +under the terms of the BSD or ZPL 2.1 licenses (see the file +COPYING.txt included with the distribution). + +""" + +import os, re +from types import StringType +from types import UnicodeType +STRING_TYPES = StringType, UnicodeType + +from _util import http2time +import _rfc3986 + +def is_html(ct_headers, url, allow_xhtml=False): + """ + ct_headers: Sequence of Content-Type headers + url: Response URL + + """ + if not ct_headers: + # guess + ext = os.path.splitext(_rfc3986.urlsplit(url)[2])[1] + html_exts = [".htm", ".html"] + if allow_xhtml: + html_exts += [".xhtml"] + return ext in html_exts + # use first header + ct = split_header_words(ct_headers)[0][0][0] + html_types = ["text/html"] + if allow_xhtml: + html_types += [ + "text/xhtml", "text/xml", + "application/xml", "application/xhtml+xml", + ] + return ct in html_types + +def unmatched(match): + """Return unmatched part of re.Match object.""" + start, end = match.span(0) + return match.string[:start]+match.string[end:] + +token_re = re.compile(r"^\s*([^=\s;,]+)") +quoted_value_re = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") +value_re = re.compile(r"^\s*=\s*([^\s;,]*)") +escape_re = re.compile(r"\\(.)") +def split_header_words(header_values): + r"""Parse header values into a list of lists containing key,value pairs. + + The function knows how to deal with ",", ";" and "=" as well as quoted + values after "=". A list of space separated tokens are parsed as if they + were separated by ";". + + If the header_values passed as argument contains multiple values, then they + are treated as if they were a single value separated by comma ",". + + This means that this function is useful for parsing header fields that + follow this syntax (BNF as from the HTTP/1.1 specification, but we relax + the requirement for tokens). + + headers = #header + header = (token | parameter) *( [";"] (token | parameter)) + + token = 1*<any CHAR except CTLs or separators> + separators = "(" | ")" | "<" | ">" | "@" + | "," | ";" | ":" | "\" | <"> + | "/" | "[" | "]" | "?" | "=" + | "{" | "}" | SP | HT + + quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) + qdtext = <any TEXT except <">> + quoted-pair = "\" CHAR + + parameter = attribute "=" value + attribute = token + value = token | quoted-string + + Each header is represented by a list of key/value pairs. The value for a + simple token (not part of a parameter) is None. Syntactically incorrect + headers will not necessarily be parsed as you would want. + + This is easier to describe with some examples: + + >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) + [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] + >>> split_header_words(['text/html; charset="iso-8859-1"']) + [[('text/html', None), ('charset', 'iso-8859-1')]] + >>> split_header_words([r'Basic realm="\"foo\bar\""']) + [[('Basic', None), ('realm', '"foobar"')]] + + """ + assert type(header_values) not in STRING_TYPES + result = [] + for text in header_values: + orig_text = text + pairs = [] + while text: + m = token_re.search(text) + if m: + text = unmatched(m) + name = m.group(1) + m = quoted_value_re.search(text) + if m: # quoted value + text = unmatched(m) + value = m.group(1) + value = escape_re.sub(r"\1", value) + else: + m = value_re.search(text) + if m: # unquoted value + text = unmatched(m) + value = m.group(1) + value = value.rstrip() + else: + # no value, a lone token + value = None + pairs.append((name, value)) + elif text.lstrip().startswith(","): + # concatenated headers, as per RFC 2616 section 4.2 + text = text.lstrip()[1:] + if pairs: result.append(pairs) + pairs = [] + else: + # skip junk + non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text) + assert nr_junk_chars > 0, ( + "split_header_words bug: '%s', '%s', %s" % + (orig_text, text, pairs)) + text = non_junk + if pairs: result.append(pairs) + return result + +join_escape_re = re.compile(r"([\"\\])") +def join_header_words(lists): + """Do the inverse of the conversion done by split_header_words. + + Takes a list of lists of (key, value) pairs and produces a single header + value. Attribute values are quoted if needed. + + >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) + 'text/plain; charset="iso-8859/1"' + >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) + 'text/plain, charset="iso-8859/1"' + + """ + headers = [] + for pairs in lists: + attr = [] + for k, v in pairs: + if v is not None: + if not re.search(r"^\w+$", v): + v = join_escape_re.sub(r"\\\1", v) # escape " and \ + v = '"%s"' % v + if k is None: # Netscape cookies may have no name + k = v + else: + k = "%s=%s" % (k, v) + attr.append(k) + if attr: headers.append("; ".join(attr)) + return ", ".join(headers) + +def strip_quotes(text): + if text.startswith('"'): + text = text[1:] + if text.endswith('"'): + text = text[:-1] + return text + +def parse_ns_headers(ns_headers): + """Ad-hoc parser for Netscape protocol cookie-attributes. + + The old Netscape cookie format for Set-Cookie can for instance contain + an unquoted "," in the expires field, so we have to use this ad-hoc + parser instead of split_header_words. + + XXX This may not make the best possible effort to parse all the crap + that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient + parser is probably better, so could do worse than following that if + this ever gives any trouble. + + Currently, this is also used for parsing RFC 2109 cookies. + + """ + known_attrs = ("expires", "domain", "path", "secure", + # RFC 2109 attrs (may turn up in Netscape cookies, too) + "version", "port", "max-age") + + result = [] + for ns_header in ns_headers: + pairs = [] + version_set = False + params = re.split(r";\s*", ns_header) + for ii in range(len(params)): + param = params[ii] + param = param.rstrip() + if param == "": continue + if "=" not in param: + k, v = param, None + else: + k, v = re.split(r"\s*=\s*", param, 1) + k = k.lstrip() + if ii != 0: + lc = k.lower() + if lc in known_attrs: + k = lc + if k == "version": + # This is an RFC 2109 cookie. + v = strip_quotes(v) + version_set = True + if k == "expires": + # convert expires date to seconds since epoch + v = http2time(strip_quotes(v)) # None if invalid + pairs.append((k, v)) + + if pairs: + if not version_set: + pairs.append(("version", "0")) + result.append(pairs) + + return result + + +def _test(): + import doctest, _headersutil + return doctest.testmod(_headersutil) + +if __name__ == "__main__": + _test() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_html.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_html.py new file mode 100644 index 0000000..5da0815 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_html.py @@ -0,0 +1,631 @@ +"""HTML handling. + +Copyright 2003-2006 John J. Lee <jjl@pobox.com> + +This code is free software; you can redistribute it and/or modify it under +the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt +included with the distribution). + +""" + +import re, copy, htmlentitydefs +import sgmllib, ClientForm + +import _request +from _headersutil import split_header_words, is_html as _is_html +import _rfc3986 + +DEFAULT_ENCODING = "latin-1" + +COMPRESS_RE = re.compile(r"\s+") + + +# the base classe is purely for backwards compatibility +class ParseError(ClientForm.ParseError): pass + + +class CachingGeneratorFunction(object): + """Caching wrapper around a no-arguments iterable.""" + + def __init__(self, iterable): + self._cache = [] + # wrap iterable to make it non-restartable (otherwise, repeated + # __call__ would give incorrect results) + self._iterator = iter(iterable) + + def __call__(self): + cache = self._cache + for item in cache: + yield item + for item in self._iterator: + cache.append(item) + yield item + + +class EncodingFinder: + def __init__(self, default_encoding): + self._default_encoding = default_encoding + def encoding(self, response): + # HTTPEquivProcessor may be in use, so both HTTP and HTTP-EQUIV + # headers may be in the response. HTTP-EQUIV headers come last, + # so try in order from first to last. + for ct in response.info().getheaders("content-type"): + for k, v in split_header_words([ct])[0]: + if k == "charset": + return v + return self._default_encoding + +class ResponseTypeFinder: + def __init__(self, allow_xhtml): + self._allow_xhtml = allow_xhtml + def is_html(self, response, encoding): + ct_hdrs = response.info().getheaders("content-type") + url = response.geturl() + # XXX encoding + return _is_html(ct_hdrs, url, self._allow_xhtml) + + +# idea for this argument-processing trick is from Peter Otten +class Args: + def __init__(self, args_map): + self.dictionary = dict(args_map) + def __getattr__(self, key): + try: + return self.dictionary[key] + except KeyError: + return getattr(self.__class__, key) + +def form_parser_args( + select_default=False, + form_parser_class=None, + request_class=None, + backwards_compat=False, + ): + return Args(locals()) + + +class Link: + def __init__(self, base_url, url, text, tag, attrs): + assert None not in [url, tag, attrs] + self.base_url = base_url + self.absolute_url = _rfc3986.urljoin(base_url, url) + self.url, self.text, self.tag, self.attrs = url, text, tag, attrs + def __cmp__(self, other): + try: + for name in "url", "text", "tag", "attrs": + if getattr(self, name) != getattr(other, name): + return -1 + except AttributeError: + return -1 + return 0 + def __repr__(self): + return "Link(base_url=%r, url=%r, text=%r, tag=%r, attrs=%r)" % ( + self.base_url, self.url, self.text, self.tag, self.attrs) + + +class LinksFactory: + + def __init__(self, + link_parser_class=None, + link_class=Link, + urltags=None, + ): + import _pullparser + if link_parser_class is None: + link_parser_class = _pullparser.TolerantPullParser + self.link_parser_class = link_parser_class + self.link_class = link_class + if urltags is None: + urltags = { + "a": "href", + "area": "href", + "frame": "src", + "iframe": "src", + } + self.urltags = urltags + self._response = None + self._encoding = None + + def set_response(self, response, base_url, encoding): + self._response = response + self._encoding = encoding + self._base_url = base_url + + def links(self): + """Return an iterator that provides links of the document.""" + response = self._response + encoding = self._encoding + base_url = self._base_url + p = self.link_parser_class(response, encoding=encoding) + + try: + for token in p.tags(*(self.urltags.keys()+["base"])): + if token.type == "endtag": + continue + if token.data == "base": + base_href = dict(token.attrs).get("href") + if base_href is not None: + base_url = base_href + continue + attrs = dict(token.attrs) + tag = token.data + name = attrs.get("name") + text = None + # XXX use attr_encoding for ref'd doc if that doc does not + # provide one by other means + #attr_encoding = attrs.get("charset") + url = attrs.get(self.urltags[tag]) # XXX is "" a valid URL? + if not url: + # Probably an <A NAME="blah"> link or <AREA NOHREF...>. + # For our purposes a link is something with a URL, so + # ignore this. + continue + + url = _rfc3986.clean_url(url, encoding) + if tag == "a": + if token.type != "startendtag": + # hmm, this'd break if end tag is missing + text = p.get_compressed_text(("endtag", tag)) + # but this doesn't work for eg. + # <a href="blah"><b>Andy</b></a> + #text = p.get_compressed_text() + + yield Link(base_url, url, text, tag, token.attrs) + except sgmllib.SGMLParseError, exc: + raise ParseError(exc) + +class FormsFactory: + + """Makes a sequence of objects satisfying ClientForm.HTMLForm interface. + + After calling .forms(), the .global_form attribute is a form object + containing all controls not a descendant of any FORM element. + + For constructor argument docs, see ClientForm.ParseResponse + argument docs. + + """ + + def __init__(self, + select_default=False, + form_parser_class=None, + request_class=None, + backwards_compat=False, + ): + import ClientForm + self.select_default = select_default + if form_parser_class is None: + form_parser_class = ClientForm.FormParser + self.form_parser_class = form_parser_class + if request_class is None: + request_class = _request.Request + self.request_class = request_class + self.backwards_compat = backwards_compat + self._response = None + self.encoding = None + self.global_form = None + + def set_response(self, response, encoding): + self._response = response + self.encoding = encoding + self.global_form = None + + def forms(self): + import ClientForm + encoding = self.encoding + try: + forms = ClientForm.ParseResponseEx( + self._response, + select_default=self.select_default, + form_parser_class=self.form_parser_class, + request_class=self.request_class, + encoding=encoding, + _urljoin=_rfc3986.urljoin, + _urlparse=_rfc3986.urlsplit, + _urlunparse=_rfc3986.urlunsplit, + ) + except ClientForm.ParseError, exc: + raise ParseError(exc) + self.global_form = forms[0] + return forms[1:] + +class TitleFactory: + def __init__(self): + self._response = self._encoding = None + + def set_response(self, response, encoding): + self._response = response + self._encoding = encoding + + def _get_title_text(self, parser): + import _pullparser + text = [] + tok = None + while 1: + try: + tok = parser.get_token() + except _pullparser.NoMoreTokensError: + break + if tok.type == "data": + text.append(str(tok)) + elif tok.type == "entityref": + t = unescape("&%s;" % tok.data, + parser._entitydefs, parser.encoding) + text.append(t) + elif tok.type == "charref": + t = unescape_charref(tok.data, parser.encoding) + text.append(t) + elif tok.type in ["starttag", "endtag", "startendtag"]: + tag_name = tok.data + if tok.type == "endtag" and tag_name == "title": + break + text.append(str(tok)) + return COMPRESS_RE.sub(" ", "".join(text).strip()) + + def title(self): + import _pullparser + p = _pullparser.TolerantPullParser( + self._response, encoding=self._encoding) + try: + try: + p.get_tag("title") + except _pullparser.NoMoreTokensError: + return None + else: + return self._get_title_text(p) + except sgmllib.SGMLParseError, exc: + raise ParseError(exc) + + +def unescape(data, entities, encoding): + if data is None or "&" not in data: + return data + + def replace_entities(match): + ent = match.group() + if ent[1] == "#": + return unescape_charref(ent[2:-1], encoding) + + repl = entities.get(ent[1:-1]) + if repl is not None: + repl = unichr(repl) + if type(repl) != type(""): + try: + repl = repl.encode(encoding) + except UnicodeError: + repl = ent + else: + repl = ent + return repl + + return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data) + +def unescape_charref(data, encoding): + name, base = data, 10 + if name.startswith("x"): + name, base= name[1:], 16 + uc = unichr(int(name, base)) + if encoding is None: + return uc + else: + try: + repl = uc.encode(encoding) + except UnicodeError: + repl = "&#%s;" % data + return repl + + +# bizarre import gymnastics for bundled BeautifulSoup +import _beautifulsoup +import ClientForm +RobustFormParser, NestingRobustFormParser = ClientForm._create_bs_classes( + _beautifulsoup.BeautifulSoup, _beautifulsoup.ICantBelieveItsBeautifulSoup + ) +# monkeypatch sgmllib to fix http://www.python.org/sf/803422 :-( +sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]") + +class MechanizeBs(_beautifulsoup.BeautifulSoup): + _entitydefs = htmlentitydefs.name2codepoint + # don't want the magic Microsoft-char workaround + PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda(x):x.group(1) + ' />'), + (re.compile('<!\s+([^<>]*)>'), + lambda(x):'<!' + x.group(1) + '>') + ] + + def __init__(self, encoding, text=None, avoidParserProblems=True, + initialTextIsEverything=True): + self._encoding = encoding + _beautifulsoup.BeautifulSoup.__init__( + self, text, avoidParserProblems, initialTextIsEverything) + + def handle_charref(self, ref): + t = unescape("&#%s;"%ref, self._entitydefs, self._encoding) + self.handle_data(t) + def handle_entityref(self, ref): + t = unescape("&%s;"%ref, self._entitydefs, self._encoding) + self.handle_data(t) + def unescape_attrs(self, attrs): + escaped_attrs = [] + for key, val in attrs: + val = unescape(val, self._entitydefs, self._encoding) + escaped_attrs.append((key, val)) + return escaped_attrs + +class RobustLinksFactory: + + compress_re = COMPRESS_RE + + def __init__(self, + link_parser_class=None, + link_class=Link, + urltags=None, + ): + if link_parser_class is None: + link_parser_class = MechanizeBs + self.link_parser_class = link_parser_class + self.link_class = link_class + if urltags is None: + urltags = { + "a": "href", + "area": "href", + "frame": "src", + "iframe": "src", + } + self.urltags = urltags + self._bs = None + self._encoding = None + self._base_url = None + + def set_soup(self, soup, base_url, encoding): + self._bs = soup + self._base_url = base_url + self._encoding = encoding + + def links(self): + import _beautifulsoup + bs = self._bs + base_url = self._base_url + encoding = self._encoding + gen = bs.recursiveChildGenerator() + for ch in bs.recursiveChildGenerator(): + if (isinstance(ch, _beautifulsoup.Tag) and + ch.name in self.urltags.keys()+["base"]): + link = ch + attrs = bs.unescape_attrs(link.attrs) + attrs_dict = dict(attrs) + if link.name == "base": + base_href = attrs_dict.get("href") + if base_href is not None: + base_url = base_href + continue + url_attr = self.urltags[link.name] + url = attrs_dict.get(url_attr) + if not url: + continue + url = _rfc3986.clean_url(url, encoding) + text = link.fetchText(lambda t: True) + if not text: + # follow _pullparser's weird behaviour rigidly + if link.name == "a": + text = "" + else: + text = None + else: + text = self.compress_re.sub(" ", " ".join(text).strip()) + yield Link(base_url, url, text, link.name, attrs) + + +class RobustFormsFactory(FormsFactory): + def __init__(self, *args, **kwds): + args = form_parser_args(*args, **kwds) + if args.form_parser_class is None: + args.form_parser_class = RobustFormParser + FormsFactory.__init__(self, **args.dictionary) + + def set_response(self, response, encoding): + self._response = response + self.encoding = encoding + + +class RobustTitleFactory: + def __init__(self): + self._bs = self._encoding = None + + def set_soup(self, soup, encoding): + self._bs = soup + self._encoding = encoding + + def title(self): + import _beautifulsoup + title = self._bs.first("title") + if title == _beautifulsoup.Null: + return None + else: + inner_html = "".join([str(node) for node in title.contents]) + return COMPRESS_RE.sub(" ", inner_html.strip()) + + +class Factory: + """Factory for forms, links, etc. + + This interface may expand in future. + + Public methods: + + set_request_class(request_class) + set_response(response) + forms() + links() + + Public attributes: + + Note that accessing these attributes may raise ParseError. + + encoding: string specifying the encoding of response if it contains a text + document (this value is left unspecified for documents that do not have + an encoding, e.g. an image file) + is_html: true if response contains an HTML document (XHTML may be + regarded as HTML too) + title: page title, or None if no title or not HTML + global_form: form object containing all controls that are not descendants + of any FORM element, or None if the forms_factory does not support + supplying a global form + + """ + + LAZY_ATTRS = ["encoding", "is_html", "title", "global_form"] + + def __init__(self, forms_factory, links_factory, title_factory, + encoding_finder=EncodingFinder(DEFAULT_ENCODING), + response_type_finder=ResponseTypeFinder(allow_xhtml=False), + ): + """ + + Pass keyword arguments only. + + default_encoding: character encoding to use if encoding cannot be + determined (or guessed) from the response. You should turn on + HTTP-EQUIV handling if you want the best chance of getting this right + without resorting to this default. The default value of this + parameter (currently latin-1) may change in future. + + """ + self._forms_factory = forms_factory + self._links_factory = links_factory + self._title_factory = title_factory + self._encoding_finder = encoding_finder + self._response_type_finder = response_type_finder + + self.set_response(None) + + def set_request_class(self, request_class): + """Set urllib2.Request class. + + ClientForm.HTMLForm instances returned by .forms() will return + instances of this class when .click()ed. + + """ + self._forms_factory.request_class = request_class + + def set_response(self, response): + """Set response. + + The response must either be None or implement the same interface as + objects returned by urllib2.urlopen(). + + """ + self._response = response + self._forms_genf = self._links_genf = None + self._get_title = None + for name in self.LAZY_ATTRS: + try: + delattr(self, name) + except AttributeError: + pass + + def __getattr__(self, name): + if name not in self.LAZY_ATTRS: + return getattr(self.__class__, name) + + if name == "encoding": + self.encoding = self._encoding_finder.encoding( + copy.copy(self._response)) + return self.encoding + elif name == "is_html": + self.is_html = self._response_type_finder.is_html( + copy.copy(self._response), self.encoding) + return self.is_html + elif name == "title": + if self.is_html: + self.title = self._title_factory.title() + else: + self.title = None + return self.title + elif name == "global_form": + self.forms() + return self.global_form + + def forms(self): + """Return iterable over ClientForm.HTMLForm-like objects. + + Raises mechanize.ParseError on failure. + """ + # this implementation sets .global_form as a side-effect, for benefit + # of __getattr__ impl + if self._forms_genf is None: + try: + self._forms_genf = CachingGeneratorFunction( + self._forms_factory.forms()) + except: # XXXX define exception! + self.set_response(self._response) + raise + self.global_form = getattr( + self._forms_factory, "global_form", None) + return self._forms_genf() + + def links(self): + """Return iterable over mechanize.Link-like objects. + + Raises mechanize.ParseError on failure. + """ + if self._links_genf is None: + try: + self._links_genf = CachingGeneratorFunction( + self._links_factory.links()) + except: # XXXX define exception! + self.set_response(self._response) + raise + return self._links_genf() + +class DefaultFactory(Factory): + """Based on sgmllib.""" + def __init__(self, i_want_broken_xhtml_support=False): + Factory.__init__( + self, + forms_factory=FormsFactory(), + links_factory=LinksFactory(), + title_factory=TitleFactory(), + response_type_finder=ResponseTypeFinder( + allow_xhtml=i_want_broken_xhtml_support), + ) + + def set_response(self, response): + Factory.set_response(self, response) + if response is not None: + self._forms_factory.set_response( + copy.copy(response), self.encoding) + self._links_factory.set_response( + copy.copy(response), response.geturl(), self.encoding) + self._title_factory.set_response( + copy.copy(response), self.encoding) + +class RobustFactory(Factory): + """Based on BeautifulSoup, hopefully a bit more robust to bad HTML than is + DefaultFactory. + + """ + def __init__(self, i_want_broken_xhtml_support=False, + soup_class=None): + Factory.__init__( + self, + forms_factory=RobustFormsFactory(), + links_factory=RobustLinksFactory(), + title_factory=RobustTitleFactory(), + response_type_finder=ResponseTypeFinder( + allow_xhtml=i_want_broken_xhtml_support), + ) + if soup_class is None: + soup_class = MechanizeBs + self._soup_class = soup_class + + def set_response(self, response): + Factory.set_response(self, response) + if response is not None: + data = response.read() + soup = self._soup_class(self.encoding, data) + self._forms_factory.set_response( + copy.copy(response), self.encoding) + self._links_factory.set_soup( + soup, response.geturl(), self.encoding) + self._title_factory.set_soup(soup, self.encoding) diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_http.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_http.py new file mode 100644 index 0000000..1b80e2b --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_http.py @@ -0,0 +1,758 @@ +"""HTTP related handlers. + +Note that some other HTTP handlers live in more specific modules: _auth.py, +_gzip.py, etc. + + +Copyright 2002-2006 John J Lee <jjl@pobox.com> + +This code is free software; you can redistribute it and/or modify it +under the terms of the BSD or ZPL 2.1 licenses (see the file +COPYING.txt included with the distribution). + +""" + +import time, htmlentitydefs, logging, socket, \ + urllib2, urllib, httplib, sgmllib +from urllib2 import URLError, HTTPError, BaseHandler +from cStringIO import StringIO + +from _clientcookie import CookieJar +from _headersutil import is_html +from _html import unescape, unescape_charref +from _request import Request +from _response import closeable_response, response_seek_wrapper +import _rfc3986 +import _sockettimeout + +debug = logging.getLogger("mechanize").debug +debug_robots = logging.getLogger("mechanize.robots").debug + +# monkeypatch urllib2.HTTPError to show URL +## def urllib2_str(self): +## return 'HTTP Error %s: %s (%s)' % ( +## self.code, self.msg, self.geturl()) +## urllib2.HTTPError.__str__ = urllib2_str + + +CHUNK = 1024 # size of chunks fed to HTML HEAD parser, in bytes +DEFAULT_ENCODING = 'latin-1' + + +try: + socket._fileobject("fake socket", close=True) +except TypeError: + # python <= 2.4 + create_readline_wrapper = socket._fileobject +else: + def create_readline_wrapper(fh): + return socket._fileobject(fh, close=True) + + +# This adds "refresh" to the list of redirectables and provides a redirection +# algorithm that doesn't go into a loop in the presence of cookies +# (Python 2.4 has this new algorithm, 2.3 doesn't). +class HTTPRedirectHandler(BaseHandler): + # maximum number of redirections to any single URL + # this is needed because of the state that cookies introduce + max_repeats = 4 + # maximum total number of redirections (regardless of URL) before + # assuming we're in a loop + max_redirections = 10 + + # Implementation notes: + + # To avoid the server sending us into an infinite loop, the request + # object needs to track what URLs we have already seen. Do this by + # adding a handler-specific attribute to the Request object. The value + # of the dict is used to count the number of times the same URL has + # been visited. This is needed because visiting the same URL twice + # does not necessarily imply a loop, thanks to state introduced by + # cookies. + + # Always unhandled redirection codes: + # 300 Multiple Choices: should not handle this here. + # 304 Not Modified: no need to handle here: only of interest to caches + # that do conditional GETs + # 305 Use Proxy: probably not worth dealing with here + # 306 Unused: what was this for in the previous versions of protocol?? + + def redirect_request(self, newurl, req, fp, code, msg, headers): + """Return a Request or None in response to a redirect. + + This is called by the http_error_30x methods when a redirection + response is received. If a redirection should take place, return a + new Request to allow http_error_30x to perform the redirect; + otherwise, return None to indicate that an HTTPError should be + raised. + + """ + if code in (301, 302, 303, "refresh") or \ + (code == 307 and not req.has_data()): + # Strictly (according to RFC 2616), 301 or 302 in response to + # a POST MUST NOT cause a redirection without confirmation + # from the user (of urllib2, in this case). In practice, + # essentially all clients do redirect in this case, so we do + # the same. + # XXX really refresh redirections should be visiting; tricky to + # fix, so this will wait until post-stable release + new = Request(newurl, + headers=req.headers, + origin_req_host=req.get_origin_req_host(), + unverifiable=True, + visit=False, + ) + new._origin_req = getattr(req, "_origin_req", req) + return new + else: + raise HTTPError(req.get_full_url(), code, msg, headers, fp) + + def http_error_302(self, req, fp, code, msg, headers): + # Some servers (incorrectly) return multiple Location headers + # (so probably same goes for URI). Use first header. + if headers.has_key('location'): + newurl = headers.getheaders('location')[0] + elif headers.has_key('uri'): + newurl = headers.getheaders('uri')[0] + else: + return + newurl = _rfc3986.clean_url(newurl, "latin-1") + newurl = _rfc3986.urljoin(req.get_full_url(), newurl) + + # XXX Probably want to forget about the state of the current + # request, although that might interact poorly with other + # handlers that also use handler-specific request attributes + new = self.redirect_request(newurl, req, fp, code, msg, headers) + if new is None: + return + + # loop detection + # .redirect_dict has a key url if url was previously visited. + if hasattr(req, 'redirect_dict'): + visited = new.redirect_dict = req.redirect_dict + if (visited.get(newurl, 0) >= self.max_repeats or + len(visited) >= self.max_redirections): + raise HTTPError(req.get_full_url(), code, + self.inf_msg + msg, headers, fp) + else: + visited = new.redirect_dict = req.redirect_dict = {} + visited[newurl] = visited.get(newurl, 0) + 1 + + # Don't close the fp until we are sure that we won't use it + # with HTTPError. + fp.read() + fp.close() + + return self.parent.open(new) + + http_error_301 = http_error_303 = http_error_307 = http_error_302 + http_error_refresh = http_error_302 + + inf_msg = "The HTTP server returned a redirect error that would " \ + "lead to an infinite loop.\n" \ + "The last 30x error message was:\n" + + +# XXX would self.reset() work, instead of raising this exception? +class EndOfHeadError(Exception): pass +class AbstractHeadParser: + # only these elements are allowed in or before HEAD of document + head_elems = ("html", "head", + "title", "base", + "script", "style", "meta", "link", "object") + _entitydefs = htmlentitydefs.name2codepoint + _encoding = DEFAULT_ENCODING + + def __init__(self): + self.http_equiv = [] + + def start_meta(self, attrs): + http_equiv = content = None + for key, value in attrs: + if key == "http-equiv": + http_equiv = self.unescape_attr_if_required(value) + elif key == "content": + content = self.unescape_attr_if_required(value) + if http_equiv is not None and content is not None: + self.http_equiv.append((http_equiv, content)) + + def end_head(self): + raise EndOfHeadError() + + def handle_entityref(self, name): + #debug("%s", name) + self.handle_data(unescape( + '&%s;' % name, self._entitydefs, self._encoding)) + + def handle_charref(self, name): + #debug("%s", name) + self.handle_data(unescape_charref(name, self._encoding)) + + def unescape_attr(self, name): + #debug("%s", name) + return unescape(name, self._entitydefs, self._encoding) + + def unescape_attrs(self, attrs): + #debug("%s", attrs) + escaped_attrs = {} + for key, val in attrs.items(): + escaped_attrs[key] = self.unescape_attr(val) + return escaped_attrs + + def unknown_entityref(self, ref): + self.handle_data("&%s;" % ref) + + def unknown_charref(self, ref): + self.handle_data("&#%s;" % ref) + + +try: + import HTMLParser +except ImportError: + pass +else: + class XHTMLCompatibleHeadParser(AbstractHeadParser, + HTMLParser.HTMLParser): + def __init__(self): + HTMLParser.HTMLParser.__init__(self) + AbstractHeadParser.__init__(self) + + def handle_starttag(self, tag, attrs): + if tag not in self.head_elems: + raise EndOfHeadError() + try: + method = getattr(self, 'start_' + tag) + except AttributeError: + try: + method = getattr(self, 'do_' + tag) + except AttributeError: + pass # unknown tag + else: + method(attrs) + else: + method(attrs) + + def handle_endtag(self, tag): + if tag not in self.head_elems: + raise EndOfHeadError() + try: + method = getattr(self, 'end_' + tag) + except AttributeError: + pass # unknown tag + else: + method() + + def unescape(self, name): + # Use the entitydefs passed into constructor, not + # HTMLParser.HTMLParser's entitydefs. + return self.unescape_attr(name) + + def unescape_attr_if_required(self, name): + return name # HTMLParser.HTMLParser already did it + +class HeadParser(AbstractHeadParser, sgmllib.SGMLParser): + + def _not_called(self): + assert False + + def __init__(self): + sgmllib.SGMLParser.__init__(self) + AbstractHeadParser.__init__(self) + + def handle_starttag(self, tag, method, attrs): + if tag not in self.head_elems: + raise EndOfHeadError() + if tag == "meta": + method(attrs) + + def unknown_starttag(self, tag, attrs): + self.handle_starttag(tag, self._not_called, attrs) + + def handle_endtag(self, tag, method): + if tag in self.head_elems: + method() + else: + raise EndOfHeadError() + + def unescape_attr_if_required(self, name): + return self.unescape_attr(name) + +def parse_head(fileobj, parser): + """Return a list of key, value pairs.""" + while 1: + data = fileobj.read(CHUNK) + try: + parser.feed(data) + except EndOfHeadError: + break + if len(data) != CHUNK: + # this should only happen if there is no HTML body, or if + # CHUNK is big + break + return parser.http_equiv + +class HTTPEquivProcessor(BaseHandler): + """Append META HTTP-EQUIV headers to regular HTTP headers.""" + + handler_order = 300 # before handlers that look at HTTP headers + + def __init__(self, head_parser_class=HeadParser, + i_want_broken_xhtml_support=False, + ): + self.head_parser_class = head_parser_class + self._allow_xhtml = i_want_broken_xhtml_support + + def http_response(self, request, response): + if not hasattr(response, "seek"): + response = response_seek_wrapper(response) + http_message = response.info() + url = response.geturl() + ct_hdrs = http_message.getheaders("content-type") + if is_html(ct_hdrs, url, self._allow_xhtml): + try: + try: + html_headers = parse_head(response, + self.head_parser_class()) + finally: + response.seek(0) + except (HTMLParser.HTMLParseError, + sgmllib.SGMLParseError): + pass + else: + for hdr, val in html_headers: + # add a header + http_message.dict[hdr.lower()] = val + text = hdr + ": " + val + for line in text.split("\n"): + http_message.headers.append(line + "\n") + return response + + https_response = http_response + +class HTTPCookieProcessor(BaseHandler): + """Handle HTTP cookies. + + Public attributes: + + cookiejar: CookieJar instance + + """ + def __init__(self, cookiejar=None): + if cookiejar is None: + cookiejar = CookieJar() + self.cookiejar = cookiejar + + def http_request(self, request): + self.cookiejar.add_cookie_header(request) + return request + + def http_response(self, request, response): + self.cookiejar.extract_cookies(response, request) + return response + + https_request = http_request + https_response = http_response + +try: + import robotparser +except ImportError: + pass +else: + class MechanizeRobotFileParser(robotparser.RobotFileParser): + + def __init__(self, url='', opener=None): + robotparser.RobotFileParser.__init__(self, url) + self._opener = opener + self._timeout = _sockettimeout._GLOBAL_DEFAULT_TIMEOUT + + def set_opener(self, opener=None): + import _opener + if opener is None: + opener = _opener.OpenerDirector() + self._opener = opener + + def set_timeout(self, timeout): + self._timeout = timeout + + def read(self): + """Reads the robots.txt URL and feeds it to the parser.""" + if self._opener is None: + self.set_opener() + req = Request(self.url, unverifiable=True, visit=False, + timeout=self._timeout) + try: + f = self._opener.open(req) + except HTTPError, f: + pass + except (IOError, socket.error, OSError), exc: + debug_robots("ignoring error opening %r: %s" % + (self.url, exc)) + return + lines = [] + line = f.readline() + while line: + lines.append(line.strip()) + line = f.readline() + status = f.code + if status == 401 or status == 403: + self.disallow_all = True + debug_robots("disallow all") + elif status >= 400: + self.allow_all = True + debug_robots("allow all") + elif status == 200 and lines: + debug_robots("parse lines") + self.parse(lines) + + class RobotExclusionError(urllib2.HTTPError): + def __init__(self, request, *args): + apply(urllib2.HTTPError.__init__, (self,)+args) + self.request = request + + class HTTPRobotRulesProcessor(BaseHandler): + # before redirections, after everything else + handler_order = 800 + + try: + from httplib import HTTPMessage + except: + from mimetools import Message + http_response_class = Message + else: + http_response_class = HTTPMessage + + def __init__(self, rfp_class=MechanizeRobotFileParser): + self.rfp_class = rfp_class + self.rfp = None + self._host = None + + def http_request(self, request): + scheme = request.get_type() + if scheme not in ["http", "https"]: + # robots exclusion only applies to HTTP + return request + + if request.get_selector() == "/robots.txt": + # /robots.txt is always OK to fetch + return request + + host = request.get_host() + + # robots.txt requests don't need to be allowed by robots.txt :-) + origin_req = getattr(request, "_origin_req", None) + if (origin_req is not None and + origin_req.get_selector() == "/robots.txt" and + origin_req.get_host() == host + ): + return request + + if host != self._host: + self.rfp = self.rfp_class() + try: + self.rfp.set_opener(self.parent) + except AttributeError: + debug("%r instance does not support set_opener" % + self.rfp.__class__) + self.rfp.set_url(scheme+"://"+host+"/robots.txt") + self.rfp.set_timeout(request.timeout) + self.rfp.read() + self._host = host + + ua = request.get_header("User-agent", "") + if self.rfp.can_fetch(ua, request.get_full_url()): + return request + else: + # XXX This should really have raised URLError. Too late now... + msg = "request disallowed by robots.txt" + raise RobotExclusionError( + request, + request.get_full_url(), + 403, msg, + self.http_response_class(StringIO()), StringIO(msg)) + + https_request = http_request + +class HTTPRefererProcessor(BaseHandler): + """Add Referer header to requests. + + This only makes sense if you use each RefererProcessor for a single + chain of requests only (so, for example, if you use a single + HTTPRefererProcessor to fetch a series of URLs extracted from a single + page, this will break). + + There's a proper implementation of this in mechanize.Browser. + + """ + def __init__(self): + self.referer = None + + def http_request(self, request): + if ((self.referer is not None) and + not request.has_header("Referer")): + request.add_unredirected_header("Referer", self.referer) + return request + + def http_response(self, request, response): + self.referer = response.geturl() + return response + + https_request = http_request + https_response = http_response + + +def clean_refresh_url(url): + # e.g. Firefox 1.5 does (something like) this + if ((url.startswith('"') and url.endswith('"')) or + (url.startswith("'") and url.endswith("'"))): + url = url[1:-1] + return _rfc3986.clean_url(url, "latin-1") # XXX encoding + +def parse_refresh_header(refresh): + """ + >>> parse_refresh_header("1; url=http://example.com/") + (1.0, 'http://example.com/') + >>> parse_refresh_header("1; url='http://example.com/'") + (1.0, 'http://example.com/') + >>> parse_refresh_header("1") + (1.0, None) + >>> parse_refresh_header("blah") + Traceback (most recent call last): + ValueError: invalid literal for float(): blah + + """ + + ii = refresh.find(";") + if ii != -1: + pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:] + jj = newurl_spec.find("=") + key = None + if jj != -1: + key, newurl = newurl_spec[:jj], newurl_spec[jj+1:] + newurl = clean_refresh_url(newurl) + if key is None or key.strip().lower() != "url": + raise ValueError() + else: + pause, newurl = float(refresh), None + return pause, newurl + +class HTTPRefreshProcessor(BaseHandler): + """Perform HTTP Refresh redirections. + + Note that if a non-200 HTTP code has occurred (for example, a 30x + redirect), this processor will do nothing. + + By default, only zero-time Refresh headers are redirected. Use the + max_time attribute / constructor argument to allow Refresh with longer + pauses. Use the honor_time attribute / constructor argument to control + whether the requested pause is honoured (with a time.sleep()) or + skipped in favour of immediate redirection. + + Public attributes: + + max_time: see above + honor_time: see above + + """ + handler_order = 1000 + + def __init__(self, max_time=0, honor_time=True): + self.max_time = max_time + self.honor_time = honor_time + self._sleep = time.sleep + + def http_response(self, request, response): + code, msg, hdrs = response.code, response.msg, response.info() + + if code == 200 and hdrs.has_key("refresh"): + refresh = hdrs.getheaders("refresh")[0] + try: + pause, newurl = parse_refresh_header(refresh) + except ValueError: + debug("bad Refresh header: %r" % refresh) + return response + + if newurl is None: + newurl = response.geturl() + if (self.max_time is None) or (pause <= self.max_time): + if pause > 1E-3 and self.honor_time: + self._sleep(pause) + hdrs["location"] = newurl + # hardcoded http is NOT a bug + response = self.parent.error( + "http", request, response, + "refresh", msg, hdrs) + else: + debug("Refresh header ignored: %r" % refresh) + + return response + + https_response = http_response + +class HTTPErrorProcessor(BaseHandler): + """Process HTTP error responses. + + The purpose of this handler is to to allow other response processors a + look-in by removing the call to parent.error() from + AbstractHTTPHandler. + + For non-200 error codes, this just passes the job on to the + Handler.<proto>_error_<code> methods, via the OpenerDirector.error + method. Eventually, urllib2.HTTPDefaultErrorHandler will raise an + HTTPError if no other handler handles the error. + + """ + handler_order = 1000 # after all other processors + + def http_response(self, request, response): + code, msg, hdrs = response.code, response.msg, response.info() + + if code != 200: + # hardcoded http is NOT a bug + response = self.parent.error( + "http", request, response, code, msg, hdrs) + + return response + + https_response = http_response + + +class HTTPDefaultErrorHandler(BaseHandler): + def http_error_default(self, req, fp, code, msg, hdrs): + # why these error methods took the code, msg, headers args in the first + # place rather than a response object, I don't know, but to avoid + # multiple wrapping, we're discarding them + + if isinstance(fp, urllib2.HTTPError): + response = fp + else: + response = urllib2.HTTPError( + req.get_full_url(), code, msg, hdrs, fp) + assert code == response.code + assert msg == response.msg + assert hdrs == response.hdrs + raise response + + +class AbstractHTTPHandler(BaseHandler): + + def __init__(self, debuglevel=0): + self._debuglevel = debuglevel + + def set_http_debuglevel(self, level): + self._debuglevel = level + + def do_request_(self, request): + host = request.get_host() + if not host: + raise URLError('no host given') + + if request.has_data(): # POST + data = request.get_data() + if not request.has_header('Content-type'): + request.add_unredirected_header( + 'Content-type', + 'application/x-www-form-urlencoded') + if not request.has_header('Content-length'): + request.add_unredirected_header( + 'Content-length', '%d' % len(data)) + + scheme, sel = urllib.splittype(request.get_selector()) + sel_host, sel_path = urllib.splithost(sel) + if not request.has_header('Host'): + request.add_unredirected_header('Host', sel_host or host) + for name, value in self.parent.addheaders: + name = name.capitalize() + if not request.has_header(name): + request.add_unredirected_header(name, value) + + return request + + def do_open(self, http_class, req): + """Return an addinfourl object for the request, using http_class. + + http_class must implement the HTTPConnection API from httplib. + The addinfourl return value is a file-like object. It also + has methods and attributes including: + - info(): return a mimetools.Message object for the headers + - geturl(): return the original request URL + - code: HTTP status code + """ + host_port = req.get_host() + if not host_port: + raise URLError('no host given') + + try: + h = http_class(host_port, timeout=req.timeout) + except TypeError: + # Python < 2.6, no per-connection timeout support + h = http_class(host_port) + h.set_debuglevel(self._debuglevel) + + headers = dict(req.headers) + headers.update(req.unredirected_hdrs) + # We want to make an HTTP/1.1 request, but the addinfourl + # class isn't prepared to deal with a persistent connection. + # It will try to read all remaining data from the socket, + # which will block while the server waits for the next request. + # So make sure the connection gets closed after the (only) + # request. + headers["Connection"] = "close" + headers = dict( + [(name.title(), val) for name, val in headers.items()]) + try: + h.request(req.get_method(), req.get_selector(), req.data, headers) + r = h.getresponse() + except socket.error, err: # XXX what error? + raise URLError(err) + + # Pick apart the HTTPResponse object to get the addinfourl + # object initialized properly. + + # Wrap the HTTPResponse object in socket's file object adapter + # for Windows. That adapter calls recv(), so delegate recv() + # to read(). This weird wrapping allows the returned object to + # have readline() and readlines() methods. + + # XXX It might be better to extract the read buffering code + # out of socket._fileobject() and into a base class. + + r.recv = r.read + fp = create_readline_wrapper(r) + + resp = closeable_response(fp, r.msg, req.get_full_url(), + r.status, r.reason) + return resp + + +class HTTPHandler(AbstractHTTPHandler): + def http_open(self, req): + return self.do_open(httplib.HTTPConnection, req) + + http_request = AbstractHTTPHandler.do_request_ + +if hasattr(httplib, 'HTTPS'): + + class HTTPSConnectionFactory: + def __init__(self, key_file, cert_file): + self._key_file = key_file + self._cert_file = cert_file + def __call__(self, hostport): + return httplib.HTTPSConnection( + hostport, + key_file=self._key_file, cert_file=self._cert_file) + + class HTTPSHandler(AbstractHTTPHandler): + def __init__(self, client_cert_manager=None): + AbstractHTTPHandler.__init__(self) + self.client_cert_manager = client_cert_manager + + def https_open(self, req): + if self.client_cert_manager is not None: + key_file, cert_file = self.client_cert_manager.find_key_cert( + req.get_full_url()) + conn_factory = HTTPSConnectionFactory(key_file, cert_file) + else: + conn_factory = httplib.HTTPSConnection + return self.do_open(conn_factory, req) + + https_request = AbstractHTTPHandler.do_request_ diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_lwpcookiejar.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_lwpcookiejar.py new file mode 100644 index 0000000..f8d49cf --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_lwpcookiejar.py @@ -0,0 +1,185 @@ +"""Load / save to libwww-perl (LWP) format files. + +Actually, the format is slightly extended from that used by LWP's +(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information +not recorded by LWP. + +It uses the version string "2.0", though really there isn't an LWP Cookies +2.0 format. This indicates that there is extra information in here +(domain_dot and port_spec) while still being compatible with libwww-perl, +I hope. + +Copyright 2002-2006 John J Lee <jjl@pobox.com> +Copyright 1997-1999 Gisle Aas (original libwww-perl code) + +This code is free software; you can redistribute it and/or modify it +under the terms of the BSD or ZPL 2.1 licenses (see the file +COPYING.txt included with the distribution). + +""" + +import time, re, logging + +from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \ + MISSING_FILENAME_TEXT, LoadError +from _headersutil import join_header_words, split_header_words +from _util import iso2time, time2isoz + +debug = logging.getLogger("mechanize").debug + + +def lwp_cookie_str(cookie): + """Return string representation of Cookie in an the LWP cookie file format. + + Actually, the format is extended a bit -- see module docstring. + + """ + h = [(cookie.name, cookie.value), + ("path", cookie.path), + ("domain", cookie.domain)] + if cookie.port is not None: h.append(("port", cookie.port)) + if cookie.path_specified: h.append(("path_spec", None)) + if cookie.port_specified: h.append(("port_spec", None)) + if cookie.domain_initial_dot: h.append(("domain_dot", None)) + if cookie.secure: h.append(("secure", None)) + if cookie.expires: h.append(("expires", + time2isoz(float(cookie.expires)))) + if cookie.discard: h.append(("discard", None)) + if cookie.comment: h.append(("comment", cookie.comment)) + if cookie.comment_url: h.append(("commenturl", cookie.comment_url)) + if cookie.rfc2109: h.append(("rfc2109", None)) + + keys = cookie.nonstandard_attr_keys() + keys.sort() + for k in keys: + h.append((k, str(cookie.get_nonstandard_attr(k)))) + + h.append(("version", str(cookie.version))) + + return join_header_words([h]) + +class LWPCookieJar(FileCookieJar): + """ + The LWPCookieJar saves a sequence of"Set-Cookie3" lines. + "Set-Cookie3" is the format used by the libwww-perl libary, not known + to be compatible with any browser, but which is easy to read and + doesn't lose information about RFC 2965 cookies. + + Additional methods + + as_lwp_str(ignore_discard=True, ignore_expired=True) + + """ + + magic_re = r"^\#LWP-Cookies-(\d+\.\d+)" + + def as_lwp_str(self, ignore_discard=True, ignore_expires=True): + """Return cookies as a string of "\n"-separated "Set-Cookie3" headers. + + ignore_discard and ignore_expires: see docstring for FileCookieJar.save + + """ + now = time.time() + r = [] + for cookie in self: + if not ignore_discard and cookie.discard: + debug(" Not saving %s: marked for discard", cookie.name) + continue + if not ignore_expires and cookie.is_expired(now): + debug(" Not saving %s: expired", cookie.name) + continue + r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie)) + return "\n".join(r+[""]) + + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + f = open(filename, "w") + try: + debug("Saving LWP cookies file") + # There really isn't an LWP Cookies 2.0 format, but this indicates + # that there is extra information in here (domain_dot and + # port_spec) while still being compatible with libwww-perl, I hope. + f.write("#LWP-Cookies-2.0\n") + f.write(self.as_lwp_str(ignore_discard, ignore_expires)) + finally: + f.close() + + def _really_load(self, f, filename, ignore_discard, ignore_expires): + magic = f.readline() + if not re.search(self.magic_re, magic): + msg = "%s does not seem to contain cookies" % filename + raise LoadError(msg) + + now = time.time() + + header = "Set-Cookie3:" + boolean_attrs = ("port_spec", "path_spec", "domain_dot", + "secure", "discard", "rfc2109") + value_attrs = ("version", + "port", "path", "domain", + "expires", + "comment", "commenturl") + + try: + while 1: + line = f.readline() + if line == "": break + if not line.startswith(header): + continue + line = line[len(header):].strip() + + for data in split_header_words([line]): + name, value = data[0] + standard = {} + rest = {} + for k in boolean_attrs: + standard[k] = False + for k, v in data[1:]: + if k is not None: + lc = k.lower() + else: + lc = None + # don't lose case distinction for unknown fields + if (lc in value_attrs) or (lc in boolean_attrs): + k = lc + if k in boolean_attrs: + if v is None: v = True + standard[k] = v + elif k in value_attrs: + standard[k] = v + else: + rest[k] = v + + h = standard.get + expires = h("expires") + discard = h("discard") + if expires is not None: + expires = iso2time(expires) + if expires is None: + discard = True + domain = h("domain") + domain_specified = domain.startswith(".") + c = Cookie(h("version"), name, value, + h("port"), h("port_spec"), + domain, domain_specified, h("domain_dot"), + h("path"), h("path_spec"), + h("secure"), + expires, + discard, + h("comment"), + h("commenturl"), + rest, + h("rfc2109"), + ) + if not ignore_discard and c.discard: + continue + if not ignore_expires and c.is_expired(now): + continue + self.set_cookie(c) + except: + reraise_unmasked_exceptions((IOError,)) + raise LoadError("invalid Set-Cookie3 format file %s" % filename) + diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mechanize.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mechanize.py new file mode 100644 index 0000000..ad729c9 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mechanize.py @@ -0,0 +1,676 @@ +"""Stateful programmatic WWW navigation, after Perl's WWW::Mechanize. + +Copyright 2003-2006 John J. Lee <jjl@pobox.com> +Copyright 2003 Andy Lester (original Perl code) + +This code is free software; you can redistribute it and/or modify it +under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt +included with the distribution). + +""" + +import urllib2, copy, re, os, urllib + + +from _html import DefaultFactory +import _response +import _request +import _rfc3986 +import _sockettimeout +from _useragent import UserAgentBase + +__version__ = (0, 1, 11, None, None) # 0.1.11 + +class BrowserStateError(Exception): pass +class LinkNotFoundError(Exception): pass +class FormNotFoundError(Exception): pass + + +def sanepathname2url(path): + urlpath = urllib.pathname2url(path) + if os.name == "nt" and urlpath.startswith("///"): + urlpath = urlpath[2:] + # XXX don't ask me about the mac... + return urlpath + + +class History: + """ + + Though this will become public, the implied interface is not yet stable. + + """ + def __init__(self): + self._history = [] # LIFO + def add(self, request, response): + self._history.append((request, response)) + def back(self, n, _response): + response = _response # XXX move Browser._response into this class? + while n > 0 or response is None: + try: + request, response = self._history.pop() + except IndexError: + raise BrowserStateError("already at start of history") + n -= 1 + return request, response + def clear(self): + del self._history[:] + def close(self): + for request, response in self._history: + if response is not None: + response.close() + del self._history[:] + + +class HTTPRefererProcessor(urllib2.BaseHandler): + def http_request(self, request): + # See RFC 2616 14.36. The only times we know the source of the + # request URI has a URI associated with it are redirect, and + # Browser.click() / Browser.submit() / Browser.follow_link(). + # Otherwise, it's the user's job to add any Referer header before + # .open()ing. + if hasattr(request, "redirect_dict"): + request = self.parent._add_referer_header( + request, origin_request=False) + return request + + https_request = http_request + + +class Browser(UserAgentBase): + """Browser-like class with support for history, forms and links. + + BrowserStateError is raised whenever the browser is in the wrong state to + complete the requested operation - eg., when .back() is called when the + browser history is empty, or when .follow_link() is called when the current + response does not contain HTML data. + + Public attributes: + + request: current request (mechanize.Request or urllib2.Request) + form: currently selected form (see .select_form()) + + """ + + handler_classes = copy.copy(UserAgentBase.handler_classes) + handler_classes["_referer"] = HTTPRefererProcessor + default_features = copy.copy(UserAgentBase.default_features) + default_features.append("_referer") + + def __init__(self, + factory=None, + history=None, + request_class=None, + ): + """ + + Only named arguments should be passed to this constructor. + + factory: object implementing the mechanize.Factory interface. + history: object implementing the mechanize.History interface. Note + this interface is still experimental and may change in future. + request_class: Request class to use. Defaults to mechanize.Request + by default for Pythons older than 2.4, urllib2.Request otherwise. + + The Factory and History objects passed in are 'owned' by the Browser, + so they should not be shared across Browsers. In particular, + factory.set_response() should not be called except by the owning + Browser itself. + + Note that the supplied factory's request_class is overridden by this + constructor, to ensure only one Request class is used. + + """ + self._handle_referer = True + + if history is None: + history = History() + self._history = history + + if request_class is None: + if not hasattr(urllib2.Request, "add_unredirected_header"): + request_class = _request.Request + else: + request_class = urllib2.Request # Python >= 2.4 + + if factory is None: + factory = DefaultFactory() + factory.set_request_class(request_class) + self._factory = factory + self.request_class = request_class + + self.request = None + self._set_response(None, False) + + # do this last to avoid __getattr__ problems + UserAgentBase.__init__(self) + + def close(self): + UserAgentBase.close(self) + if self._response is not None: + self._response.close() + if self._history is not None: + self._history.close() + self._history = None + + # make use after .close easy to spot + self.form = None + self.request = self._response = None + self.request = self.response = self.set_response = None + self.geturl = self.reload = self.back = None + self.clear_history = self.set_cookie = self.links = self.forms = None + self.viewing_html = self.encoding = self.title = None + self.select_form = self.click = self.submit = self.click_link = None + self.follow_link = self.find_link = None + + def set_handle_referer(self, handle): + """Set whether to add Referer header to each request.""" + self._set_handler("_referer", handle) + self._handle_referer = bool(handle) + + def _add_referer_header(self, request, origin_request=True): + if self.request is None: + return request + scheme = request.get_type() + original_scheme = self.request.get_type() + if scheme not in ["http", "https"]: + return request + if not origin_request and not self.request.has_header("Referer"): + return request + + if (self._handle_referer and + original_scheme in ["http", "https"] and + not (original_scheme == "https" and scheme != "https")): + # strip URL fragment (RFC 2616 14.36) + parts = _rfc3986.urlsplit(self.request.get_full_url()) + parts = parts[:-1]+(None,) + referer = _rfc3986.urlunsplit(parts) + request.add_unredirected_header("Referer", referer) + return request + + def open_novisit(self, url, data=None, + timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): + """Open a URL without visiting it. + + Browser state (including request, response, history, forms and links) + is left unchanged by calling this function. + + The interface is the same as for .open(). + + This is useful for things like fetching images. + + See also .retrieve(). + + """ + return self._mech_open(url, data, visit=False, timeout=timeout) + + def open(self, url, data=None, + timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): + return self._mech_open(url, data, timeout=timeout) + + def _mech_open(self, url, data=None, update_history=True, visit=None, + timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): + try: + url.get_full_url + except AttributeError: + # string URL -- convert to absolute URL if required + scheme, authority = _rfc3986.urlsplit(url)[:2] + if scheme is None: + # relative URL + if self._response is None: + raise BrowserStateError( + "can't fetch relative reference: " + "not viewing any document") + url = _rfc3986.urljoin(self._response.geturl(), url) + + request = self._request(url, data, visit, timeout) + visit = request.visit + if visit is None: + visit = True + + if visit: + self._visit_request(request, update_history) + + success = True + try: + response = UserAgentBase.open(self, request, data) + except urllib2.HTTPError, error: + success = False + if error.fp is None: # not a response + raise + response = error +## except (IOError, socket.error, OSError), error: +## # Yes, urllib2 really does raise all these :-(( +## # See test_urllib2.py for examples of socket.gaierror and OSError, +## # plus note that FTPHandler raises IOError. +## # XXX I don't seem to have an example of exactly socket.error being +## # raised, only socket.gaierror... +## # I don't want to start fixing these here, though, since this is a +## # subclass of OpenerDirector, and it would break old code. Even in +## # Python core, a fix would need some backwards-compat. hack to be +## # acceptable. +## raise + + if visit: + self._set_response(response, False) + response = copy.copy(self._response) + elif response is not None: + response = _response.upgrade_response(response) + + if not success: + raise response + return response + + def __str__(self): + text = [] + text.append("<%s " % self.__class__.__name__) + if self._response: + text.append("visiting %s" % self._response.geturl()) + else: + text.append("(not visiting a URL)") + if self.form: + text.append("\n selected form:\n %s\n" % str(self.form)) + text.append(">") + return "".join(text) + + def response(self): + """Return a copy of the current response. + + The returned object has the same interface as the object returned by + .open() (or urllib2.urlopen()). + + """ + return copy.copy(self._response) + + def open_local_file(self, filename): + path = sanepathname2url(os.path.abspath(filename)) + url = 'file://'+path + return self.open(url) + + def set_response(self, response): + """Replace current response with (a copy of) response. + + response may be None. + + This is intended mostly for HTML-preprocessing. + """ + self._set_response(response, True) + + def _set_response(self, response, close_current): + # sanity check, necessary but far from sufficient + if not (response is None or + (hasattr(response, "info") and hasattr(response, "geturl") and + hasattr(response, "read") + ) + ): + raise ValueError("not a response object") + + self.form = None + if response is not None: + response = _response.upgrade_response(response) + if close_current and self._response is not None: + self._response.close() + self._response = response + self._factory.set_response(response) + + def visit_response(self, response, request=None): + """Visit the response, as if it had been .open()ed. + + Unlike .set_response(), this updates history rather than replacing the + current response. + """ + if request is None: + request = _request.Request(response.geturl()) + self._visit_request(request, True) + self._set_response(response, False) + + def _visit_request(self, request, update_history): + if self._response is not None: + self._response.close() + if self.request is not None and update_history: + self._history.add(self.request, self._response) + self._response = None + # we want self.request to be assigned even if UserAgentBase.open + # fails + self.request = request + + def geturl(self): + """Get URL of current document.""" + if self._response is None: + raise BrowserStateError("not viewing any document") + return self._response.geturl() + + def reload(self): + """Reload current document, and return response object.""" + if self.request is None: + raise BrowserStateError("no URL has yet been .open()ed") + if self._response is not None: + self._response.close() + return self._mech_open(self.request, update_history=False) + + def back(self, n=1): + """Go back n steps in history, and return response object. + + n: go back this number of steps (default 1 step) + + """ + if self._response is not None: + self._response.close() + self.request, response = self._history.back(n, self._response) + self.set_response(response) + if not response.read_complete: + return self.reload() + return copy.copy(response) + + def clear_history(self): + self._history.clear() + + def set_cookie(self, cookie_string): + """Request to set a cookie. + + Note that it is NOT necessary to call this method under ordinary + circumstances: cookie handling is normally entirely automatic. The + intended use case is rather to simulate the setting of a cookie by + client script in a web page (e.g. JavaScript). In that case, use of + this method is necessary because mechanize currently does not support + JavaScript, VBScript, etc. + + The cookie is added in the same way as if it had arrived with the + current response, as a result of the current request. This means that, + for example, if it is not appropriate to set the cookie based on the + current request, no cookie will be set. + + The cookie will be returned automatically with subsequent responses + made by the Browser instance whenever that's appropriate. + + cookie_string should be a valid value of the Set-Cookie header. + + For example: + + browser.set_cookie( + "sid=abcdef; expires=Wednesday, 09-Nov-06 23:12:40 GMT") + + Currently, this method does not allow for adding RFC 2986 cookies. + This limitation will be lifted if anybody requests it. + + """ + if self._response is None: + raise BrowserStateError("not viewing any document") + if self.request.get_type() not in ["http", "https"]: + raise BrowserStateError("can't set cookie for non-HTTP/HTTPS " + "transactions") + cookiejar = self._ua_handlers["_cookies"].cookiejar + response = self.response() # copy + headers = response.info() + headers["Set-cookie"] = cookie_string + cookiejar.extract_cookies(response, self.request) + + def links(self, **kwds): + """Return iterable over links (mechanize.Link objects).""" + if not self.viewing_html(): + raise BrowserStateError("not viewing HTML") + links = self._factory.links() + if kwds: + return self._filter_links(links, **kwds) + else: + return links + + def forms(self): + """Return iterable over forms. + + The returned form objects implement the ClientForm.HTMLForm interface. + + """ + if not self.viewing_html(): + raise BrowserStateError("not viewing HTML") + return self._factory.forms() + + def global_form(self): + """Return the global form object, or None if the factory implementation + did not supply one. + + The "global" form object contains all controls that are not descendants + of any FORM element. + + The returned form object implements the ClientForm.HTMLForm interface. + + This is a separate method since the global form is not regarded as part + of the sequence of forms in the document -- mostly for + backwards-compatibility. + + """ + if not self.viewing_html(): + raise BrowserStateError("not viewing HTML") + return self._factory.global_form + + def viewing_html(self): + """Return whether the current response contains HTML data.""" + if self._response is None: + raise BrowserStateError("not viewing any document") + return self._factory.is_html + + def encoding(self): + if self._response is None: + raise BrowserStateError("not viewing any document") + return self._factory.encoding + + def title(self): + r"""Return title, or None if there is no title element in the document. + + Treatment of any tag children of attempts to follow Firefox and IE + (currently, tags are preserved). + + """ + if not self.viewing_html(): + raise BrowserStateError("not viewing HTML") + return self._factory.title + + def select_form(self, name=None, predicate=None, nr=None): + """Select an HTML form for input. + + This is a bit like giving a form the "input focus" in a browser. + + If a form is selected, the Browser object supports the HTMLForm + interface, so you can call methods like .set_value(), .set(), and + .click(). + + Another way to select a form is to assign to the .form attribute. The + form assigned should be one of the objects returned by the .forms() + method. + + At least one of the name, predicate and nr arguments must be supplied. + If no matching form is found, mechanize.FormNotFoundError is raised. + + If name is specified, then the form must have the indicated name. + + If predicate is specified, then the form must match that function. The + predicate function is passed the HTMLForm as its single argument, and + should return a boolean value indicating whether the form matched. + + nr, if supplied, is the sequence number of the form (where 0 is the + first). Note that control 0 is the first form matching all the other + arguments (if supplied); it is not necessarily the first control in the + form. The "global form" (consisting of all form controls not contained + in any FORM element) is considered not to be part of this sequence and + to have no name, so will not be matched unless both name and nr are + None. + + """ + if not self.viewing_html(): + raise BrowserStateError("not viewing HTML") + if (name is None) and (predicate is None) and (nr is None): + raise ValueError( + "at least one argument must be supplied to specify form") + + global_form = self._factory.global_form + if nr is None and name is None and \ + predicate is not None and predicate(global_form): + self.form = global_form + return + + orig_nr = nr + for form in self.forms(): + if name is not None and name != form.name: + continue + if predicate is not None and not predicate(form): + continue + if nr: + nr -= 1 + continue + self.form = form + break # success + else: + # failure + description = [] + if name is not None: description.append("name '%s'" % name) + if predicate is not None: + description.append("predicate %s" % predicate) + if orig_nr is not None: description.append("nr %d" % orig_nr) + description = ", ".join(description) + raise FormNotFoundError("no form matching "+description) + + def click(self, *args, **kwds): + """See ClientForm.HTMLForm.click for documentation.""" + if not self.viewing_html(): + raise BrowserStateError("not viewing HTML") + request = self.form.click(*args, **kwds) + return self._add_referer_header(request) + + def submit(self, *args, **kwds): + """Submit current form. + + Arguments are as for ClientForm.HTMLForm.click(). + + Return value is same as for Browser.open(). + + """ + return self.open(self.click(*args, **kwds)) + + def click_link(self, link=None, **kwds): + """Find a link and return a Request object for it. + + Arguments are as for .find_link(), except that a link may be supplied + as the first argument. + + """ + if not self.viewing_html(): + raise BrowserStateError("not viewing HTML") + if not link: + link = self.find_link(**kwds) + else: + if kwds: + raise ValueError( + "either pass a Link, or keyword arguments, not both") + request = self.request_class(link.absolute_url) + return self._add_referer_header(request) + + def follow_link(self, link=None, **kwds): + """Find a link and .open() it. + + Arguments are as for .click_link(). + + Return value is same as for Browser.open(). + + """ + return self.open(self.click_link(link, **kwds)) + + def find_link(self, **kwds): + """Find a link in current page. + + Links are returned as mechanize.Link objects. + + # Return third link that .search()-matches the regexp "python" + # (by ".search()-matches", I mean that the regular expression method + # .search() is used, rather than .match()). + find_link(text_regex=re.compile("python"), nr=2) + + # Return first http link in the current page that points to somewhere + # on python.org whose link text (after tags have been removed) is + # exactly "monty python". + find_link(text="monty python", + url_regex=re.compile("http.*python.org")) + + # Return first link with exactly three HTML attributes. + find_link(predicate=lambda link: len(link.attrs) == 3) + + Links include anchors (<a>), image maps (<area>), and frames (<frame>, + <iframe>). + + All arguments must be passed by keyword, not position. Zero or more + arguments may be supplied. In order to find a link, all arguments + supplied must match. + + If a matching link is not found, mechanize.LinkNotFoundError is raised. + + text: link text between link tags: eg. <a href="blah">this bit</a> (as + returned by pullparser.get_compressed_text(), ie. without tags but + with opening tags "textified" as per the pullparser docs) must compare + equal to this argument, if supplied + text_regex: link text between tag (as defined above) must match the + regular expression object or regular expression string passed as this + argument, if supplied + name, name_regex: as for text and text_regex, but matched against the + name HTML attribute of the link tag + url, url_regex: as for text and text_regex, but matched against the + URL of the link tag (note this matches against Link.url, which is a + relative or absolute URL according to how it was written in the HTML) + tag: element name of opening tag, eg. "a" + predicate: a function taking a Link object as its single argument, + returning a boolean result, indicating whether the links + nr: matches the nth link that matches all other criteria (default 0) + + """ + try: + return self._filter_links(self._factory.links(), **kwds).next() + except StopIteration: + raise LinkNotFoundError() + + def __getattr__(self, name): + # pass through ClientForm / DOMForm methods and attributes + form = self.__dict__.get("form") + if form is None: + raise AttributeError( + "%s instance has no attribute %s (perhaps you forgot to " + ".select_form()?)" % (self.__class__, name)) + return getattr(form, name) + + def _filter_links(self, links, + text=None, text_regex=None, + name=None, name_regex=None, + url=None, url_regex=None, + tag=None, + predicate=None, + nr=0 + ): + if not self.viewing_html(): + raise BrowserStateError("not viewing HTML") + + found_links = [] + orig_nr = nr + + for link in links: + if url is not None and url != link.url: + continue + if url_regex is not None and not re.search(url_regex, link.url): + continue + if (text is not None and + (link.text is None or text != link.text)): + continue + if (text_regex is not None and + (link.text is None or not re.search(text_regex, link.text))): + continue + if name is not None and name != dict(link.attrs).get("name"): + continue + if name_regex is not None: + link_name = dict(link.attrs).get("name") + if link_name is None or not re.search(name_regex, link_name): + continue + if tag is not None and tag != link.tag: + continue + if predicate is not None and not predicate(link): + continue + if nr: + nr -= 1 + continue + yield link + nr = orig_nr diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mozillacookiejar.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mozillacookiejar.py new file mode 100644 index 0000000..51e81bb --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_mozillacookiejar.py @@ -0,0 +1,161 @@ +"""Mozilla / Netscape cookie loading / saving. + +Copyright 2002-2006 John J Lee <jjl@pobox.com> +Copyright 1997-1999 Gisle Aas (original libwww-perl code) + +This code is free software; you can redistribute it and/or modify it +under the terms of the BSD or ZPL 2.1 licenses (see the file +COPYING.txt included with the distribution). + +""" + +import re, time, logging + +from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \ + MISSING_FILENAME_TEXT, LoadError +debug = logging.getLogger("ClientCookie").debug + + +class MozillaCookieJar(FileCookieJar): + """ + + WARNING: you may want to backup your browser's cookies file if you use + this class to save cookies. I *think* it works, but there have been + bugs in the past! + + This class differs from CookieJar only in the format it uses to save and + load cookies to and from a file. This class uses the Mozilla/Netscape + `cookies.txt' format. lynx uses this file format, too. + + Don't expect cookies saved while the browser is running to be noticed by + the browser (in fact, Mozilla on unix will overwrite your saved cookies if + you change them on disk while it's running; on Windows, you probably can't + save at all while the browser is running). + + Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to + Netscape cookies on saving. + + In particular, the cookie version and port number information is lost, + together with information about whether or not Path, Port and Discard were + specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the + domain as set in the HTTP header started with a dot (yes, I'm aware some + domains in Netscape files start with a dot and some don't -- trust me, you + really don't want to know any more about this). + + Note that though Mozilla and Netscape use the same format, they use + slightly different headers. The class saves cookies using the Netscape + header by default (Mozilla can cope with that). + + """ + magic_re = "#( Netscape)? HTTP Cookie File" + header = """\ + # Netscape HTTP Cookie File + # http://www.netscape.com/newsref/std/cookie_spec.html + # This is a generated file! Do not edit. + +""" + + def _really_load(self, f, filename, ignore_discard, ignore_expires): + now = time.time() + + magic = f.readline() + if not re.search(self.magic_re, magic): + f.close() + raise LoadError( + "%s does not look like a Netscape format cookies file" % + filename) + + try: + while 1: + line = f.readline() + if line == "": break + + # last field may be absent, so keep any trailing tab + if line.endswith("\n"): line = line[:-1] + + # skip comments and blank lines XXX what is $ for? + if (line.strip().startswith("#") or + line.strip().startswith("$") or + line.strip() == ""): + continue + + domain, domain_specified, path, secure, expires, name, value = \ + line.split("\t", 6) + secure = (secure == "TRUE") + domain_specified = (domain_specified == "TRUE") + if name == "": + name = value + value = None + + initial_dot = domain.startswith(".") + if domain_specified != initial_dot: + raise LoadError("domain and domain specified flag don't " + "match in %s: %s" % (filename, line)) + + discard = False + if expires == "": + expires = None + discard = True + + # assume path_specified is false + c = Cookie(0, name, value, + None, False, + domain, domain_specified, initial_dot, + path, False, + secure, + expires, + discard, + None, + None, + {}) + if not ignore_discard and c.discard: + continue + if not ignore_expires and c.is_expired(now): + continue + self.set_cookie(c) + + except: + reraise_unmasked_exceptions((IOError, LoadError)) + raise LoadError("invalid Netscape format file %s: %s" % + (filename, line)) + + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + f = open(filename, "w") + try: + debug("Saving Netscape cookies.txt file") + f.write(self.header) + now = time.time() + for cookie in self: + if not ignore_discard and cookie.discard: + debug(" Not saving %s: marked for discard", cookie.name) + continue + if not ignore_expires and cookie.is_expired(now): + debug(" Not saving %s: expired", cookie.name) + continue + if cookie.secure: secure = "TRUE" + else: secure = "FALSE" + if cookie.domain.startswith("."): initial_dot = "TRUE" + else: initial_dot = "FALSE" + if cookie.expires is not None: + expires = str(cookie.expires) + else: + expires = "" + if cookie.value is None: + # cookies.txt regards 'Set-Cookie: foo' as a cookie + # with no name, whereas cookielib regards it as a + # cookie with no value. + name = "" + value = cookie.name + else: + name = cookie.name + value = cookie.value + f.write( + "\t".join([cookie.domain, initial_dot, cookie.path, + secure, expires, name, value])+ + "\n") + finally: + f.close() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_msiecookiejar.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_msiecookiejar.py new file mode 100644 index 0000000..1057811 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_msiecookiejar.py @@ -0,0 +1,388 @@ +"""Microsoft Internet Explorer cookie loading on Windows. + +Copyright 2002-2003 Johnny Lee <typo_pl@hotmail.com> (MSIE Perl code) +Copyright 2002-2006 John J Lee <jjl@pobox.com> (The Python port) + +This code is free software; you can redistribute it and/or modify it +under the terms of the BSD or ZPL 2.1 licenses (see the file +COPYING.txt included with the distribution). + +""" + +# XXX names and comments are not great here + +import os, re, time, struct, logging +if os.name == "nt": + import _winreg + +from _clientcookie import FileCookieJar, CookieJar, Cookie, \ + MISSING_FILENAME_TEXT, LoadError + +debug = logging.getLogger("mechanize").debug + + +def regload(path, leaf): + key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, path, 0, + _winreg.KEY_ALL_ACCESS) + try: + value = _winreg.QueryValueEx(key, leaf)[0] + except WindowsError: + value = None + return value + +WIN32_EPOCH = 0x019db1ded53e8000L # 1970 Jan 01 00:00:00 in Win32 FILETIME + +def epoch_time_offset_from_win32_filetime(filetime): + """Convert from win32 filetime to seconds-since-epoch value. + + MSIE stores create and expire times as Win32 FILETIME, which is 64 + bits of 100 nanosecond intervals since Jan 01 1601. + + mechanize expects time in 32-bit value expressed in seconds since the + epoch (Jan 01 1970). + + """ + if filetime < WIN32_EPOCH: + raise ValueError("filetime (%d) is before epoch (%d)" % + (filetime, WIN32_EPOCH)) + + return divmod((filetime - WIN32_EPOCH), 10000000L)[0] + +def binary_to_char(c): return "%02X" % ord(c) +def binary_to_str(d): return "".join(map(binary_to_char, list(d))) + +class MSIEBase: + magic_re = re.compile(r"Client UrlCache MMF Ver \d\.\d.*") + padding = "\x0d\xf0\xad\x0b" + + msie_domain_re = re.compile(r"^([^/]+)(/.*)$") + cookie_re = re.compile("Cookie\:.+\@([\x21-\xFF]+).*?" + "(.+\@[\x21-\xFF]+\.txt)") + + # path under HKEY_CURRENT_USER from which to get location of index.dat + reg_path = r"software\microsoft\windows" \ + r"\currentversion\explorer\shell folders" + reg_key = "Cookies" + + def __init__(self): + self._delayload_domains = {} + + def _delayload_domain(self, domain): + # if necessary, lazily load cookies for this domain + delayload_info = self._delayload_domains.get(domain) + if delayload_info is not None: + cookie_file, ignore_discard, ignore_expires = delayload_info + try: + self.load_cookie_data(cookie_file, + ignore_discard, ignore_expires) + except (LoadError, IOError): + debug("error reading cookie file, skipping: %s", cookie_file) + else: + del self._delayload_domains[domain] + + def _load_cookies_from_file(self, filename): + debug("Loading MSIE cookies file: %s", filename) + cookies = [] + + cookies_fh = open(filename) + + try: + while 1: + key = cookies_fh.readline() + if key == "": break + + rl = cookies_fh.readline + def getlong(rl=rl): return long(rl().rstrip()) + def getstr(rl=rl): return rl().rstrip() + + key = key.rstrip() + value = getstr() + domain_path = getstr() + flags = getlong() # 0x2000 bit is for secure I think + lo_expire = getlong() + hi_expire = getlong() + lo_create = getlong() + hi_create = getlong() + sep = getstr() + + if "" in (key, value, domain_path, flags, hi_expire, lo_expire, + hi_create, lo_create, sep) or (sep != "*"): + break + + m = self.msie_domain_re.search(domain_path) + if m: + domain = m.group(1) + path = m.group(2) + + cookies.append({"KEY": key, "VALUE": value, + "DOMAIN": domain, "PATH": path, + "FLAGS": flags, "HIXP": hi_expire, + "LOXP": lo_expire, "HICREATE": hi_create, + "LOCREATE": lo_create}) + finally: + cookies_fh.close() + + return cookies + + def load_cookie_data(self, filename, + ignore_discard=False, ignore_expires=False): + """Load cookies from file containing actual cookie data. + + Old cookies are kept unless overwritten by newly loaded ones. + + You should not call this method if the delayload attribute is set. + + I think each of these files contain all cookies for one user, domain, + and path. + + filename: file containing cookies -- usually found in a file like + C:\WINNT\Profiles\joe\Cookies\joe@blah[1].txt + + """ + now = int(time.time()) + + cookie_data = self._load_cookies_from_file(filename) + + for cookie in cookie_data: + flags = cookie["FLAGS"] + secure = ((flags & 0x2000) != 0) + filetime = (cookie["HIXP"] << 32) + cookie["LOXP"] + expires = epoch_time_offset_from_win32_filetime(filetime) + if expires < now: + discard = True + else: + discard = False + domain = cookie["DOMAIN"] + initial_dot = domain.startswith(".") + if initial_dot: + domain_specified = True + else: + # MSIE 5 does not record whether the domain cookie-attribute + # was specified. + # Assuming it wasn't is conservative, because with strict + # domain matching this will match less frequently; with regular + # Netscape tail-matching, this will match at exactly the same + # times that domain_specified = True would. It also means we + # don't have to prepend a dot to achieve consistency with our + # own & Mozilla's domain-munging scheme. + domain_specified = False + + # assume path_specified is false + # XXX is there other stuff in here? -- eg. comment, commentURL? + c = Cookie(0, + cookie["KEY"], cookie["VALUE"], + None, False, + domain, domain_specified, initial_dot, + cookie["PATH"], False, + secure, + expires, + discard, + None, + None, + {"flags": flags}) + if not ignore_discard and c.discard: + continue + if not ignore_expires and c.is_expired(now): + continue + CookieJar.set_cookie(self, c) + + def load_from_registry(self, ignore_discard=False, ignore_expires=False, + username=None): + """ + username: only required on win9x + + """ + cookies_dir = regload(self.reg_path, self.reg_key) + filename = os.path.normpath(os.path.join(cookies_dir, "INDEX.DAT")) + self.load(filename, ignore_discard, ignore_expires, username) + + def _really_load(self, index, filename, ignore_discard, ignore_expires, + username): + now = int(time.time()) + + if username is None: + username = os.environ['USERNAME'].lower() + + cookie_dir = os.path.dirname(filename) + + data = index.read(256) + if len(data) != 256: + raise LoadError("%s file is too short" % filename) + + # Cookies' index.dat file starts with 32 bytes of signature + # followed by an offset to the first record, stored as a little- + # endian DWORD. + sig, size, data = data[:32], data[32:36], data[36:] + size = struct.unpack("<L", size)[0] + + # check that sig is valid + if not self.magic_re.match(sig) or size != 0x4000: + raise LoadError("%s ['%s' %s] does not seem to contain cookies" % + (str(filename), sig, size)) + + # skip to start of first record + index.seek(size, 0) + + sector = 128 # size of sector in bytes + + while 1: + data = "" + + # Cookies are usually in two contiguous sectors, so read in two + # sectors and adjust if not a Cookie. + to_read = 2 * sector + d = index.read(to_read) + if len(d) != to_read: + break + data = data + d + + # Each record starts with a 4-byte signature and a count + # (little-endian DWORD) of sectors for the record. + sig, size, data = data[:4], data[4:8], data[8:] + size = struct.unpack("<L", size)[0] + + to_read = (size - 2) * sector + +## from urllib import quote +## print "data", quote(data) +## print "sig", quote(sig) +## print "size in sectors", size +## print "size in bytes", size*sector +## print "size in units of 16 bytes", (size*sector) / 16 +## print "size to read in bytes", to_read +## print + + if sig != "URL ": + assert sig in ("HASH", "LEAK", \ + self.padding, "\x00\x00\x00\x00"), \ + "unrecognized MSIE index.dat record: %s" % \ + binary_to_str(sig) + if sig == "\x00\x00\x00\x00": + # assume we've got all the cookies, and stop + break + if sig == self.padding: + continue + # skip the rest of this record + assert to_read >= 0 + if size != 2: + assert to_read != 0 + index.seek(to_read, 1) + continue + + # read in rest of record if necessary + if size > 2: + more_data = index.read(to_read) + if len(more_data) != to_read: break + data = data + more_data + + cookie_re = ("Cookie\:%s\@([\x21-\xFF]+).*?" % username + + "(%s\@[\x21-\xFF]+\.txt)" % username) + m = re.search(cookie_re, data, re.I) + if m: + cookie_file = os.path.join(cookie_dir, m.group(2)) + if not self.delayload: + try: + self.load_cookie_data(cookie_file, + ignore_discard, ignore_expires) + except (LoadError, IOError): + debug("error reading cookie file, skipping: %s", + cookie_file) + else: + domain = m.group(1) + i = domain.find("/") + if i != -1: + domain = domain[:i] + + self._delayload_domains[domain] = ( + cookie_file, ignore_discard, ignore_expires) + + +class MSIECookieJar(MSIEBase, FileCookieJar): + """FileCookieJar that reads from the Windows MSIE cookies database. + + MSIECookieJar can read the cookie files of Microsoft Internet Explorer + (MSIE) for Windows version 5 on Windows NT and version 6 on Windows XP and + Windows 98. Other configurations may also work, but are untested. Saving + cookies in MSIE format is NOT supported. If you save cookies, they'll be + in the usual Set-Cookie3 format, which you can read back in using an + instance of the plain old CookieJar class. Don't save using the same + filename that you loaded cookies from, because you may succeed in + clobbering your MSIE cookies index file! + + You should be able to have LWP share Internet Explorer's cookies like + this (note you need to supply a username to load_from_registry if you're on + Windows 9x or Windows ME): + + cj = MSIECookieJar(delayload=1) + # find cookies index file in registry and load cookies from it + cj.load_from_registry() + opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj)) + response = opener.open("http://example.com/") + + Iterating over a delayloaded MSIECookieJar instance will not cause any + cookies to be read from disk. To force reading of all cookies from disk, + call read_all_cookies. Note that the following methods iterate over self: + clear_temporary_cookies, clear_expired_cookies, __len__, __repr__, __str__ + and as_string. + + Additional methods: + + load_from_registry(ignore_discard=False, ignore_expires=False, + username=None) + load_cookie_data(filename, ignore_discard=False, ignore_expires=False) + read_all_cookies() + + """ + def __init__(self, filename=None, delayload=False, policy=None): + MSIEBase.__init__(self) + FileCookieJar.__init__(self, filename, delayload, policy) + + def set_cookie(self, cookie): + if self.delayload: + self._delayload_domain(cookie.domain) + CookieJar.set_cookie(self, cookie) + + def _cookies_for_request(self, request): + """Return a list of cookies to be returned to server.""" + domains = self._cookies.copy() + domains.update(self._delayload_domains) + domains = domains.keys() + + cookies = [] + for domain in domains: + cookies.extend(self._cookies_for_domain(domain, request)) + return cookies + + def _cookies_for_domain(self, domain, request): + if not self._policy.domain_return_ok(domain, request): + return [] + debug("Checking %s for cookies to return", domain) + if self.delayload: + self._delayload_domain(domain) + return CookieJar._cookies_for_domain(self, domain, request) + + def read_all_cookies(self): + """Eagerly read in all cookies.""" + if self.delayload: + for domain in self._delayload_domains.keys(): + self._delayload_domain(domain) + + def load(self, filename, ignore_discard=False, ignore_expires=False, + username=None): + """Load cookies from an MSIE 'index.dat' cookies index file. + + filename: full path to cookie index file + username: only required on win9x + + """ + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + index = open(filename, "rb") + + try: + self._really_load(index, filename, ignore_discard, ignore_expires, + username) + finally: + index.close() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_opener.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_opener.py new file mode 100644 index 0000000..d94eacf --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_opener.py @@ -0,0 +1,436 @@ +"""Integration with Python standard library module urllib2: OpenerDirector +class. + +Copyright 2004-2006 John J Lee <jjl@pobox.com> + +This code is free software; you can redistribute it and/or modify it +under the terms of the BSD or ZPL 2.1 licenses (see the file +COPYING.txt included with the distribution). + +""" + +import os, urllib2, bisect, httplib, types, tempfile +try: + import threading as _threading +except ImportError: + import dummy_threading as _threading +try: + set +except NameError: + import sets + set = sets.Set + +import _file +import _http +from _request import Request +import _response +import _rfc3986 +import _sockettimeout +import _upgrade +from _util import isstringlike + + +class ContentTooShortError(urllib2.URLError): + def __init__(self, reason, result): + urllib2.URLError.__init__(self, reason) + self.result = result + + +def set_request_attr(req, name, value, default): + try: + getattr(req, name) + except AttributeError: + setattr(req, name, default) + if value is not default: + setattr(req, name, value) + + +class OpenerDirector(urllib2.OpenerDirector): + def __init__(self): + urllib2.OpenerDirector.__init__(self) + # really none of these are (sanely) public -- the lack of initial + # underscore on some is just due to following urllib2 + self.process_response = {} + self.process_request = {} + self._any_request = {} + self._any_response = {} + self._handler_index_valid = True + self._tempfiles = [] + + def add_handler(self, handler): + if handler in self.handlers: + return + # XXX why does self.handlers need to be sorted? + bisect.insort(self.handlers, handler) + handler.add_parent(self) + self._handler_index_valid = False + + def _maybe_reindex_handlers(self): + if self._handler_index_valid: + return + + handle_error = {} + handle_open = {} + process_request = {} + process_response = {} + any_request = set() + any_response = set() + unwanted = [] + + for handler in self.handlers: + added = False + for meth in dir(handler): + if meth in ["redirect_request", "do_open", "proxy_open"]: + # oops, coincidental match + continue + + if meth == "any_request": + any_request.add(handler) + added = True + continue + elif meth == "any_response": + any_response.add(handler) + added = True + continue + + ii = meth.find("_") + scheme = meth[:ii] + condition = meth[ii+1:] + + if condition.startswith("error"): + jj = meth[ii+1:].find("_") + ii + 1 + kind = meth[jj+1:] + try: + kind = int(kind) + except ValueError: + pass + lookup = handle_error.setdefault(scheme, {}) + elif condition == "open": + kind = scheme + lookup = handle_open + elif condition == "request": + kind = scheme + lookup = process_request + elif condition == "response": + kind = scheme + lookup = process_response + else: + continue + + lookup.setdefault(kind, set()).add(handler) + added = True + + if not added: + unwanted.append(handler) + + for handler in unwanted: + self.handlers.remove(handler) + + # sort indexed methods + # XXX could be cleaned up + for lookup in [process_request, process_response]: + for scheme, handlers in lookup.iteritems(): + lookup[scheme] = handlers + for scheme, lookup in handle_error.iteritems(): + for code, handlers in lookup.iteritems(): + handlers = list(handlers) + handlers.sort() + lookup[code] = handlers + for scheme, handlers in handle_open.iteritems(): + handlers = list(handlers) + handlers.sort() + handle_open[scheme] = handlers + + # cache the indexes + self.handle_error = handle_error + self.handle_open = handle_open + self.process_request = process_request + self.process_response = process_response + self._any_request = any_request + self._any_response = any_response + + def _request(self, url_or_req, data, visit, + timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): + if isstringlike(url_or_req): + req = Request(url_or_req, data, visit=visit, timeout=timeout) + else: + # already a urllib2.Request or mechanize.Request instance + req = url_or_req + if data is not None: + req.add_data(data) + # XXX yuck + set_request_attr(req, "visit", visit, None) + set_request_attr(req, "timeout", timeout, + _sockettimeout._GLOBAL_DEFAULT_TIMEOUT) + return req + + def open(self, fullurl, data=None, + timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): + req = self._request(fullurl, data, None, timeout) + req_scheme = req.get_type() + + self._maybe_reindex_handlers() + + # pre-process request + # XXX should we allow a Processor to change the URL scheme + # of the request? + request_processors = set(self.process_request.get(req_scheme, [])) + request_processors.update(self._any_request) + request_processors = list(request_processors) + request_processors.sort() + for processor in request_processors: + for meth_name in ["any_request", req_scheme+"_request"]: + meth = getattr(processor, meth_name, None) + if meth: + req = meth(req) + + # In Python >= 2.4, .open() supports processors already, so we must + # call ._open() instead. + urlopen = getattr(urllib2.OpenerDirector, "_open", + urllib2.OpenerDirector.open) + response = urlopen(self, req, data) + + # post-process response + response_processors = set(self.process_response.get(req_scheme, [])) + response_processors.update(self._any_response) + response_processors = list(response_processors) + response_processors.sort() + for processor in response_processors: + for meth_name in ["any_response", req_scheme+"_response"]: + meth = getattr(processor, meth_name, None) + if meth: + response = meth(req, response) + + return response + + def error(self, proto, *args): + if proto in ['http', 'https']: + # XXX http[s] protocols are special-cased + dict = self.handle_error['http'] # https is not different than http + proto = args[2] # YUCK! + meth_name = 'http_error_%s' % proto + http_err = 1 + orig_args = args + else: + dict = self.handle_error + meth_name = proto + '_error' + http_err = 0 + args = (dict, proto, meth_name) + args + result = apply(self._call_chain, args) + if result: + return result + + if http_err: + args = (dict, 'default', 'http_error_default') + orig_args + return apply(self._call_chain, args) + + BLOCK_SIZE = 1024*8 + def retrieve(self, fullurl, filename=None, reporthook=None, data=None, + timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): + """Returns (filename, headers). + + For remote objects, the default filename will refer to a temporary + file. Temporary files are removed when the OpenerDirector.close() + method is called. + + For file: URLs, at present the returned filename is None. This may + change in future. + + If the actual number of bytes read is less than indicated by the + Content-Length header, raises ContentTooShortError (a URLError + subclass). The exception's .result attribute contains the (filename, + headers) that would have been returned. + + """ + req = self._request(fullurl, data, False, timeout) + scheme = req.get_type() + fp = self.open(req) + headers = fp.info() + if filename is None and scheme == 'file': + # XXX req.get_selector() seems broken here, return None, + # pending sanity :-/ + return None, headers + #return urllib.url2pathname(req.get_selector()), headers + if filename: + tfp = open(filename, 'wb') + else: + path = _rfc3986.urlsplit(req.get_full_url())[2] + suffix = os.path.splitext(path)[1] + fd, filename = tempfile.mkstemp(suffix) + self._tempfiles.append(filename) + tfp = os.fdopen(fd, 'wb') + + result = filename, headers + bs = self.BLOCK_SIZE + size = -1 + read = 0 + blocknum = 0 + if reporthook: + if "content-length" in headers: + size = int(headers["Content-Length"]) + reporthook(blocknum, bs, size) + while 1: + block = fp.read(bs) + if block == "": + break + read += len(block) + tfp.write(block) + blocknum += 1 + if reporthook: + reporthook(blocknum, bs, size) + fp.close() + tfp.close() + del fp + del tfp + + # raise exception if actual size does not match content-length header + if size >= 0 and read < size: + raise ContentTooShortError( + "retrieval incomplete: " + "got only %i out of %i bytes" % (read, size), + result + ) + + return result + + def close(self): + urllib2.OpenerDirector.close(self) + + # make it very obvious this object is no longer supposed to be used + self.open = self.error = self.retrieve = self.add_handler = None + + if self._tempfiles: + for filename in self._tempfiles: + try: + os.unlink(filename) + except OSError: + pass + del self._tempfiles[:] + + +def wrapped_open(urlopen, process_response_object, fullurl, data=None, + timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): + success = True + try: + response = urlopen(fullurl, data, timeout) + except urllib2.HTTPError, error: + success = False + if error.fp is None: # not a response + raise + response = error + + if response is not None: + response = process_response_object(response) + + if not success: + raise response + return response + +class ResponseProcessingOpener(OpenerDirector): + + def open(self, fullurl, data=None, + timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): + def bound_open(fullurl, data=None, + timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): + return OpenerDirector.open(self, fullurl, data, timeout) + return wrapped_open( + bound_open, self.process_response_object, fullurl, data, timeout) + + def process_response_object(self, response): + return response + + +class SeekableResponseOpener(ResponseProcessingOpener): + def process_response_object(self, response): + return _response.seek_wrapped_response(response) + + +class OpenerFactory: + """This class's interface is quite likely to change.""" + + default_classes = [ + # handlers + urllib2.ProxyHandler, + urllib2.UnknownHandler, + _http.HTTPHandler, # derived from new AbstractHTTPHandler + _http.HTTPDefaultErrorHandler, + _http.HTTPRedirectHandler, # bugfixed + urllib2.FTPHandler, + _file.FileHandler, + # processors + _upgrade.HTTPRequestUpgradeProcessor, + _http.HTTPCookieProcessor, + _http.HTTPErrorProcessor, + ] + if hasattr(httplib, 'HTTPS'): + default_classes.append(_http.HTTPSHandler) + handlers = [] + replacement_handlers = [] + + def __init__(self, klass=OpenerDirector): + self.klass = klass + + def build_opener(self, *handlers): + """Create an opener object from a list of handlers and processors. + + The opener will use several default handlers and processors, including + support for HTTP and FTP. + + If any of the handlers passed as arguments are subclasses of the + default handlers, the default handlers will not be used. + + """ + opener = self.klass() + default_classes = list(self.default_classes) + skip = [] + for klass in default_classes: + for check in handlers: + if type(check) == types.ClassType: + if issubclass(check, klass): + skip.append(klass) + elif type(check) == types.InstanceType: + if isinstance(check, klass): + skip.append(klass) + for klass in skip: + default_classes.remove(klass) + + for klass in default_classes: + opener.add_handler(klass()) + for h in handlers: + if type(h) == types.ClassType: + h = h() + opener.add_handler(h) + + return opener + + +build_opener = OpenerFactory().build_opener + +_opener = None +urlopen_lock = _threading.Lock() +def urlopen(url, data=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): + global _opener + if _opener is None: + urlopen_lock.acquire() + try: + if _opener is None: + _opener = build_opener() + finally: + urlopen_lock.release() + return _opener.open(url, data, timeout) + +def urlretrieve(url, filename=None, reporthook=None, data=None, + timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): + global _opener + if _opener is None: + urlopen_lock.acquire() + try: + if _opener is None: + _opener = build_opener() + finally: + urlopen_lock.release() + return _opener.retrieve(url, filename, reporthook, data, timeout) + +def install_opener(opener): + global _opener + _opener = opener diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_pullparser.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_pullparser.py new file mode 100644 index 0000000..4d8d9d3 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_pullparser.py @@ -0,0 +1,390 @@ +"""A simple "pull API" for HTML parsing, after Perl's HTML::TokeParser. + +Examples + +This program extracts all links from a document. It will print one +line for each link, containing the URL and the textual description +between the <A>...</A> tags: + +import pullparser, sys +f = file(sys.argv[1]) +p = pullparser.PullParser(f) +for token in p.tags("a"): + if token.type == "endtag": continue + url = dict(token.attrs).get("href", "-") + text = p.get_compressed_text(endat=("endtag", "a")) + print "%s\t%s" % (url, text) + +This program extracts the <TITLE> from the document: + +import pullparser, sys +f = file(sys.argv[1]) +p = pullparser.PullParser(f) +if p.get_tag("title"): + title = p.get_compressed_text() + print "Title: %s" % title + + +Copyright 2003-2006 John J. Lee <jjl@pobox.com> +Copyright 1998-2001 Gisle Aas (original libwww-perl code) + +This code is free software; you can redistribute it and/or modify it +under the terms of the BSD or ZPL 2.1 licenses. + +""" + +import re, htmlentitydefs +import sgmllib, HTMLParser +from xml.sax import saxutils + +from _html import unescape, unescape_charref + + +class NoMoreTokensError(Exception): pass + +class Token: + """Represents an HTML tag, declaration, processing instruction etc. + + Behaves as both a tuple-like object (ie. iterable) and has attributes + .type, .data and .attrs. + + >>> t = Token("starttag", "a", [("href", "http://www.python.org/")]) + >>> t == ("starttag", "a", [("href", "http://www.python.org/")]) + True + >>> (t.type, t.data) == ("starttag", "a") + True + >>> t.attrs == [("href", "http://www.python.org/")] + True + + Public attributes + + type: one of "starttag", "endtag", "startendtag", "charref", "entityref", + "data", "comment", "decl", "pi", after the corresponding methods of + HTMLParser.HTMLParser + data: For a tag, the tag name; otherwise, the relevant data carried by the + tag, as a string + attrs: list of (name, value) pairs representing HTML attributes + (or None if token does not represent an opening tag) + + """ + def __init__(self, type, data, attrs=None): + self.type = type + self.data = data + self.attrs = attrs + def __iter__(self): + return iter((self.type, self.data, self.attrs)) + def __eq__(self, other): + type, data, attrs = other + if (self.type == type and + self.data == data and + self.attrs == attrs): + return True + else: + return False + def __ne__(self, other): return not self.__eq__(other) + def __repr__(self): + args = ", ".join(map(repr, [self.type, self.data, self.attrs])) + return self.__class__.__name__+"(%s)" % args + + def __str__(self): + """ + >>> print Token("starttag", "br") + <br> + >>> print Token("starttag", "a", + ... [("href", "http://www.python.org/"), ("alt", '"foo"')]) + <a href="http://www.python.org/" alt='"foo"'> + >>> print Token("startendtag", "br") + <br /> + >>> print Token("startendtag", "br", [("spam", "eggs")]) + <br spam="eggs" /> + >>> print Token("endtag", "p") + </p> + >>> print Token("charref", "38") + & + >>> print Token("entityref", "amp") + & + >>> print Token("data", "foo\\nbar") + foo + bar + >>> print Token("comment", "Life is a bowl\\nof cherries.") + <!--Life is a bowl + of cherries.--> + >>> print Token("decl", "decl") + <!decl> + >>> print Token("pi", "pi") + <?pi> + """ + if self.attrs is not None: + attrs = "".join([" %s=%s" % (k, saxutils.quoteattr(v)) for + k, v in self.attrs]) + else: + attrs = "" + if self.type == "starttag": + return "<%s%s>" % (self.data, attrs) + elif self.type == "startendtag": + return "<%s%s />" % (self.data, attrs) + elif self.type == "endtag": + return "</%s>" % self.data + elif self.type == "charref": + return "&#%s;" % self.data + elif self.type == "entityref": + return "&%s;" % self.data + elif self.type == "data": + return self.data + elif self.type == "comment": + return "<!--%s-->" % self.data + elif self.type == "decl": + return "<!%s>" % self.data + elif self.type == "pi": + return "<?%s>" % self.data + assert False + + +def iter_until_exception(fn, exception, *args, **kwds): + while 1: + try: + yield fn(*args, **kwds) + except exception: + raise StopIteration + + +class _AbstractParser: + chunk = 1024 + compress_re = re.compile(r"\s+") + def __init__(self, fh, textify={"img": "alt", "applet": "alt"}, + encoding="ascii", entitydefs=None): + """ + fh: file-like object (only a .read() method is required) from which to + read HTML to be parsed + textify: mapping used by .get_text() and .get_compressed_text() methods + to represent opening tags as text + encoding: encoding used to encode numeric character references by + .get_text() and .get_compressed_text() ("ascii" by default) + + entitydefs: mapping like {"amp": "&", ...} containing HTML entity + definitions (a sensible default is used). This is used to unescape + entities in .get_text() (and .get_compressed_text()) and attribute + values. If the encoding can not represent the character, the entity + reference is left unescaped. Note that entity references (both + numeric - e.g. { or ઼ - and non-numeric - e.g. &) are + unescaped in attribute values and the return value of .get_text(), but + not in data outside of tags. Instead, entity references outside of + tags are represented as tokens. This is a bit odd, it's true :-/ + + If the element name of an opening tag matches a key in the textify + mapping then that tag is converted to text. The corresponding value is + used to specify which tag attribute to obtain the text from. textify + maps from element names to either: + + - an HTML attribute name, in which case the HTML attribute value is + used as its text value along with the element name in square + brackets (eg."alt text goes here[IMG]", or, if the alt attribute + were missing, just "[IMG]") + - a callable object (eg. a function) which takes a Token and returns + the string to be used as its text value + + If textify has no key for an element name, nothing is substituted for + the opening tag. + + Public attributes: + + encoding and textify: see above + + """ + self._fh = fh + self._tokenstack = [] # FIFO + self.textify = textify + self.encoding = encoding + if entitydefs is None: + entitydefs = htmlentitydefs.name2codepoint + self._entitydefs = entitydefs + + def __iter__(self): return self + + def tags(self, *names): + return iter_until_exception(self.get_tag, NoMoreTokensError, *names) + + def tokens(self, *tokentypes): + return iter_until_exception(self.get_token, NoMoreTokensError, + *tokentypes) + + def next(self): + try: + return self.get_token() + except NoMoreTokensError: + raise StopIteration() + + def get_token(self, *tokentypes): + """Pop the next Token object from the stack of parsed tokens. + + If arguments are given, they are taken to be token types in which the + caller is interested: tokens representing other elements will be + skipped. Element names must be given in lower case. + + Raises NoMoreTokensError. + + """ + while 1: + while self._tokenstack: + token = self._tokenstack.pop(0) + if tokentypes: + if token.type in tokentypes: + return token + else: + return token + data = self._fh.read(self.chunk) + if not data: + raise NoMoreTokensError() + self.feed(data) + + def unget_token(self, token): + """Push a Token back onto the stack.""" + self._tokenstack.insert(0, token) + + def get_tag(self, *names): + """Return the next Token that represents an opening or closing tag. + + If arguments are given, they are taken to be element names in which the + caller is interested: tags representing other elements will be skipped. + Element names must be given in lower case. + + Raises NoMoreTokensError. + + """ + while 1: + tok = self.get_token() + if tok.type not in ["starttag", "endtag", "startendtag"]: + continue + if names: + if tok.data in names: + return tok + else: + return tok + + def get_text(self, endat=None): + """Get some text. + + endat: stop reading text at this tag (the tag is included in the + returned text); endtag is a tuple (type, name) where type is + "starttag", "endtag" or "startendtag", and name is the element name of + the tag (element names must be given in lower case) + + If endat is not given, .get_text() will stop at the next opening or + closing tag, or when there are no more tokens (no exception is raised). + Note that .get_text() includes the text representation (if any) of the + opening tag, but pushes the opening tag back onto the stack. As a + result, if you want to call .get_text() again, you need to call + .get_tag() first (unless you want an empty string returned when you + next call .get_text()). + + Entity references are translated using the value of the entitydefs + constructor argument (a mapping from names to characters like that + provided by the standard module htmlentitydefs). Named entity + references that are not in this mapping are left unchanged. + + The textify attribute is used to translate opening tags into text: see + the class docstring. + + """ + text = [] + tok = None + while 1: + try: + tok = self.get_token() + except NoMoreTokensError: + # unget last token (not the one we just failed to get) + if tok: self.unget_token(tok) + break + if tok.type == "data": + text.append(tok.data) + elif tok.type == "entityref": + t = unescape("&%s;"%tok.data, self._entitydefs, self.encoding) + text.append(t) + elif tok.type == "charref": + t = unescape_charref(tok.data, self.encoding) + text.append(t) + elif tok.type in ["starttag", "endtag", "startendtag"]: + tag_name = tok.data + if tok.type in ["starttag", "startendtag"]: + alt = self.textify.get(tag_name) + if alt is not None: + if callable(alt): + text.append(alt(tok)) + elif tok.attrs is not None: + for k, v in tok.attrs: + if k == alt: + text.append(v) + text.append("[%s]" % tag_name.upper()) + if endat is None or endat == (tok.type, tag_name): + self.unget_token(tok) + break + return "".join(text) + + def get_compressed_text(self, *args, **kwds): + """ + As .get_text(), but collapses each group of contiguous whitespace to a + single space character, and removes all initial and trailing + whitespace. + + """ + text = self.get_text(*args, **kwds) + text = text.strip() + return self.compress_re.sub(" ", text) + + def handle_startendtag(self, tag, attrs): + self._tokenstack.append(Token("startendtag", tag, attrs)) + def handle_starttag(self, tag, attrs): + self._tokenstack.append(Token("starttag", tag, attrs)) + def handle_endtag(self, tag): + self._tokenstack.append(Token("endtag", tag)) + def handle_charref(self, name): + self._tokenstack.append(Token("charref", name)) + def handle_entityref(self, name): + self._tokenstack.append(Token("entityref", name)) + def handle_data(self, data): + self._tokenstack.append(Token("data", data)) + def handle_comment(self, data): + self._tokenstack.append(Token("comment", data)) + def handle_decl(self, decl): + self._tokenstack.append(Token("decl", decl)) + def unknown_decl(self, data): + # XXX should this call self.error instead? + #self.error("unknown declaration: " + `data`) + self._tokenstack.append(Token("decl", data)) + def handle_pi(self, data): + self._tokenstack.append(Token("pi", data)) + + def unescape_attr(self, name): + return unescape(name, self._entitydefs, self.encoding) + def unescape_attrs(self, attrs): + escaped_attrs = [] + for key, val in attrs: + escaped_attrs.append((key, self.unescape_attr(val))) + return escaped_attrs + +class PullParser(_AbstractParser, HTMLParser.HTMLParser): + def __init__(self, *args, **kwds): + HTMLParser.HTMLParser.__init__(self) + _AbstractParser.__init__(self, *args, **kwds) + def unescape(self, name): + # Use the entitydefs passed into constructor, not + # HTMLParser.HTMLParser's entitydefs. + return self.unescape_attr(name) + +class TolerantPullParser(_AbstractParser, sgmllib.SGMLParser): + def __init__(self, *args, **kwds): + sgmllib.SGMLParser.__init__(self) + _AbstractParser.__init__(self, *args, **kwds) + def unknown_starttag(self, tag, attrs): + attrs = self.unescape_attrs(attrs) + self._tokenstack.append(Token("starttag", tag, attrs)) + def unknown_endtag(self, tag): + self._tokenstack.append(Token("endtag", tag)) + + +def _test(): + import doctest, _pullparser + return doctest.testmod(_pullparser) + +if __name__ == "__main__": + _test() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_request.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_request.py new file mode 100644 index 0000000..7824441 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_request.py @@ -0,0 +1,87 @@ +"""Integration with Python standard library module urllib2: Request class. + +Copyright 2004-2006 John J Lee <jjl@pobox.com> + +This code is free software; you can redistribute it and/or modify it +under the terms of the BSD or ZPL 2.1 licenses (see the file +COPYING.txt included with the distribution). + +""" + +import urllib2, urllib, logging + +from _clientcookie import request_host_lc +import _rfc3986 +import _sockettimeout + +warn = logging.getLogger("mechanize").warning + + +class Request(urllib2.Request): + def __init__(self, url, data=None, headers={}, + origin_req_host=None, unverifiable=False, visit=None, + timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): + # In mechanize 0.2, the interpretation of a unicode url argument will + # change: A unicode url argument will be interpreted as an IRI, and a + # bytestring as a URI. For now, we accept unicode or bytestring. We + # don't insist that the value is always a URI (specifically, must only + # contain characters which are legal), because that might break working + # code (who knows what bytes some servers want to see, especially with + # browser plugins for internationalised URIs). + if not _rfc3986.is_clean_uri(url): + warn("url argument is not a URI " + "(contains illegal characters) %r" % url) + urllib2.Request.__init__(self, url, data, headers) + self.selector = None + self.unredirected_hdrs = {} + self.visit = visit + self.timeout = timeout + + # All the terminology below comes from RFC 2965. + self.unverifiable = unverifiable + # Set request-host of origin transaction. + # The origin request-host is needed in order to decide whether + # unverifiable sub-requests (automatic redirects, images embedded + # in HTML, etc.) are to third-party hosts. If they are, the + # resulting transactions might need to be conducted with cookies + # turned off. + if origin_req_host is None: + origin_req_host = request_host_lc(self) + self.origin_req_host = origin_req_host + + def get_selector(self): + return urllib.splittag(self.__r_host)[0] + + def get_origin_req_host(self): + return self.origin_req_host + + def is_unverifiable(self): + return self.unverifiable + + def add_unredirected_header(self, key, val): + """Add a header that will not be added to a redirected request.""" + self.unredirected_hdrs[key.capitalize()] = val + + def has_header(self, header_name): + """True iff request has named header (regular or unredirected).""" + return (header_name in self.headers or + header_name in self.unredirected_hdrs) + + def get_header(self, header_name, default=None): + return self.headers.get( + header_name, + self.unredirected_hdrs.get(header_name, default)) + + def header_items(self): + hdrs = self.unredirected_hdrs.copy() + hdrs.update(self.headers) + return hdrs.items() + + def __str__(self): + return "<Request for %s>" % self.get_full_url() + + def get_method(self): + if self.has_data(): + return "POST" + else: + return "GET" diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_response.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_response.py new file mode 100644 index 0000000..fad9b57 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_response.py @@ -0,0 +1,527 @@ +"""Response classes. + +The seek_wrapper code is not used if you're using UserAgent with +.set_seekable_responses(False), or if you're using the urllib2-level interface +without SeekableProcessor or HTTPEquivProcessor. Class closeable_response is +instantiated by some handlers (AbstractHTTPHandler), but the closeable_response +interface is only depended upon by Browser-level code. Function +upgrade_response is only used if you're using Browser or +ResponseUpgradeProcessor. + + +Copyright 2006 John J. Lee <jjl@pobox.com> + +This code is free software; you can redistribute it and/or modify it +under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt +included with the distribution). + +""" + +import copy, mimetools +from cStringIO import StringIO +import urllib2 + + +def len_of_seekable(file_): + # this function exists because evaluation of len(file_.getvalue()) on every + # .read() from seek_wrapper would be O(N**2) in number of .read()s + pos = file_.tell() + file_.seek(0, 2) # to end + try: + return file_.tell() + finally: + file_.seek(pos) + + +# XXX Andrew Dalke kindly sent me a similar class in response to my request on +# comp.lang.python, which I then proceeded to lose. I wrote this class +# instead, but I think he's released his code publicly since, could pinch the +# tests from it, at least... + +# For testing seek_wrapper invariant (note that +# test_urllib2.HandlerTest.test_seekable is expected to fail when this +# invariant checking is turned on). The invariant checking is done by module +# ipdc, which is available here: +# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/436834 +## from ipdbc import ContractBase +## class seek_wrapper(ContractBase): +class seek_wrapper: + """Adds a seek method to a file object. + + This is only designed for seeking on readonly file-like objects. + + Wrapped file-like object must have a read method. The readline method is + only supported if that method is present on the wrapped object. The + readlines method is always supported. xreadlines and iteration are + supported only for Python 2.2 and above. + + Public attributes: + + wrapped: the wrapped file object + is_closed: true iff .close() has been called + + WARNING: All other attributes of the wrapped object (ie. those that are not + one of wrapped, read, readline, readlines, xreadlines, __iter__ and next) + are passed through unaltered, which may or may not make sense for your + particular file object. + + """ + # General strategy is to check that cache is full enough, then delegate to + # the cache (self.__cache, which is a cStringIO.StringIO instance). A seek + # position (self.__pos) is maintained independently of the cache, in order + # that a single cache may be shared between multiple seek_wrapper objects. + # Copying using module copy shares the cache in this way. + + def __init__(self, wrapped): + self.wrapped = wrapped + self.__read_complete_state = [False] + self.__is_closed_state = [False] + self.__have_readline = hasattr(self.wrapped, "readline") + self.__cache = StringIO() + self.__pos = 0 # seek position + + def invariant(self): + # The end of the cache is always at the same place as the end of the + # wrapped file (though the .tell() method is not required to be present + # on wrapped file). + return self.wrapped.tell() == len(self.__cache.getvalue()) + + def close(self): + self.wrapped.close() + self.is_closed = True + + def __getattr__(self, name): + if name == "is_closed": + return self.__is_closed_state[0] + elif name == "read_complete": + return self.__read_complete_state[0] + + wrapped = self.__dict__.get("wrapped") + if wrapped: + return getattr(wrapped, name) + + return getattr(self.__class__, name) + + def __setattr__(self, name, value): + if name == "is_closed": + self.__is_closed_state[0] = bool(value) + elif name == "read_complete": + if not self.is_closed: + self.__read_complete_state[0] = bool(value) + else: + self.__dict__[name] = value + + def seek(self, offset, whence=0): + assert whence in [0,1,2] + + # how much data, if any, do we need to read? + if whence == 2: # 2: relative to end of *wrapped* file + if offset < 0: raise ValueError("negative seek offset") + # since we don't know yet where the end of that file is, we must + # read everything + to_read = None + else: + if whence == 0: # 0: absolute + if offset < 0: raise ValueError("negative seek offset") + dest = offset + else: # 1: relative to current position + pos = self.__pos + if pos < offset: + raise ValueError("seek to before start of file") + dest = pos + offset + end = len_of_seekable(self.__cache) + to_read = dest - end + if to_read < 0: + to_read = 0 + + if to_read != 0: + self.__cache.seek(0, 2) + if to_read is None: + assert whence == 2 + self.__cache.write(self.wrapped.read()) + self.read_complete = True + self.__pos = self.__cache.tell() - offset + else: + data = self.wrapped.read(to_read) + if not data: + self.read_complete = True + else: + self.__cache.write(data) + # Don't raise an exception even if we've seek()ed past the end + # of .wrapped, since fseek() doesn't complain in that case. + # Also like fseek(), pretend we have seek()ed past the end, + # i.e. not: + #self.__pos = self.__cache.tell() + # but rather: + self.__pos = dest + else: + self.__pos = dest + + def tell(self): + return self.__pos + + def __copy__(self): + cpy = self.__class__(self.wrapped) + cpy.__cache = self.__cache + cpy.__read_complete_state = self.__read_complete_state + cpy.__is_closed_state = self.__is_closed_state + return cpy + + def get_data(self): + pos = self.__pos + try: + self.seek(0) + return self.read(-1) + finally: + self.__pos = pos + + def read(self, size=-1): + pos = self.__pos + end = len_of_seekable(self.__cache) + available = end - pos + + # enough data already cached? + if size <= available and size != -1: + self.__cache.seek(pos) + self.__pos = pos+size + return self.__cache.read(size) + + # no, so read sufficient data from wrapped file and cache it + self.__cache.seek(0, 2) + if size == -1: + self.__cache.write(self.wrapped.read()) + self.read_complete = True + else: + to_read = size - available + assert to_read > 0 + data = self.wrapped.read(to_read) + if not data: + self.read_complete = True + else: + self.__cache.write(data) + self.__cache.seek(pos) + + data = self.__cache.read(size) + self.__pos = self.__cache.tell() + assert self.__pos == pos + len(data) + return data + + def readline(self, size=-1): + if not self.__have_readline: + raise NotImplementedError("no readline method on wrapped object") + + # line we're about to read might not be complete in the cache, so + # read another line first + pos = self.__pos + self.__cache.seek(0, 2) + data = self.wrapped.readline() + if not data: + self.read_complete = True + else: + self.__cache.write(data) + self.__cache.seek(pos) + + data = self.__cache.readline() + if size != -1: + r = data[:size] + self.__pos = pos+size + else: + r = data + self.__pos = pos+len(data) + return r + + def readlines(self, sizehint=-1): + pos = self.__pos + self.__cache.seek(0, 2) + self.__cache.write(self.wrapped.read()) + self.read_complete = True + self.__cache.seek(pos) + data = self.__cache.readlines(sizehint) + self.__pos = self.__cache.tell() + return data + + def __iter__(self): return self + def next(self): + line = self.readline() + if line == "": raise StopIteration + return line + + xreadlines = __iter__ + + def __repr__(self): + return ("<%s at %s whose wrapped object = %r>" % + (self.__class__.__name__, hex(abs(id(self))), self.wrapped)) + + +class response_seek_wrapper(seek_wrapper): + + """ + Supports copying response objects and setting response body data. + + """ + + def __init__(self, wrapped): + seek_wrapper.__init__(self, wrapped) + self._headers = self.wrapped.info() + + def __copy__(self): + cpy = seek_wrapper.__copy__(self) + # copy headers from delegate + cpy._headers = copy.copy(self.info()) + return cpy + + # Note that .info() and .geturl() (the only two urllib2 response methods + # that are not implemented by seek_wrapper) must be here explicitly rather + # than by seek_wrapper's __getattr__ delegation) so that the nasty + # dynamically-created HTTPError classes in get_seek_wrapper_class() get the + # wrapped object's implementation, and not HTTPError's. + + def info(self): + return self._headers + + def geturl(self): + return self.wrapped.geturl() + + def set_data(self, data): + self.seek(0) + self.read() + self.close() + cache = self._seek_wrapper__cache = StringIO() + cache.write(data) + self.seek(0) + + +class eoffile: + # file-like object that always claims to be at end-of-file... + def read(self, size=-1): return "" + def readline(self, size=-1): return "" + def __iter__(self): return self + def next(self): return "" + def close(self): pass + +class eofresponse(eoffile): + def __init__(self, url, headers, code, msg): + self._url = url + self._headers = headers + self.code = code + self.msg = msg + def geturl(self): return self._url + def info(self): return self._headers + + +class closeable_response: + """Avoids unnecessarily clobbering urllib.addinfourl methods on .close(). + + Only supports responses returned by mechanize.HTTPHandler. + + After .close(), the following methods are supported: + + .read() + .readline() + .info() + .geturl() + .__iter__() + .next() + .close() + + and the following attributes are supported: + + .code + .msg + + Also supports pickling (but the stdlib currently does something to prevent + it: http://python.org/sf/1144636). + + """ + # presence of this attr indicates is useable after .close() + closeable_response = None + + def __init__(self, fp, headers, url, code, msg): + self._set_fp(fp) + self._headers = headers + self._url = url + self.code = code + self.msg = msg + + def _set_fp(self, fp): + self.fp = fp + self.read = self.fp.read + self.readline = self.fp.readline + if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines + if hasattr(self.fp, "fileno"): + self.fileno = self.fp.fileno + else: + self.fileno = lambda: None + self.__iter__ = self.fp.__iter__ + self.next = self.fp.next + + def __repr__(self): + return '<%s at %s whose fp = %r>' % ( + self.__class__.__name__, hex(abs(id(self))), self.fp) + + def info(self): + return self._headers + + def geturl(self): + return self._url + + def close(self): + wrapped = self.fp + wrapped.close() + new_wrapped = eofresponse( + self._url, self._headers, self.code, self.msg) + self._set_fp(new_wrapped) + + def __getstate__(self): + # There are three obvious options here: + # 1. truncate + # 2. read to end + # 3. close socket, pickle state including read position, then open + # again on unpickle and use Range header + # XXXX um, 4. refuse to pickle unless .close()d. This is better, + # actually ("errors should never pass silently"). Pickling doesn't + # work anyway ATM, because of http://python.org/sf/1144636 so fix + # this later + + # 2 breaks pickle protocol, because one expects the original object + # to be left unscathed by pickling. 3 is too complicated and + # surprising (and too much work ;-) to happen in a sane __getstate__. + # So we do 1. + + state = self.__dict__.copy() + new_wrapped = eofresponse( + self._url, self._headers, self.code, self.msg) + state["wrapped"] = new_wrapped + return state + +def test_response(data='test data', headers=[], + url="http://example.com/", code=200, msg="OK"): + return make_response(data, headers, url, code, msg) + +def test_html_response(data='test data', headers=[], + url="http://example.com/", code=200, msg="OK"): + headers += [("Content-type", "text/html")] + return make_response(data, headers, url, code, msg) + +def make_response(data, headers, url, code, msg): + """Convenient factory for objects implementing response interface. + + data: string containing response body data + headers: sequence of (name, value) pairs + url: URL of response + code: integer response code (e.g. 200) + msg: string response code message (e.g. "OK") + + """ + mime_headers = make_headers(headers) + r = closeable_response(StringIO(data), mime_headers, url, code, msg) + return response_seek_wrapper(r) + + +def make_headers(headers): + """ + headers: sequence of (name, value) pairs + """ + hdr_text = [] + for name_value in headers: + hdr_text.append("%s: %s" % name_value) + return mimetools.Message(StringIO("\n".join(hdr_text))) + + +# Rest of this module is especially horrible, but needed, at least until fork +# urllib2. Even then, may want to preseve urllib2 compatibility. + +def get_seek_wrapper_class(response): + # in order to wrap response objects that are also exceptions, we must + # dynamically subclass the exception :-((( + if (isinstance(response, urllib2.HTTPError) and + not hasattr(response, "seek")): + if response.__class__.__module__ == "__builtin__": + exc_class_name = response.__class__.__name__ + else: + exc_class_name = "%s.%s" % ( + response.__class__.__module__, response.__class__.__name__) + + class httperror_seek_wrapper(response_seek_wrapper, response.__class__): + # this only derives from HTTPError in order to be a subclass -- + # the HTTPError behaviour comes from delegation + + _exc_class_name = exc_class_name + + def __init__(self, wrapped): + response_seek_wrapper.__init__(self, wrapped) + # be compatible with undocumented HTTPError attributes :-( + self.hdrs = wrapped.info() + self.filename = wrapped.geturl() + + def __repr__(self): + return ( + "<%s (%s instance) at %s " + "whose wrapped object = %r>" % ( + self.__class__.__name__, self._exc_class_name, + hex(abs(id(self))), self.wrapped) + ) + wrapper_class = httperror_seek_wrapper + else: + wrapper_class = response_seek_wrapper + return wrapper_class + +def seek_wrapped_response(response): + """Return a copy of response that supports seekable response interface. + + Accepts responses from both mechanize and urllib2 handlers. + + Copes with both oridinary response instances and HTTPError instances (which + can't be simply wrapped due to the requirement of preserving the exception + base class). + """ + if not hasattr(response, "seek"): + wrapper_class = get_seek_wrapper_class(response) + response = wrapper_class(response) + assert hasattr(response, "get_data") + return response + +def upgrade_response(response): + """Return a copy of response that supports Browser response interface. + + Browser response interface is that of "seekable responses" + (response_seek_wrapper), plus the requirement that responses must be + useable after .close() (closeable_response). + + Accepts responses from both mechanize and urllib2 handlers. + + Copes with both ordinary response instances and HTTPError instances (which + can't be simply wrapped due to the requirement of preserving the exception + base class). + """ + wrapper_class = get_seek_wrapper_class(response) + if hasattr(response, "closeable_response"): + if not hasattr(response, "seek"): + response = wrapper_class(response) + assert hasattr(response, "get_data") + return copy.copy(response) + + # a urllib2 handler constructed the response, i.e. the response is an + # urllib.addinfourl or a urllib2.HTTPError, instead of a + # _Util.closeable_response as returned by e.g. mechanize.HTTPHandler + try: + code = response.code + except AttributeError: + code = None + try: + msg = response.msg + except AttributeError: + msg = None + + # may have already-.read() data from .seek() cache + data = None + get_data = getattr(response, "get_data", None) + if get_data: + data = get_data() + + response = closeable_response( + response.fp, response.info(), response.geturl(), code, msg) + response = wrapper_class(response) + if data: + response.set_data(data) + return response diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_rfc3986.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_rfc3986.py new file mode 100644 index 0000000..1bb5021 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_rfc3986.py @@ -0,0 +1,241 @@ +"""RFC 3986 URI parsing and relative reference resolution / absolutization. + +(aka splitting and joining) + +Copyright 2006 John J. Lee <jjl@pobox.com> + +This code is free software; you can redistribute it and/or modify it under +the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt +included with the distribution). + +""" + +# XXX Wow, this is ugly. Overly-direct translation of the RFC ATM. + +import re, urllib + +## def chr_range(a, b): +## return "".join(map(chr, range(ord(a), ord(b)+1))) + +## UNRESERVED_URI_CHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ" +## "abcdefghijklmnopqrstuvwxyz" +## "0123456789" +## "-_.~") +## RESERVED_URI_CHARS = "!*'();:@&=+$,/?#[]" +## URI_CHARS = RESERVED_URI_CHARS+UNRESERVED_URI_CHARS+'%' +# this re matches any character that's not in URI_CHARS +BAD_URI_CHARS_RE = re.compile("[^A-Za-z0-9\-_.~!*'();:@&=+$,/?%#[\]]") + + +def clean_url(url, encoding): + # percent-encode illegal URI characters + # Trying to come up with test cases for this gave me a headache, revisit + # when do switch to unicode. + # Somebody else's comments (lost the attribution): +## - IE will return you the url in the encoding you send it +## - Mozilla/Firefox will send you latin-1 if there's no non latin-1 +## characters in your link. It will send you utf-8 however if there are... + if type(url) == type(""): + url = url.decode(encoding, "replace") + url = url.strip() + # for second param to urllib.quote(), we want URI_CHARS, minus the + # 'always_safe' characters that urllib.quote() never percent-encodes + return urllib.quote(url.encode(encoding), "!*'();:@&=+$,/?%#[]~") + +def is_clean_uri(uri): + """ + >>> is_clean_uri("ABC!") + True + >>> is_clean_uri(u"ABC!") + True + >>> is_clean_uri("ABC|") + False + >>> is_clean_uri(u"ABC|") + False + >>> is_clean_uri("http://example.com/0") + True + >>> is_clean_uri(u"http://example.com/0") + True + """ + # note module re treats bytestrings as through they were decoded as latin-1 + # so this function accepts both unicode and bytestrings + return not bool(BAD_URI_CHARS_RE.search(uri)) + + +SPLIT_MATCH = re.compile( + r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?").match +def urlsplit(absolute_uri): + """Return scheme, authority, path, query, fragment.""" + match = SPLIT_MATCH(absolute_uri) + if match: + g = match.groups() + return g[1], g[3], g[4], g[6], g[8] + +def urlunsplit(parts): + scheme, authority, path, query, fragment = parts + r = [] + append = r.append + if scheme is not None: + append(scheme) + append(":") + if authority is not None: + append("//") + append(authority) + append(path) + if query is not None: + append("?") + append(query) + if fragment is not None: + append("#") + append(fragment) + return "".join(r) + +def urljoin(base_uri, uri_reference): + return urlunsplit(urljoin_parts(urlsplit(base_uri), + urlsplit(uri_reference))) + +# oops, this doesn't do the same thing as the literal translation +# from the RFC below +## import posixpath +## def urljoin_parts(base_parts, reference_parts): +## scheme, authority, path, query, fragment = base_parts +## rscheme, rauthority, rpath, rquery, rfragment = reference_parts + +## # compute target URI path +## if rpath == "": +## tpath = path +## else: +## tpath = rpath +## if not tpath.startswith("/"): +## tpath = merge(authority, path, tpath) +## tpath = posixpath.normpath(tpath) + +## if rscheme is not None: +## return (rscheme, rauthority, tpath, rquery, rfragment) +## elif rauthority is not None: +## return (scheme, rauthority, tpath, rquery, rfragment) +## elif rpath == "": +## if rquery is not None: +## tquery = rquery +## else: +## tquery = query +## return (scheme, authority, tpath, tquery, rfragment) +## else: +## return (scheme, authority, tpath, rquery, rfragment) + +def urljoin_parts(base_parts, reference_parts): + scheme, authority, path, query, fragment = base_parts + rscheme, rauthority, rpath, rquery, rfragment = reference_parts + + if rscheme == scheme: + rscheme = None + + if rscheme is not None: + tscheme, tauthority, tpath, tquery = ( + rscheme, rauthority, remove_dot_segments(rpath), rquery) + else: + if rauthority is not None: + tauthority, tpath, tquery = ( + rauthority, remove_dot_segments(rpath), rquery) + else: + if rpath == "": + tpath = path + if rquery is not None: + tquery = rquery + else: + tquery = query + else: + if rpath.startswith("/"): + tpath = remove_dot_segments(rpath) + else: + tpath = merge(authority, path, rpath) + tpath = remove_dot_segments(tpath) + tquery = rquery + tauthority = authority + tscheme = scheme + tfragment = rfragment + return (tscheme, tauthority, tpath, tquery, tfragment) + +# um, something *vaguely* like this is what I want, but I have to generate +# lots of test cases first, if only to understand what it is that +# remove_dot_segments really does... +## def remove_dot_segments(path): +## if path == '': +## return '' +## comps = path.split('/') +## new_comps = [] +## for comp in comps: +## if comp in ['.', '']: +## if not new_comps or new_comps[-1]: +## new_comps.append('') +## continue +## if comp != '..': +## new_comps.append(comp) +## elif new_comps: +## new_comps.pop() +## return '/'.join(new_comps) + + +def remove_dot_segments(path): + r = [] + while path: + # A + if path.startswith("../"): + path = path[3:] + continue + if path.startswith("./"): + path = path[2:] + continue + # B + if path.startswith("/./"): + path = path[2:] + continue + if path == "/.": + path = "/" + continue + # C + if path.startswith("/../"): + path = path[3:] + if r: + r.pop() + continue + if path == "/..": + path = "/" + if r: + r.pop() + continue + # D + if path == ".": + path = path[1:] + continue + if path == "..": + path = path[2:] + continue + # E + start = 0 + if path.startswith("/"): + start = 1 + ii = path.find("/", start) + if ii < 0: + ii = None + r.append(path[:ii]) + if ii is None: + break + path = path[ii:] + return "".join(r) + +def merge(base_authority, base_path, ref_path): + # XXXX Oddly, the sample Perl implementation of this by Roy Fielding + # doesn't even take base_authority as a parameter, despite the wording in + # the RFC suggesting otherwise. Perhaps I'm missing some obvious identity. + #if base_authority is not None and base_path == "": + if base_path == "": + return "/" + ref_path + ii = base_path.rfind("/") + if ii >= 0: + return base_path[:ii+1] + ref_path + return ref_path + +if __name__ == "__main__": + import doctest + doctest.testmod() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_seek.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_seek.py new file mode 100644 index 0000000..4086d52 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_seek.py @@ -0,0 +1,16 @@ +from urllib2 import BaseHandler +from _util import deprecation +from _response import response_seek_wrapper + + +class SeekableProcessor(BaseHandler): + """Deprecated: Make responses seekable.""" + + def __init__(self): + deprecation( + "See http://wwwsearch.sourceforge.net/mechanize/doc.html#seekable") + + def any_response(self, request, response): + if not hasattr(response, "seek"): + return response_seek_wrapper(response) + return response diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_sockettimeout.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_sockettimeout.py new file mode 100644 index 0000000..c22b734 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_sockettimeout.py @@ -0,0 +1,6 @@ +import socket + +try: + _GLOBAL_DEFAULT_TIMEOUT = socket._GLOBAL_DEFAULT_TIMEOUT +except AttributeError: + _GLOBAL_DEFAULT_TIMEOUT = object() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_testcase.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_testcase.py new file mode 100644 index 0000000..a13cca3 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_testcase.py @@ -0,0 +1,73 @@ +import shutil +import tempfile +import unittest + + +class SetupStack(object): + + def __init__(self): + self._on_teardown = [] + + def add_teardown(self, teardown): + self._on_teardown.append(teardown) + + def tear_down(self): + for func in reversed(self._on_teardown): + func() + + +class TearDownConvenience(object): + + def __init__(self, setup_stack=None): + self._own_setup_stack = setup_stack is None + if setup_stack is None: + setup_stack = SetupStack() + self._setup_stack = setup_stack + + # only call this convenience method if no setup_stack was supplied to c'tor + def tear_down(self): + assert self._own_setup_stack + self._setup_stack.tear_down() + + +class TempDirMaker(TearDownConvenience): + + def make_temp_dir(self): + temp_dir = tempfile.mkdtemp(prefix="tmp-%s-" % self.__class__.__name__) + def tear_down(): + shutil.rmtree(temp_dir) + self._setup_stack.add_teardown(tear_down) + return temp_dir + + +class MonkeyPatcher(TearDownConvenience): + + def monkey_patch(self, obj, name, value): + orig_value = getattr(obj, name) + setattr(obj, name, value) + def reverse_patch(): + setattr(obj, name, orig_value) + self._setup_stack.add_teardown(reverse_patch) + + +class TestCase(unittest.TestCase): + + def setUp(self): + self._setup_stack = SetupStack() + + def tearDown(self): + self._setup_stack.tear_down() + + def make_temp_dir(self, *args, **kwds): + return TempDirMaker(self._setup_stack).make_temp_dir(*args, **kwds) + + def monkey_patch(self, *args, **kwds): + return MonkeyPatcher(self._setup_stack).monkey_patch(*args, **kwds) + + def assert_contains(self, container, containee): + self.assertTrue(containee in container, "%r not in %r" % + (containee, container)) + + def assert_less_than(self, got, expected): + self.assertTrue(got < expected, "%r >= %r" % + (got, expected)) diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_upgrade.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_upgrade.py new file mode 100644 index 0000000..df59c01 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_upgrade.py @@ -0,0 +1,40 @@ +from urllib2 import BaseHandler + +from _request import Request +from _response import upgrade_response +from _util import deprecation + + +class HTTPRequestUpgradeProcessor(BaseHandler): + # upgrade urllib2.Request to this module's Request + # yuck! + handler_order = 0 # before anything else + + def http_request(self, request): + if not hasattr(request, "add_unredirected_header"): + newrequest = Request(request.get_full_url(), request.data, + request.headers) + try: newrequest.origin_req_host = request.origin_req_host + except AttributeError: pass + try: newrequest.unverifiable = request.unverifiable + except AttributeError: pass + try: newrequest.visit = request.visit + except AttributeError: pass + request = newrequest + return request + + https_request = http_request + + +class ResponseUpgradeProcessor(BaseHandler): + # upgrade responses to be .close()able without becoming unusable + handler_order = 0 # before anything else + + def __init__(self): + deprecation( + "See http://wwwsearch.sourceforge.net/mechanize/doc.html#seekable") + + def any_response(self, request, response): + if not hasattr(response, 'closeable_response'): + response = upgrade_response(response) + return response diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_urllib2.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_urllib2.py new file mode 100644 index 0000000..cbb761b --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_urllib2.py @@ -0,0 +1,55 @@ +# urllib2 work-alike interface +# ...from urllib2... +from urllib2 import \ + URLError, \ + HTTPError, \ + BaseHandler, \ + UnknownHandler, \ + FTPHandler, \ + CacheFTPHandler +# ...and from mechanize +from _auth import \ + HTTPPasswordMgr, \ + HTTPPasswordMgrWithDefaultRealm, \ + AbstractBasicAuthHandler, \ + AbstractDigestAuthHandler, \ + HTTPProxyPasswordMgr, \ + ProxyHandler, \ + ProxyBasicAuthHandler, \ + ProxyDigestAuthHandler, \ + HTTPBasicAuthHandler, \ + HTTPDigestAuthHandler, \ + HTTPSClientCertMgr +from _debug import \ + HTTPResponseDebugProcessor, \ + HTTPRedirectDebugProcessor +from _file import \ + FileHandler +# crap ATM +## from _gzip import \ +## HTTPGzipProcessor +from _http import \ + HTTPHandler, \ + HTTPDefaultErrorHandler, \ + HTTPRedirectHandler, \ + HTTPEquivProcessor, \ + HTTPCookieProcessor, \ + HTTPRefererProcessor, \ + HTTPRefreshProcessor, \ + HTTPErrorProcessor, \ + HTTPRobotRulesProcessor, \ + RobotExclusionError +import httplib +if hasattr(httplib, 'HTTPS'): + from _http import HTTPSHandler +del httplib +from _opener import OpenerDirector, \ + SeekableResponseOpener, \ + build_opener, install_opener, urlopen +from _request import \ + Request +from _seek import \ + SeekableProcessor +from _upgrade import \ + HTTPRequestUpgradeProcessor, \ + ResponseUpgradeProcessor diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_useragent.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_useragent.py new file mode 100644 index 0000000..723f87c --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_useragent.py @@ -0,0 +1,352 @@ +"""Convenient HTTP UserAgent class. + +This is a subclass of urllib2.OpenerDirector. + + +Copyright 2003-2006 John J. Lee <jjl@pobox.com> + +This code is free software; you can redistribute it and/or modify it under +the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt +included with the distribution). + +""" + +import warnings + +import _auth +import _gzip +import _opener +import _response +import _sockettimeout +import _urllib2 + + +class UserAgentBase(_opener.OpenerDirector): + """Convenient user-agent class. + + Do not use .add_handler() to add a handler for something already dealt with + by this code. + + The only reason at present for the distinction between UserAgent and + UserAgentBase is so that classes that depend on .seek()able responses + (e.g. mechanize.Browser) can inherit from UserAgentBase. The subclass + UserAgent exposes a .set_seekable_responses() method that allows switching + off the adding of a .seek() method to responses. + + Public attributes: + + addheaders: list of (name, value) pairs specifying headers to send with + every request, unless they are overridden in the Request instance. + + >>> ua = UserAgentBase() + >>> ua.addheaders = [ + ... ("User-agent", "Mozilla/5.0 (compatible)"), + ... ("From", "responsible.person@example.com")] + + """ + + handler_classes = { + # scheme handlers + "http": _urllib2.HTTPHandler, + # CacheFTPHandler is buggy, at least in 2.3, so we don't use it + "ftp": _urllib2.FTPHandler, + "file": _urllib2.FileHandler, + + # other handlers + "_unknown": _urllib2.UnknownHandler, + # HTTP{S,}Handler depend on HTTPErrorProcessor too + "_http_error": _urllib2.HTTPErrorProcessor, + "_http_request_upgrade": _urllib2.HTTPRequestUpgradeProcessor, + "_http_default_error": _urllib2.HTTPDefaultErrorHandler, + + # feature handlers + "_basicauth": _urllib2.HTTPBasicAuthHandler, + "_digestauth": _urllib2.HTTPDigestAuthHandler, + "_redirect": _urllib2.HTTPRedirectHandler, + "_cookies": _urllib2.HTTPCookieProcessor, + "_refresh": _urllib2.HTTPRefreshProcessor, + "_equiv": _urllib2.HTTPEquivProcessor, + "_proxy": _urllib2.ProxyHandler, + "_proxy_basicauth": _urllib2.ProxyBasicAuthHandler, + "_proxy_digestauth": _urllib2.ProxyDigestAuthHandler, + "_robots": _urllib2.HTTPRobotRulesProcessor, + "_gzip": _gzip.HTTPGzipProcessor, # experimental! + + # debug handlers + "_debug_redirect": _urllib2.HTTPRedirectDebugProcessor, + "_debug_response_body": _urllib2.HTTPResponseDebugProcessor, + } + + default_schemes = ["http", "ftp", "file"] + default_others = ["_unknown", "_http_error", "_http_request_upgrade", + "_http_default_error", + ] + default_features = ["_redirect", "_cookies", + "_refresh", "_equiv", + "_basicauth", "_digestauth", + "_proxy", "_proxy_basicauth", "_proxy_digestauth", + "_robots", + ] + if hasattr(_urllib2, 'HTTPSHandler'): + handler_classes["https"] = _urllib2.HTTPSHandler + default_schemes.append("https") + + def __init__(self): + _opener.OpenerDirector.__init__(self) + + ua_handlers = self._ua_handlers = {} + for scheme in (self.default_schemes+ + self.default_others+ + self.default_features): + klass = self.handler_classes[scheme] + ua_handlers[scheme] = klass() + for handler in ua_handlers.itervalues(): + self.add_handler(handler) + + # Yuck. + # Ensure correct default constructor args were passed to + # HTTPRefreshProcessor and HTTPEquivProcessor. + if "_refresh" in ua_handlers: + self.set_handle_refresh(True) + if "_equiv" in ua_handlers: + self.set_handle_equiv(True) + # Ensure default password managers are installed. + pm = ppm = None + if "_basicauth" in ua_handlers or "_digestauth" in ua_handlers: + pm = _urllib2.HTTPPasswordMgrWithDefaultRealm() + if ("_proxy_basicauth" in ua_handlers or + "_proxy_digestauth" in ua_handlers): + ppm = _auth.HTTPProxyPasswordMgr() + self.set_password_manager(pm) + self.set_proxy_password_manager(ppm) + # set default certificate manager + if "https" in ua_handlers: + cm = _urllib2.HTTPSClientCertMgr() + self.set_client_cert_manager(cm) + + def close(self): + _opener.OpenerDirector.close(self) + self._ua_handlers = None + + # XXX +## def set_timeout(self, timeout): +## self._timeout = timeout +## def set_http_connection_cache(self, conn_cache): +## self._http_conn_cache = conn_cache +## def set_ftp_connection_cache(self, conn_cache): +## # XXX ATM, FTP has cache as part of handler; should it be separate? +## self._ftp_conn_cache = conn_cache + + def set_handled_schemes(self, schemes): + """Set sequence of URL scheme (protocol) strings. + + For example: ua.set_handled_schemes(["http", "ftp"]) + + If this fails (with ValueError) because you've passed an unknown + scheme, the set of handled schemes will not be changed. + + """ + want = {} + for scheme in schemes: + if scheme.startswith("_"): + raise ValueError("not a scheme '%s'" % scheme) + if scheme not in self.handler_classes: + raise ValueError("unknown scheme '%s'") + want[scheme] = None + + # get rid of scheme handlers we don't want + for scheme, oldhandler in self._ua_handlers.items(): + if scheme.startswith("_"): continue # not a scheme handler + if scheme not in want: + self._replace_handler(scheme, None) + else: + del want[scheme] # already got it + # add the scheme handlers that are missing + for scheme in want.keys(): + self._set_handler(scheme, True) + + def set_cookiejar(self, cookiejar): + """Set a mechanize.CookieJar, or None.""" + self._set_handler("_cookies", obj=cookiejar) + + # XXX could use Greg Stein's httpx for some of this instead? + # or httplib2?? + def set_proxies(self, proxies): + """Set a dictionary mapping URL scheme to proxy specification, or None. + + e.g. {"http": "joe:password@myproxy.example.com:3128", + "ftp": "proxy.example.com"} + + """ + self._set_handler("_proxy", obj=proxies) + + def add_password(self, url, user, password, realm=None): + self._password_manager.add_password(realm, url, user, password) + def add_proxy_password(self, user, password, hostport=None, realm=None): + self._proxy_password_manager.add_password( + realm, hostport, user, password) + + def add_client_certificate(self, url, key_file, cert_file): + """Add an SSL client certificate, for HTTPS client auth. + + key_file and cert_file must be filenames of the key and certificate + files, in PEM format. You can use e.g. OpenSSL to convert a p12 (PKCS + 12) file to PEM format: + + openssl pkcs12 -clcerts -nokeys -in cert.p12 -out cert.pem + openssl pkcs12 -nocerts -in cert.p12 -out key.pem + + + Note that client certificate password input is very inflexible ATM. At + the moment this seems to be console only, which is presumably the + default behaviour of libopenssl. In future mechanize may support + third-party libraries that (I assume) allow more options here. + + """ + self._client_cert_manager.add_key_cert(url, key_file, cert_file) + + # the following are rarely useful -- use add_password / add_proxy_password + # instead + def set_password_manager(self, password_manager): + """Set a mechanize.HTTPPasswordMgrWithDefaultRealm, or None.""" + self._password_manager = password_manager + self._set_handler("_basicauth", obj=password_manager) + self._set_handler("_digestauth", obj=password_manager) + def set_proxy_password_manager(self, password_manager): + """Set a mechanize.HTTPProxyPasswordMgr, or None.""" + self._proxy_password_manager = password_manager + self._set_handler("_proxy_basicauth", obj=password_manager) + self._set_handler("_proxy_digestauth", obj=password_manager) + def set_client_cert_manager(self, cert_manager): + """Set a mechanize.HTTPClientCertMgr, or None.""" + self._client_cert_manager = cert_manager + handler = self._ua_handlers["https"] + handler.client_cert_manager = cert_manager + + # these methods all take a boolean parameter + def set_handle_robots(self, handle): + """Set whether to observe rules from robots.txt.""" + self._set_handler("_robots", handle) + def set_handle_redirect(self, handle): + """Set whether to handle HTTP 30x redirections.""" + self._set_handler("_redirect", handle) + def set_handle_refresh(self, handle, max_time=None, honor_time=True): + """Set whether to handle HTTP Refresh headers.""" + self._set_handler("_refresh", handle, constructor_kwds= + {"max_time": max_time, "honor_time": honor_time}) + def set_handle_equiv(self, handle, head_parser_class=None): + """Set whether to treat HTML http-equiv headers like HTTP headers. + + Response objects may be .seek()able if this is set (currently returned + responses are, raised HTTPError exception responses are not). + + """ + if head_parser_class is not None: + constructor_kwds = {"head_parser_class": head_parser_class} + else: + constructor_kwds={} + self._set_handler("_equiv", handle, constructor_kwds=constructor_kwds) + def set_handle_gzip(self, handle): + """Handle gzip transfer encoding. + + """ + if handle: + warnings.warn( + "gzip transfer encoding is experimental!", stacklevel=2) + self._set_handler("_gzip", handle) + def set_debug_redirects(self, handle): + """Log information about HTTP redirects (including refreshes). + + Logging is performed using module logging. The logger name is + "mechanize.http_redirects". To actually print some debug output, + eg: + + import sys, logging + logger = logging.getLogger("mechanize.http_redirects") + logger.addHandler(logging.StreamHandler(sys.stdout)) + logger.setLevel(logging.INFO) + + Other logger names relevant to this module: + + "mechanize.http_responses" + "mechanize.cookies" (or "cookielib" if running Python 2.4) + + To turn on everything: + + import sys, logging + logger = logging.getLogger("mechanize") + logger.addHandler(logging.StreamHandler(sys.stdout)) + logger.setLevel(logging.INFO) + + """ + self._set_handler("_debug_redirect", handle) + def set_debug_responses(self, handle): + """Log HTTP response bodies. + + See docstring for .set_debug_redirects() for details of logging. + + Response objects may be .seek()able if this is set (currently returned + responses are, raised HTTPError exception responses are not). + + """ + self._set_handler("_debug_response_body", handle) + def set_debug_http(self, handle): + """Print HTTP headers to sys.stdout.""" + level = int(bool(handle)) + for scheme in "http", "https": + h = self._ua_handlers.get(scheme) + if h is not None: + h.set_http_debuglevel(level) + + def _set_handler(self, name, handle=None, obj=None, + constructor_args=(), constructor_kwds={}): + if handle is None: + handle = obj is not None + if handle: + handler_class = self.handler_classes[name] + if obj is not None: + newhandler = handler_class(obj) + else: + newhandler = handler_class( + *constructor_args, **constructor_kwds) + else: + newhandler = None + self._replace_handler(name, newhandler) + + def _replace_handler(self, name, newhandler=None): + # first, if handler was previously added, remove it + if name is not None: + handler = self._ua_handlers.get(name) + if handler: + try: + self.handlers.remove(handler) + except ValueError: + pass + # then add the replacement, if any + if newhandler is not None: + self.add_handler(newhandler) + self._ua_handlers[name] = newhandler + + +class UserAgent(UserAgentBase): + + def __init__(self): + UserAgentBase.__init__(self) + self._seekable = False + + def set_seekable_responses(self, handle): + """Make response objects .seek()able.""" + self._seekable = bool(handle) + + def open(self, fullurl, data=None, + timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): + if self._seekable: + def bound_open(fullurl, data=None, + timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): + return UserAgentBase.open(self, fullurl, data, timeout) + response = _opener.wrapped_open( + bound_open, _response.seek_wrapped_response, fullurl, data, + timeout) + else: + response = UserAgentBase.open(self, fullurl, data) + return response diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_util.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_util.py new file mode 100644 index 0000000..dcdefa9 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/mechanize/_util.py @@ -0,0 +1,291 @@ +"""Utility functions and date/time routines. + + Copyright 2002-2006 John J Lee <jjl@pobox.com> + +This code is free software; you can redistribute it and/or modify it +under the terms of the BSD or ZPL 2.1 licenses (see the file +COPYING.txt included with the distribution). + +""" + +import re, time, warnings + + +class ExperimentalWarning(UserWarning): + pass + +def experimental(message): + warnings.warn(message, ExperimentalWarning, stacklevel=3) +def hide_experimental_warnings(): + warnings.filterwarnings("ignore", category=ExperimentalWarning) +def reset_experimental_warnings(): + warnings.filterwarnings("default", category=ExperimentalWarning) + +def deprecation(message): + warnings.warn(message, DeprecationWarning, stacklevel=3) +def hide_deprecations(): + warnings.filterwarnings("ignore", category=DeprecationWarning) +def reset_deprecations(): + warnings.filterwarnings("default", category=DeprecationWarning) + + +def isstringlike(x): + try: x+"" + except: return False + else: return True + +## def caller(): +## try: +## raise SyntaxError +## except: +## import sys +## return sys.exc_traceback.tb_frame.f_back.f_back.f_code.co_name + + +from calendar import timegm + +# Date/time conversion routines for formats used by the HTTP protocol. + +EPOCH = 1970 +def my_timegm(tt): + year, month, mday, hour, min, sec = tt[:6] + if ((year >= EPOCH) and (1 <= month <= 12) and (1 <= mday <= 31) and + (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): + return timegm(tt) + else: + return None + +days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] +months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] +months_lower = [] +for month in months: months_lower.append(month.lower()) + + +def time2isoz(t=None): + """Return a string representing time in seconds since epoch, t. + + If the function is called without an argument, it will use the current + time. + + The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", + representing Universal Time (UTC, aka GMT). An example of this format is: + + 1994-11-24 08:49:37Z + + """ + if t is None: t = time.time() + year, mon, mday, hour, min, sec = time.gmtime(t)[:6] + return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( + year, mon, mday, hour, min, sec) + +def time2netscape(t=None): + """Return a string representing time in seconds since epoch, t. + + If the function is called without an argument, it will use the current + time. + + The format of the returned string is like this: + + Wed, DD-Mon-YYYY HH:MM:SS GMT + + """ + if t is None: t = time.time() + year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7] + return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % ( + days[wday], mday, months[mon-1], year, hour, min, sec) + + +UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} + +timezone_re = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$") +def offset_from_tz_string(tz): + offset = None + if UTC_ZONES.has_key(tz): + offset = 0 + else: + m = timezone_re.search(tz) + if m: + offset = 3600 * int(m.group(2)) + if m.group(3): + offset = offset + 60 * int(m.group(3)) + if m.group(1) == '-': + offset = -offset + return offset + +def _str2time(day, mon, yr, hr, min, sec, tz): + # translate month name to number + # month numbers start with 1 (January) + try: + mon = months_lower.index(mon.lower())+1 + except ValueError: + # maybe it's already a number + try: + imon = int(mon) + except ValueError: + return None + if 1 <= imon <= 12: + mon = imon + else: + return None + + # make sure clock elements are defined + if hr is None: hr = 0 + if min is None: min = 0 + if sec is None: sec = 0 + + yr = int(yr) + day = int(day) + hr = int(hr) + min = int(min) + sec = int(sec) + + if yr < 1000: + # find "obvious" year + cur_yr = time.localtime(time.time())[0] + m = cur_yr % 100 + tmp = yr + yr = yr + cur_yr - m + m = m - tmp + if abs(m) > 50: + if m > 0: yr = yr + 100 + else: yr = yr - 100 + + # convert UTC time tuple to seconds since epoch (not timezone-adjusted) + t = my_timegm((yr, mon, day, hr, min, sec, tz)) + + if t is not None: + # adjust time using timezone string, to get absolute time since epoch + if tz is None: + tz = "UTC" + tz = tz.upper() + offset = offset_from_tz_string(tz) + if offset is None: + return None + t = t - offset + + return t + + +strict_re = re.compile(r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " + r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$") +wkday_re = re.compile( + r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I) +loose_http_re = re.compile( + r"""^ + (\d\d?) # day + (?:\s+|[-\/]) + (\w+) # month + (?:\s+|[-\/]) + (\d+) # year + (?: + (?:\s+|:) # separator before clock + (\d\d?):(\d\d) # hour:min + (?::(\d\d))? # optional seconds + )? # optional clock + \s* + ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone + \s* + (?:\(\w+\))? # ASCII representation of timezone in parens. + \s*$""", re.X) +def http2time(text): + """Returns time in seconds since epoch of time represented by a string. + + Return value is an integer. + + None is returned if the format of str is unrecognized, the time is outside + the representable range, or the timezone string is not recognized. If the + string contains no timezone, UTC is assumed. + + The timezone in the string may be numerical (like "-0800" or "+0100") or a + string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the + timezone strings equivalent to UTC (zero offset) are known to the function. + + The function loosely parses the following formats: + + Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format + Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format + Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format + 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) + 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) + 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday) + + The parser ignores leading and trailing whitespace. The time may be + absent. + + If the year is given with only 2 digits, the function will select the + century that makes the year closest to the current date. + + """ + # fast exit for strictly conforming string + m = strict_re.search(text) + if m: + g = m.groups() + mon = months_lower.index(g[1].lower()) + 1 + tt = (int(g[2]), mon, int(g[0]), + int(g[3]), int(g[4]), float(g[5])) + return my_timegm(tt) + + # No, we need some messy parsing... + + # clean up + text = text.lstrip() + text = wkday_re.sub("", text, 1) # Useless weekday + + # tz is time zone specifier string + day, mon, yr, hr, min, sec, tz = [None]*7 + + # loose regexp parse + m = loose_http_re.search(text) + if m is not None: + day, mon, yr, hr, min, sec, tz = m.groups() + else: + return None # bad format + + return _str2time(day, mon, yr, hr, min, sec, tz) + + +iso_re = re.compile( + """^ + (\d{4}) # year + [-\/]? + (\d\d?) # numerical month + [-\/]? + (\d\d?) # day + (?: + (?:\s+|[-:Tt]) # separator before clock + (\d\d?):?(\d\d) # hour:min + (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) + )? # optional clock + \s* + ([-+]?\d\d?:?(:?\d\d)? + |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) + \s*$""", re.X) +def iso2time(text): + """ + As for http2time, but parses the ISO 8601 formats: + + 1994-02-03 14:15:29 -0100 -- ISO 8601 format + 1994-02-03 14:15:29 -- zone is optional + 1994-02-03 -- only date + 1994-02-03T14:15:29 -- Use T as separator + 19940203T141529Z -- ISO 8601 compact format + 19940203 -- only date + + """ + # clean up + text = text.lstrip() + + # tz is time zone specifier string + day, mon, yr, hr, min, sec, tz = [None]*7 + + # loose regexp parse + m = iso_re.search(text) + if m is not None: + # XXX there's an extra bit of the timezone I'm ignoring here: is + # this the right thing to do? + yr, mon, day, hr, min, sec, tz, _ = m.groups() + else: + return None # bad format + + return _str2time(day, mon, yr, hr, min, sec, tz) diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/pep8.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/pep8.py new file mode 100755 index 0000000..c319370 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/pep8.py @@ -0,0 +1,1254 @@ +#!/usr/bin/python +# pep8.py - Check Python source code formatting, according to PEP 8 +# Copyright (C) 2006 Johann C. Rocholl <johann@rocholl.net> +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation files +# (the "Software"), to deal in the Software without restriction, +# including without limitation the rights to use, copy, modify, merge, +# publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +""" +Check Python source code formatting, according to PEP 8: +http://www.python.org/dev/peps/pep-0008/ + +For usage and a list of options, try this: +$ python pep8.py -h + +This program and its regression test suite live here: +http://github.com/jcrocholl/pep8 + +Groups of errors and warnings: +E errors +W warnings +100 indentation +200 whitespace +300 blank lines +400 imports +500 line length +600 deprecation +700 statements + +You can add checks to this program by writing plugins. Each plugin is +a simple function that is called for each line of source code, either +physical or logical. + +Physical line: +- Raw line of text from the input file. + +Logical line: +- Multi-line statements converted to a single line. +- Stripped left and right. +- Contents of strings replaced with 'xxx' of same length. +- Comments removed. + +The check function requests physical or logical lines by the name of +the first argument: + +def maximum_line_length(physical_line) +def extraneous_whitespace(logical_line) +def blank_lines(logical_line, blank_lines, indent_level, line_number) + +The last example above demonstrates how check plugins can request +additional information with extra arguments. All attributes of the +Checker object are available. Some examples: + +lines: a list of the raw lines from the input file +tokens: the tokens that contribute to this logical line +line_number: line number in the input file +blank_lines: blank lines before this one +indent_char: first indentation character in this file (' ' or '\t') +indent_level: indentation (with tabs expanded to multiples of 8) +previous_indent_level: indentation on previous line +previous_logical: previous logical line + +The docstring of each check function shall be the relevant part of +text from PEP 8. It is printed if the user enables --show-pep8. +Several docstrings contain examples directly from the PEP 8 document. + +Okay: spam(ham[1], {eggs: 2}) +E201: spam( ham[1], {eggs: 2}) + +These examples are verified automatically when pep8.py is run with the +--doctest option. You can add examples for your own check functions. +The format is simple: "Okay" or error/warning code followed by colon +and space, the rest of the line is example source code. If you put 'r' +before the docstring, you can use \n for newline, \t for tab and \s +for space. + +""" + +__version__ = '0.5.0' + +import os +import sys +import re +import time +import inspect +import tokenize +from optparse import OptionParser +from keyword import iskeyword +from fnmatch import fnmatch + +DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git' +DEFAULT_IGNORE = ['E24'] + +INDENT_REGEX = re.compile(r'([ \t]*)') +RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)') +SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)') +ERRORCODE_REGEX = re.compile(r'[EW]\d{3}') +E301NOT_REGEX = re.compile(r'class |def |u?r?["\']') + +WHITESPACE = ' \t' + +BINARY_OPERATORS = ['**=', '*=', '+=', '-=', '!=', '<>', + '%=', '^=', '&=', '|=', '==', '/=', '//=', '>=', '<=', '>>=', '<<=', + '%', '^', '&', '|', '=', '/', '//', '>', '<', '>>', '<<'] +UNARY_OPERATORS = ['**', '*', '+', '-'] +OPERATORS = BINARY_OPERATORS + UNARY_OPERATORS + +options = None +args = None + + +############################################################################## +# Plugins (check functions) for physical lines +############################################################################## + + +def tabs_or_spaces(physical_line, indent_char): + r""" + Never mix tabs and spaces. + + The most popular way of indenting Python is with spaces only. The + second-most popular way is with tabs only. Code indented with a mixture + of tabs and spaces should be converted to using spaces exclusively. When + invoking the Python command line interpreter with the -t option, it issues + warnings about code that illegally mixes tabs and spaces. When using -tt + these warnings become errors. These options are highly recommended! + + Okay: if a == 0:\n a = 1\n b = 1 + E101: if a == 0:\n a = 1\n\tb = 1 + """ + indent = INDENT_REGEX.match(physical_line).group(1) + for offset, char in enumerate(indent): + if char != indent_char: + return offset, "E101 indentation contains mixed spaces and tabs" + + +def tabs_obsolete(physical_line): + r""" + For new projects, spaces-only are strongly recommended over tabs. Most + editors have features that make this easy to do. + + Okay: if True:\n return + W191: if True:\n\treturn + """ + indent = INDENT_REGEX.match(physical_line).group(1) + if indent.count('\t'): + return indent.index('\t'), "W191 indentation contains tabs" + + +def trailing_whitespace(physical_line): + """ + JCR: Trailing whitespace is superfluous. + + Okay: spam(1) + W291: spam(1)\s + """ + physical_line = physical_line.rstrip('\n') # chr(10), newline + physical_line = physical_line.rstrip('\r') # chr(13), carriage return + physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L + stripped = physical_line.rstrip() + if physical_line != stripped: + return len(stripped), "W291 trailing whitespace" + + +def trailing_blank_lines(physical_line, lines, line_number): + r""" + JCR: Trailing blank lines are superfluous. + + Okay: spam(1) + W391: spam(1)\n + """ + if physical_line.strip() == '' and line_number == len(lines): + return 0, "W391 blank line at end of file" + + +def missing_newline(physical_line): + """ + JCR: The last line should have a newline. + """ + if physical_line.rstrip() == physical_line: + return len(physical_line), "W292 no newline at end of file" + + +def maximum_line_length(physical_line): + """ + Limit all lines to a maximum of 79 characters. + + There are still many devices around that are limited to 80 character + lines; plus, limiting windows to 80 characters makes it possible to have + several windows side-by-side. The default wrapping on such devices looks + ugly. Therefore, please limit all lines to a maximum of 79 characters. + For flowing long blocks of text (docstrings or comments), limiting the + length to 72 characters is recommended. + """ + length = len(physical_line.rstrip()) + if length > 79: + return 79, "E501 line too long (%d characters)" % length + + +############################################################################## +# Plugins (check functions) for logical lines +############################################################################## + + +def blank_lines(logical_line, blank_lines, indent_level, line_number, + previous_logical, blank_lines_before_comment): + r""" + Separate top-level function and class definitions with two blank lines. + + Method definitions inside a class are separated by a single blank line. + + Extra blank lines may be used (sparingly) to separate groups of related + functions. Blank lines may be omitted between a bunch of related + one-liners (e.g. a set of dummy implementations). + + Use blank lines in functions, sparingly, to indicate logical sections. + + Okay: def a():\n pass\n\n\ndef b():\n pass + Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass + + E301: class Foo:\n b = 0\n def bar():\n pass + E302: def a():\n pass\n\ndef b(n):\n pass + E303: def a():\n pass\n\n\n\ndef b(n):\n pass + E303: def a():\n\n\n\n pass + E304: @decorator\n\ndef a():\n pass + """ + if line_number == 1: + return # Don't expect blank lines before the first line + max_blank_lines = max(blank_lines, blank_lines_before_comment) + if previous_logical.startswith('@'): + if max_blank_lines: + return 0, "E304 blank lines found after function decorator" + elif max_blank_lines > 2 or (indent_level and max_blank_lines == 2): + return 0, "E303 too many blank lines (%d)" % max_blank_lines + elif (logical_line.startswith('def ') or + logical_line.startswith('class ') or + logical_line.startswith('@')): + if indent_level: + if not (max_blank_lines or E301NOT_REGEX.match(previous_logical)): + return 0, "E301 expected 1 blank line, found 0" + elif max_blank_lines != 2: + return 0, "E302 expected 2 blank lines, found %d" % max_blank_lines + + +def extraneous_whitespace(logical_line): + """ + Avoid extraneous whitespace in the following situations: + + - Immediately inside parentheses, brackets or braces. + + - Immediately before a comma, semicolon, or colon. + + Okay: spam(ham[1], {eggs: 2}) + E201: spam( ham[1], {eggs: 2}) + E201: spam(ham[ 1], {eggs: 2}) + E201: spam(ham[1], { eggs: 2}) + E202: spam(ham[1], {eggs: 2} ) + E202: spam(ham[1 ], {eggs: 2}) + E202: spam(ham[1], {eggs: 2 }) + + E203: if x == 4: print x, y; x, y = y , x + E203: if x == 4: print x, y ; x, y = y, x + E203: if x == 4 : print x, y; x, y = y, x + """ + line = logical_line + for char in '([{': + found = line.find(char + ' ') + if found > -1: + return found + 1, "E201 whitespace after '%s'" % char + for char in '}])': + found = line.find(' ' + char) + if found > -1 and line[found - 1] != ',': + return found, "E202 whitespace before '%s'" % char + for char in ',;:': + found = line.find(' ' + char) + if found > -1: + return found, "E203 whitespace before '%s'" % char + + +def missing_whitespace(logical_line): + """ + JCR: Each comma, semicolon or colon should be followed by whitespace. + + Okay: [a, b] + Okay: (3,) + Okay: a[1:4] + Okay: a[:4] + Okay: a[1:] + Okay: a[1:4:2] + E231: ['a','b'] + E231: foo(bar,baz) + """ + line = logical_line + for index in range(len(line) - 1): + char = line[index] + if char in ',;:' and line[index + 1] not in WHITESPACE: + before = line[:index] + if char == ':' and before.count('[') > before.count(']'): + continue # Slice syntax, no space required + if char == ',' and line[index + 1] == ')': + continue # Allow tuple with only one element: (3,) + return index, "E231 missing whitespace after '%s'" % char + + +def indentation(logical_line, previous_logical, indent_char, + indent_level, previous_indent_level): + r""" + Use 4 spaces per indentation level. + + For really old code that you don't want to mess up, you can continue to + use 8-space tabs. + + Okay: a = 1 + Okay: if a == 0:\n a = 1 + E111: a = 1 + + Okay: for item in items:\n pass + E112: for item in items:\npass + + Okay: a = 1\nb = 2 + E113: a = 1\n b = 2 + """ + if indent_char == ' ' and indent_level % 4: + return 0, "E111 indentation is not a multiple of four" + indent_expect = previous_logical.endswith(':') + if indent_expect and indent_level <= previous_indent_level: + return 0, "E112 expected an indented block" + if indent_level > previous_indent_level and not indent_expect: + return 0, "E113 unexpected indentation" + + +def whitespace_before_parameters(logical_line, tokens): + """ + Avoid extraneous whitespace in the following situations: + + - Immediately before the open parenthesis that starts the argument + list of a function call. + + - Immediately before the open parenthesis that starts an indexing or + slicing. + + Okay: spam(1) + E211: spam (1) + + Okay: dict['key'] = list[index] + E211: dict ['key'] = list[index] + E211: dict['key'] = list [index] + """ + prev_type = tokens[0][0] + prev_text = tokens[0][1] + prev_end = tokens[0][3] + for index in range(1, len(tokens)): + token_type, text, start, end, line = tokens[index] + if (token_type == tokenize.OP and + text in '([' and + start != prev_end and + prev_type == tokenize.NAME and + (index < 2 or tokens[index - 2][1] != 'class') and + (not iskeyword(prev_text))): + return prev_end, "E211 whitespace before '%s'" % text + prev_type = token_type + prev_text = text + prev_end = end + + +def whitespace_around_operator(logical_line): + """ + Avoid extraneous whitespace in the following situations: + + - More than one space around an assignment (or other) operator to + align it with another. + + Okay: a = 12 + 3 + E221: a = 4 + 5 + E222: a = 4 + 5 + E223: a = 4\t+ 5 + E224: a = 4 +\t5 + """ + line = logical_line + for operator in OPERATORS: + found = line.find(' ' + operator) + if found > -1: + return found, "E221 multiple spaces before operator" + found = line.find(operator + ' ') + if found > -1: + return found, "E222 multiple spaces after operator" + found = line.find('\t' + operator) + if found > -1: + return found, "E223 tab before operator" + found = line.find(operator + '\t') + if found > -1: + return found, "E224 tab after operator" + + +def missing_whitespace_around_operator(logical_line, tokens): + r""" + - Always surround these binary operators with a single space on + either side: assignment (=), augmented assignment (+=, -= etc.), + comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not), + Booleans (and, or, not). + + - Use spaces around arithmetic operators. + + Okay: i = i + 1 + Okay: submitted += 1 + Okay: x = x * 2 - 1 + Okay: hypot2 = x * x + y * y + Okay: c = (a + b) * (a - b) + Okay: foo(bar, key='word', *args, **kwargs) + Okay: baz(**kwargs) + Okay: negative = -1 + Okay: spam(-1) + Okay: alpha[:-i] + Okay: if not -5 < x < +5:\n pass + Okay: lambda *args, **kw: (args, kw) + + E225: i=i+1 + E225: submitted +=1 + E225: x = x*2 - 1 + E225: hypot2 = x*x + y*y + E225: c = (a+b) * (a-b) + E225: c = alpha -4 + E225: z = x **y + """ + parens = 0 + need_space = False + prev_type = tokenize.OP + prev_text = prev_end = None + for token_type, text, start, end, line in tokens: + if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN): + # ERRORTOKEN is triggered by backticks in Python 3000 + continue + if text in ('(', 'lambda'): + parens += 1 + elif text == ')': + parens -= 1 + if need_space: + if start == prev_end: + return prev_end, "E225 missing whitespace around operator" + need_space = False + elif token_type == tokenize.OP: + if text == '=' and parens: + # Allow keyword args or defaults: foo(bar=None). + pass + elif text in BINARY_OPERATORS: + need_space = True + elif text in UNARY_OPERATORS: + if ((prev_type != tokenize.OP or prev_text in '}])') and not + (prev_type == tokenize.NAME and iskeyword(prev_text))): + # Allow unary operators: -123, -x, +1. + # Allow argument unpacking: foo(*args, **kwargs). + need_space = True + if need_space and start == prev_end: + return prev_end, "E225 missing whitespace around operator" + prev_type = token_type + prev_text = text + prev_end = end + + +def whitespace_around_comma(logical_line): + """ + Avoid extraneous whitespace in the following situations: + + - More than one space around an assignment (or other) operator to + align it with another. + + JCR: This should also be applied around comma etc. + Note: these checks are disabled by default + + Okay: a = (1, 2) + E241: a = (1, 2) + E242: a = (1,\t2) + """ + line = logical_line + for separator in ',;:': + found = line.find(separator + ' ') + if found > -1: + return found + 1, "E241 multiple spaces after '%s'" % separator + found = line.find(separator + '\t') + if found > -1: + return found + 1, "E242 tab after '%s'" % separator + + +def whitespace_around_named_parameter_equals(logical_line): + """ + Don't use spaces around the '=' sign when used to indicate a + keyword argument or a default parameter value. + + Okay: def complex(real, imag=0.0): + Okay: return magic(r=real, i=imag) + Okay: boolean(a == b) + Okay: boolean(a != b) + Okay: boolean(a <= b) + Okay: boolean(a >= b) + + E251: def complex(real, imag = 0.0): + E251: return magic(r = real, i = imag) + """ + parens = 0 + window = ' ' + equal_ok = ['==', '!=', '<=', '>='] + + for pos, c in enumerate(logical_line): + window = window[1:] + c + if parens: + if window[0] in WHITESPACE and window[1] == '=': + if window[1:] not in equal_ok: + issue = "E251 no spaces around keyword / parameter equals" + return pos, issue + if window[2] in WHITESPACE and window[1] == '=': + if window[:2] not in equal_ok: + issue = "E251 no spaces around keyword / parameter equals" + return pos, issue + if c == '(': + parens += 1 + elif c == ')': + parens -= 1 + + +def whitespace_before_inline_comment(logical_line, tokens): + """ + Separate inline comments by at least two spaces. + + An inline comment is a comment on the same line as a statement. Inline + comments should be separated by at least two spaces from the statement. + They should start with a # and a single space. + + Okay: x = x + 1 # Increment x + Okay: x = x + 1 # Increment x + E261: x = x + 1 # Increment x + E262: x = x + 1 #Increment x + E262: x = x + 1 # Increment x + """ + prev_end = (0, 0) + for token_type, text, start, end, line in tokens: + if token_type == tokenize.NL: + continue + if token_type == tokenize.COMMENT: + if not line[:start[1]].strip(): + continue + if prev_end[0] == start[0] and start[1] < prev_end[1] + 2: + return (prev_end, + "E261 at least two spaces before inline comment") + if (len(text) > 1 and text.startswith('# ') + or not text.startswith('# ')): + return start, "E262 inline comment should start with '# '" + else: + prev_end = end + + +def imports_on_separate_lines(logical_line): + r""" + Imports should usually be on separate lines. + + Okay: import os\nimport sys + E401: import sys, os + + Okay: from subprocess import Popen, PIPE + Okay: from myclas import MyClass + Okay: from foo.bar.yourclass import YourClass + Okay: import myclass + Okay: import foo.bar.yourclass + """ + line = logical_line + if line.startswith('import '): + found = line.find(',') + if found > -1: + return found, "E401 multiple imports on one line" + + +def compound_statements(logical_line): + r""" + Compound statements (multiple statements on the same line) are + generally discouraged. + + While sometimes it's okay to put an if/for/while with a small body + on the same line, never do this for multi-clause statements. Also + avoid folding such long lines! + + Okay: if foo == 'blah':\n do_blah_thing() + Okay: do_one() + Okay: do_two() + Okay: do_three() + + E701: if foo == 'blah': do_blah_thing() + E701: for x in lst: total += x + E701: while t < 10: t = delay() + E701: if foo == 'blah': do_blah_thing() + E701: else: do_non_blah_thing() + E701: try: something() + E701: finally: cleanup() + E701: if foo == 'blah': one(); two(); three() + + E702: do_one(); do_two(); do_three() + """ + line = logical_line + found = line.find(':') + if -1 < found < len(line) - 1: + before = line[:found] + if (before.count('{') <= before.count('}') and # {'a': 1} (dict) + before.count('[') <= before.count(']') and # [1:2] (slice) + not re.search(r'\blambda\b', before)): # lambda x: x + return found, "E701 multiple statements on one line (colon)" + found = line.find(';') + if -1 < found: + return found, "E702 multiple statements on one line (semicolon)" + + +def python_3000_has_key(logical_line): + """ + The {}.has_key() method will be removed in the future version of + Python. Use the 'in' operation instead, like: + d = {"a": 1, "b": 2} + if "b" in d: + print d["b"] + """ + pos = logical_line.find('.has_key(') + if pos > -1: + return pos, "W601 .has_key() is deprecated, use 'in'" + + +def python_3000_raise_comma(logical_line): + """ + When raising an exception, use "raise ValueError('message')" + instead of the older form "raise ValueError, 'message'". + + The paren-using form is preferred because when the exception arguments + are long or include string formatting, you don't need to use line + continuation characters thanks to the containing parentheses. The older + form will be removed in Python 3000. + """ + match = RAISE_COMMA_REGEX.match(logical_line) + if match: + return match.start(1), "W602 deprecated form of raising exception" + + +def python_3000_not_equal(logical_line): + """ + != can also be written <>, but this is an obsolete usage kept for + backwards compatibility only. New code should always use !=. + The older syntax is removed in Python 3000. + """ + pos = logical_line.find('<>') + if pos > -1: + return pos, "W603 '<>' is deprecated, use '!='" + + +def python_3000_backticks(logical_line): + """ + Backticks are removed in Python 3000. + Use repr() instead. + """ + pos = logical_line.find('`') + if pos > -1: + return pos, "W604 backticks are deprecated, use 'repr()'" + + +############################################################################## +# Helper functions +############################################################################## + + +def expand_indent(line): + """ + Return the amount of indentation. + Tabs are expanded to the next multiple of 8. + + >>> expand_indent(' ') + 4 + >>> expand_indent('\\t') + 8 + >>> expand_indent(' \\t') + 8 + >>> expand_indent(' \\t') + 8 + >>> expand_indent(' \\t') + 16 + """ + result = 0 + for char in line: + if char == '\t': + result = result // 8 * 8 + 8 + elif char == ' ': + result += 1 + else: + break + return result + + +def mute_string(text): + """ + Replace contents with 'xxx' to prevent syntax matching. + + >>> mute_string('"abc"') + '"xxx"' + >>> mute_string("'''abc'''") + "'''xxx'''" + >>> mute_string("r'abc'") + "r'xxx'" + """ + start = 1 + end = len(text) - 1 + # String modifiers (e.g. u or r) + if text.endswith('"'): + start += text.index('"') + elif text.endswith("'"): + start += text.index("'") + # Triple quotes + if text.endswith('"""') or text.endswith("'''"): + start += 2 + end -= 2 + return text[:start] + 'x' * (end - start) + text[end:] + + +def message(text): + """Print a message.""" + # print >> sys.stderr, options.prog + ': ' + text + # print >> sys.stderr, text + print(text) + + +############################################################################## +# Framework to run all checks +############################################################################## + + +def find_checks(argument_name): + """ + Find all globally visible functions where the first argument name + starts with argument_name. + """ + checks = [] + for name, function in globals().items(): + if not inspect.isfunction(function): + continue + args = inspect.getargspec(function)[0] + if args and args[0].startswith(argument_name): + codes = ERRORCODE_REGEX.findall(inspect.getdoc(function) or '') + for code in codes or ['']: + if not code or not ignore_code(code): + checks.append((name, function, args)) + break + checks.sort() + return checks + + +class Checker(object): + """ + Load a Python source file, tokenize it, check coding style. + """ + + def __init__(self, filename): + if filename: + self.filename = filename + try: + self.lines = open(filename).readlines() + except UnicodeDecodeError: + # Errors may occur with non-UTF8 files in Python 3000 + self.lines = open(filename, errors='replace').readlines() + else: + self.filename = 'stdin' + self.lines = [] + options.counters['physical lines'] = \ + options.counters.get('physical lines', 0) + len(self.lines) + + def readline(self): + """ + Get the next line from the input buffer. + """ + self.line_number += 1 + if self.line_number > len(self.lines): + return '' + return self.lines[self.line_number - 1] + + def readline_check_physical(self): + """ + Check and return the next physical line. This method can be + used to feed tokenize.generate_tokens. + """ + line = self.readline() + if line: + self.check_physical(line) + return line + + def run_check(self, check, argument_names): + """ + Run a check plugin. + """ + arguments = [] + for name in argument_names: + arguments.append(getattr(self, name)) + return check(*arguments) + + def check_physical(self, line): + """ + Run all physical checks on a raw input line. + """ + self.physical_line = line + if self.indent_char is None and len(line) and line[0] in ' \t': + self.indent_char = line[0] + for name, check, argument_names in options.physical_checks: + result = self.run_check(check, argument_names) + if result is not None: + offset, text = result + self.report_error(self.line_number, offset, text, check) + + def build_tokens_line(self): + """ + Build a logical line from tokens. + """ + self.mapping = [] + logical = [] + length = 0 + previous = None + for token in self.tokens: + token_type, text = token[0:2] + if token_type in (tokenize.COMMENT, tokenize.NL, + tokenize.INDENT, tokenize.DEDENT, + tokenize.NEWLINE): + continue + if token_type == tokenize.STRING: + text = mute_string(text) + if previous: + end_line, end = previous[3] + start_line, start = token[2] + if end_line != start_line: # different row + if self.lines[end_line - 1][end - 1] not in '{[(': + logical.append(' ') + length += 1 + elif end != start: # different column + fill = self.lines[end_line - 1][end:start] + logical.append(fill) + length += len(fill) + self.mapping.append((length, token)) + logical.append(text) + length += len(text) + previous = token + self.logical_line = ''.join(logical) + assert self.logical_line.lstrip() == self.logical_line + assert self.logical_line.rstrip() == self.logical_line + + def check_logical(self): + """ + Build a line from tokens and run all logical checks on it. + """ + options.counters['logical lines'] = \ + options.counters.get('logical lines', 0) + 1 + self.build_tokens_line() + first_line = self.lines[self.mapping[0][1][2][0] - 1] + indent = first_line[:self.mapping[0][1][2][1]] + self.previous_indent_level = self.indent_level + self.indent_level = expand_indent(indent) + if options.verbose >= 2: + print(self.logical_line[:80].rstrip()) + for name, check, argument_names in options.logical_checks: + if options.verbose >= 3: + print(' ', name) + result = self.run_check(check, argument_names) + if result is not None: + offset, text = result + if isinstance(offset, tuple): + original_number, original_offset = offset + else: + for token_offset, token in self.mapping: + if offset >= token_offset: + original_number = token[2][0] + original_offset = (token[2][1] + + offset - token_offset) + self.report_error(original_number, original_offset, + text, check) + self.previous_logical = self.logical_line + + def check_all(self): + """ + Run all checks on the input file. + """ + self.file_errors = 0 + self.line_number = 0 + self.indent_char = None + self.indent_level = 0 + self.previous_logical = '' + self.blank_lines = 0 + self.blank_lines_before_comment = 0 + self.tokens = [] + parens = 0 + for token in tokenize.generate_tokens(self.readline_check_physical): + # print(tokenize.tok_name[token[0]], repr(token)) + self.tokens.append(token) + token_type, text = token[0:2] + if token_type == tokenize.OP and text in '([{': + parens += 1 + if token_type == tokenize.OP and text in '}])': + parens -= 1 + if token_type == tokenize.NEWLINE and not parens: + self.check_logical() + self.blank_lines = 0 + self.blank_lines_before_comment = 0 + self.tokens = [] + if token_type == tokenize.NL and not parens: + if len(self.tokens) <= 1: + # The physical line contains only this token. + self.blank_lines += 1 + self.tokens = [] + if token_type == tokenize.COMMENT: + source_line = token[4] + token_start = token[2][1] + if source_line[:token_start].strip() == '': + self.blank_lines_before_comment = max(self.blank_lines, + self.blank_lines_before_comment) + self.blank_lines = 0 + if text.endswith('\n') and not parens: + # The comment also ends a physical line. This works around + # Python < 2.6 behaviour, which does not generate NL after + # a comment which is on a line by itself. + self.tokens = [] + return self.file_errors + + def report_error(self, line_number, offset, text, check): + """ + Report an error, according to options. + """ + if options.quiet == 1 and not self.file_errors: + message(self.filename) + self.file_errors += 1 + code = text[:4] + options.counters[code] = options.counters.get(code, 0) + 1 + options.messages[code] = text[5:] + if options.quiet: + return + if options.testsuite: + basename = os.path.basename(self.filename) + if basename[:4] != code: + return # Don't care about other errors or warnings + if 'not' not in basename: + return # Don't print the expected error message + if ignore_code(code): + return + if options.counters[code] == 1 or options.repeat: + message("%s:%s:%d: %s" % + (self.filename, line_number, offset + 1, text)) + if options.show_source: + line = self.lines[line_number - 1] + message(line.rstrip()) + message(' ' * offset + '^') + if options.show_pep8: + message(check.__doc__.lstrip('\n').rstrip()) + + +def input_file(filename): + """ + Run all checks on a Python source file. + """ + if excluded(filename): + return {} + if options.verbose: + message('checking ' + filename) + files_counter_before = options.counters.get('files', 0) + if options.testsuite: # Keep showing errors for multiple tests + options.counters = {} + options.counters['files'] = files_counter_before + 1 + errors = Checker(filename).check_all() + if options.testsuite: # Check if the expected error was found + basename = os.path.basename(filename) + code = basename[:4] + count = options.counters.get(code, 0) + if count == 0 and 'not' not in basename: + message("%s: error %s not found" % (filename, code)) + + +def input_dir(dirname): + """ + Check all Python source files in this directory and all subdirectories. + """ + dirname = dirname.rstrip('/') + if excluded(dirname): + return + for root, dirs, files in os.walk(dirname): + if options.verbose: + message('directory ' + root) + options.counters['directories'] = \ + options.counters.get('directories', 0) + 1 + dirs.sort() + for subdir in dirs: + if excluded(subdir): + dirs.remove(subdir) + files.sort() + for filename in files: + if filename_match(filename): + input_file(os.path.join(root, filename)) + + +def excluded(filename): + """ + Check if options.exclude contains a pattern that matches filename. + """ + basename = os.path.basename(filename) + for pattern in options.exclude: + if fnmatch(basename, pattern): + # print basename, 'excluded because it matches', pattern + return True + + +def filename_match(filename): + """ + Check if options.filename contains a pattern that matches filename. + If options.filename is unspecified, this always returns True. + """ + if not options.filename: + return True + for pattern in options.filename: + if fnmatch(filename, pattern): + return True + + +def ignore_code(code): + """ + Check if options.ignore contains a prefix of the error code. + If options.select contains a prefix of the error code, do not ignore it. + """ + for select in options.select: + if code.startswith(select): + return False + for ignore in options.ignore: + if code.startswith(ignore): + return True + + +def get_error_statistics(): + """Get error statistics.""" + return get_statistics("E") + + +def get_warning_statistics(): + """Get warning statistics.""" + return get_statistics("W") + + +def get_statistics(prefix=''): + """ + Get statistics for message codes that start with the prefix. + + prefix='' matches all errors and warnings + prefix='E' matches all errors + prefix='W' matches all warnings + prefix='E4' matches all errors that have to do with imports + """ + stats = [] + keys = list(options.messages.keys()) + keys.sort() + for key in keys: + if key.startswith(prefix): + stats.append('%-7s %s %s' % + (options.counters[key], key, options.messages[key])) + return stats + + +def get_count(prefix=''): + """Return the total count of errors and warnings.""" + keys = list(options.messages.keys()) + count = 0 + for key in keys: + if key.startswith(prefix): + count += options.counters[key] + return count + + +def print_statistics(prefix=''): + """Print overall statistics (number of errors and warnings).""" + for line in get_statistics(prefix): + print(line) + + +def print_benchmark(elapsed): + """ + Print benchmark numbers. + """ + print('%-7.2f %s' % (elapsed, 'seconds elapsed')) + keys = ['directories', 'files', + 'logical lines', 'physical lines'] + for key in keys: + if key in options.counters: + print('%-7d %s per second (%d total)' % ( + options.counters[key] / elapsed, key, + options.counters[key])) + + +def selftest(): + """ + Test all check functions with test cases in docstrings. + """ + count_passed = 0 + count_failed = 0 + checks = options.physical_checks + options.logical_checks + for name, check, argument_names in checks: + for line in check.__doc__.splitlines(): + line = line.lstrip() + match = SELFTEST_REGEX.match(line) + if match is None: + continue + code, source = match.groups() + checker = Checker(None) + for part in source.split(r'\n'): + part = part.replace(r'\t', '\t') + part = part.replace(r'\s', ' ') + checker.lines.append(part + '\n') + options.quiet = 2 + options.counters = {} + checker.check_all() + error = None + if code == 'Okay': + if len(options.counters) > 1: + codes = [key for key in options.counters.keys() + if key != 'logical lines'] + error = "incorrectly found %s" % ', '.join(codes) + elif options.counters.get(code, 0) == 0: + error = "failed to find %s" % code + if not error: + count_passed += 1 + else: + count_failed += 1 + if len(checker.lines) == 1: + print("pep8.py: %s: %s" % + (error, checker.lines[0].rstrip())) + else: + print("pep8.py: %s:" % error) + for line in checker.lines: + print(line.rstrip()) + if options.verbose: + print("%d passed and %d failed." % (count_passed, count_failed)) + if count_failed: + print("Test failed.") + else: + print("Test passed.") + + +def process_options(arglist=None): + """ + Process options passed either via arglist or via command line args. + """ + global options, args + parser = OptionParser(version=__version__, + usage="%prog [options] input ...") + parser.add_option('-v', '--verbose', default=0, action='count', + help="print status messages, or debug with -vv") + parser.add_option('-q', '--quiet', default=0, action='count', + help="report only file names, or nothing with -qq") + parser.add_option('-r', '--repeat', action='store_true', + help="show all occurrences of the same error") + parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE, + help="exclude files or directories which match these " + "comma separated patterns (default: %s)" % + DEFAULT_EXCLUDE) + parser.add_option('--filename', metavar='patterns', default='*.py', + help="when parsing directories, only check filenames " + "matching these comma separated patterns (default: " + "*.py)") + parser.add_option('--select', metavar='errors', default='', + help="select errors and warnings (e.g. E,W6)") + parser.add_option('--ignore', metavar='errors', default='', + help="skip errors and warnings (e.g. E4,W)") + parser.add_option('--show-source', action='store_true', + help="show source code for each error") + parser.add_option('--show-pep8', action='store_true', + help="show text of PEP 8 for each error") + parser.add_option('--statistics', action='store_true', + help="count errors and warnings") + parser.add_option('--count', action='store_true', + help="print total number of errors and warnings " + "to standard error and set exit code to 1 if " + "total is not null") + parser.add_option('--benchmark', action='store_true', + help="measure processing speed") + parser.add_option('--testsuite', metavar='dir', + help="run regression tests from dir") + parser.add_option('--doctest', action='store_true', + help="run doctest on myself") + options, args = parser.parse_args(arglist) + if options.testsuite: + args.append(options.testsuite) + if len(args) == 0 and not options.doctest: + parser.error('input not specified') + options.prog = os.path.basename(sys.argv[0]) + options.exclude = options.exclude.split(',') + for index in range(len(options.exclude)): + options.exclude[index] = options.exclude[index].rstrip('/') + if options.filename: + options.filename = options.filename.split(',') + if options.select: + options.select = options.select.split(',') + else: + options.select = [] + if options.ignore: + options.ignore = options.ignore.split(',') + elif options.select: + # Ignore all checks which are not explicitly selected + options.ignore = [''] + elif options.testsuite or options.doctest: + # For doctest and testsuite, all checks are required + options.ignore = [] + else: + # The default choice: ignore controversial checks + options.ignore = DEFAULT_IGNORE + options.physical_checks = find_checks('physical_line') + options.logical_checks = find_checks('logical_line') + options.counters = {} + options.messages = {} + return options, args + + +def _main(): + """ + Parse options and run checks on Python source. + """ + options, args = process_options() + if options.doctest: + import doctest + doctest.testmod(verbose=options.verbose) + selftest() + start_time = time.time() + for path in args: + if os.path.isdir(path): + input_dir(path) + else: + input_file(path) + elapsed = time.time() - start_time + if options.statistics: + print_statistics() + if options.benchmark: + print_benchmark(elapsed) + if options.count: + count = get_count() + if count: + sys.stderr.write(str(count) + '\n') + sys.exit(1) + + +if __name__ == '__main__': + _main() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/.upload.py.url b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/.upload.py.url new file mode 100644 index 0000000..8098dbc --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/.upload.py.url @@ -0,0 +1 @@ +http://webkit-rietveld.googlecode.com/svn/trunk/static/upload.py
\ No newline at end of file diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/__init__.py new file mode 100644 index 0000000..c1e4c6d --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/__init__.py @@ -0,0 +1 @@ +# This file is required for Python to search this directory for modules. diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/upload.py b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/upload.py new file mode 100755 index 0000000..e91060f --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/autoinstalled/rietveld/upload.py @@ -0,0 +1,1702 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tool for uploading diffs from a version control system to the codereview app. + +Usage summary: upload.py [options] [-- diff_options] + +Diff options are passed to the diff command of the underlying system. + +Supported version control systems: + Git + Mercurial + Subversion + +It is important for Git/Mercurial users to specify a tree/node/branch to diff +against by using the '--rev' option. +""" +# This code is derived from appcfg.py in the App Engine SDK (open source), +# and from ASPN recipe #146306. + +import ConfigParser +import cookielib +import fnmatch +import getpass +import logging +import mimetypes +import optparse +import os +import re +import socket +import subprocess +import sys +import urllib +import urllib2 +import urlparse + +# The md5 module was deprecated in Python 2.5. +try: + from hashlib import md5 +except ImportError: + from md5 import md5 + +try: + import readline +except ImportError: + pass + +# The logging verbosity: +# 0: Errors only. +# 1: Status messages. +# 2: Info logs. +# 3: Debug logs. +verbosity = 1 + +# Max size of patch or base file. +MAX_UPLOAD_SIZE = 900 * 1024 + +# Constants for version control names. Used by GuessVCSName. +VCS_GIT = "Git" +VCS_MERCURIAL = "Mercurial" +VCS_SUBVERSION = "Subversion" +VCS_UNKNOWN = "Unknown" + +# whitelist for non-binary filetypes which do not start with "text/" +# .mm (Objective-C) shows up as application/x-freemind on my Linux box. +TEXT_MIMETYPES = ['application/javascript', 'application/x-javascript', + 'application/xml', 'application/x-freemind'] + +VCS_ABBREVIATIONS = { + VCS_MERCURIAL.lower(): VCS_MERCURIAL, + "hg": VCS_MERCURIAL, + VCS_SUBVERSION.lower(): VCS_SUBVERSION, + "svn": VCS_SUBVERSION, + VCS_GIT.lower(): VCS_GIT, +} + +# The result of parsing Subversion's [auto-props] setting. +svn_auto_props_map = None + +def GetEmail(prompt): + """Prompts the user for their email address and returns it. + + The last used email address is saved to a file and offered up as a suggestion + to the user. If the user presses enter without typing in anything the last + used email address is used. If the user enters a new address, it is saved + for next time we prompt. + + """ + last_email_file_name = os.path.expanduser("~/.last_codereview_email_address") + last_email = "" + if os.path.exists(last_email_file_name): + try: + last_email_file = open(last_email_file_name, "r") + last_email = last_email_file.readline().strip("\n") + last_email_file.close() + prompt += " [%s]" % last_email + except IOError, e: + pass + email = raw_input(prompt + ": ").strip() + if email: + try: + last_email_file = open(last_email_file_name, "w") + last_email_file.write(email) + last_email_file.close() + except IOError, e: + pass + else: + email = last_email + return email + + +def StatusUpdate(msg): + """Print a status message to stdout. + + If 'verbosity' is greater than 0, print the message. + + Args: + msg: The string to print. + """ + if verbosity > 0: + print msg + + +def ErrorExit(msg): + """Print an error message to stderr and exit.""" + print >>sys.stderr, msg + sys.exit(1) + + +class ClientLoginError(urllib2.HTTPError): + """Raised to indicate there was an error authenticating with ClientLogin.""" + + def __init__(self, url, code, msg, headers, args): + urllib2.HTTPError.__init__(self, url, code, msg, headers, None) + self.args = args + self.reason = args["Error"] + + +class AbstractRpcServer(object): + """Provides a common interface for a simple RPC server.""" + + def __init__(self, host, auth_function, host_override=None, extra_headers={}, + save_cookies=False): + """Creates a new HttpRpcServer. + + Args: + host: The host to send requests to. + auth_function: A function that takes no arguments and returns an + (email, password) tuple when called. Will be called if authentication + is required. + host_override: The host header to send to the server (defaults to host). + extra_headers: A dict of extra headers to append to every request. + save_cookies: If True, save the authentication cookies to local disk. + If False, use an in-memory cookiejar instead. Subclasses must + implement this functionality. Defaults to False. + """ + self.host = host + self.host_override = host_override + self.auth_function = auth_function + self.authenticated = False + self.extra_headers = extra_headers + self.save_cookies = save_cookies + self.opener = self._GetOpener() + if self.host_override: + logging.info("Server: %s; Host: %s", self.host, self.host_override) + else: + logging.info("Server: %s", self.host) + + def _GetOpener(self): + """Returns an OpenerDirector for making HTTP requests. + + Returns: + A urllib2.OpenerDirector object. + """ + raise NotImplementedError() + + def _CreateRequest(self, url, data=None): + """Creates a new urllib request.""" + logging.debug("Creating request for: '%s' with payload:\n%s", url, data) + req = urllib2.Request(url, data=data) + if self.host_override: + req.add_header("Host", self.host_override) + for key, value in self.extra_headers.iteritems(): + req.add_header(key, value) + return req + + def _GetAuthToken(self, email, password): + """Uses ClientLogin to authenticate the user, returning an auth token. + + Args: + email: The user's email address + password: The user's password + + Raises: + ClientLoginError: If there was an error authenticating with ClientLogin. + HTTPError: If there was some other form of HTTP error. + + Returns: + The authentication token returned by ClientLogin. + """ + account_type = "GOOGLE" + if self.host.endswith(".google.com"): + # Needed for use inside Google. + account_type = "HOSTED" + req = self._CreateRequest( + url="https://www.google.com/accounts/ClientLogin", + data=urllib.urlencode({ + "Email": email, + "Passwd": password, + "service": "ah", + "source": "rietveld-codereview-upload", + "accountType": account_type, + }), + ) + try: + response = self.opener.open(req) + response_body = response.read() + response_dict = dict(x.split("=") + for x in response_body.split("\n") if x) + return response_dict["Auth"] + except urllib2.HTTPError, e: + if e.code == 403: + body = e.read() + response_dict = dict(x.split("=", 1) for x in body.split("\n") if x) + raise ClientLoginError(req.get_full_url(), e.code, e.msg, + e.headers, response_dict) + else: + raise + + def _GetAuthCookie(self, auth_token): + """Fetches authentication cookies for an authentication token. + + Args: + auth_token: The authentication token returned by ClientLogin. + + Raises: + HTTPError: If there was an error fetching the authentication cookies. + """ + # This is a dummy value to allow us to identify when we're successful. + continue_location = "http://localhost/" + args = {"continue": continue_location, "auth": auth_token} + req = self._CreateRequest("http://%s/_ah/login?%s" % + (self.host, urllib.urlencode(args))) + try: + response = self.opener.open(req) + except urllib2.HTTPError, e: + response = e + if (response.code != 302 or + response.info()["location"] != continue_location): + raise urllib2.HTTPError(req.get_full_url(), response.code, response.msg, + response.headers, response.fp) + self.authenticated = True + + def _Authenticate(self): + """Authenticates the user. + + The authentication process works as follows: + 1) We get a username and password from the user + 2) We use ClientLogin to obtain an AUTH token for the user + (see http://code.google.com/apis/accounts/AuthForInstalledApps.html). + 3) We pass the auth token to /_ah/login on the server to obtain an + authentication cookie. If login was successful, it tries to redirect + us to the URL we provided. + + If we attempt to access the upload API without first obtaining an + authentication cookie, it returns a 401 response (or a 302) and + directs us to authenticate ourselves with ClientLogin. + """ + for i in range(3): + credentials = self.auth_function() + try: + auth_token = self._GetAuthToken(credentials[0], credentials[1]) + except ClientLoginError, e: + if e.reason == "BadAuthentication": + print >>sys.stderr, "Invalid username or password." + continue + if e.reason == "CaptchaRequired": + print >>sys.stderr, ( + "Please go to\n" + "https://www.google.com/accounts/DisplayUnlockCaptcha\n" + "and verify you are a human. Then try again.") + break + if e.reason == "NotVerified": + print >>sys.stderr, "Account not verified." + break + if e.reason == "TermsNotAgreed": + print >>sys.stderr, "User has not agreed to TOS." + break + if e.reason == "AccountDeleted": + print >>sys.stderr, "The user account has been deleted." + break + if e.reason == "AccountDisabled": + print >>sys.stderr, "The user account has been disabled." + break + if e.reason == "ServiceDisabled": + print >>sys.stderr, ("The user's access to the service has been " + "disabled.") + break + if e.reason == "ServiceUnavailable": + print >>sys.stderr, "The service is not available; try again later." + break + raise + self._GetAuthCookie(auth_token) + return + + def Send(self, request_path, payload=None, + content_type="application/octet-stream", + timeout=None, + **kwargs): + """Sends an RPC and returns the response. + + Args: + request_path: The path to send the request to, eg /api/appversion/create. + payload: The body of the request, or None to send an empty request. + content_type: The Content-Type header to use. + timeout: timeout in seconds; default None i.e. no timeout. + (Note: for large requests on OS X, the timeout doesn't work right.) + kwargs: Any keyword arguments are converted into query string parameters. + + Returns: + The response body, as a string. + """ + # TODO: Don't require authentication. Let the server say + # whether it is necessary. + if not self.authenticated: + self._Authenticate() + + old_timeout = socket.getdefaulttimeout() + socket.setdefaulttimeout(timeout) + try: + tries = 0 + while True: + tries += 1 + args = dict(kwargs) + url = "http://%s%s" % (self.host, request_path) + if args: + url += "?" + urllib.urlencode(args) + req = self._CreateRequest(url=url, data=payload) + req.add_header("Content-Type", content_type) + try: + f = self.opener.open(req) + response = f.read() + f.close() + return response + except urllib2.HTTPError, e: + if tries > 3: + raise + elif e.code == 401 or e.code == 302: + self._Authenticate() +## elif e.code >= 500 and e.code < 600: +## # Server Error - try again. +## continue + else: + raise + finally: + socket.setdefaulttimeout(old_timeout) + + +class HttpRpcServer(AbstractRpcServer): + """Provides a simplified RPC-style interface for HTTP requests.""" + + def _Authenticate(self): + """Save the cookie jar after authentication.""" + super(HttpRpcServer, self)._Authenticate() + if self.save_cookies: + StatusUpdate("Saving authentication cookies to %s" % self.cookie_file) + self.cookie_jar.save() + + def _GetOpener(self): + """Returns an OpenerDirector that supports cookies and ignores redirects. + + Returns: + A urllib2.OpenerDirector object. + """ + opener = urllib2.OpenerDirector() + opener.add_handler(urllib2.ProxyHandler()) + opener.add_handler(urllib2.UnknownHandler()) + opener.add_handler(urllib2.HTTPHandler()) + opener.add_handler(urllib2.HTTPDefaultErrorHandler()) + opener.add_handler(urllib2.HTTPSHandler()) + opener.add_handler(urllib2.HTTPErrorProcessor()) + if self.save_cookies: + self.cookie_file = os.path.expanduser("~/.codereview_upload_cookies") + self.cookie_jar = cookielib.MozillaCookieJar(self.cookie_file) + if os.path.exists(self.cookie_file): + try: + self.cookie_jar.load() + self.authenticated = True + StatusUpdate("Loaded authentication cookies from %s" % + self.cookie_file) + except (cookielib.LoadError, IOError): + # Failed to load cookies - just ignore them. + pass + else: + # Create an empty cookie file with mode 600 + fd = os.open(self.cookie_file, os.O_CREAT, 0600) + os.close(fd) + # Always chmod the cookie file + os.chmod(self.cookie_file, 0600) + else: + # Don't save cookies across runs of update.py. + self.cookie_jar = cookielib.CookieJar() + opener.add_handler(urllib2.HTTPCookieProcessor(self.cookie_jar)) + return opener + + +parser = optparse.OptionParser(usage="%prog [options] [-- diff_options]") +parser.add_option("-y", "--assume_yes", action="store_true", + dest="assume_yes", default=False, + help="Assume that the answer to yes/no questions is 'yes'.") +# Logging +group = parser.add_option_group("Logging options") +group.add_option("-q", "--quiet", action="store_const", const=0, + dest="verbose", help="Print errors only.") +group.add_option("-v", "--verbose", action="store_const", const=2, + dest="verbose", default=1, + help="Print info level logs (default).") +group.add_option("--noisy", action="store_const", const=3, + dest="verbose", help="Print all logs.") +# Review server +group = parser.add_option_group("Review server options") +group.add_option("-s", "--server", action="store", dest="server", + default="codereview.appspot.com", + metavar="SERVER", + help=("The server to upload to. The format is host[:port]. " + "Defaults to '%default'.")) +group.add_option("-e", "--email", action="store", dest="email", + metavar="EMAIL", default=None, + help="The username to use. Will prompt if omitted.") +group.add_option("-H", "--host", action="store", dest="host", + metavar="HOST", default=None, + help="Overrides the Host header sent with all RPCs.") +group.add_option("--no_cookies", action="store_false", + dest="save_cookies", default=True, + help="Do not save authentication cookies to local disk.") +# Issue +group = parser.add_option_group("Issue options") +group.add_option("-d", "--description", action="store", dest="description", + metavar="DESCRIPTION", default=None, + help="Optional description when creating an issue.") +group.add_option("-f", "--description_file", action="store", + dest="description_file", metavar="DESCRIPTION_FILE", + default=None, + help="Optional path of a file that contains " + "the description when creating an issue.") +group.add_option("-r", "--reviewers", action="store", dest="reviewers", + metavar="REVIEWERS", default=None, + help="Add reviewers (comma separated email addresses).") +group.add_option("--cc", action="store", dest="cc", + metavar="CC", default=None, + help="Add CC (comma separated email addresses).") +group.add_option("--private", action="store_true", dest="private", + default=False, + help="Make the issue restricted to reviewers and those CCed") +# Upload options +group = parser.add_option_group("Patch options") +group.add_option("-m", "--message", action="store", dest="message", + metavar="MESSAGE", default=None, + help="A message to identify the patch. " + "Will prompt if omitted.") +group.add_option("-i", "--issue", type="int", action="store", + metavar="ISSUE", default=None, + help="Issue number to which to add. Defaults to new issue.") +group.add_option("--base_url", action="store", dest="base_url", default=None, + help="Base repository URL (listed as \"Base URL\" when " + "viewing issue). If omitted, will be guessed automatically " + "for SVN repos and left blank for others.") +group.add_option("--download_base", action="store_true", + dest="download_base", default=False, + help="Base files will be downloaded by the server " + "(side-by-side diffs may not work on files with CRs).") +group.add_option("--rev", action="store", dest="revision", + metavar="REV", default=None, + help="Base revision/branch/tree to diff against. Use " + "rev1:rev2 range to review already committed changeset.") +group.add_option("--send_mail", action="store_true", + dest="send_mail", default=False, + help="Send notification email to reviewers.") +group.add_option("--vcs", action="store", dest="vcs", + metavar="VCS", default=None, + help=("Version control system (optional, usually upload.py " + "already guesses the right VCS).")) +group.add_option("--emulate_svn_auto_props", action="store_true", + dest="emulate_svn_auto_props", default=False, + help=("Emulate Subversion's auto properties feature.")) + + +def GetRpcServer(server, email=None, host_override=None, save_cookies=True): + """Returns an instance of an AbstractRpcServer. + + Args: + server: String containing the review server URL. + email: String containing user's email address. + host_override: If not None, string containing an alternate hostname to use + in the host header. + save_cookies: Whether authentication cookies should be saved to disk. + + Returns: + A new AbstractRpcServer, on which RPC calls can be made. + """ + + rpc_server_class = HttpRpcServer + + def GetUserCredentials(): + """Prompts the user for a username and password.""" + if email is None: + email = GetEmail("Email (login for uploading to %s)" % server) + password = getpass.getpass("Password for %s: " % email) + return (email, password) + + # If this is the dev_appserver, use fake authentication. + host = (host_override or server).lower() + if host == "localhost" or host.startswith("localhost:"): + if email is None: + email = "test@example.com" + logging.info("Using debug user %s. Override with --email" % email) + server = rpc_server_class( + server, + lambda: (email, "password"), + host_override=host_override, + extra_headers={"Cookie": + 'dev_appserver_login="%s:False"' % email}, + save_cookies=save_cookies) + # Don't try to talk to ClientLogin. + server.authenticated = True + return server + + return rpc_server_class(server, + GetUserCredentials, + host_override=host_override, + save_cookies=save_cookies) + + +def EncodeMultipartFormData(fields, files): + """Encode form fields for multipart/form-data. + + Args: + fields: A sequence of (name, value) elements for regular form fields. + files: A sequence of (name, filename, value) elements for data to be + uploaded as files. + Returns: + (content_type, body) ready for httplib.HTTP instance. + + Source: + http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306 + """ + BOUNDARY = '-M-A-G-I-C---B-O-U-N-D-A-R-Y-' + CRLF = '\r\n' + lines = [] + for (key, value) in fields: + lines.append('--' + BOUNDARY) + lines.append('Content-Disposition: form-data; name="%s"' % key) + lines.append('') + lines.append(value) + for (key, filename, value) in files: + lines.append('--' + BOUNDARY) + lines.append('Content-Disposition: form-data; name="%s"; filename="%s"' % + (key, filename)) + lines.append('Content-Type: %s' % GetContentType(filename)) + lines.append('') + lines.append(value) + lines.append('--' + BOUNDARY + '--') + lines.append('') + body = CRLF.join(lines) + content_type = 'multipart/form-data; boundary=%s' % BOUNDARY + return content_type, body + + +def GetContentType(filename): + """Helper to guess the content-type from the filename.""" + return mimetypes.guess_type(filename)[0] or 'application/octet-stream' + + +# Use a shell for subcommands on Windows to get a PATH search. +use_shell = sys.platform.startswith("win") + +def RunShellWithReturnCode(command, print_output=False, + universal_newlines=True, + env=os.environ): + """Executes a command and returns the output from stdout and the return code. + + Args: + command: Command to execute. + print_output: If True, the output is printed to stdout. + If False, both stdout and stderr are ignored. + universal_newlines: Use universal_newlines flag (default: True). + + Returns: + Tuple (output, return code) + """ + logging.info("Running %s", command) + p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + shell=use_shell, universal_newlines=universal_newlines, + env=env) + if print_output: + output_array = [] + while True: + line = p.stdout.readline() + if not line: + break + print line.strip("\n") + output_array.append(line) + output = "".join(output_array) + else: + output = p.stdout.read() + p.wait() + errout = p.stderr.read() + if print_output and errout: + print >>sys.stderr, errout + p.stdout.close() + p.stderr.close() + return output, p.returncode + + +def RunShell(command, silent_ok=False, universal_newlines=True, + print_output=False, env=os.environ): + data, retcode = RunShellWithReturnCode(command, print_output, + universal_newlines, env) + if retcode: + ErrorExit("Got error status from %s:\n%s" % (command, data)) + if not silent_ok and not data: + ErrorExit("No output from %s" % command) + return data + + +class VersionControlSystem(object): + """Abstract base class providing an interface to the VCS.""" + + def __init__(self, options): + """Constructor. + + Args: + options: Command line options. + """ + self.options = options + + def GenerateDiff(self, args): + """Return the current diff as a string. + + Args: + args: Extra arguments to pass to the diff command. + """ + raise NotImplementedError( + "abstract method -- subclass %s must override" % self.__class__) + + def GetUnknownFiles(self): + """Return a list of files unknown to the VCS.""" + raise NotImplementedError( + "abstract method -- subclass %s must override" % self.__class__) + + def CheckForUnknownFiles(self): + """Show an "are you sure?" prompt if there are unknown files.""" + unknown_files = self.GetUnknownFiles() + if unknown_files: + print "The following files are not added to version control:" + for line in unknown_files: + print line + prompt = "Are you sure to continue?(y/N) " + answer = raw_input(prompt).strip() + if answer != "y": + ErrorExit("User aborted") + + def GetBaseFile(self, filename): + """Get the content of the upstream version of a file. + + Returns: + A tuple (base_content, new_content, is_binary, status) + base_content: The contents of the base file. + new_content: For text files, this is empty. For binary files, this is + the contents of the new file, since the diff output won't contain + information to reconstruct the current file. + is_binary: True iff the file is binary. + status: The status of the file. + """ + + raise NotImplementedError( + "abstract method -- subclass %s must override" % self.__class__) + + + def GetBaseFiles(self, diff): + """Helper that calls GetBase file for each file in the patch. + + Returns: + A dictionary that maps from filename to GetBaseFile's tuple. Filenames + are retrieved based on lines that start with "Index:" or + "Property changes on:". + """ + files = {} + for line in diff.splitlines(True): + if line.startswith('Index:') or line.startswith('Property changes on:'): + unused, filename = line.split(':', 1) + # On Windows if a file has property changes its filename uses '\' + # instead of '/'. + filename = filename.strip().replace('\\', '/') + files[filename] = self.GetBaseFile(filename) + return files + + + def UploadBaseFiles(self, issue, rpc_server, patch_list, patchset, options, + files): + """Uploads the base files (and if necessary, the current ones as well).""" + + def UploadFile(filename, file_id, content, is_binary, status, is_base): + """Uploads a file to the server.""" + file_too_large = False + if is_base: + type = "base" + else: + type = "current" + if len(content) > MAX_UPLOAD_SIZE: + print ("Not uploading the %s file for %s because it's too large." % + (type, filename)) + file_too_large = True + content = "" + checksum = md5(content).hexdigest() + if options.verbose > 0 and not file_too_large: + print "Uploading %s file for %s" % (type, filename) + url = "/%d/upload_content/%d/%d" % (int(issue), int(patchset), file_id) + form_fields = [("filename", filename), + ("status", status), + ("checksum", checksum), + ("is_binary", str(is_binary)), + ("is_current", str(not is_base)), + ] + if file_too_large: + form_fields.append(("file_too_large", "1")) + if options.email: + form_fields.append(("user", options.email)) + ctype, body = EncodeMultipartFormData(form_fields, + [("data", filename, content)]) + response_body = rpc_server.Send(url, body, + content_type=ctype) + if not response_body.startswith("OK"): + StatusUpdate(" --> %s" % response_body) + sys.exit(1) + + patches = dict() + [patches.setdefault(v, k) for k, v in patch_list] + for filename in patches.keys(): + base_content, new_content, is_binary, status = files[filename] + file_id_str = patches.get(filename) + if file_id_str.find("nobase") != -1: + base_content = None + file_id_str = file_id_str[file_id_str.rfind("_") + 1:] + file_id = int(file_id_str) + if base_content != None: + UploadFile(filename, file_id, base_content, is_binary, status, True) + if new_content != None: + UploadFile(filename, file_id, new_content, is_binary, status, False) + + def IsImage(self, filename): + """Returns true if the filename has an image extension.""" + mimetype = mimetypes.guess_type(filename)[0] + if not mimetype: + return False + return mimetype.startswith("image/") + + def IsBinary(self, filename): + """Returns true if the guessed mimetyped isnt't in text group.""" + mimetype = mimetypes.guess_type(filename)[0] + if not mimetype: + return False # e.g. README, "real" binaries usually have an extension + # special case for text files which don't start with text/ + if mimetype in TEXT_MIMETYPES: + return False + return not mimetype.startswith("text/") + + +class SubversionVCS(VersionControlSystem): + """Implementation of the VersionControlSystem interface for Subversion.""" + + def __init__(self, options): + super(SubversionVCS, self).__init__(options) + if self.options.revision: + match = re.match(r"(\d+)(:(\d+))?", self.options.revision) + if not match: + ErrorExit("Invalid Subversion revision %s." % self.options.revision) + self.rev_start = match.group(1) + self.rev_end = match.group(3) + else: + self.rev_start = self.rev_end = None + # Cache output from "svn list -r REVNO dirname". + # Keys: dirname, Values: 2-tuple (ouput for start rev and end rev). + self.svnls_cache = {} + # Base URL is required to fetch files deleted in an older revision. + # Result is cached to not guess it over and over again in GetBaseFile(). + required = self.options.download_base or self.options.revision is not None + self.svn_base = self._GuessBase(required) + + def GuessBase(self, required): + """Wrapper for _GuessBase.""" + return self.svn_base + + def _GuessBase(self, required): + """Returns the SVN base URL. + + Args: + required: If true, exits if the url can't be guessed, otherwise None is + returned. + """ + info = RunShell(["svn", "info"]) + for line in info.splitlines(): + words = line.split() + if len(words) == 2 and words[0] == "URL:": + url = words[1] + scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) + username, netloc = urllib.splituser(netloc) + if username: + logging.info("Removed username from base URL") + if netloc.endswith("svn.python.org"): + if netloc == "svn.python.org": + if path.startswith("/projects/"): + path = path[9:] + elif netloc != "pythondev@svn.python.org": + ErrorExit("Unrecognized Python URL: %s" % url) + base = "http://svn.python.org/view/*checkout*%s/" % path + logging.info("Guessed Python base = %s", base) + elif netloc.endswith("svn.collab.net"): + if path.startswith("/repos/"): + path = path[6:] + base = "http://svn.collab.net/viewvc/*checkout*%s/" % path + logging.info("Guessed CollabNet base = %s", base) + elif netloc.endswith(".googlecode.com"): + path = path + "/" + base = urlparse.urlunparse(("http", netloc, path, params, + query, fragment)) + logging.info("Guessed Google Code base = %s", base) + else: + path = path + "/" + base = urlparse.urlunparse((scheme, netloc, path, params, + query, fragment)) + logging.info("Guessed base = %s", base) + return base + if required: + ErrorExit("Can't find URL in output from svn info") + return None + + def GenerateDiff(self, args): + cmd = ["svn", "diff"] + if self.options.revision: + cmd += ["-r", self.options.revision] + cmd.extend(args) + data = RunShell(cmd) + count = 0 + for line in data.splitlines(): + if line.startswith("Index:") or line.startswith("Property changes on:"): + count += 1 + logging.info(line) + if not count: + ErrorExit("No valid patches found in output from svn diff") + return data + + def _CollapseKeywords(self, content, keyword_str): + """Collapses SVN keywords.""" + # svn cat translates keywords but svn diff doesn't. As a result of this + # behavior patching.PatchChunks() fails with a chunk mismatch error. + # This part was originally written by the Review Board development team + # who had the same problem (http://reviews.review-board.org/r/276/). + # Mapping of keywords to known aliases + svn_keywords = { + # Standard keywords + 'Date': ['Date', 'LastChangedDate'], + 'Revision': ['Revision', 'LastChangedRevision', 'Rev'], + 'Author': ['Author', 'LastChangedBy'], + 'HeadURL': ['HeadURL', 'URL'], + 'Id': ['Id'], + + # Aliases + 'LastChangedDate': ['LastChangedDate', 'Date'], + 'LastChangedRevision': ['LastChangedRevision', 'Rev', 'Revision'], + 'LastChangedBy': ['LastChangedBy', 'Author'], + 'URL': ['URL', 'HeadURL'], + } + + def repl(m): + if m.group(2): + return "$%s::%s$" % (m.group(1), " " * len(m.group(3))) + return "$%s$" % m.group(1) + keywords = [keyword + for name in keyword_str.split(" ") + for keyword in svn_keywords.get(name, [])] + return re.sub(r"\$(%s):(:?)([^\$]+)\$" % '|'.join(keywords), repl, content) + + def GetUnknownFiles(self): + status = RunShell(["svn", "status", "--ignore-externals"], silent_ok=True) + unknown_files = [] + for line in status.split("\n"): + if line and line[0] == "?": + unknown_files.append(line) + return unknown_files + + def ReadFile(self, filename): + """Returns the contents of a file.""" + file = open(filename, 'rb') + result = "" + try: + result = file.read() + finally: + file.close() + return result + + def GetStatus(self, filename): + """Returns the status of a file.""" + if not self.options.revision: + status = RunShell(["svn", "status", "--ignore-externals", filename]) + if not status: + ErrorExit("svn status returned no output for %s" % filename) + status_lines = status.splitlines() + # If file is in a cl, the output will begin with + # "\n--- Changelist 'cl_name':\n". See + # http://svn.collab.net/repos/svn/trunk/notes/changelist-design.txt + if (len(status_lines) == 3 and + not status_lines[0] and + status_lines[1].startswith("--- Changelist")): + status = status_lines[2] + else: + status = status_lines[0] + # If we have a revision to diff against we need to run "svn list" + # for the old and the new revision and compare the results to get + # the correct status for a file. + else: + dirname, relfilename = os.path.split(filename) + if dirname not in self.svnls_cache: + cmd = ["svn", "list", "-r", self.rev_start, dirname or "."] + out, returncode = RunShellWithReturnCode(cmd) + if returncode: + ErrorExit("Failed to get status for %s." % filename) + old_files = out.splitlines() + args = ["svn", "list"] + if self.rev_end: + args += ["-r", self.rev_end] + cmd = args + [dirname or "."] + out, returncode = RunShellWithReturnCode(cmd) + if returncode: + ErrorExit("Failed to run command %s" % cmd) + self.svnls_cache[dirname] = (old_files, out.splitlines()) + old_files, new_files = self.svnls_cache[dirname] + if relfilename in old_files and relfilename not in new_files: + status = "D " + elif relfilename in old_files and relfilename in new_files: + status = "M " + else: + status = "A " + return status + + def GetBaseFile(self, filename): + status = self.GetStatus(filename) + base_content = None + new_content = None + + # If a file is copied its status will be "A +", which signifies + # "addition-with-history". See "svn st" for more information. We need to + # upload the original file or else diff parsing will fail if the file was + # edited. + if status[0] == "A" and status[3] != "+": + # We'll need to upload the new content if we're adding a binary file + # since diff's output won't contain it. + mimetype = RunShell(["svn", "propget", "svn:mime-type", filename], + silent_ok=True) + base_content = "" + is_binary = bool(mimetype) and not mimetype.startswith("text/") + if is_binary and self.IsImage(filename): + new_content = self.ReadFile(filename) + elif (status[0] in ("M", "D", "R") or + (status[0] == "A" and status[3] == "+") or # Copied file. + (status[0] == " " and status[1] == "M")): # Property change. + args = [] + if self.options.revision: + url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start) + else: + # Don't change filename, it's needed later. + url = filename + args += ["-r", "BASE"] + cmd = ["svn"] + args + ["propget", "svn:mime-type", url] + mimetype, returncode = RunShellWithReturnCode(cmd) + if returncode: + # File does not exist in the requested revision. + # Reset mimetype, it contains an error message. + mimetype = "" + get_base = False + is_binary = bool(mimetype) and not mimetype.startswith("text/") + if status[0] == " ": + # Empty base content just to force an upload. + base_content = "" + elif is_binary: + if self.IsImage(filename): + get_base = True + if status[0] == "M": + if not self.rev_end: + new_content = self.ReadFile(filename) + else: + url = "%s/%s@%s" % (self.svn_base, filename, self.rev_end) + new_content = RunShell(["svn", "cat", url], + universal_newlines=True, silent_ok=True) + else: + base_content = "" + else: + get_base = True + + if get_base: + if is_binary: + universal_newlines = False + else: + universal_newlines = True + if self.rev_start: + # "svn cat -r REV delete_file.txt" doesn't work. cat requires + # the full URL with "@REV" appended instead of using "-r" option. + url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start) + base_content = RunShell(["svn", "cat", url], + universal_newlines=universal_newlines, + silent_ok=True) + else: + base_content = RunShell(["svn", "cat", filename], + universal_newlines=universal_newlines, + silent_ok=True) + if not is_binary: + args = [] + if self.rev_start: + url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start) + else: + url = filename + args += ["-r", "BASE"] + cmd = ["svn"] + args + ["propget", "svn:keywords", url] + keywords, returncode = RunShellWithReturnCode(cmd) + if keywords and not returncode: + base_content = self._CollapseKeywords(base_content, keywords) + else: + StatusUpdate("svn status returned unexpected output: %s" % status) + sys.exit(1) + return base_content, new_content, is_binary, status[0:5] + + +class GitVCS(VersionControlSystem): + """Implementation of the VersionControlSystem interface for Git.""" + + def __init__(self, options): + super(GitVCS, self).__init__(options) + # Map of filename -> (hash before, hash after) of base file. + # Hashes for "no such file" are represented as None. + self.hashes = {} + # Map of new filename -> old filename for renames. + self.renames = {} + + def GenerateDiff(self, extra_args): + # This is more complicated than svn's GenerateDiff because we must convert + # the diff output to include an svn-style "Index:" line as well as record + # the hashes of the files, so we can upload them along with our diff. + + # Special used by git to indicate "no such content". + NULL_HASH = "0"*40 + + extra_args = extra_args[:] + if self.options.revision: + extra_args = [self.options.revision] + extra_args + + # --no-ext-diff is broken in some versions of Git, so try to work around + # this by overriding the environment (but there is still a problem if the + # git config key "diff.external" is used). + env = os.environ.copy() + if 'GIT_EXTERNAL_DIFF' in env: del env['GIT_EXTERNAL_DIFF'] + gitdiff = RunShell(["git", "diff", "--no-ext-diff", "--full-index", "-M"] + + extra_args, env=env) + + def IsFileNew(filename): + return filename in self.hashes and self.hashes[filename][0] is None + + def AddSubversionPropertyChange(filename): + """Add svn's property change information into the patch if given file is + new file. + + We use Subversion's auto-props setting to retrieve its property. + See http://svnbook.red-bean.com/en/1.1/ch07.html#svn-ch-7-sect-1.3.2 for + Subversion's [auto-props] setting. + """ + if self.options.emulate_svn_auto_props and IsFileNew(filename): + svnprops = GetSubversionPropertyChanges(filename) + if svnprops: + svndiff.append("\n" + svnprops + "\n") + + svndiff = [] + filecount = 0 + filename = None + for line in gitdiff.splitlines(): + match = re.match(r"diff --git a/(.*) b/(.*)$", line) + if match: + # Add auto property here for previously seen file. + if filename is not None: + AddSubversionPropertyChange(filename) + filecount += 1 + # Intentionally use the "after" filename so we can show renames. + filename = match.group(2) + svndiff.append("Index: %s\n" % filename) + if match.group(1) != match.group(2): + self.renames[match.group(2)] = match.group(1) + else: + # The "index" line in a git diff looks like this (long hashes elided): + # index 82c0d44..b2cee3f 100755 + # We want to save the left hash, as that identifies the base file. + match = re.match(r"index (\w+)\.\.(\w+)", line) + if match: + before, after = (match.group(1), match.group(2)) + if before == NULL_HASH: + before = None + if after == NULL_HASH: + after = None + self.hashes[filename] = (before, after) + svndiff.append(line + "\n") + if not filecount: + ErrorExit("No valid patches found in output from git diff") + # Add auto property for the last seen file. + assert filename is not None + AddSubversionPropertyChange(filename) + return "".join(svndiff) + + def GetUnknownFiles(self): + status = RunShell(["git", "ls-files", "--exclude-standard", "--others"], + silent_ok=True) + return status.splitlines() + + def GetFileContent(self, file_hash, is_binary): + """Returns the content of a file identified by its git hash.""" + data, retcode = RunShellWithReturnCode(["git", "show", file_hash], + universal_newlines=not is_binary) + if retcode: + ErrorExit("Got error status from 'git show %s'" % file_hash) + return data + + def GetBaseFile(self, filename): + hash_before, hash_after = self.hashes.get(filename, (None,None)) + base_content = None + new_content = None + is_binary = self.IsBinary(filename) + status = None + + if filename in self.renames: + status = "A +" # Match svn attribute name for renames. + if filename not in self.hashes: + # If a rename doesn't change the content, we never get a hash. + base_content = RunShell(["git", "show", "HEAD:" + filename]) + elif not hash_before: + status = "A" + base_content = "" + elif not hash_after: + status = "D" + else: + status = "M" + + is_image = self.IsImage(filename) + + # Grab the before/after content if we need it. + # We should include file contents if it's text or it's an image. + if not is_binary or is_image: + # Grab the base content if we don't have it already. + if base_content is None and hash_before: + base_content = self.GetFileContent(hash_before, is_binary) + # Only include the "after" file if it's an image; otherwise it + # it is reconstructed from the diff. + if is_image and hash_after: + new_content = self.GetFileContent(hash_after, is_binary) + + return (base_content, new_content, is_binary, status) + + +class MercurialVCS(VersionControlSystem): + """Implementation of the VersionControlSystem interface for Mercurial.""" + + def __init__(self, options, repo_dir): + super(MercurialVCS, self).__init__(options) + # Absolute path to repository (we can be in a subdir) + self.repo_dir = os.path.normpath(repo_dir) + # Compute the subdir + cwd = os.path.normpath(os.getcwd()) + assert cwd.startswith(self.repo_dir) + self.subdir = cwd[len(self.repo_dir):].lstrip(r"\/") + if self.options.revision: + self.base_rev = self.options.revision + else: + self.base_rev = RunShell(["hg", "parent", "-q"]).split(':')[1].strip() + + def _GetRelPath(self, filename): + """Get relative path of a file according to the current directory, + given its logical path in the repo.""" + assert filename.startswith(self.subdir), (filename, self.subdir) + return filename[len(self.subdir):].lstrip(r"\/") + + def GenerateDiff(self, extra_args): + # If no file specified, restrict to the current subdir + extra_args = extra_args or ["."] + cmd = ["hg", "diff", "--git", "-r", self.base_rev] + extra_args + data = RunShell(cmd, silent_ok=True) + svndiff = [] + filecount = 0 + for line in data.splitlines(): + m = re.match("diff --git a/(\S+) b/(\S+)", line) + if m: + # Modify line to make it look like as it comes from svn diff. + # With this modification no changes on the server side are required + # to make upload.py work with Mercurial repos. + # NOTE: for proper handling of moved/copied files, we have to use + # the second filename. + filename = m.group(2) + svndiff.append("Index: %s" % filename) + svndiff.append("=" * 67) + filecount += 1 + logging.info(line) + else: + svndiff.append(line) + if not filecount: + ErrorExit("No valid patches found in output from hg diff") + return "\n".join(svndiff) + "\n" + + def GetUnknownFiles(self): + """Return a list of files unknown to the VCS.""" + args = [] + status = RunShell(["hg", "status", "--rev", self.base_rev, "-u", "."], + silent_ok=True) + unknown_files = [] + for line in status.splitlines(): + st, fn = line.split(" ", 1) + if st == "?": + unknown_files.append(fn) + return unknown_files + + def GetBaseFile(self, filename): + # "hg status" and "hg cat" both take a path relative to the current subdir + # rather than to the repo root, but "hg diff" has given us the full path + # to the repo root. + base_content = "" + new_content = None + is_binary = False + oldrelpath = relpath = self._GetRelPath(filename) + # "hg status -C" returns two lines for moved/copied files, one otherwise + out = RunShell(["hg", "status", "-C", "--rev", self.base_rev, relpath]) + out = out.splitlines() + # HACK: strip error message about missing file/directory if it isn't in + # the working copy + if out[0].startswith('%s: ' % relpath): + out = out[1:] + if len(out) > 1: + # Moved/copied => considered as modified, use old filename to + # retrieve base contents + oldrelpath = out[1].strip() + status = "M" + else: + status, _ = out[0].split(' ', 1) + if ":" in self.base_rev: + base_rev = self.base_rev.split(":", 1)[0] + else: + base_rev = self.base_rev + if status != "A": + base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath], + silent_ok=True) + is_binary = "\0" in base_content # Mercurial's heuristic + if status != "R": + new_content = open(relpath, "rb").read() + is_binary = is_binary or "\0" in new_content + if is_binary and base_content: + # Fetch again without converting newlines + base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath], + silent_ok=True, universal_newlines=False) + if not is_binary or not self.IsImage(relpath): + new_content = None + return base_content, new_content, is_binary, status + + +# NOTE: The SplitPatch function is duplicated in engine.py, keep them in sync. +def SplitPatch(data): + """Splits a patch into separate pieces for each file. + + Args: + data: A string containing the output of svn diff. + + Returns: + A list of 2-tuple (filename, text) where text is the svn diff output + pertaining to filename. + """ + patches = [] + filename = None + diff = [] + for line in data.splitlines(True): + new_filename = None + if line.startswith('Index:'): + unused, new_filename = line.split(':', 1) + new_filename = new_filename.strip() + elif line.startswith('Property changes on:'): + unused, temp_filename = line.split(':', 1) + # When a file is modified, paths use '/' between directories, however + # when a property is modified '\' is used on Windows. Make them the same + # otherwise the file shows up twice. + temp_filename = temp_filename.strip().replace('\\', '/') + if temp_filename != filename: + # File has property changes but no modifications, create a new diff. + new_filename = temp_filename + if new_filename: + if filename and diff: + patches.append((filename, ''.join(diff))) + filename = new_filename + diff = [line] + continue + if diff is not None: + diff.append(line) + if filename and diff: + patches.append((filename, ''.join(diff))) + return patches + + +def UploadSeparatePatches(issue, rpc_server, patchset, data, options): + """Uploads a separate patch for each file in the diff output. + + Returns a list of [patch_key, filename] for each file. + """ + patches = SplitPatch(data) + rv = [] + for patch in patches: + if len(patch[1]) > MAX_UPLOAD_SIZE: + print ("Not uploading the patch for " + patch[0] + + " because the file is too large.") + continue + form_fields = [("filename", patch[0])] + if not options.download_base: + form_fields.append(("content_upload", "1")) + files = [("data", "data.diff", patch[1])] + ctype, body = EncodeMultipartFormData(form_fields, files) + url = "/%d/upload_patch/%d" % (int(issue), int(patchset)) + print "Uploading patch for " + patch[0] + response_body = rpc_server.Send(url, body, content_type=ctype) + lines = response_body.splitlines() + if not lines or lines[0] != "OK": + StatusUpdate(" --> %s" % response_body) + sys.exit(1) + rv.append([lines[1], patch[0]]) + return rv + + +def GuessVCSName(): + """Helper to guess the version control system. + + This examines the current directory, guesses which VersionControlSystem + we're using, and returns an string indicating which VCS is detected. + + Returns: + A pair (vcs, output). vcs is a string indicating which VCS was detected + and is one of VCS_GIT, VCS_MERCURIAL, VCS_SUBVERSION, or VCS_UNKNOWN. + output is a string containing any interesting output from the vcs + detection routine, or None if there is nothing interesting. + """ + # Mercurial has a command to get the base directory of a repository + # Try running it, but don't die if we don't have hg installed. + # NOTE: we try Mercurial first as it can sit on top of an SVN working copy. + try: + out, returncode = RunShellWithReturnCode(["hg", "root"]) + if returncode == 0: + return (VCS_MERCURIAL, out.strip()) + except OSError, (errno, message): + if errno != 2: # ENOENT -- they don't have hg installed. + raise + + # Subversion has a .svn in all working directories. + if os.path.isdir('.svn'): + logging.info("Guessed VCS = Subversion") + return (VCS_SUBVERSION, None) + + # Git has a command to test if you're in a git tree. + # Try running it, but don't die if we don't have git installed. + try: + out, returncode = RunShellWithReturnCode(["git", "rev-parse", + "--is-inside-work-tree"]) + if returncode == 0: + return (VCS_GIT, None) + except OSError, (errno, message): + if errno != 2: # ENOENT -- they don't have git installed. + raise + + return (VCS_UNKNOWN, None) + + +def GuessVCS(options): + """Helper to guess the version control system. + + This verifies any user-specified VersionControlSystem (by command line + or environment variable). If the user didn't specify one, this examines + the current directory, guesses which VersionControlSystem we're using, + and returns an instance of the appropriate class. Exit with an error + if we can't figure it out. + + Returns: + A VersionControlSystem instance. Exits if the VCS can't be guessed. + """ + vcs = options.vcs + if not vcs: + vcs = os.environ.get("CODEREVIEW_VCS") + if vcs: + v = VCS_ABBREVIATIONS.get(vcs.lower()) + if v is None: + ErrorExit("Unknown version control system %r specified." % vcs) + (vcs, extra_output) = (v, None) + else: + (vcs, extra_output) = GuessVCSName() + + if vcs == VCS_MERCURIAL: + if extra_output is None: + extra_output = RunShell(["hg", "root"]).strip() + return MercurialVCS(options, extra_output) + elif vcs == VCS_SUBVERSION: + return SubversionVCS(options) + elif vcs == VCS_GIT: + return GitVCS(options) + + ErrorExit(("Could not guess version control system. " + "Are you in a working copy directory?")) + + +def CheckReviewer(reviewer): + """Validate a reviewer -- either a nickname or an email addres. + + Args: + reviewer: A nickname or an email address. + + Calls ErrorExit() if it is an invalid email address. + """ + if "@" not in reviewer: + return # Assume nickname + parts = reviewer.split("@") + if len(parts) > 2: + ErrorExit("Invalid email address: %r" % reviewer) + assert len(parts) == 2 + if "." not in parts[1]: + ErrorExit("Invalid email address: %r" % reviewer) + + +def LoadSubversionAutoProperties(): + """Returns the content of [auto-props] section of Subversion's config file as + a dictionary. + + Returns: + A dictionary whose key-value pair corresponds the [auto-props] section's + key-value pair. + In following cases, returns empty dictionary: + - config file doesn't exist, or + - 'enable-auto-props' is not set to 'true-like-value' in [miscellany]. + """ + # Todo(hayato): Windows users might use different path for configuration file. + subversion_config = os.path.expanduser("~/.subversion/config") + if not os.path.exists(subversion_config): + return {} + config = ConfigParser.ConfigParser() + config.read(subversion_config) + if (config.has_section("miscellany") and + config.has_option("miscellany", "enable-auto-props") and + config.getboolean("miscellany", "enable-auto-props") and + config.has_section("auto-props")): + props = {} + for file_pattern in config.options("auto-props"): + props[file_pattern] = ParseSubversionPropertyValues( + config.get("auto-props", file_pattern)) + return props + else: + return {} + +def ParseSubversionPropertyValues(props): + """Parse the given property value which comes from [auto-props] section and + returns a list whose element is a (svn_prop_key, svn_prop_value) pair. + + See the following doctest for example. + + >>> ParseSubversionPropertyValues('svn:eol-style=LF') + [('svn:eol-style', 'LF')] + >>> ParseSubversionPropertyValues('svn:mime-type=image/jpeg') + [('svn:mime-type', 'image/jpeg')] + >>> ParseSubversionPropertyValues('svn:eol-style=LF;svn:executable') + [('svn:eol-style', 'LF'), ('svn:executable', '*')] + """ + key_value_pairs = [] + for prop in props.split(";"): + key_value = prop.split("=") + assert len(key_value) <= 2 + if len(key_value) == 1: + # If value is not given, use '*' as a Subversion's convention. + key_value_pairs.append((key_value[0], "*")) + else: + key_value_pairs.append((key_value[0], key_value[1])) + return key_value_pairs + + +def GetSubversionPropertyChanges(filename): + """Return a Subversion's 'Property changes on ...' string, which is used in + the patch file. + + Args: + filename: filename whose property might be set by [auto-props] config. + + Returns: + A string like 'Property changes on |filename| ...' if given |filename| + matches any entries in [auto-props] section. None, otherwise. + """ + global svn_auto_props_map + if svn_auto_props_map is None: + svn_auto_props_map = LoadSubversionAutoProperties() + + all_props = [] + for file_pattern, props in svn_auto_props_map.items(): + if fnmatch.fnmatch(filename, file_pattern): + all_props.extend(props) + if all_props: + return FormatSubversionPropertyChanges(filename, all_props) + return None + + +def FormatSubversionPropertyChanges(filename, props): + """Returns Subversion's 'Property changes on ...' strings using given filename + and properties. + + Args: + filename: filename + props: A list whose element is a (svn_prop_key, svn_prop_value) pair. + + Returns: + A string which can be used in the patch file for Subversion. + + See the following doctest for example. + + >>> print FormatSubversionPropertyChanges('foo.cc', [('svn:eol-style', 'LF')]) + Property changes on: foo.cc + ___________________________________________________________________ + Added: svn:eol-style + + LF + <BLANKLINE> + """ + prop_changes_lines = [ + "Property changes on: %s" % filename, + "___________________________________________________________________"] + for key, value in props: + prop_changes_lines.append("Added: " + key) + prop_changes_lines.append(" + " + value) + return "\n".join(prop_changes_lines) + "\n" + + +def RealMain(argv, data=None): + """The real main function. + + Args: + argv: Command line arguments. + data: Diff contents. If None (default) the diff is generated by + the VersionControlSystem implementation returned by GuessVCS(). + + Returns: + A 2-tuple (issue id, patchset id). + The patchset id is None if the base files are not uploaded by this + script (applies only to SVN checkouts). + """ + logging.basicConfig(format=("%(asctime).19s %(levelname)s %(filename)s:" + "%(lineno)s %(message)s ")) + os.environ['LC_ALL'] = 'C' + options, args = parser.parse_args(argv[1:]) + global verbosity + verbosity = options.verbose + if verbosity >= 3: + logging.getLogger().setLevel(logging.DEBUG) + elif verbosity >= 2: + logging.getLogger().setLevel(logging.INFO) + + vcs = GuessVCS(options) + + base = options.base_url + if isinstance(vcs, SubversionVCS): + # Guessing the base field is only supported for Subversion. + # Note: Fetching base files may become deprecated in future releases. + guessed_base = vcs.GuessBase(options.download_base) + if base: + if guessed_base and base != guessed_base: + print "Using base URL \"%s\" from --base_url instead of \"%s\"" % \ + (base, guessed_base) + else: + base = guessed_base + + if not base and options.download_base: + options.download_base = True + logging.info("Enabled upload of base file") + if not options.assume_yes: + vcs.CheckForUnknownFiles() + if data is None: + data = vcs.GenerateDiff(args) + files = vcs.GetBaseFiles(data) + if verbosity >= 1: + print "Upload server:", options.server, "(change with -s/--server)" + if options.issue: + prompt = "Message describing this patch set: " + else: + prompt = "New issue subject: " + message = options.message or raw_input(prompt).strip() + if not message: + ErrorExit("A non-empty message is required") + rpc_server = GetRpcServer(options.server, + options.email, + options.host, + options.save_cookies) + form_fields = [("subject", message)] + if base: + form_fields.append(("base", base)) + if options.issue: + form_fields.append(("issue", str(options.issue))) + if options.email: + form_fields.append(("user", options.email)) + if options.reviewers: + for reviewer in options.reviewers.split(','): + CheckReviewer(reviewer) + form_fields.append(("reviewers", options.reviewers)) + if options.cc: + for cc in options.cc.split(','): + CheckReviewer(cc) + form_fields.append(("cc", options.cc)) + description = options.description + if options.description_file: + if options.description: + ErrorExit("Can't specify description and description_file") + file = open(options.description_file, 'r') + description = file.read() + file.close() + if description: + form_fields.append(("description", description)) + # Send a hash of all the base file so the server can determine if a copy + # already exists in an earlier patchset. + base_hashes = "" + for file, info in files.iteritems(): + if not info[0] is None: + checksum = md5(info[0]).hexdigest() + if base_hashes: + base_hashes += "|" + base_hashes += checksum + ":" + file + form_fields.append(("base_hashes", base_hashes)) + if options.private: + if options.issue: + print "Warning: Private flag ignored when updating an existing issue." + else: + form_fields.append(("private", "1")) + # If we're uploading base files, don't send the email before the uploads, so + # that it contains the file status. + if options.send_mail and options.download_base: + form_fields.append(("send_mail", "1")) + if not options.download_base: + form_fields.append(("content_upload", "1")) + if len(data) > MAX_UPLOAD_SIZE: + print "Patch is large, so uploading file patches separately." + uploaded_diff_file = [] + form_fields.append(("separate_patches", "1")) + else: + uploaded_diff_file = [("data", "data.diff", data)] + ctype, body = EncodeMultipartFormData(form_fields, uploaded_diff_file) + response_body = rpc_server.Send("/upload", body, content_type=ctype) + patchset = None + if not options.download_base or not uploaded_diff_file: + lines = response_body.splitlines() + if len(lines) >= 2: + msg = lines[0] + patchset = lines[1].strip() + patches = [x.split(" ", 1) for x in lines[2:]] + else: + msg = response_body + else: + msg = response_body + StatusUpdate(msg) + if not response_body.startswith("Issue created.") and \ + not response_body.startswith("Issue updated."): + sys.exit(0) + issue = msg[msg.rfind("/")+1:] + + if not uploaded_diff_file: + result = UploadSeparatePatches(issue, rpc_server, patchset, data, options) + if not options.download_base: + patches = result + + if not options.download_base: + vcs.UploadBaseFiles(issue, rpc_server, patches, patchset, options, files) + if options.send_mail: + rpc_server.Send("/" + issue + "/mail", payload="") + return issue, patchset + + +def main(): + try: + RealMain(sys.argv) + except KeyboardInterrupt: + print + StatusUpdate("Interrupted.") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/download.py b/WebKitTools/Scripts/webkitpy/tool/commands/download.py index d960bbe..c66b95c 100644 --- a/WebKitTools/Scripts/webkitpy/tool/commands/download.py +++ b/WebKitTools/Scripts/webkitpy/tool/commands/download.py @@ -89,12 +89,12 @@ class Land(AbstractSequencedCommand): steps.CloseBugForLandDiff, ] long_help = """land commits the current working copy diff (just as svn or git commit would). -land will build and run the tests before committing. +land will NOT build and run the tests before committing, but you can use the --build option for that. If a bug id is provided, or one can be found in the ChangeLog land will update the bug after committing.""" def _prepare_state(self, options, args, tool): return { - "bug_id" : (args and args[0]) or tool.checkout().bug_id_for_this_commit() + "bug_id": (args and args[0]) or tool.checkout().bug_id_for_this_commit(options.git_commit, options.squash), } diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/earlywarningsystem.py b/WebKitTools/Scripts/webkitpy/tool/commands/earlywarningsystem.py index 9ea34c0..7505c62 100644 --- a/WebKitTools/Scripts/webkitpy/tool/commands/earlywarningsystem.py +++ b/WebKitTools/Scripts/webkitpy/tool/commands/earlywarningsystem.py @@ -26,8 +26,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from StringIO import StringIO - from webkitpy.tool.commands.queues import AbstractReviewQueue from webkitpy.common.config.committers import CommitterList from webkitpy.common.config.ports import WebKitPort diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/queues.py b/WebKitTools/Scripts/webkitpy/tool/commands/queues.py index f0da379..775aa44 100644 --- a/WebKitTools/Scripts/webkitpy/tool/commands/queues.py +++ b/WebKitTools/Scripts/webkitpy/tool/commands/queues.py @@ -71,8 +71,11 @@ class AbstractQueue(Command, QueueEngineDelegate): def run_webkit_patch(self, args): webkit_patch_args = [self.tool.path()] # FIXME: This is a hack, we should have a more general way to pass global options. + # FIXME: We must always pass global options and their value in one argument + # because our global option code looks for the first argument which does + # not begin with "-" and assumes that is the command name. webkit_patch_args += ["--status-host=%s" % self.tool.status_server.host] - webkit_patch_args += map(str, args) + webkit_patch_args.extend(args) return self.tool.executive.run_and_throw_if_fail(webkit_patch_args) def _log_directory(self): @@ -123,7 +126,10 @@ class AbstractQueue(Command, QueueEngineDelegate): if is_error: message = "Error: %s" % message output = script_error.message_with_output(output_limit=1024*1024) # 1MB - return tool.status_server.update_status(cls.name, message, state["patch"], StringIO(output)) + # We pre-encode the string to a byte array before passing it + # to status_server, because ClientForm (part of mechanize) + # wants a file-like object with pre-encoded data. + return tool.status_server.update_status(cls.name, message, state["patch"], StringIO(output.encode("utf-8"))) class AbstractPatchQueue(AbstractQueue): diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/queues_unittest.py b/WebKitTools/Scripts/webkitpy/tool/commands/queues_unittest.py index f0f7c86..16eb053 100644 --- a/WebKitTools/Scripts/webkitpy/tool/commands/queues_unittest.py +++ b/WebKitTools/Scripts/webkitpy/tool/commands/queues_unittest.py @@ -75,7 +75,7 @@ class AbstractQueueTest(CommandsTest): queue.bind_to_tool(tool) queue.run_webkit_patch(run_args) - expected_run_args = ["echo", "--status-host=example.com"] + map(str, run_args) + expected_run_args = ["echo", "--status-host=example.com"] + run_args tool.executive.run_and_throw_if_fail.assert_called_with(expected_run_args) def test_run_webkit_patch(self): @@ -150,7 +150,7 @@ Warning, attachment 128 on bug 42 has invalid committer (non-committer@example.c Warning, attachment 128 on bug 42 has invalid committer (non-committer@example.com) 1 patch in commit-queue [106] """, - "process_work_item": "MOCK run_and_throw_if_fail: ['echo', '--status-host=example.com', 'land-attachment', '--force-clean', '--build', '--test', '--non-interactive', '--ignore-builders', '--build-style=both', '--quiet', '76543']\n", + "process_work_item": "MOCK run_and_throw_if_fail: ['echo', '--status-host=example.com', 'land-attachment', '--force-clean', '--build', '--test', '--non-interactive', '--ignore-builders', '--build-style=both', '--quiet', 76543]\n", } self.assert_queue_outputs(CommitQueue(), tool=tool, work_item=rollout_patch, expected_stderr=expected_stderr) diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/upload.py b/WebKitTools/Scripts/webkitpy/tool/commands/upload.py index bdf060a..99d45a6 100644 --- a/WebKitTools/Scripts/webkitpy/tool/commands/upload.py +++ b/WebKitTools/Scripts/webkitpy/tool/commands/upload.py @@ -30,7 +30,6 @@ import os import re -import StringIO import sys from optparse import make_option @@ -141,16 +140,16 @@ class ObsoleteAttachments(AbstractSequencedCommand): class AbstractPatchUploadingCommand(AbstractSequencedCommand): - def _bug_id(self, args, tool, state): + def _bug_id(self, options, args, tool, state): # Perfer a bug id passed as an argument over a bug url in the diff (i.e. ChangeLogs). bug_id = args and args[0] if not bug_id: - bug_id = tool.checkout().bug_id_for_this_commit() + bug_id = tool.checkout().bug_id_for_this_commit(options.git_commit, options.squash) return bug_id def _prepare_state(self, options, args, tool): state = {} - state["bug_id"] = self._bug_id(args, tool, state) + state["bug_id"] = self._bug_id(options, args, tool, state) if not state["bug_id"]: error("No bug id passed and no bug url found in ChangeLogs.") return state @@ -223,7 +222,7 @@ class Upload(AbstractPatchUploadingCommand): def _prepare_state(self, options, args, tool): state = {} - state["bug_id"] = self._bug_id(args, tool, state) + state["bug_id"] = self._bug_id(options, args, tool, state) return state @@ -260,10 +259,6 @@ class PostCommits(AbstractDeclarativeCommand): comment_text += tool.scm().files_changed_summary_for_commit(commit_id) return comment_text - def _diff_file_for_commit(self, tool, commit_id): - diff = tool.scm().create_patch_from_local_commit(commit_id) - return StringIO.StringIO(diff) # add_patch_to_bug expects a file-like object - def execute(self, options, args, tool): commit_ids = tool.scm().commit_ids_from_commitish_arguments(args) if len(commit_ids) > 10: # We could lower this limit, 10 is too many for one bug as-is. @@ -274,7 +269,7 @@ class PostCommits(AbstractDeclarativeCommand): commit_message = tool.scm().commit_message_for_local_commit(commit_id) # Prefer --bug-id=, then a bug url in the commit message, then a bug url in the entire commit diff (i.e. ChangeLogs). - bug_id = options.bug_id or parse_bug_id(commit_message.message()) or parse_bug_id(tool.scm().create_patch_from_local_commit(commit_id)) + bug_id = options.bug_id or parse_bug_id(commit_message.message()) or parse_bug_id(tool.scm().create_patch(git_commit=commit_id)) if not bug_id: log("Skipping %s: No bug id found in commit or specified with --bug-id." % commit_id) continue @@ -284,10 +279,10 @@ class PostCommits(AbstractDeclarativeCommand): steps.ObsoletePatches(tool, options).run(state) have_obsoleted_patches.add(bug_id) - diff_file = self._diff_file_for_commit(tool, commit_id) + diff = tool.scm().create_patch(git_commit=commit_id) description = options.description or commit_message.description(lstrip=True, strip_url=True) comment_text = self._comment_text_for_commit(options, commit_message, tool, commit_id) - tool.bugs.add_patch_to_bug(bug_id, diff_file, description, comment_text, mark_for_review=options.review, mark_for_commit_queue=options.request_commit) + tool.bugs.add_patch_to_bug(bug_id, diff, description, comment_text, mark_for_review=options.review, mark_for_commit_queue=options.request_commit) # FIXME: This command needs to be brought into the modern age with steps and CommitInfo. @@ -403,9 +398,8 @@ class CreateBug(AbstractDeclarativeCommand): comment_text += "---\n" comment_text += tool.scm().files_changed_summary_for_commit(commit_id) - diff = tool.scm().create_patch_from_local_commit(commit_id) - diff_file = StringIO.StringIO(diff) # create_bug expects a file-like object - bug_id = tool.bugs.create_bug(bug_title, comment_text, options.component, diff_file, "Patch", cc=options.cc, mark_for_review=options.review, mark_for_commit_queue=options.request_commit) + diff = tool.scm().create_patch(git_commit=commit_id) + bug_id = tool.bugs.create_bug(bug_title, comment_text, options.component, diff, "Patch", cc=options.cc, mark_for_review=options.review, mark_for_commit_queue=options.request_commit) if bug_id and len(commit_ids) > 1: options.bug_id = bug_id @@ -419,13 +413,12 @@ class CreateBug(AbstractDeclarativeCommand): if options.prompt: (bug_title, comment_text) = self.prompt_for_bug_title_and_comment() else: - commit_message = tool.checkout().commit_message_for_this_commit() + commit_message = tool.checkout().commit_message_for_this_commit(options.git_commit, options.squash) bug_title = commit_message.description(lstrip=True, strip_url=True) comment_text = commit_message.body(lstrip=True) - diff = tool.scm().create_patch() - diff_file = StringIO.StringIO(diff) # create_bug expects a file-like object - bug_id = tool.bugs.create_bug(bug_title, comment_text, options.component, diff_file, "Patch", cc=options.cc, mark_for_review=options.review, mark_for_commit_queue=options.request_commit) + diff = tool.scm().create_patch(options.git_commit, options.squash) + bug_id = tool.bugs.create_bug(bug_title, comment_text, options.component, diff, "Patch", cc=options.cc, mark_for_review=options.review, mark_for_commit_queue=options.request_commit) def prompt_for_bug_title_and_comment(self): bug_title = User.prompt("Bug title: ") diff --git a/WebKitTools/Scripts/webkitpy/tool/commands/upload_unittest.py b/WebKitTools/Scripts/webkitpy/tool/commands/upload_unittest.py index 271df01..eec3751 100644 --- a/WebKitTools/Scripts/webkitpy/tool/commands/upload_unittest.py +++ b/WebKitTools/Scripts/webkitpy/tool/commands/upload_unittest.py @@ -58,6 +58,8 @@ class UploadCommandsTest(CommandsTest): options.description = "MOCK description" options.request_commit = False options.review = True + # Rietveld upload code requires a real SCM checkout. + options.fancy_review = False options.cc = None expected_stderr = """Running check-webkit-style MOCK: user.open_url: file://... @@ -86,6 +88,8 @@ MOCK: user.open_url: http://example.com/42 options.description = "MOCK description" options.request_commit = False options.review = True + # Rietveld upload code requires a real SCM checkout. + options.fancy_review = False options.cc = None expected_stderr = """Running check-webkit-style MOCK: user.open_url: file://... diff --git a/WebKitTools/Scripts/webkitpy/tool/main.py b/WebKitTools/Scripts/webkitpy/tool/main.py index 06cde74..2dc177d 100755 --- a/WebKitTools/Scripts/webkitpy/tool/main.py +++ b/WebKitTools/Scripts/webkitpy/tool/main.py @@ -123,6 +123,7 @@ class WebKitPatch(MultiCommandTool): # FIXME: This may be unnecessary since we pass global options to all commands during execute() as well. def handle_global_options(self, options): + self._options = options if options.dry_run: self.scm().dryrun = True self.bugs.dryrun = True diff --git a/WebKitTools/Scripts/webkitpy/tool/mocktool.py b/WebKitTools/Scripts/webkitpy/tool/mocktool.py index cc361ff..128362a 100644 --- a/WebKitTools/Scripts/webkitpy/tool/mocktool.py +++ b/WebKitTools/Scripts/webkitpy/tool/mocktool.py @@ -260,7 +260,7 @@ class MockBugzilla(Mock): bug_title, bug_description, component=None, - patch_file_object=None, + diff=None, patch_description=None, cc=None, blocked=None, @@ -302,7 +302,7 @@ class MockBugzilla(Mock): def add_patch_to_bug(self, bug_id, - patch_file_object, + diff, description, comment_text=None, mark_for_review=False, @@ -384,7 +384,7 @@ class MockSCM(Mock): # will actually be the root. Since getcwd() is wrong, use a globally fake root for now. self.checkout_root = self.fake_checkout_root - def create_patch(self): + def create_patch(self, git_commit, squash): return "Patch1" def commit_ids_from_commitish_arguments(self, args): @@ -399,13 +399,6 @@ class MockSCM(Mock): "https://bugs.example.org/show_bug.cgi?id=75\n") raise Exception("Bogus commit_id in commit_message_for_local_commit.") - def create_patch_from_local_commit(self, commit_id): - if commit_id == "Commitish1": - return "Patch1" - if commit_id == "Commitish2": - return "Patch2" - raise Exception("Bogus commit_id in commit_message_for_local_commit.") - def diff_for_revision(self, revision): return "DiffForRevision%s\n" \ "http://bugs.webkit.org/show_bug.cgi?id=12345" % revision @@ -431,12 +424,12 @@ class MockCheckout(object): def bug_id_for_revision(self, svn_revision): return 12345 - def modified_changelogs(self): + def modified_changelogs(self, git_commit, squash): # Ideally we'd return something more interesting here. The problem is # that LandDiff will try to actually read the patch from disk! return [] - def commit_message_for_this_commit(self): + def commit_message_for_this_commit(self, git_commit, squash): commit_message = Mock() commit_message.message = lambda:"This is a fake commit message that is at least 50 characters." return commit_message @@ -507,7 +500,8 @@ class MockExecute(Mock): input=None, error_handler=None, return_exit_code=False, - return_stderr=True): + return_stderr=True, + decode_output=False): return "MOCK output of child process" diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/abstractstep.py b/WebKitTools/Scripts/webkitpy/tool/steps/abstractstep.py index 1ad343d..abafe63 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/abstractstep.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/abstractstep.py @@ -28,6 +28,7 @@ from webkitpy.common.system.deprecated_logging import log from webkitpy.common.config.ports import WebKitPort +from webkitpy.tool.steps.options import Options class AbstractStep(object): @@ -36,10 +37,13 @@ class AbstractStep(object): self._options = options self._port = None - def _run_script(self, script_name, quiet=False, port=WebKitPort): + def _run_script(self, script_name, args=None, quiet=False, port=WebKitPort): log("Running %s" % script_name) + command = [port.script_path(script_name)] + if args: + command.extend(args) # FIXME: This should use self.port() - self._tool.executive.run_and_throw_if_fail(port.script_path(script_name), quiet) + self._tool.executive.run_and_throw_if_fail(command, quiet) # FIXME: The port should live on the tool. def port(self): @@ -49,8 +53,8 @@ class AbstractStep(object): return self._port _well_known_keys = { - "diff" : lambda self: self._tool.scm().create_patch(), - "changelogs" : lambda self: self._tool.checkout().modified_changelogs(), + "diff": lambda self: self._tool.scm().create_patch(self._options.git_commit, self._options.squash), + "changelogs": lambda self: self._tool.checkout().modified_changelogs(self._options.git_commit, self._options.squash), } def cached_lookup(self, state, key, promise=None): @@ -63,7 +67,12 @@ class AbstractStep(object): @classmethod def options(cls): - return [] + return [ + # We need these options here because cached_lookup uses them. :( + Options.git_commit, + Options.no_squash, + Options.squash, + ] def run(self, state): raise NotImplementedError, "subclasses must implement" diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/applypatch.py b/WebKitTools/Scripts/webkitpy/tool/steps/applypatch.py index 66d0a03..6cded27 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/applypatch.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/applypatch.py @@ -33,7 +33,7 @@ from webkitpy.common.system.deprecated_logging import log class ApplyPatch(AbstractStep): @classmethod def options(cls): - return [ + return AbstractStep.options() + [ Options.non_interactive, ] diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/applypatchwithlocalcommit.py b/WebKitTools/Scripts/webkitpy/tool/steps/applypatchwithlocalcommit.py index 70ddfe5..d6b026d 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/applypatchwithlocalcommit.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/applypatchwithlocalcommit.py @@ -32,12 +32,12 @@ from webkitpy.tool.steps.options import Options class ApplyPatchWithLocalCommit(ApplyPatch): @classmethod def options(cls): - return [ + return ApplyPatch.options() + [ Options.local_commit, - ] + ApplyPatch.options() + ] def run(self, state): ApplyPatch.run(self, state) if self._options.local_commit: - commit_message = self._tool.checkout().commit_message_for_this_commit() + commit_message = self._tool.checkout().commit_message_for_this_commit(git_commit=None, squash=False) self._tool.scm().commit_locally_with_message(commit_message.message() or state["patch"].name()) diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/build.py b/WebKitTools/Scripts/webkitpy/tool/steps/build.py index f0570f9..456db25 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/build.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/build.py @@ -34,7 +34,7 @@ from webkitpy.common.system.deprecated_logging import log class Build(AbstractStep): @classmethod def options(cls): - return [ + return AbstractStep.options() + [ Options.build, Options.quiet, Options.build_style, diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/checkstyle.py b/WebKitTools/Scripts/webkitpy/tool/steps/checkstyle.py index 63f0114..7b2be99 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/checkstyle.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/checkstyle.py @@ -36,9 +36,12 @@ from webkitpy.common.system.deprecated_logging import error class CheckStyle(AbstractStep): @classmethod def options(cls): - return [ + return AbstractStep.options() + [ Options.non_interactive, Options.check_style, + Options.git_commit, + Options.no_squash, + Options.squash, ] def run(self, state): @@ -46,7 +49,16 @@ class CheckStyle(AbstractStep): return os.chdir(self._tool.scm().checkout_root) try: - self._run_script("check-webkit-style") + args = [] + if self._options.git_commit: + args.append("--git-commit") + args.append(self._options.git_commit) + if self._tool.scm().should_squash(self._options.squash): + args.append("--squash") + else: + args.append("--no-squash") + + self._run_script("check-webkit-style", args) except ScriptError, e: if self._options.non_interactive: # We need to re-raise the exception here to have the diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/cleanworkingdirectory.py b/WebKitTools/Scripts/webkitpy/tool/steps/cleanworkingdirectory.py index 3768297..e13fbc2 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/cleanworkingdirectory.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/cleanworkingdirectory.py @@ -39,7 +39,7 @@ class CleanWorkingDirectory(AbstractStep): @classmethod def options(cls): - return [ + return AbstractStep.options() + [ Options.force_clean, Options.clean, ] diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/closebug.py b/WebKitTools/Scripts/webkitpy/tool/steps/closebug.py index d5059ea..e77bc24 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/closebug.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/closebug.py @@ -34,7 +34,7 @@ from webkitpy.common.system.deprecated_logging import log class CloseBug(AbstractStep): @classmethod def options(cls): - return [ + return AbstractStep.options() + [ Options.close_bug, ] diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/closebugforlanddiff.py b/WebKitTools/Scripts/webkitpy/tool/steps/closebugforlanddiff.py index 476d3af..e5a68db 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/closebugforlanddiff.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/closebugforlanddiff.py @@ -35,7 +35,7 @@ from webkitpy.common.system.deprecated_logging import log class CloseBugForLandDiff(AbstractStep): @classmethod def options(cls): - return [ + return AbstractStep.options() + [ Options.close_bug, ] diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/commit.py b/WebKitTools/Scripts/webkitpy/tool/steps/commit.py index 294b41e..7bf8b8a 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/commit.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/commit.py @@ -27,11 +27,21 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from webkitpy.tool.steps.abstractstep import AbstractStep +from webkitpy.tool.steps.options import Options class Commit(AbstractStep): + @classmethod + def options(cls): + return AbstractStep.options() + [ + Options.git_commit, + Options.no_squash, + Options.squash, + ] + def run(self, state): - commit_message = self._tool.checkout().commit_message_for_this_commit() + commit_message = self._tool.checkout().commit_message_for_this_commit(self._options.git_commit, self._options.squash) if len(commit_message.message()) < 50: raise Exception("Attempted to commit with a commit message shorter than 50 characters. Either your patch is missing a ChangeLog or webkit-patch may have a bug.") - state["commit_text"] = self._tool.scm().commit_with_message(commit_message.message()) + state["commit_text"] = self._tool.scm().commit_with_message(commit_message.message(), + git_commit=self._options.git_commit, squash=self._options.squash) diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/confirmdiff.py b/WebKitTools/Scripts/webkitpy/tool/steps/confirmdiff.py index d08e477..626fcf3 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/confirmdiff.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/confirmdiff.py @@ -41,7 +41,7 @@ _log = logutils.get_logger(__file__) class ConfirmDiff(AbstractStep): @classmethod def options(cls): - return [ + return AbstractStep.options() + [ Options.confirm, ] diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/createbug.py b/WebKitTools/Scripts/webkitpy/tool/steps/createbug.py index 2f3d42c..cd043d6 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/createbug.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/createbug.py @@ -33,7 +33,7 @@ from webkitpy.tool.steps.options import Options class CreateBug(AbstractStep): @classmethod def options(cls): - return [ + return AbstractStep.options() + [ Options.cc, Options.component, ] diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/ensurebuildersaregreen.py b/WebKitTools/Scripts/webkitpy/tool/steps/ensurebuildersaregreen.py index fd44564..40bc302 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/ensurebuildersaregreen.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/ensurebuildersaregreen.py @@ -34,7 +34,7 @@ from webkitpy.common.system.deprecated_logging import error class EnsureBuildersAreGreen(AbstractStep): @classmethod def options(cls): - return [ + return AbstractStep.options() + [ Options.check_builders, ] diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/ensurelocalcommitifneeded.py b/WebKitTools/Scripts/webkitpy/tool/steps/ensurelocalcommitifneeded.py index 4f799f2..d0cda46 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/ensurelocalcommitifneeded.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/ensurelocalcommitifneeded.py @@ -34,7 +34,7 @@ from webkitpy.common.system.deprecated_logging import error class EnsureLocalCommitIfNeeded(AbstractStep): @classmethod def options(cls): - return [ + return AbstractStep.options() + [ Options.local_commit, ] diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/obsoletepatches.py b/WebKitTools/Scripts/webkitpy/tool/steps/obsoletepatches.py index 9f65d41..de508c6 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/obsoletepatches.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/obsoletepatches.py @@ -35,7 +35,7 @@ from webkitpy.common.system.deprecated_logging import log class ObsoletePatches(AbstractStep): @classmethod def options(cls): - return [ + return AbstractStep.options() + [ Options.obsolete_patches, ] diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/options.py b/WebKitTools/Scripts/webkitpy/tool/steps/options.py index 7f76f55..524a252 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/options.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/options.py @@ -42,7 +42,10 @@ class Options(object): email = make_option("--email", action="store", type="string", dest="email", help="Email address to use in ChangeLogs.") fancy_review = make_option("--fancy-review", action="store_true", dest="fancy_review", default=False, help="(Experimental) Upload the patch to Rietveld code review tool.") force_clean = make_option("--force-clean", action="store_true", dest="force_clean", default=False, help="Clean working directory before applying patches (removes local changes and commits)") +# FIXME: Make commit ranges treat each commit separately instead of squashing them into one. + git_commit = make_option("--git-commit", action="store", dest="git_commit", help="Local git commit to upload/land. If a range, the commits are squashed into one.") local_commit = make_option("--local-commit", action="store_true", dest="local_commit", default=False, help="Make a local commit for each applied patch") + no_squash = make_option("--no-squash", action="store_false", dest="squash", help="Don't squash local commits into one on upload/land (git-only).") non_interactive = make_option("--non-interactive", action="store_true", dest="non_interactive", default=False, help="Never prompt the user, fail as fast as possible.") obsolete_patches = make_option("--no-obsolete", action="store_false", dest="obsolete_patches", default=True, help="Do not obsolete old patches before posting this one.") open_bug = make_option("--open-bug", action="store_true", dest="open_bug", default=False, help="Opens the associated bug in a browser.") @@ -52,5 +55,6 @@ class Options(object): request_commit = make_option("--request-commit", action="store_true", dest="request_commit", default=False, help="Mark the patch as needing auto-commit after review.") review = make_option("--no-review", action="store_false", dest="review", default=True, help="Do not mark the patch for review.") reviewer = make_option("-r", "--reviewer", action="store", type="string", dest="reviewer", help="Update ChangeLogs to say Reviewed by REVIEWER.") - test = make_option("--test", action="store_true", dest="test", default=False, help="Commit without running run-webkit-tests") + squash = make_option("-s", "--squash", action="store_true", dest="squash", help="Squash all local commits into one on upload/land (git-only).") + test = make_option("--test", action="store_true", dest="test", default=False, help="Run run-webkit-tests before committing.") update = make_option("--no-update", action="store_false", dest="update", default=True, help="Don't update the working directory.") diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/postcodereview.py b/WebKitTools/Scripts/webkitpy/tool/steps/postcodereview.py index 3e7ed76..198cfce 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/postcodereview.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/postcodereview.py @@ -33,11 +33,10 @@ from webkitpy.tool.steps.options import Options class PostCodeReview(AbstractStep): @classmethod def options(cls): - return [ + return AbstractStep.options() + [ Options.cc, Options.description, Options.fancy_review, - Options.review, ] def run(self, state): @@ -66,7 +65,8 @@ class PostCodeReview(AbstractStep): # Unreachable with our current commands, but we might hit # this case if we support bug-less code reviews. message = "Code review" - created_issue = self._tool.codereview.post(message=message, + created_issue = self._tool.codereview.post(diff=self.cached_lookup(state, "diff"), + message=message, codereview_issue=codereview_issue, cc=self._options.cc) if created_issue: diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/postdiff.py b/WebKitTools/Scripts/webkitpy/tool/steps/postdiff.py index 6a3dee4..a542dba 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/postdiff.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/postdiff.py @@ -26,8 +26,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import StringIO - from webkitpy.tool.steps.abstractstep import AbstractStep from webkitpy.tool.steps.options import Options @@ -35,7 +33,7 @@ from webkitpy.tool.steps.options import Options class PostDiff(AbstractStep): @classmethod def options(cls): - return [ + return AbstractStep.options() + [ Options.description, Options.review, Options.request_commit, @@ -44,7 +42,6 @@ class PostDiff(AbstractStep): def run(self, state): diff = self.cached_lookup(state, "diff") - diff_file = StringIO.StringIO(diff) # add_patch_to_bug expects a file-like object description = self._options.description or "Patch" comment_text = None codereview_issue = state.get("codereview_issue") @@ -52,6 +49,6 @@ class PostDiff(AbstractStep): # but it makes doing the rietveld integration a lot easier. if codereview_issue: description += "-%s" % state["codereview_issue"] - self._tool.bugs.add_patch_to_bug(state["bug_id"], diff_file, description, comment_text=comment_text, mark_for_review=self._options.review, mark_for_commit_queue=self._options.request_commit) + self._tool.bugs.add_patch_to_bug(state["bug_id"], diff, description, comment_text=comment_text, mark_for_review=self._options.review, mark_for_commit_queue=self._options.request_commit) if self._options.open_bug: self._tool.user.open_url(self._tool.bugs.bug_url_for_bug_id(state["bug_id"])) diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/postdiffforcommit.py b/WebKitTools/Scripts/webkitpy/tool/steps/postdiffforcommit.py index 03b9e78..13bc00c 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/postdiffforcommit.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/postdiffforcommit.py @@ -26,8 +26,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import StringIO - from webkitpy.tool.steps.abstractstep import AbstractStep @@ -35,7 +33,7 @@ class PostDiffForCommit(AbstractStep): def run(self, state): self._tool.bugs.add_patch_to_bug( state["bug_id"], - StringIO.StringIO(self.cached_lookup(state, "diff")), + self.cached_lookup(state, "diff"), "Patch for landing", mark_for_review=False, mark_for_landing=True) diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/postdiffforrevert.py b/WebKitTools/Scripts/webkitpy/tool/steps/postdiffforrevert.py index 3b9da04..bfa631f 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/postdiffforrevert.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/postdiffforrevert.py @@ -26,8 +26,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import StringIO - from webkitpy.common.net.bugzilla import Attachment from webkitpy.tool.steps.abstractstep import AbstractStep @@ -44,7 +42,7 @@ following command:\n\n\ where ATTACHMENT_ID is the ID of this attachment." self._tool.bugs.add_patch_to_bug( state["bug_id"], - StringIO.StringIO(self.cached_lookup(state, "diff")), + self.cached_lookup(state, "diff"), "%s%s" % (Attachment.rollout_preamble, state["revision"]), comment_text=comment_text, mark_for_review=False, diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog.py b/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog.py index fcb40be..3a5c013 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelog.py @@ -37,10 +37,13 @@ from webkitpy.common.system.deprecated_logging import error class PrepareChangeLog(AbstractStep): @classmethod def options(cls): - return [ + return AbstractStep.options() + [ Options.port, Options.quiet, Options.email, + Options.git_commit, + Options.no_squash, + Options.squash, ] def run(self, state): @@ -52,6 +55,11 @@ class PrepareChangeLog(AbstractStep): args.append("--bug=%s" % state["bug_id"]) if self._options.email: args.append("--email=%s" % self._options.email) + if self._tool.scm().should_squash(self._options.squash): + args.append("--merge-base=%s" % self._tool.scm().svn_merge_base()) + if self._options.git_commit: + args.append("--git-commit=%s" % self._options.git_commit) + try: self._tool.executive.run_and_throw_if_fail(args, self._options.quiet) except ScriptError, e: diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelogforrevert.py b/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelogforrevert.py index f7d9cd3..4d299fa 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelogforrevert.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/preparechangelogforrevert.py @@ -36,7 +36,7 @@ class PrepareChangeLogForRevert(AbstractStep): def run(self, state): # This could move to prepare-ChangeLog by adding a --revert= option. self._run_script("prepare-ChangeLog") - changelog_paths = self._tool.checkout().modified_changelogs() + changelog_paths = self._tool.checkout().modified_changelogs(git_commit=None, squash=False) bug_url = self._tool.bugs.bug_url_for_bug_id(state["bug_id"]) if state["bug_id"] else None for changelog_path in changelog_paths: # FIXME: Seems we should prepare the message outside of changelogs.py and then just pass in diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/runtests.py b/WebKitTools/Scripts/webkitpy/tool/steps/runtests.py index 55d8c62..b1c2d3b 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/runtests.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/runtests.py @@ -33,7 +33,7 @@ from webkitpy.common.system.deprecated_logging import log class RunTests(AbstractStep): @classmethod def options(cls): - return [ + return AbstractStep.options() + [ Options.test, Options.non_interactive, Options.quiet, diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/steps_unittest.py b/WebKitTools/Scripts/webkitpy/tool/steps/steps_unittest.py index 40bee90..5abfc6d 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/steps_unittest.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/steps_unittest.py @@ -48,7 +48,8 @@ class StepsTest(unittest.TestCase): def test_update_step(self): options = Mock() options.update = True - self._run_step(Update, options) + expected_stderr = "Updating working directory\n" + OutputCapture().assert_outputs(self, self._run_step, [Update, options], expected_stderr=expected_stderr) def test_prompt_for_bug_or_title_step(self): tool = MockTool() diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/update.py b/WebKitTools/Scripts/webkitpy/tool/steps/update.py index c98eba7..0f450f3 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/update.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/update.py @@ -34,7 +34,7 @@ from webkitpy.common.system.deprecated_logging import log class Update(AbstractStep): @classmethod def options(cls): - return [ + return AbstractStep.options() + [ Options.update, Options.port, ] diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/updatechangelogswithreviewer.py b/WebKitTools/Scripts/webkitpy/tool/steps/updatechangelogswithreviewer.py index a35ed8c..9740013 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/updatechangelogswithreviewer.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/updatechangelogswithreviewer.py @@ -37,8 +37,11 @@ from webkitpy.common.system.deprecated_logging import log, error class UpdateChangeLogsWithReviewer(AbstractStep): @classmethod def options(cls): - return [ + return AbstractStep.options() + [ + Options.git_commit, Options.reviewer, + Options.no_squash, + Options.squash, ] def _guess_reviewer_from_bug(self, bug_id): @@ -67,5 +70,5 @@ class UpdateChangeLogsWithReviewer(AbstractStep): return os.chdir(self._tool.scm().checkout_root) - for changelog_path in self._tool.checkout().modified_changelogs(): + for changelog_path in self._tool.checkout().modified_changelogs(self._options.git_commit, self._options.squash): ChangeLog(changelog_path).set_reviewer(reviewer) diff --git a/WebKitTools/Scripts/webkitpy/tool/steps/validatereviewer.py b/WebKitTools/Scripts/webkitpy/tool/steps/validatereviewer.py index 80b2c5d..66ee5b7 100644 --- a/WebKitTools/Scripts/webkitpy/tool/steps/validatereviewer.py +++ b/WebKitTools/Scripts/webkitpy/tool/steps/validatereviewer.py @@ -31,11 +31,20 @@ import re from webkitpy.common.checkout.changelog import ChangeLog from webkitpy.tool.steps.abstractstep import AbstractStep +from webkitpy.tool.steps.options import Options from webkitpy.common.system.deprecated_logging import error, log # FIXME: Some of this logic should probably be unified with CommitterValidator? class ValidateReviewer(AbstractStep): + @classmethod + def options(cls): + return AbstractStep.options() + [ + Options.git_commit, + Options.no_squash, + Options.squash, + ] + # FIXME: This should probably move onto ChangeLogEntry def _has_valid_reviewer(self, changelog_entry): if changelog_entry.reviewer(): @@ -54,7 +63,7 @@ class ValidateReviewer(AbstractStep): # FIXME: We should figure out how to handle the current working # directory issue more globally. os.chdir(self._tool.scm().checkout_root) - for changelog_path in self._tool.checkout().modified_changelogs(): + for changelog_path in self._tool.checkout().modified_changelogs(self._options.git_commit, self._options.squash): changelog_entry = ChangeLog(changelog_path).latest_entry() if self._has_valid_reviewer(changelog_entry): continue |