diff options
Diffstat (limited to 'Tools/Scripts/extract-localizable-strings')
| -rwxr-xr-x | Tools/Scripts/extract-localizable-strings | 390 | 
1 files changed, 390 insertions, 0 deletions
diff --git a/Tools/Scripts/extract-localizable-strings b/Tools/Scripts/extract-localizable-strings new file mode 100755 index 0000000..b31550a --- /dev/null +++ b/Tools/Scripts/extract-localizable-strings @@ -0,0 +1,390 @@ +#!/usr/bin/perl -w + +# Copyright (C) 2006, 2007, 2009, 2010 Apple Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1.  Redistributions of source code must retain the above copyright +#     notice, this list of conditions and the following disclaimer.  +# 2.  Redistributions in binary form must reproduce the above copyright +#     notice, this list of conditions and the following disclaimer in the +#     documentation and/or other materials provided with the distribution.  +# 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of +#     its contributors may be used to endorse or promote products derived +#     from this software without specific prior written permission.  +# +# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This script is like the genstrings tool (minus most of the options) with these differences. +# +#    1) It uses the names UI_STRING and UI_STRING_WITH_KEY for the macros, rather than the macros +#       from NSBundle.h, and doesn't support tables (although they would be easy to add). +#    2) It supports UTF-8 in key strings (and hence uses "" strings rather than @"" strings; +#       @"" strings only reliably support ASCII since they are decoded based on the system encoding +#       at runtime, so give different results on US and Japanese systems for example). +#    3) It looks for strings that are not marked for localization, using both macro names that are +#       known to be used for debugging in Intrigue source code and an exceptions file. +#    4) It finds the files to work on rather than taking them as parameters, and also uses a +#       hardcoded location for both the output file and the exceptions file. +#       It would have been nice to use the project to find the source files, but it's too hard to +#       locate source files after parsing a .pbxproj file. + +# The exceptions file has a list of strings in quotes, filenames, and filename/string pairs separated by :. + +use strict; + +sub UnescapeHexSequence($); + +my %isDebugMacro = ( ASSERT_WITH_MESSAGE => 1, LOG_ERROR => 1, ERROR => 1, NSURL_ERROR => 1, FATAL => 1, LOG => 1, LOG_WARNING => 1, UI_STRING_LOCALIZE_LATER => 1, LPCTSTR_UI_STRING_LOCALIZE_LATER => 1, UNLOCALIZED_STRING => 1, UNLOCALIZED_LPCTSTR => 1, dprintf => 1, NSException => 1, NSLog => 1, printf => 1 ); + +@ARGV >= 2 or die "Usage: extract-localizable-strings <exceptions file> <file to update> [ directory... ]\nDid you mean to run update-webkit-localizable-strings instead?\n"; + +my $exceptionsFile = shift @ARGV; +-f $exceptionsFile or die "Couldn't find exceptions file $exceptionsFile\n"; + +my $fileToUpdate = shift @ARGV; +-f $fileToUpdate or die "Couldn't find file to update $fileToUpdate\n"; + +my @directories = (); +my @directoriesToSkip = (); +if (@ARGV < 1) { +    push(@directories, "."); +} else { +    for my $dir (@ARGV) { +        if ($dir =~ /^-(.*)$/) { +            push @directoriesToSkip, $1; +        } else { +            push @directories, $dir; +        } +    } +} + +my $sawError = 0; + +my $localizedCount = 0; +my $keyCollisionCount = 0; +my $notLocalizedCount = 0; +my $NSLocalizeCount = 0; + +my %exception; +my %usedException; + +if (open EXCEPTIONS, $exceptionsFile) { +    while (<EXCEPTIONS>) { +        chomp; +        if (/^"([^\\"]|\\.)*"$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp)$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp):"([^\\"]|\\.)*"$/) { +            if ($exception{$_}) { +                print "$exceptionsFile:$.:exception for $_ appears twice\n"; +                print "$exceptionsFile:$exception{$_}:first appearance\n"; +            } else { +                $exception{$_} = $.; +            } +        } else { +            print "$exceptionsFile:$.:syntax error\n"; +        } +    } +    close EXCEPTIONS; +} + +my $quotedDirectoriesString = '"' . join('" "', @directories) . '"'; +for my $dir (@directoriesToSkip) { +    $quotedDirectoriesString .= ' -path "' . $dir . '" -prune -o'; +} + +my @files = ( split "\n", `find $quotedDirectoriesString \\( -name "*.h" -o -name "*.m" -o -name "*.mm" -o -name "*.c" -o -name "*.cpp" \\)` ); + +for my $file (sort @files) { +    next if $file =~ /\/\w+LocalizableStrings\.h$/; + +    $file =~ s-^./--; + +    open SOURCE, $file or die "can't open $file\n"; +     +    my $inComment = 0; +     +    my $expected = ""; +    my $macroLine; +    my $macro; +    my $UIString; +    my $key; +    my $comment; +     +    my $string; +    my $stringLine; +    my $nestingLevel; +     +    my $previousToken = ""; + +    while (<SOURCE>) { +        chomp; +         +        # Handle continued multi-line comment. +        if ($inComment) { +            next unless s-.*\*/--; +            $inComment = 0; +        } +     +        # Handle all the tokens in the line. +        while (s-^\s*([#\w]+|/\*|//|[^#\w/'"()\[\],]+|.)--) { +            my $token = $1; +             +            if ($token eq "\"") { +                if ($expected and $expected ne "a quoted string") { +                    print "$file:$.:ERROR:found a quoted string but expected $expected\n"; +                    $sawError = 1; +                    $expected = ""; +                } +                if (s-^(([^\\$token]|\\.)*?)$token--) { +                    if (!defined $string) { +                        $stringLine = $.; +                        $string = $1; +                    } else { +                        $string .= $1; +                    } +                } else { +                    print "$file:$.:ERROR:mismatched quotes\n"; +                    $sawError = 1; +                    $_ = ""; +                } +                next; +            } +             +            if (defined $string) { +handleString: +                if ($expected) { +                    if (!defined $UIString) { +                        # FIXME: Validate UTF-8 here? +                        $UIString = $string; +                        $expected = ","; +                    } elsif (($macro =~ /UI_STRING_KEY$/) and !defined $key) { +                        # FIXME: Validate UTF-8 here? +                        $key = $string; +                        $expected = ","; +                    } elsif (!defined $comment) { +                        # FIXME: Validate UTF-8 here? +                        $comment = $string; +                        $expected = ")"; +                    } +                } else { +                    if (defined $nestingLevel) { +                        # In a debug macro, no need to localize. +                    } elsif ($previousToken eq "#include" or $previousToken eq "#import") { +                        # File name, no need to localize. +                    } elsif ($previousToken eq "extern" and $string eq "C") { +                        # extern "C", no need to localize. +                    } elsif ($string eq "") { +                        # Empty string can sometimes be localized, but we need not complain if not. +                    } elsif ($exception{$file}) { +                        $usedException{$file} = 1; +                    } elsif ($exception{"\"$string\""}) { +                        $usedException{"\"$string\""} = 1; +                    } elsif ($exception{"$file:\"$string\""}) { +                        $usedException{"$file:\"$string\""} = 1; +                    } else { +                        print "$file:$stringLine:\"$string\" is not marked for localization\n"; +                        $notLocalizedCount++; +                    } +                } +                $string = undef; +                last if !defined $token; +            } +             +            $previousToken = $token; + +            if ($token =~ /^NSLocalized/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedStringFromTableInBundle/) { +                print "$file:$.:ERROR:found a use of an NSLocalized macro; not supported\n"; +                $nestingLevel = 0 if !defined $nestingLevel; +                $sawError = 1; +                $NSLocalizeCount++; +            } elsif ($token eq "/*") { +                if (!s-^.*?\*/--) { +                    $_ = ""; # If the comment doesn't end, discard the result of the line and set flag +                    $inComment = 1; +                } +            } elsif ($token eq "//") { +                $_ = ""; # Discard the rest of the line +            } elsif ($token eq "'") { +                if (!s-([^\\]|\\.)'--) { #' <-- that single quote makes the Project Builder editor less confused +                    print "$file:$.:ERROR:mismatched single quote\n"; +                    $sawError = 1; +                    $_ = ""; +                } +            } else { +                if ($expected and $expected ne $token) { +                    print "$file:$.:ERROR:found $token but expected $expected\n"; +                    $sawError = 1; +                    $expected = ""; +                } +                if ($token =~ /UI_STRING(_KEY)?$/) { +                    $expected = "("; +                    $macro = $token; +                    $UIString = undef; +                    $key = undef; +                    $comment = undef; +                    $macroLine = $.; +                } elsif ($token eq "(" or $token eq "[") { +                    ++$nestingLevel if defined $nestingLevel; +                    $expected = "a quoted string" if $expected; +                } elsif ($token eq ",") { +                    $expected = "a quoted string" if $expected; +                } elsif ($token eq ")" or $token eq "]") { +                    $nestingLevel = undef if defined $nestingLevel && !--$nestingLevel; +                    if ($expected) { +                        $key = $UIString if !defined $key; +                        HandleUIString($UIString, $key, $comment, $file, $macroLine); +                        $macro = ""; +                        $expected = ""; +                        $localizedCount++; +                    } +                } elsif ($isDebugMacro{$token}) { +                    $nestingLevel = 0 if !defined $nestingLevel; +                } +            } +        } +             +    } +     +    goto handleString if defined $string; +     +    if ($expected) { +        print "$file:ERROR:reached end of file but expected $expected\n"; +        $sawError = 1; +    } +     +    close SOURCE; +} + +# Unescapes C language hexadecimal escape sequences. +sub UnescapeHexSequence($) +{ +    my ($originalStr) = @_; + +    my $escapedStr = $originalStr; +    my $unescapedStr = ""; + +    for (;;) { +        if ($escapedStr =~ s-^\\x([[:xdigit:]]+)--) { +            if (256 <= hex($1)) { +                print "Hexadecimal escape sequence out of range: \\x$1\n"; +                return undef; +            } +            $unescapedStr .= pack("H*", $1); +        } elsif ($escapedStr =~ s-^(.)--) { +            $unescapedStr .= $1; +        } else { +            return $unescapedStr; +        } +    } +} + +my %stringByKey; +my %commentByKey; +my %fileByKey; +my %lineByKey; + +sub HandleUIString +{ +    my ($string, $key, $comment, $file, $line) = @_; + +    my $bad = 0; +    $string = UnescapeHexSequence($string); +    if (!defined($string)) { +        print "$file:$line:ERROR:string has an illegal hexadecimal escape sequence\n"; +        $bad = 1; +    } +    $key = UnescapeHexSequence($key); +    if (!defined($key)) { +        print "$file:$line:ERROR:key has an illegal hexadecimal escape sequence\n"; +        $bad = 1; +    } +    $comment = UnescapeHexSequence($comment); +    if (!defined($comment)) { +        print "$file:$line:ERROR:comment has an illegal hexadecimal escape sequence\n"; +        $bad = 1; +    } +    if (grep { $_ == 0xFFFD } unpack "U*", $string) { +        print "$file:$line:ERROR:string for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; +        $bad = 1; +    } +    if ($string ne $key && grep { $_ == 0xFFFD } unpack "U*", $key) { +        print "$file:$line:ERROR:key has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; +        $bad = 1; +    } +    if (grep { $_ == 0xFFFD } unpack "U*", $comment) { +        print "$file:$line:ERROR:comment for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; +        $bad = 1; +    } +    if ($bad) { +        $sawError = 1; +        return; +    } +     +    if ($stringByKey{$key} && $stringByKey{$key} ne $string) { +        print "$file:$line:encountered the same key, \"$key\", twice, with different strings\n"; +        print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n"; +        $keyCollisionCount++; +        return; +    } +    if ($commentByKey{$key} && $commentByKey{$key} ne $comment) { +        print "$file:$line:encountered the same key, \"$key\", twice, with different comments\n"; +        print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n"; +        $keyCollisionCount++; +        return; +    } + +    $fileByKey{$key} = $file; +    $lineByKey{$key} = $line; +    $stringByKey{$key} = $string; +    $commentByKey{$key} = $comment; +} + +print "\n" if $sawError || $notLocalizedCount || $NSLocalizeCount; + +my @unusedExceptions = sort grep { !$usedException{$_} } keys %exception; +if (@unusedExceptions) { +    for my $unused (@unusedExceptions) { +        print "$exceptionsFile:$exception{$unused}:exception $unused not used\n"; +    } +    print "\n"; +} + +print "$localizedCount localizable strings\n" if $localizedCount; +print "$keyCollisionCount key collisions\n" if $keyCollisionCount; +print "$notLocalizedCount strings not marked for localization\n" if $notLocalizedCount; +print "$NSLocalizeCount uses of NSLocalize\n" if $NSLocalizeCount; +print scalar(@unusedExceptions), " unused exceptions\n" if @unusedExceptions; + +if ($sawError) { +    print "\nErrors encountered. Exiting without writing to $fileToUpdate.\n"; +    exit 1; +} + +my $localizedStrings = ""; + +for my $key (sort keys %commentByKey) { +    $localizedStrings .= "/* $commentByKey{$key} */\n\"$key\" = \"$stringByKey{$key}\";\n\n"; +} + +# Write out the strings file in UTF-16 with a BOM. +utf8::decode($localizedStrings) if $^V ge v5.8; +my $output = pack "n*", (0xFEFF, unpack "U*", $localizedStrings); + +if (-e "$fileToUpdate") { +    open STRINGS, ">", "$fileToUpdate" or die; +    print STRINGS $output; +    close STRINGS; +} else { +    print "$fileToUpdate does not exist\n"; +    exit 1; +}  | 
