lib/CPAN/lib/Locale/Maketext/Lexicon.pm
changeset 0 414e01d06fd5
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/lib/CPAN/lib/Locale/Maketext/Lexicon.pm	Sat Jan 26 15:50:53 2008 +0100
     1.3 @@ -0,0 +1,473 @@
     1.4 +package Locale::Maketext::Lexicon;
     1.5 +$Locale::Maketext::Lexicon::VERSION = '0.49';
     1.6 +
     1.7 +use strict;
     1.8 +
     1.9 +=head1 NAME
    1.10 +
    1.11 +Locale::Maketext::Lexicon - Use other catalog formats in Maketext
    1.12 +
    1.13 +=head1 VERSION
    1.14 +
    1.15 +This document describes version 0.49 of Locale::Maketext::Lexicon,
    1.16 +released April 13, 2005.
    1.17 +
    1.18 +=head1 SYNOPSIS
    1.19 +
    1.20 +As part of a localization class, automatically glob for available
    1.21 +lexicons:
    1.22 +
    1.23 +    package Hello::I18N;
    1.24 +    use base 'Locale::Maketext';
    1.25 +    use Locale::Maketext::Lexicon {
    1.26 +        '*' => [Gettext => '/usr/local/share/locale/*/LC_MESSAGES/hello.mo'],
    1.27 +        _decode => 1,   # decode lexicon entries into utf8-strings
    1.28 +    };
    1.29 +
    1.30 +Explicitly specify languages, during compile- or run-time:
    1.31 +
    1.32 +    package Hello::I18N;
    1.33 +    use base 'Locale::Maketext';
    1.34 +    use Locale::Maketext::Lexicon {
    1.35 +        de => [Gettext => 'hello_de.po'],
    1.36 +        fr => [
    1.37 +            Gettext => 'hello_fr.po',
    1.38 +            Gettext => 'local/hello/fr.po',
    1.39 +        ],
    1.40 +    };
    1.41 +    # ... incrementally add new lexicons
    1.42 +    Locale::Maketext::Lexicon->import({
    1.43 +        de => [Gettext => 'local/hello/de.po'],
    1.44 +    })
    1.45 +
    1.46 +Alternatively, as part of a localization subclass:
    1.47 +
    1.48 +    package Hello::I18N::de;
    1.49 +    use base 'Hello::I18N';
    1.50 +    use Locale::Maketext::Lexicon (Gettext => \*DATA);
    1.51 +    __DATA__
    1.52 +    # Some sample data
    1.53 +    msgid ""
    1.54 +    msgstr ""
    1.55 +    "Project-Id-Version: Hello 1.3.22.1\n"
    1.56 +    "MIME-Version: 1.0\n"
    1.57 +    "Content-Type: text/plain; charset=iso8859-1\n"
    1.58 +    "Content-Transfer-Encoding: 8bit\n"
    1.59 +
    1.60 +    #: Hello.pm:10
    1.61 +    msgid "Hello, World!"
    1.62 +    msgstr "Hallo, Welt!"
    1.63 +
    1.64 +    #: Hello.pm:11
    1.65 +    msgid "You have %quant(%1,piece) of mail."
    1.66 +    msgstr "Sie haben %quant(%1,Poststueck,Poststuecken)."
    1.67 +
    1.68 +=head1 DESCRIPTION
    1.69 +
    1.70 +This module provides lexicon-handling modules to read from other
    1.71 +localization formats, such as I<Gettext>, I<Msgcat>, and so on.
    1.72 +
    1.73 +If you are unfamiliar with the concept of lexicon modules, please
    1.74 +consult L<Locale::Maketext> and L<http://www.autrijus.org/webl10n/>
    1.75 +first.
    1.76 +
    1.77 +A command-line utility L<xgettext.pl> is also installed with this
    1.78 +module, for extracting translatable strings from source files.
    1.79 +
    1.80 +=head2 The C<import> function
    1.81 +
    1.82 +The C<import()> function accepts two forms of arguments:
    1.83 +
    1.84 +=over 4
    1.85 +
    1.86 +=item (I<format> => I<source> ... )
    1.87 +
    1.88 +This form takes any number of argument pairs (usually one);
    1.89 +I<source> may be a file name, a filehandle, or an array reference.
    1.90 +
    1.91 +For each such pair, it pass the contents specified by the second
    1.92 +argument to B<Locale::Maketext::Lexicon::I<format>>->parse as a
    1.93 +plain list, and export its return value as the C<%Lexicon> hash
    1.94 +in the calling package.
    1.95 +
    1.96 +In the case that there are multiple such pairs, the lexicon
    1.97 +defined by latter ones overrides earlier ones.
    1.98 +
    1.99 +=item { I<language> => [ I<format>, I<source> ... ] ... }
   1.100 +
   1.101 +This form accepts a hash reference.  It will export a C<%Lexicon>
   1.102 +into the subclasses specified by each I<language>, using the process
   1.103 +described above.  It is designed to alleviate the need to set up a
   1.104 +separate subclass for each localized language, and just use the catalog
   1.105 +files.
   1.106 +
   1.107 +This module will convert the I<language> arguments into lowercase,
   1.108 +and replace all C<-> with C<_>, so C<zh_TW> and C<zh-tw> will both
   1.109 +map to the C<zh_tw> subclass.
   1.110 +
   1.111 +If I<language> begins with C<_>, it is taken as an option that
   1.112 +controls how lexicons are parsed.  See L</Options> for a list
   1.113 +of available options.
   1.114 +
   1.115 +The C<*> is a special I<language>; it must be used in conjunction
   1.116 +with a filename that also contains C<*>; all matched files with
   1.117 +a valid language code in the place of C<*> will be automatically
   1.118 +prepared as a lexicon subclass.  If there is multiple C<*> in
   1.119 +the filename, the last one is used as the language name.
   1.120 +
   1.121 +=back
   1.122 +
   1.123 +=head2 Options
   1.124 +
   1.125 +=over 4
   1.126 +
   1.127 +=item C<_decode>
   1.128 +
   1.129 +If set to a true value, source entries will be converted into
   1.130 +utf8-strings (available in Perl 5.6.1 or later).  This feature
   1.131 +needs the B<Encode> or B<Encode::compat> module.
   1.132 +
   1.133 +Currently, only the C<Gettext> backend supports this option.
   1.134 +
   1.135 +=item C<_encoding>
   1.136 +
   1.137 +This option only has effect when C<_decode> is set to true.
   1.138 +It specifies an encoding to store lexicon entries, instead of
   1.139 +utf8-strings.
   1.140 +
   1.141 +If C<_encoding> is set to C<locale>, the encoding from the
   1.142 +current locale setting is used.
   1.143 +
   1.144 +=head2 Subclassing format handlers
   1.145 +
   1.146 +If you wish to override how sources specified in different data types
   1.147 +are handled, please use a subclass that overrides C<lexicon_get_I<TYPE>>.
   1.148 +
   1.149 +XXX: not documented well enough yet.  Patches welcome.
   1.150 +
   1.151 +=head1 NOTES
   1.152 +
   1.153 +When you attempt to localize an entry missing in the lexicon, Maketext
   1.154 +will throw an exception by default.  To inhibit this behaviour, override
   1.155 +the C<_AUTO> key in your language subclasses, for example:
   1.156 +
   1.157 +    $Hello::I18N::en::Lexicon{_AUTO} = 1; # autocreate missing keys
   1.158 +
   1.159 +If you want to implement a new C<Lexicon::*> backend module, please note
   1.160 +that C<parse()> takes an array containing the B<source strings> from the
   1.161 +specified filehandle or filename, which are I<not> C<chomp>ed.  Although
   1.162 +if the source is an array reference, its elements will probably not contain
   1.163 +any newline characters anyway.
   1.164 +
   1.165 +The C<parse()> function should return a hash reference, which will be
   1.166 +assigned to the I<typeglob> (C<*Lexicon>) of the language module.  All
   1.167 +it amounts to is that if the returned reference points to a tied hash,
   1.168 +the C<%Lexicon> will be aliased to the same tied hash if it was not
   1.169 +initialized previously.
   1.170 +
   1.171 +=cut
   1.172 +
   1.173 +our %Opts;
   1.174 +sub option { shift if ref($_[0]); $Opts{lc $_[0]} }
   1.175 +sub set_option { shift if ref($_[0]); $Opts{lc $_[0]} = $_[1] }
   1.176 +
   1.177 +sub encoding {
   1.178 +    my $encoding = option(@_, 'encoding') or return;
   1.179 +    return $encoding unless lc($encoding) eq 'locale';
   1.180 +
   1.181 +    no warnings 'uninitialized';
   1.182 +    my ($country_language, $locale_encoding);
   1.183 +
   1.184 +    local $@;
   1.185 +    eval {
   1.186 +        require I18N::Langinfo;
   1.187 +        $locale_encoding = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET());
   1.188 +    } or eval {
   1.189 +        require Win32::Console;
   1.190 +        $locale_encoding = 'cp'.Win32::Console::OutputCP();
   1.191 +    };
   1.192 +    if (!$locale_encoding) {
   1.193 +        foreach my $key (qw( LANGUAGE LC_ALL LC_MESSAGES LANG )) {
   1.194 +            $ENV{$key} =~ /^([^.]+)\.([^.:]+)/ or next;
   1.195 +            ($country_language, $locale_encoding) = ($1, $2);
   1.196 +            last;
   1.197 +        }
   1.198 +    }
   1.199 +    if (defined $locale_encoding &&
   1.200 +        lc($locale_encoding) eq 'euc' &&
   1.201 +        defined $country_language) {
   1.202 +        if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) {
   1.203 +            $locale_encoding = 'euc-jp';
   1.204 +        } elsif ($country_language =~ /^ko_KR|korean?$/i) {
   1.205 +            $locale_encoding = 'euc-kr';
   1.206 +        } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) {
   1.207 +            $locale_encoding = 'euc-cn';
   1.208 +        } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) {
   1.209 +            $locale_encoding = 'euc-tw';
   1.210 +        }
   1.211 +    }
   1.212 +
   1.213 +    return $locale_encoding;
   1.214 +}
   1.215 +
   1.216 +sub import {
   1.217 +    my $class = shift;
   1.218 +    return unless @_;
   1.219 +
   1.220 +    my %entries;
   1.221 +    if (UNIVERSAL::isa($_[0], 'HASH')) {
   1.222 +        # a hashref with $lang as keys, [$format, $src ...] as values
   1.223 +        %entries = %{$_[0]};
   1.224 +    }
   1.225 +    elsif (@_ % 2) {
   1.226 +        %entries = ( '' => [ @_ ] );
   1.227 +    }
   1.228 +
   1.229 +    # expand the wildcard entry
   1.230 +    if (my $wild_entry = delete $entries{'*'}) {
   1.231 +        while (my ($format, $src) = splice(@$wild_entry, 0, 2)) {
   1.232 +            next if ref($src); # XXX: implement globbing for the 'Tie' backend
   1.233 +
   1.234 +            my $pattern = quotemeta($src);
   1.235 +            $pattern =~ s/\\\*(?=[^*]+$)/\([-\\w]+\)/g or next;
   1.236 +            $pattern =~ s/\\\*/.*?/g;
   1.237 +            $pattern =~ s/\\\?/./g;
   1.238 +            $pattern =~ s/\\\[/[/g;
   1.239 +            $pattern =~ s/\\\]/]/g;
   1.240 +            $pattern =~ s[\\\{(.*?)\\\\}][
   1.241 +                '(?:'.join('|', split(/,/, $1)).')'
   1.242 +            ]eg;
   1.243 +
   1.244 +            require File::Glob;
   1.245 +            foreach my $file (File::Glob::bsd_glob($src)) {
   1.246 +                $file =~ /$pattern/ or next;
   1.247 +                push @{$entries{$1}}, ($format => $file) if $1;
   1.248 +            }
   1.249 +            delete $entries{$1}
   1.250 +                unless !defined($1)
   1.251 +                    or exists $entries{$1} and @{$entries{$1}};
   1.252 +        }
   1.253 +    }
   1.254 +
   1.255 +    %Opts = ();
   1.256 +    foreach my $key (grep /^_/, keys %entries) {
   1.257 +        set_option(lc(substr($key, 1)) => delete($entries{$key}));
   1.258 +    }
   1.259 +    my $OptsRef = { %Opts };
   1.260 +
   1.261 +    while (my ($lang, $entry) = each %entries) {
   1.262 +        my $export = caller;
   1.263 +
   1.264 +        if (length $lang) {
   1.265 +            # normalize language tag to Maketext's subclass convention
   1.266 +            $lang = lc($lang);
   1.267 +            $lang =~ s/-/_/g;
   1.268 +            $export .= "::$lang";
   1.269 +        }
   1.270 +
   1.271 +        my @pairs = @{$entry||[]} or die "no format specified";
   1.272 +
   1.273 +        while (my ($format, $src) = splice(@pairs, 0, 2)) {
   1.274 +            if (defined($src) and !ref($src) and $src =~ /\*/) {
   1.275 +                unshift(@pairs, $format => $_) for File::Glob::bsd_glob($src);
   1.276 +                next;
   1.277 +            }
   1.278 +
   1.279 +            local $@;
   1.280 +            my @content = eval {
   1.281 +                $class->lexicon_get($src, scalar caller, $lang);
   1.282 +            };
   1.283 +            next if $@ and $@ eq 'next';
   1.284 +            die $@ if $@;
   1.285 +
   1.286 +            no strict 'refs';
   1.287 +            eval "use $class\::$format; 1" or die $@;
   1.288 +
   1.289 +            if (defined %{"$export\::Lexicon"}) {
   1.290 +                if (ref(tied %{"$export\::Lexicon"}) eq __PACKAGE__) {
   1.291 +                    tied(%{"$export\::Lexicon"})->_force;
   1.292 +                }
   1.293 +                # be very careful not to pollute the possibly tied lexicon
   1.294 +                *{"$export\::Lexicon"} = {
   1.295 +                    %{"$export\::Lexicon"},
   1.296 +                    %{"$class\::$format"->parse(@content)},
   1.297 +                };
   1.298 +            }
   1.299 +            else {
   1.300 +                my $promise;
   1.301 +                tie %{"$export\::Lexicon"}, __PACKAGE__, {
   1.302 +                    Opts => $OptsRef,
   1.303 +                    Export => "$export\::Lexicon",
   1.304 +                    Class => "$class\::$format",
   1.305 +                    Content => \@content,
   1.306 +                };
   1.307 +            }
   1.308 +
   1.309 +            push(@{"$export\::ISA"}, scalar caller) if length $lang;
   1.310 +        }
   1.311 +    }
   1.312 +}
   1.313 +
   1.314 +sub TIEHASH {
   1.315 +    my ($class, $args) = @_;
   1.316 +    return bless($args, $class);
   1.317 +
   1.318 +}
   1.319 +
   1.320 +{
   1.321 +    no strict 'refs';
   1.322 +    sub _force {
   1.323 +        my $args = shift;
   1.324 +        if (!$args->{Done}++) {
   1.325 +            local *Opts = $args->{Opts};
   1.326 +            *{$args->{Export}} = $args->{Class}->parse(@{$args->{Content}});
   1.327 +        }
   1.328 +        return \%{$args->{Export}};
   1.329 +    }
   1.330 +    sub FETCH { _force($_[0])->{$_[1]} }
   1.331 +    sub EXISTS { _force($_[0])->{$_[1]} }
   1.332 +    sub DELETE { delete _force($_[0])->{$_[1]} }
   1.333 +    sub SCALAR { scalar %{_force($_[0])} }
   1.334 +    sub STORE { _force($_[0])->{$_[1]} = $_[2] }
   1.335 +    sub CLEAR { %{_force($_[0])->{$_[1]}} = () }
   1.336 +    sub NEXTKEY { each %{_force($_[0])} }
   1.337 +    sub FIRSTKEY {
   1.338 +        my $hash = _force($_[0]);
   1.339 +        my $a = scalar keys %$hash;
   1.340 +        each %$hash;
   1.341 +    }
   1.342 +}
   1.343 +
   1.344 +sub lexicon_get {
   1.345 +    my ($class, $src, $caller, $lang) = @_;
   1.346 +    return unless defined $src;
   1.347 +
   1.348 +    foreach my $type (qw(ARRAY HASH SCALAR GLOB), ref($src)) {
   1.349 +        next unless UNIVERSAL::isa($src, $type);
   1.350 +
   1.351 +        my $method = 'lexicon_get_' . lc($type);
   1.352 +        die "cannot handle source $type for $src: no $method defined"
   1.353 +            unless $class->can($method);
   1.354 +
   1.355 +        return $class->$method($src, $caller, $lang);
   1.356 +    }
   1.357 +
   1.358 +    # default handler
   1.359 +    return $class->lexicon_get_($src, $caller, $lang);
   1.360 +}
   1.361 +
   1.362 +# for scalarrefs and arrayrefs we just dereference the $src
   1.363 +sub lexicon_get_scalar { ${$_[1]} }
   1.364 +sub lexicon_get_array  { @{$_[1]} }
   1.365 +
   1.366 +sub lexicon_get_hash   {
   1.367 +    my ($class, $src, $caller, $lang) = @_;
   1.368 +    return map { $_ => $src->{$_} } sort keys %$src;
   1.369 +}
   1.370 +
   1.371 +sub lexicon_get_glob   {
   1.372 +    my ($class, $src, $caller, $lang) = @_;
   1.373 +
   1.374 +    no strict 'refs';
   1.375 +
   1.376 +    # be extra magical and check for DATA section
   1.377 +    if (eof($src) and $src eq \*{"$caller\::DATA"} or $src eq \*{"main\::DATA"}) {
   1.378 +        # okay, the *DATA isn't initiated yet. let's read.
   1.379 +        #
   1.380 +        require FileHandle;
   1.381 +        my $fh = FileHandle->new;
   1.382 +        my $package = ( ($src eq \*{"main\::DATA"}) ? 'main' : $caller );
   1.383 +
   1.384 +        if ( $package eq 'main' and -e $0 ) {
   1.385 +            $fh->open($0) or die "Can't open $0: $!";
   1.386 +        }
   1.387 +        else {
   1.388 +            my $level = 1;
   1.389 +            while ( my ($pkg, $filename) = caller($level++) ) {
   1.390 +                next unless $pkg eq $package;
   1.391 +                next unless -e $filename;
   1.392 +                next;
   1.393 +
   1.394 +                $fh->open($filename) or die "Can't open $filename: $!";
   1.395 +                last;
   1.396 +            }
   1.397 +        }
   1.398 +
   1.399 +        while (<$fh>) {
   1.400 +            # okay, this isn't foolproof, but good enough
   1.401 +            last if /^__DATA__$/;
   1.402 +        }
   1.403 +
   1.404 +        return <$fh>;
   1.405 +    }
   1.406 +
   1.407 +    # fh containing the lines
   1.408 +    my $pos = tell($src);
   1.409 +    my @lines = <$src>;
   1.410 +    seek($src, $pos, 0);
   1.411 +    return @lines;
   1.412 +}
   1.413 +
   1.414 +# assume filename - search path, open and return its contents
   1.415 +sub lexicon_get_ {
   1.416 +    my ($class, $src, $caller, $lang) = @_;
   1.417 +
   1.418 +    require FileHandle;
   1.419 +    require File::Spec;
   1.420 +
   1.421 +    my $fh = FileHandle->new;
   1.422 +    my @path = split('::', $caller);
   1.423 +    push @path, $lang if length $lang;
   1.424 +
   1.425 +    $src = (grep { -e } map {
   1.426 +        my @subpath = @path[0..$_];
   1.427 +        map { File::Spec->catfile($_, @subpath, $src) } @INC;
   1.428 +    } -1 .. $#path)[-1] unless -e $src;
   1.429 +
   1.430 +    defined $src or die 'next';
   1.431 +
   1.432 +    $fh->open($src) or die "Cannot read $src (called by $caller): $!";
   1.433 +    binmode($fh);
   1.434 +    return <$fh>;
   1.435 +}
   1.436 +
   1.437 +1;
   1.438 +
   1.439 +=head1 ACKNOWLEDGMENTS
   1.440 +
   1.441 +Thanks to Jesse Vincent for suggesting this module to be written.
   1.442 +
   1.443 +Thanks also to Sean M. Burke for coming up with B<Locale::Maketext>
   1.444 +in the first place, and encouraging me to experiment with alternative
   1.445 +Lexicon syntaxes.
   1.446 +
   1.447 +Thanks also to Yi Ma Mao for providing the MO file parsing subroutine,
   1.448 +as well as inspiring me to implement file globbing and transcoding
   1.449 +support.
   1.450 +
   1.451 +See the F<AUTHORS> file in the distribution for a list of people who
   1.452 +have sent helpful patches, ideas or comments.
   1.453 +
   1.454 +=head1 SEE ALSO
   1.455 +
   1.456 +L<xgettext.pl> for extracting translatable strings from common template
   1.457 +systems and perl source files.
   1.458 +
   1.459 +L<Locale::Maketext>, L<Locale::Maketext::Lexicon::Auto>,
   1.460 +L<Locale::Maketext::Lexicon::Gettext>, L<Locale::Maketext::Lexicon::Msgcat>,
   1.461 +L<Locale::Maketext::Lexicon::Tie>
   1.462 +
   1.463 +=head1 AUTHORS
   1.464 +
   1.465 +Autrijus Tang E<lt>autrijus@autrijus.orgE<gt>
   1.466 +
   1.467 +=head1 COPYRIGHT
   1.468 +
   1.469 +Copyright 2002, 2003, 2004, 2005 by Autrijus Tang E<lt>autrijus@autrijus.orgE<gt>.
   1.470 +
   1.471 +This program is free software; you can redistribute it and/or 
   1.472 +modify it under the same terms as Perl itself.
   1.473 +
   1.474 +See L<http://www.perl.com/perl/misc/Artistic.html>
   1.475 +
   1.476 +=cut