lib/TWiki/I18N.pm
changeset 0 414e01d06fd5
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/lib/TWiki/I18N.pm	Sat Jan 26 15:50:53 2008 +0100
     1.3 @@ -0,0 +1,424 @@
     1.4 +# Module of TWiki Enterprise Collaboration Platform, http://TWiki.org/
     1.5 +#
     1.6 +# Copyright (C) 1999-2008 TWiki Contributors.
     1.7 +# All Rights Reserved. TWiki Contributors
     1.8 +# are listed in the AUTHORS file in the root of this distribution.
     1.9 +# NOTE: Please extend that file, not this notice.
    1.10 +#
    1.11 +# This program is free software; you can redistribute it and/or
    1.12 +# modify it under the terms of the GNU General Public License
    1.13 +# as published by the Free Software Foundation; either version 2
    1.14 +# of the License, or (at your option) any later version. For
    1.15 +# more details read LICENSE in the root of this distribution.
    1.16 +#
    1.17 +# This program is distributed in the hope that it will be useful,
    1.18 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
    1.19 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
    1.20 +#
    1.21 +# As per the GPL, removal of this notice is prohibited.
    1.22 +
    1.23 +=pod
    1.24 +
    1.25 +---+ package TWiki::I18N
    1.26 +
    1.27 +Support for strings translation and language detection.
    1.28 +
    1.29 +=cut
    1.30 +
    1.31 +package TWiki::I18N;
    1.32 +
    1.33 +use strict;
    1.34 +use Assert;
    1.35 +
    1.36 +use vars qw( $initialised @initErrors );
    1.37 +
    1.38 +=pod
    1.39 +
    1.40 +---++ ClassMethod available_languages
    1.41 +
    1.42 +Lists languages tags for languages available at TWiki installation. Returns a
    1.43 +list containing the tags of the available languages.
    1.44 +
    1.45 +__Note__: the languages available to users are determined in the =configure=
    1.46 +interface.
    1.47 +
    1.48 +=cut
    1.49 +
    1.50 +sub available_languages {
    1.51 +
    1.52 +    my @available ;
    1.53 +
    1.54 +    while ( my ( $langCode, $langOptions ) = each %{$TWiki::cfg{Languages}} ) {
    1.55 +        if ( $langOptions->{Enabled} ) {
    1.56 +            push(@available, _normalize_language_tag($langCode));
    1.57 +        }
    1.58 +    }
    1.59 +
    1.60 +    return @available;
    1.61 +}
    1.62 +
    1.63 +# utility function: normalize language tags like ab_CD to ab-cd
    1.64 +# also renove any character there is not a letter [a-z] or a hyphen.
    1.65 +sub _normalize_language_tag {
    1.66 +    my $tag = shift;
    1.67 +    $tag = lc($tag);;
    1.68 +    $tag =~ s/\_/-/g;
    1.69 +    $tag =~ s/[^a-z-]//g;
    1.70 +    return $tag;
    1.71 +}
    1.72 +
    1.73 +# initialisation block
    1.74 +BEGIN {
    1.75 +    # we only need to proceed if user wants internationalisation support
    1.76 +    return unless $TWiki::cfg{UserInterfaceInternationalisation};
    1.77 +
    1.78 +    # no languages enabled is the same as disabling {UserInterfaceInternationalisation}
    1.79 +    my @languages = available_languages();
    1.80 +    return unless (scalar(@languages));
    1.81 +
    1.82 +    # we first assume it's ok
    1.83 +    $initialised = 1;
    1.84 +
    1.85 +    eval "use base 'Locale::Maketext'";
    1.86 +    if ( $@ ) {
    1.87 +        $initialised = 0;
    1.88 +        push(@initErrors, "I18N: Couldn't load required perl module Locale::Maketext: " . $@."\nInstall the module or turn off {UserInterfaceInternationalisation}");
    1.89 +    }
    1.90 +
    1.91 +    unless( $TWiki::cfg{LocalesDir} && -e $TWiki::cfg{LocalesDir} ) {
    1.92 +        push(@initErrors, 'I18N: {LocalesDir} not configured. Define it or turn off {UserInterfaceInternationalisation}');
    1.93 +        $initialised = 0;
    1.94 +    }
    1.95 +
    1.96 +    # dynamically build languages to be loaded according to admin-enabled
    1.97 +    # languages.
    1.98 +    my $dependencies = "use Locale::Maketext::Lexicon{'en'=>['Auto'],";
    1.99 +    foreach my $lang (@languages) {
   1.100 +        $dependencies .= "'$lang'=>['Gettext'=>'$TWiki::cfg{LocalesDir}/$lang.po' ], ";
   1.101 +    }
   1.102 +    $dependencies .= '};';
   1.103 +
   1.104 +    eval $dependencies;
   1.105 +    if ( $@ ) {
   1.106 +        $initialised = 0;
   1.107 +        push(@initErrors, "I18N - Couldn't load required perl module Locale::Maketext::Lexicon: " . $@ . "\nInstall the module or turn off {UserInterfaceInternationalisation}");
   1.108 +    }
   1.109 +}
   1.110 +
   1.111 +=pod
   1.112 +
   1.113 +---++ ClassMethod new ( $session )
   1.114 +
   1.115 +Constructor. Gets the language object corresponding to the current users
   1.116 +language. If $session is not a TWiki object reference, just calls
   1.117 +Local::Maketext::new (the superclass constructor)
   1.118 +
   1.119 +=cut
   1.120 +
   1.121 +sub new {
   1.122 +    my $class = shift;
   1.123 +    my( $session ) = @_;
   1.124 +
   1.125 +    unless( ref($session) && $session->isa('TWiki') ) {
   1.126 +        # it's recursive
   1.127 +        return $class->SUPER::new(@_);
   1.128 +    }
   1.129 +
   1.130 +    unless ($initialised) {
   1.131 +        foreach my $error (@initErrors) {
   1.132 +            $session->writeWarning($error);
   1.133 +        }
   1.134 +    }
   1.135 +
   1.136 +    # guesses the language from the CGI environment
   1.137 +    # TODO:
   1.138 +    #   web/user/session setting must override the language detected from the
   1.139 +    #   browser.
   1.140 +    my $this;
   1.141 +    if ($initialised) {
   1.142 +        $session->enterContext( 'i18n_enabled' );
   1.143 +        my $userLanguage = _normalize_language_tag($session->{prefs}->getPreferencesValue('LANGUAGE'));
   1.144 +        if ($userLanguage) {
   1.145 +            $this = TWiki::I18N->get_handle($userLanguage);
   1.146 +        } else {
   1.147 +            $this = TWiki::I18N->get_handle();
   1.148 +        }
   1.149 +    } else {
   1.150 +        require TWiki::I18N::Fallback;
   1.151 +
   1.152 +        $this = new TWiki::I18N::Fallback();
   1.153 +
   1.154 +        # we couldn't initialise 'optional' I18N infrastructure, warn that we
   1.155 +        # can only use English if I18N has been requested with configure
   1.156 +        $session->writeWarning('Could not load I18N infrastructure; falling back to English')
   1.157 +          if $TWiki::cfg{UserInterfaceInternationalisation};
   1.158 +    }
   1.159 +
   1.160 +    # keep a reference to the session object
   1.161 +    $this->{session} = $session;
   1.162 +
   1.163 +    # languages we know about
   1.164 +    $this->{enabled_languages} = { en => 'English' };
   1.165 +    $this->{checked_enabled}   = undef;
   1.166 +
   1.167 +    # what to do with failed translations (only needed when already initialised
   1.168 +    # and language is not English);
   1.169 +    if ($initialised and ($this->language ne 'en')) {
   1.170 +        my $fallback_handle = TWiki::I18N->get_handle('en');
   1.171 +        $this->fail_with(
   1.172 +            sub {
   1.173 +                shift; # get rid of the handle
   1.174 +                return $fallback_handle->maketext( @_ );
   1.175 +            }
   1.176 +           );
   1.177 +    }
   1.178 +
   1.179 +    # finally! :-p
   1.180 +    return $this;
   1.181 +}
   1.182 +
   1.183 +=begin twiki
   1.184 +
   1.185 +---++ ObjectMethod finish()
   1.186 +Break circular references.
   1.187 +
   1.188 +=cut
   1.189 +
   1.190 +# Note to developers; please undef *all* fields in the object explicitly,
   1.191 +# whether they are references or not. That way this method is "golden
   1.192 +# documentation" of the live fields in the object.
   1.193 +sub finish {
   1.194 +    my $this = shift;
   1.195 +    undef $this->{enabled_languages};
   1.196 +    undef $this->{checked_enabled};
   1.197 +    undef $this->{session};
   1.198 +}
   1.199 +
   1.200 +=pod
   1.201 +
   1.202 +---++ ObjectMethod maketext( $text ) -> $translation
   1.203 +
   1.204 +Translates the given string (assumed to be written in English) into the
   1.205 +current language, as detected in the constructor, and converts it into
   1.206 +the site charset.
   1.207 +
   1.208 +Wraps around Locale::Maketext's maketext method, adding charset conversion and checking
   1.209 +
   1.210 +Return value: translated string, or the argument itself if no translation is
   1.211 +found for thet argument.
   1.212 +
   1.213 +=cut
   1.214 +
   1.215 +sub maketext {
   1.216 +    my ( $this, $text, @args ) = @_;
   1.217 +
   1.218 +    # these can be user-supplied data. They can be in {Site}{CharSet}. Convert
   1.219 +    # into "internal representation" as expected by TWiki::I18N::maketext
   1.220 +    @args = map { $this->fromSiteCharSet($_) } @args;
   1.221 +
   1.222 +    if ($text =~ /^_/ && $text ne '_language_name') {
   1.223 +        require CGI;
   1.224 +        import CGI();
   1.225 +
   1.226 +        return CGI::span (
   1.227 +            { -style => 'color:red;' } ,
   1.228 +            "Error: MAKETEXT argument's can't start with an underscore (\"_\")." );
   1.229 +    }
   1.230 +
   1.231 +    my $result = $this->SUPER::maketext($text, @args);
   1.232 +    if ($result && $this->{session}) {
   1.233 +        # external calls get the resultant text in the right charset:
   1.234 +        $result = $this->toSiteCharSet($result);
   1.235 +    }
   1.236 +
   1.237 +    return $result;
   1.238 +}
   1.239 +
   1.240 +=pod
   1.241 +
   1.242 +---++ ObjectMethod language() -> $language_tag
   1.243 +
   1.244 +Indicates the language tag of the current user's language, as detected from the
   1.245 +information sent by the browser. Returns the empty string if the language
   1.246 +could not be determined.
   1.247 +
   1.248 +=cut
   1.249 +
   1.250 +sub language {
   1.251 +    my $this = shift;
   1.252 +
   1.253 +    return $this->language_tag();
   1.254 +}
   1.255 +
   1.256 +=pod
   1.257 +
   1.258 +---++ ObjectMethod enabled_languages() -> %languages
   1.259 +
   1.260 +Returns an array with language tags as keys and language (native) names as
   1.261 +values, for all the languages enabled in this TWiki.TWikiSite. Useful for
   1.262 +listing available languages to the user.
   1.263 +
   1.264 +=cut
   1.265 +
   1.266 +sub enabled_languages {
   1.267 +    my $this = shift;
   1.268 +
   1.269 +    unless ($this->{checked_enabled}) {
   1.270 +        _discover_languages( $this );
   1.271 +    }
   1.272 +
   1.273 +    $this->{checked_enabled} = 1;
   1.274 +    return $this->{enabled_languages};
   1.275 +
   1.276 +}
   1.277 +
   1.278 +
   1.279 +# discovers the available language.
   1.280 +sub _discover_languages {
   1.281 +    my $this = shift;
   1.282 +
   1.283 +    #use the cache, if available
   1.284 +    if ( open LANGUAGE,"<$TWiki::cfg{LocalesDir}/languages.cache"  ) {
   1.285 +    foreach my $line (<LANGUAGE>) {
   1.286 +            my ($key,$name)=split('=',$line);
   1.287 +             chop($name);
   1.288 +            _add_language( $this,$key,$name);
   1.289 +        }
   1.290 +    } else {
   1.291 +        #TODO: if the cache file don't exist, perhaps a warning should be issued to the logs?
   1.292 +        open LANGUAGE,">$TWiki::cfg{LocalesDir}/languages.cache";
   1.293 +        foreach my $tag ( available_languages() ) {
   1.294 +            my $h = TWiki::I18N->get_handle($tag);
   1.295 +            my $name = $h->maketext("_language_name");
   1.296 +            $name = $this->toSiteCharSet($name); 
   1.297 +            _add_language( $this,$tag, $name);
   1.298 +            print LANGUAGE "$tag=$name\n";
   1.299 +        }
   1.300 +    }
   1.301 +
   1.302 +    close LANGUAGE;
   1.303 +    $this->{checked_enabled} = 1;
   1.304 +
   1.305 +}
   1.306 +
   1.307 +
   1.308 +=pod
   1.309 +
   1.310 +---++ ObjectMethod fromSiteCharSet ( $text ) -> $encoded
   1.311 +
   1.312 +This method receives =$text=, assumed to be encoded in {Site}{CharSet}, and
   1.313 +converts it to a internal representation.
   1.314 +
   1.315 +Currently this representation will be a UTF-8 string, but this may change in
   1.316 +the future. This way, you can't assume any property on the returned value, and
   1.317 +should only use the returned value of this function as input to toSiteCharSet.
   1.318 +If you change the returnd value, either by removing, updating or appending
   1.319 +characters, be sure to touch only ASCII characters (i.e., characters that have
   1.320 +ord() less than 128).
   1.321 +
   1.322 +=cut
   1.323 +
   1.324 +sub fromSiteCharSet {
   1.325 +    my ( $this, $text ) = @_;
   1.326 +
   1.327 +    return $text if( !defined  $TWiki::cfg{Site}{CharSet} ||
   1.328 +                       $TWiki::cfg{Site}{CharSet} =~ m/^utf-?8$/i);
   1.329 +
   1.330 +    if ($] < 5.008) {
   1.331 +        # use Unicode::MapUTF8 for Perl older than 5.8
   1.332 +        require Unicode::MapUTF8;
   1.333 +        my $encoding = $TWiki::cfg{Site}{CharSet};
   1.334 +        if ( Unicode::MapUTF8::utf8_supported_charset($encoding) ) {
   1.335 +          return Unicode::MapUTF8::to_utf8 ({
   1.336 +                                             -string => $text,
   1.337 +                                             -charset => $encoding
   1.338 +                                            });
   1.339 +        } else {
   1.340 +          $this->{session}->writeWarning
   1.341 +            ( 'Conversion from $encoding no supported, '.
   1.342 +              'or name not recognised - check perldoc Unicode::MapUTF8' );
   1.343 +          return $text;
   1.344 +        }
   1.345 +    } else {
   1.346 +        # good Perl version, just use Encode
   1.347 +        require Encode;
   1.348 +        import Encode;
   1.349 +        my $encoding = Encode::resolve_alias( $TWiki::cfg{Site}{CharSet} );
   1.350 +        if ( not $encoding ) {
   1.351 +            $this->{session}->writeWarning
   1.352 +              ( 'Conversion to "'.$TWiki::cfg{Site}{CharSet}.
   1.353 +                '" not supported, or name not recognised - check '.
   1.354 +                '"perldoc Encode::Supported"' );
   1.355 +            return undef;
   1.356 +        } else {
   1.357 +            my $octets = Encode::decode ( $encoding, $text, &Encode::FB_PERLQQ() );
   1.358 +            return Encode::encode ( 'utf-8', $octets );
   1.359 +        }
   1.360 +    }
   1.361 +}
   1.362 +
   1.363 +=pod
   1.364 +
   1.365 +
   1.366 +---++ ObjectMethod toSiteCharSet ( $encoded ) -> $text
   1.367 +
   1.368 +This method receives a string, assumed to be encoded in TWiki's internal string
   1.369 +representation (as generated by the fromSiteCharSet method, and converts it
   1.370 +into {Site}{CharSet}.
   1.371 +
   1.372 +When converting into {Site}{CharSet}, characters that are not present at that
   1.373 +charset are represented as HTML numerical character entities (NCR's), in the
   1.374 +format <code>&amp;#NNNN;</code>, where NNNN is the character's Unicode
   1.375 +codepoint.
   1.376 +
   1.377 +See also: the =fromSiteCharSet= method.
   1.378 +
   1.379 +=cut
   1.380 +
   1.381 +sub toSiteCharSet {
   1.382 +    my ( $this, $encoded ) = @_;
   1.383 +
   1.384 +    return $encoded if( !defined $TWiki::cfg{Site}{CharSet} ||
   1.385 +                          $TWiki::cfg{Site}{CharSet} =~ m/^utf-?8$/i);
   1.386 +
   1.387 +    if ( $] < 5.008 ) {
   1.388 +        # use Unicode::MapUTF8 for Perl older than 5.8
   1.389 +        require Unicode::MapUTF8;
   1.390 +        my $encoding = $TWiki::cfg{Site}{CharSet};
   1.391 +        if ( Unicode::MapUTF8::utf8_supported_charset($encoding) ) {
   1.392 +          return Unicode::MapUTF8::from_utf8 ({
   1.393 +                                             -string => $encoded,
   1.394 +                                             -charset => $encoding
   1.395 +                                            });
   1.396 +        } else {
   1.397 +          $this->{session}->writeWarning
   1.398 +            ( 'Conversion to $encoding no supported, '.
   1.399 +              'or name not recognised - check perldoc Unicode::MapUTF8' );
   1.400 +          return $encoded;
   1.401 +        }
   1.402 +    } else {
   1.403 +        require Encode;
   1.404 +        import Encode;
   1.405 +        my $encoding = Encode::resolve_alias ( $TWiki::cfg{Site}{CharSet} );
   1.406 +        if ( not $encoding ) {
   1.407 +            $this->{session}->writeWarning
   1.408 +              ( 'Conversion from "'.$TWiki::cfg{Site}{CharSet}.
   1.409 +                '" not supported, or name not recognised - check '.
   1.410 +                '"perldoc Encode::Supported"' );
   1.411 +            return $encoded;
   1.412 +        } else {
   1.413 +            # converts to {Site}{CharSet}, generating HTML NCR's when needed
   1.414 +            my $octets = Encode::decode ( 'utf-8', $encoded );
   1.415 +            return Encode::encode ( $encoding, $octets, &Encode::FB_HTMLCREF() );
   1.416 +        }
   1.417 +    }
   1.418 +}
   1.419 +
   1.420 +
   1.421 +# private utility method: add a pair tag/language name
   1.422 +sub _add_language {
   1.423 +    my ( $this, $tag, $name ) = @_;  
   1.424 +    ${$this->{enabled_languages}}{$tag} = $name;
   1.425 +}
   1.426 +
   1.427 +1;