lib/TWiki/I18N.pm
author Colas Nahaboo <colas@nahaboo.net>
Sat, 26 Jan 2008 15:50:53 +0100
changeset 0 414e01d06fd5
permissions -rw-r--r--
RELEASE 4.2.0 freetown
colas@0
     1
# Module of TWiki Enterprise Collaboration Platform, http://TWiki.org/
colas@0
     2
#
colas@0
     3
# Copyright (C) 1999-2008 TWiki Contributors.
colas@0
     4
# All Rights Reserved. TWiki Contributors
colas@0
     5
# are listed in the AUTHORS file in the root of this distribution.
colas@0
     6
# NOTE: Please extend that file, not this notice.
colas@0
     7
#
colas@0
     8
# This program is free software; you can redistribute it and/or
colas@0
     9
# modify it under the terms of the GNU General Public License
colas@0
    10
# as published by the Free Software Foundation; either version 2
colas@0
    11
# of the License, or (at your option) any later version. For
colas@0
    12
# more details read LICENSE in the root of this distribution.
colas@0
    13
#
colas@0
    14
# This program is distributed in the hope that it will be useful,
colas@0
    15
# but WITHOUT ANY WARRANTY; without even the implied warranty of
colas@0
    16
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
colas@0
    17
#
colas@0
    18
# As per the GPL, removal of this notice is prohibited.
colas@0
    19
colas@0
    20
=pod
colas@0
    21
colas@0
    22
---+ package TWiki::I18N
colas@0
    23
colas@0
    24
Support for strings translation and language detection.
colas@0
    25
colas@0
    26
=cut
colas@0
    27
colas@0
    28
package TWiki::I18N;
colas@0
    29
colas@0
    30
use strict;
colas@0
    31
use Assert;
colas@0
    32
colas@0
    33
use vars qw( $initialised @initErrors );
colas@0
    34
colas@0
    35
=pod
colas@0
    36
colas@0
    37
---++ ClassMethod available_languages
colas@0
    38
colas@0
    39
Lists languages tags for languages available at TWiki installation. Returns a
colas@0
    40
list containing the tags of the available languages.
colas@0
    41
colas@0
    42
__Note__: the languages available to users are determined in the =configure=
colas@0
    43
interface.
colas@0
    44
colas@0
    45
=cut
colas@0
    46
colas@0
    47
sub available_languages {
colas@0
    48
colas@0
    49
    my @available ;
colas@0
    50
colas@0
    51
    while ( my ( $langCode, $langOptions ) = each %{$TWiki::cfg{Languages}} ) {
colas@0
    52
        if ( $langOptions->{Enabled} ) {
colas@0
    53
            push(@available, _normalize_language_tag($langCode));
colas@0
    54
        }
colas@0
    55
    }
colas@0
    56
colas@0
    57
    return @available;
colas@0
    58
}
colas@0
    59
colas@0
    60
# utility function: normalize language tags like ab_CD to ab-cd
colas@0
    61
# also renove any character there is not a letter [a-z] or a hyphen.
colas@0
    62
sub _normalize_language_tag {
colas@0
    63
    my $tag = shift;
colas@0
    64
    $tag = lc($tag);;
colas@0
    65
    $tag =~ s/\_/-/g;
colas@0
    66
    $tag =~ s/[^a-z-]//g;
colas@0
    67
    return $tag;
colas@0
    68
}
colas@0
    69
colas@0
    70
# initialisation block
colas@0
    71
BEGIN {
colas@0
    72
    # we only need to proceed if user wants internationalisation support
colas@0
    73
    return unless $TWiki::cfg{UserInterfaceInternationalisation};
colas@0
    74
colas@0
    75
    # no languages enabled is the same as disabling {UserInterfaceInternationalisation}
colas@0
    76
    my @languages = available_languages();
colas@0
    77
    return unless (scalar(@languages));
colas@0
    78
colas@0
    79
    # we first assume it's ok
colas@0
    80
    $initialised = 1;
colas@0
    81
colas@0
    82
    eval "use base 'Locale::Maketext'";
colas@0
    83
    if ( $@ ) {
colas@0
    84
        $initialised = 0;
colas@0
    85
        push(@initErrors, "I18N: Couldn't load required perl module Locale::Maketext: " . $@."\nInstall the module or turn off {UserInterfaceInternationalisation}");
colas@0
    86
    }
colas@0
    87
colas@0
    88
    unless( $TWiki::cfg{LocalesDir} && -e $TWiki::cfg{LocalesDir} ) {
colas@0
    89
        push(@initErrors, 'I18N: {LocalesDir} not configured. Define it or turn off {UserInterfaceInternationalisation}');
colas@0
    90
        $initialised = 0;
colas@0
    91
    }
colas@0
    92
colas@0
    93
    # dynamically build languages to be loaded according to admin-enabled
colas@0
    94
    # languages.
colas@0
    95
    my $dependencies = "use Locale::Maketext::Lexicon{'en'=>['Auto'],";
colas@0
    96
    foreach my $lang (@languages) {
colas@0
    97
        $dependencies .= "'$lang'=>['Gettext'=>'$TWiki::cfg{LocalesDir}/$lang.po' ], ";
colas@0
    98
    }
colas@0
    99
    $dependencies .= '};';
colas@0
   100
colas@0
   101
    eval $dependencies;
colas@0
   102
    if ( $@ ) {
colas@0
   103
        $initialised = 0;
colas@0
   104
        push(@initErrors, "I18N - Couldn't load required perl module Locale::Maketext::Lexicon: " . $@ . "\nInstall the module or turn off {UserInterfaceInternationalisation}");
colas@0
   105
    }
colas@0
   106
}
colas@0
   107
colas@0
   108
=pod
colas@0
   109
colas@0
   110
---++ ClassMethod new ( $session )
colas@0
   111
colas@0
   112
Constructor. Gets the language object corresponding to the current users
colas@0
   113
language. If $session is not a TWiki object reference, just calls
colas@0
   114
Local::Maketext::new (the superclass constructor)
colas@0
   115
colas@0
   116
=cut
colas@0
   117
colas@0
   118
sub new {
colas@0
   119
    my $class = shift;
colas@0
   120
    my( $session ) = @_;
colas@0
   121
colas@0
   122
    unless( ref($session) && $session->isa('TWiki') ) {
colas@0
   123
        # it's recursive
colas@0
   124
        return $class->SUPER::new(@_);
colas@0
   125
    }
colas@0
   126
colas@0
   127
    unless ($initialised) {
colas@0
   128
        foreach my $error (@initErrors) {
colas@0
   129
            $session->writeWarning($error);
colas@0
   130
        }
colas@0
   131
    }
colas@0
   132
colas@0
   133
    # guesses the language from the CGI environment
colas@0
   134
    # TODO:
colas@0
   135
    #   web/user/session setting must override the language detected from the
colas@0
   136
    #   browser.
colas@0
   137
    my $this;
colas@0
   138
    if ($initialised) {
colas@0
   139
        $session->enterContext( 'i18n_enabled' );
colas@0
   140
        my $userLanguage = _normalize_language_tag($session->{prefs}->getPreferencesValue('LANGUAGE'));
colas@0
   141
        if ($userLanguage) {
colas@0
   142
            $this = TWiki::I18N->get_handle($userLanguage);
colas@0
   143
        } else {
colas@0
   144
            $this = TWiki::I18N->get_handle();
colas@0
   145
        }
colas@0
   146
    } else {
colas@0
   147
        require TWiki::I18N::Fallback;
colas@0
   148
colas@0
   149
        $this = new TWiki::I18N::Fallback();
colas@0
   150
colas@0
   151
        # we couldn't initialise 'optional' I18N infrastructure, warn that we
colas@0
   152
        # can only use English if I18N has been requested with configure
colas@0
   153
        $session->writeWarning('Could not load I18N infrastructure; falling back to English')
colas@0
   154
          if $TWiki::cfg{UserInterfaceInternationalisation};
colas@0
   155
    }
colas@0
   156
colas@0
   157
    # keep a reference to the session object
colas@0
   158
    $this->{session} = $session;
colas@0
   159
colas@0
   160
    # languages we know about
colas@0
   161
    $this->{enabled_languages} = { en => 'English' };
colas@0
   162
    $this->{checked_enabled}   = undef;
colas@0
   163
colas@0
   164
    # what to do with failed translations (only needed when already initialised
colas@0
   165
    # and language is not English);
colas@0
   166
    if ($initialised and ($this->language ne 'en')) {
colas@0
   167
        my $fallback_handle = TWiki::I18N->get_handle('en');
colas@0
   168
        $this->fail_with(
colas@0
   169
            sub {
colas@0
   170
                shift; # get rid of the handle
colas@0
   171
                return $fallback_handle->maketext( @_ );
colas@0
   172
            }
colas@0
   173
           );
colas@0
   174
    }
colas@0
   175
colas@0
   176
    # finally! :-p
colas@0
   177
    return $this;
colas@0
   178
}
colas@0
   179
colas@0
   180
=begin twiki
colas@0
   181
colas@0
   182
---++ ObjectMethod finish()
colas@0
   183
Break circular references.
colas@0
   184
colas@0
   185
=cut
colas@0
   186
colas@0
   187
# Note to developers; please undef *all* fields in the object explicitly,
colas@0
   188
# whether they are references or not. That way this method is "golden
colas@0
   189
# documentation" of the live fields in the object.
colas@0
   190
sub finish {
colas@0
   191
    my $this = shift;
colas@0
   192
    undef $this->{enabled_languages};
colas@0
   193
    undef $this->{checked_enabled};
colas@0
   194
    undef $this->{session};
colas@0
   195
}
colas@0
   196
colas@0
   197
=pod
colas@0
   198
colas@0
   199
---++ ObjectMethod maketext( $text ) -> $translation
colas@0
   200
colas@0
   201
Translates the given string (assumed to be written in English) into the
colas@0
   202
current language, as detected in the constructor, and converts it into
colas@0
   203
the site charset.
colas@0
   204
colas@0
   205
Wraps around Locale::Maketext's maketext method, adding charset conversion and checking
colas@0
   206
colas@0
   207
Return value: translated string, or the argument itself if no translation is
colas@0
   208
found for thet argument.
colas@0
   209
colas@0
   210
=cut
colas@0
   211
colas@0
   212
sub maketext {
colas@0
   213
    my ( $this, $text, @args ) = @_;
colas@0
   214
colas@0
   215
    # these can be user-supplied data. They can be in {Site}{CharSet}. Convert
colas@0
   216
    # into "internal representation" as expected by TWiki::I18N::maketext
colas@0
   217
    @args = map { $this->fromSiteCharSet($_) } @args;
colas@0
   218
colas@0
   219
    if ($text =~ /^_/ && $text ne '_language_name') {
colas@0
   220
        require CGI;
colas@0
   221
        import CGI();
colas@0
   222
colas@0
   223
        return CGI::span (
colas@0
   224
            { -style => 'color:red;' } ,
colas@0
   225
            "Error: MAKETEXT argument's can't start with an underscore (\"_\")." );
colas@0
   226
    }
colas@0
   227
colas@0
   228
    my $result = $this->SUPER::maketext($text, @args);
colas@0
   229
    if ($result && $this->{session}) {
colas@0
   230
        # external calls get the resultant text in the right charset:
colas@0
   231
        $result = $this->toSiteCharSet($result);
colas@0
   232
    }
colas@0
   233
colas@0
   234
    return $result;
colas@0
   235
}
colas@0
   236
colas@0
   237
=pod
colas@0
   238
colas@0
   239
---++ ObjectMethod language() -> $language_tag
colas@0
   240
colas@0
   241
Indicates the language tag of the current user's language, as detected from the
colas@0
   242
information sent by the browser. Returns the empty string if the language
colas@0
   243
could not be determined.
colas@0
   244
colas@0
   245
=cut
colas@0
   246
colas@0
   247
sub language {
colas@0
   248
    my $this = shift;
colas@0
   249
colas@0
   250
    return $this->language_tag();
colas@0
   251
}
colas@0
   252
colas@0
   253
=pod
colas@0
   254
colas@0
   255
---++ ObjectMethod enabled_languages() -> %languages
colas@0
   256
colas@0
   257
Returns an array with language tags as keys and language (native) names as
colas@0
   258
values, for all the languages enabled in this TWiki.TWikiSite. Useful for
colas@0
   259
listing available languages to the user.
colas@0
   260
colas@0
   261
=cut
colas@0
   262
colas@0
   263
sub enabled_languages {
colas@0
   264
    my $this = shift;
colas@0
   265
colas@0
   266
    unless ($this->{checked_enabled}) {
colas@0
   267
        _discover_languages( $this );
colas@0
   268
    }
colas@0
   269
colas@0
   270
    $this->{checked_enabled} = 1;
colas@0
   271
    return $this->{enabled_languages};
colas@0
   272
colas@0
   273
}
colas@0
   274
colas@0
   275
colas@0
   276
# discovers the available language.
colas@0
   277
sub _discover_languages {
colas@0
   278
    my $this = shift;
colas@0
   279
colas@0
   280
    #use the cache, if available
colas@0
   281
    if ( open LANGUAGE,"<$TWiki::cfg{LocalesDir}/languages.cache"  ) {
colas@0
   282
    foreach my $line (<LANGUAGE>) {
colas@0
   283
            my ($key,$name)=split('=',$line);
colas@0
   284
             chop($name);
colas@0
   285
            _add_language( $this,$key,$name);
colas@0
   286
        }
colas@0
   287
    } else {
colas@0
   288
        #TODO: if the cache file don't exist, perhaps a warning should be issued to the logs?
colas@0
   289
        open LANGUAGE,">$TWiki::cfg{LocalesDir}/languages.cache";
colas@0
   290
        foreach my $tag ( available_languages() ) {
colas@0
   291
            my $h = TWiki::I18N->get_handle($tag);
colas@0
   292
            my $name = $h->maketext("_language_name");
colas@0
   293
            $name = $this->toSiteCharSet($name); 
colas@0
   294
            _add_language( $this,$tag, $name);
colas@0
   295
            print LANGUAGE "$tag=$name\n";
colas@0
   296
        }
colas@0
   297
    }
colas@0
   298
colas@0
   299
    close LANGUAGE;
colas@0
   300
    $this->{checked_enabled} = 1;
colas@0
   301
colas@0
   302
}
colas@0
   303
colas@0
   304
colas@0
   305
=pod
colas@0
   306
colas@0
   307
---++ ObjectMethod fromSiteCharSet ( $text ) -> $encoded
colas@0
   308
colas@0
   309
This method receives =$text=, assumed to be encoded in {Site}{CharSet}, and
colas@0
   310
converts it to a internal representation.
colas@0
   311
colas@0
   312
Currently this representation will be a UTF-8 string, but this may change in
colas@0
   313
the future. This way, you can't assume any property on the returned value, and
colas@0
   314
should only use the returned value of this function as input to toSiteCharSet.
colas@0
   315
If you change the returnd value, either by removing, updating or appending
colas@0
   316
characters, be sure to touch only ASCII characters (i.e., characters that have
colas@0
   317
ord() less than 128).
colas@0
   318
colas@0
   319
=cut
colas@0
   320
colas@0
   321
sub fromSiteCharSet {
colas@0
   322
    my ( $this, $text ) = @_;
colas@0
   323
colas@0
   324
    return $text if( !defined  $TWiki::cfg{Site}{CharSet} ||
colas@0
   325
                       $TWiki::cfg{Site}{CharSet} =~ m/^utf-?8$/i);
colas@0
   326
colas@0
   327
    if ($] < 5.008) {
colas@0
   328
        # use Unicode::MapUTF8 for Perl older than 5.8
colas@0
   329
        require Unicode::MapUTF8;
colas@0
   330
        my $encoding = $TWiki::cfg{Site}{CharSet};
colas@0
   331
        if ( Unicode::MapUTF8::utf8_supported_charset($encoding) ) {
colas@0
   332
          return Unicode::MapUTF8::to_utf8 ({
colas@0
   333
                                             -string => $text,
colas@0
   334
                                             -charset => $encoding
colas@0
   335
                                            });
colas@0
   336
        } else {
colas@0
   337
          $this->{session}->writeWarning
colas@0
   338
            ( 'Conversion from $encoding no supported, '.
colas@0
   339
              'or name not recognised - check perldoc Unicode::MapUTF8' );
colas@0
   340
          return $text;
colas@0
   341
        }
colas@0
   342
    } else {
colas@0
   343
        # good Perl version, just use Encode
colas@0
   344
        require Encode;
colas@0
   345
        import Encode;
colas@0
   346
        my $encoding = Encode::resolve_alias( $TWiki::cfg{Site}{CharSet} );
colas@0
   347
        if ( not $encoding ) {
colas@0
   348
            $this->{session}->writeWarning
colas@0
   349
              ( 'Conversion to "'.$TWiki::cfg{Site}{CharSet}.
colas@0
   350
                '" not supported, or name not recognised - check '.
colas@0
   351
                '"perldoc Encode::Supported"' );
colas@0
   352
            return undef;
colas@0
   353
        } else {
colas@0
   354
            my $octets = Encode::decode ( $encoding, $text, &Encode::FB_PERLQQ() );
colas@0
   355
            return Encode::encode ( 'utf-8', $octets );
colas@0
   356
        }
colas@0
   357
    }
colas@0
   358
}
colas@0
   359
colas@0
   360
=pod
colas@0
   361
colas@0
   362
colas@0
   363
---++ ObjectMethod toSiteCharSet ( $encoded ) -> $text
colas@0
   364
colas@0
   365
This method receives a string, assumed to be encoded in TWiki's internal string
colas@0
   366
representation (as generated by the fromSiteCharSet method, and converts it
colas@0
   367
into {Site}{CharSet}.
colas@0
   368
colas@0
   369
When converting into {Site}{CharSet}, characters that are not present at that
colas@0
   370
charset are represented as HTML numerical character entities (NCR's), in the
colas@0
   371
format <code>&amp;#NNNN;</code>, where NNNN is the character's Unicode
colas@0
   372
codepoint.
colas@0
   373
colas@0
   374
See also: the =fromSiteCharSet= method.
colas@0
   375
colas@0
   376
=cut
colas@0
   377
colas@0
   378
sub toSiteCharSet {
colas@0
   379
    my ( $this, $encoded ) = @_;
colas@0
   380
colas@0
   381
    return $encoded if( !defined $TWiki::cfg{Site}{CharSet} ||
colas@0
   382
                          $TWiki::cfg{Site}{CharSet} =~ m/^utf-?8$/i);
colas@0
   383
colas@0
   384
    if ( $] < 5.008 ) {
colas@0
   385
        # use Unicode::MapUTF8 for Perl older than 5.8
colas@0
   386
        require Unicode::MapUTF8;
colas@0
   387
        my $encoding = $TWiki::cfg{Site}{CharSet};
colas@0
   388
        if ( Unicode::MapUTF8::utf8_supported_charset($encoding) ) {
colas@0
   389
          return Unicode::MapUTF8::from_utf8 ({
colas@0
   390
                                             -string => $encoded,
colas@0
   391
                                             -charset => $encoding
colas@0
   392
                                            });
colas@0
   393
        } else {
colas@0
   394
          $this->{session}->writeWarning
colas@0
   395
            ( 'Conversion to $encoding no supported, '.
colas@0
   396
              'or name not recognised - check perldoc Unicode::MapUTF8' );
colas@0
   397
          return $encoded;
colas@0
   398
        }
colas@0
   399
    } else {
colas@0
   400
        require Encode;
colas@0
   401
        import Encode;
colas@0
   402
        my $encoding = Encode::resolve_alias ( $TWiki::cfg{Site}{CharSet} );
colas@0
   403
        if ( not $encoding ) {
colas@0
   404
            $this->{session}->writeWarning
colas@0
   405
              ( 'Conversion from "'.$TWiki::cfg{Site}{CharSet}.
colas@0
   406
                '" not supported, or name not recognised - check '.
colas@0
   407
                '"perldoc Encode::Supported"' );
colas@0
   408
            return $encoded;
colas@0
   409
        } else {
colas@0
   410
            # converts to {Site}{CharSet}, generating HTML NCR's when needed
colas@0
   411
            my $octets = Encode::decode ( 'utf-8', $encoded );
colas@0
   412
            return Encode::encode ( $encoding, $octets, &Encode::FB_HTMLCREF() );
colas@0
   413
        }
colas@0
   414
    }
colas@0
   415
}
colas@0
   416
colas@0
   417
colas@0
   418
# private utility method: add a pair tag/language name
colas@0
   419
sub _add_language {
colas@0
   420
    my ( $this, $tag, $name ) = @_;  
colas@0
   421
    ${$this->{enabled_languages}}{$tag} = $name;
colas@0
   422
}
colas@0
   423
colas@0
   424
1;