1 package Locale::Maketext::Lexicon::Gettext;
2 $Locale::Maketext::Lexicon::Gettext::VERSION = '0.14';
8 Locale::Maketext::Lexicon::Gettext - PO and MO file parser for Maketext
12 Called via B<Locale::Maketext::Lexicon>:
15 use base 'Locale::Maketext';
16 use Locale::Maketext::Lexicon {
17 de => [Gettext => 'hello/de.mo'],
20 Directly calling C<parse()>:
22 use Locale::Maketext::Lexicon::Gettext;
23 my %Lexicon = %{ Locale::Maketext::Lexicon::Gettext->parse(<DATA>) };
30 msgid "You have %quant(%1,piece) of mail."
31 msgstr "Sie haben %quant(%1,Poststueck,Poststuecken)."
35 This module implements a perl-based C<Gettext> parser for
36 B<Locale::Maketext>. It transforms all C<%1>, C<%2>, <%*>... sequences
37 to C<[_1]>, C<[_2]>, C<[_*]>, and so on. It accepts either plain PO
38 file, or a MO file which will be handled with a pure-perl parser
39 adapted from Imacat's C<Locale::Maketext::Gettext>.
41 Since version 0.03, this module also looks for C<%I<function>(I<args...>)>
42 in the lexicon strings, and transform it to C<[I<function>,I<args...>]>.
43 Any C<%1>, C<%2>... sequences inside the I<args> will have their percent
44 signs (C<%>) replaced by underscores (C<_>).
46 The name of I<function> above should begin with a letter or underscore,
47 followed by any number of alphanumeric characters and/or underscores.
48 As an exception, the function name may also consist of a single asterisk
49 (C<*>) or pound sign (C<#>), which are C<Locale::Maketext>'s shorthands
50 for C<quant> and C<numf>, respectively.
52 As an additional feature, this module also parses MIME-header style
53 metadata specified in the null msgstr (C<"">), and add them to the
54 C<%Lexicon> with a C<__> prefix. For example, the example above will
55 set C<__Content-Type> to C<text/plain; charset=iso8859-1>, without
56 the newline or the colon.
58 Any normal entry that duplicates a metadata entry takes precedence.
59 Hence, a C<msgid "__Content-Type"> line occurs anywhere should override
66 When parsing PO files, fuzzy entries (entries marked with C<#, fuzzy>)
67 are silently ignored. If you wish to use fuzzy entries, specify a true
68 value to the C<_use_fuzzy> option:
70 use Locale::Maketext::Lexicon {
71 de => [Gettext => 'hello/de.mo'],
77 When parsing PO files, empty entries (entries with C<msgstr "">) are
78 silently ignored. If you wish to allow empty entries, specify a true
79 value to the C<_allow_empty> option:
81 use Locale::Maketext::Lexicon {
82 de => [Gettext => 'hello/de.mo'],
88 my ($InputEncoding, $OutputEncoding, $DoEncoding);
90 sub input_encoding { $InputEncoding };
91 sub output_encoding { $OutputEncoding };
95 my (%var, $key, @ret);
98 $InputEncoding = $OutputEncoding = $DoEncoding = undef;
101 Carp::cluck "Undefined source called\n" unless defined $_[0];
103 # Check for magic string of MO files
104 return parse_mo(join('', @_))
105 if ($_[0] =~ /^\x95\x04\x12\xde/ or $_[0] =~ /^\xde\x12\x04\x95/);
107 local $^W; # no 'uninitialized' warnings, please.
109 require Locale::Maketext::Lexicon;
110 my $UseFuzzy = Locale::Maketext::Lexicon::option('use_fuzzy');
111 my $AllowEmpty = Locale::Maketext::Lexicon::option('allow_empty');
113 if ( length($var{msgstr}) and ($UseFuzzy or !$var{fuzzy}) ) {
114 push @ret, (map transform($_), @var{'msgid', 'msgstr'});
116 elsif ( $AllowEmpty ) {
117 push @ret, (transform($var{msgid}), '');
119 push @metadata, parse_metadata($var{msgstr})
120 if $var{msgid} eq '';
126 s/[\015\012]*\z//; # fix CRLF issues
128 /^(msgid|msgstr) +"(.*)" *$/ ? do { # leading strings
133 /^"(.*)" *$/ ? do { # continued strings
137 /^#, +(.*) *$/ ? do { # control variables
138 $var{$_} = 1 for split(/,\s+/, $1);
141 /^ *$/ && %var ? do { # interpolate string escapes
145 # do not silently skip last entry
146 $process->() if keys %var != 0;
148 push @ret, map { transform($_) } @var{'msgid', 'msgstr'}
149 if length $var{msgstr};
150 push @metadata, parse_metadata($var{msgstr})
151 if $var{msgid} eq '';
153 return {@metadata, @ret};
158 (/^([^\x00-\x1f\x80-\xff :=]+):\s*(.*)$/) ?
159 ($1 eq 'Content-Type') ? do {
161 if ($enc =~ /\bcharset=\s*([-\w]+)/i) {
162 $InputEncoding = $1 || '';
163 $OutputEncoding = Locale::Maketext::Lexicon::encoding() || '';
164 $InputEncoding = 'utf8' if $InputEncoding =~ /^utf-?8$/i;
165 $OutputEncoding = 'utf8' if $OutputEncoding =~ /^utf-?8$/i;
166 if ( Locale::Maketext::Lexicon::option('decode') and
167 (!$OutputEncoding or $InputEncoding ne $OutputEncoding)) {
168 require Encode::compat if $] < 5.007001;
173 ("__Content-Type", $enc);
176 } split(/\r*\n+\r*/, transform(pop));
182 if ($DoEncoding and $InputEncoding) {
183 $str = ($InputEncoding eq 'utf8')
184 ? Encode::decode_utf8($str)
185 : Encode::decode($InputEncoding, $str)
188 $str =~ s/\\([0x]..|c?.)/qq{"\\$1"}/eeg;
190 if ($DoEncoding and $OutputEncoding) {
191 $str = ($OutputEncoding eq 'utf8')
192 ? Encode::encode_utf8($str)
193 : Encode::encode($OutputEncoding, $str)
196 $str =~ s/([~\[\]])/~$1/g;
197 $str =~ s/(?<![%\\])%([A-Za-z#*]\w*)\(([^\)]*)\)/[$1,~~~$2~~~]/g;
198 $str = join('', map {
199 /^~~~.*~~~$/ ? unescape(substr($_, 3, -3)) : $_
200 } split(/(~~~.*?~~~)/, $str));
201 $str =~ s/(?<![%\\])%(\d+|\*)/\[_$1]/g;
208 /^%(?:\d+|\*)$/ ? ("_" . substr($_, 1)) : $_
209 } split(/,/, $_[0]));
212 # This subroutine was derived from Locale::Maketext::Gettext::readmo()
213 # under the Perl License; the original author is Yi Ma Mao (IMACAT).
216 my $tmpl = (substr($content, 0, 4) eq "\xde\x12\x04\x95") ? 'V' : 'N';
218 # Check the MO format revision number
219 # There is only one revision now: revision 0.
220 return if unpack($tmpl, substr($content, 4, 4)) > 0;
222 my ($num, $offo, $offt);
224 $num = unpack $tmpl, substr($content, 8, 4);
225 # Offset to the beginning of the original strings
226 $offo = unpack $tmpl, substr($content, 12, 4);
227 # Offset to the beginning of the translated strings
228 $offt = unpack $tmpl, substr($content, 16, 4);
230 my (@metadata, @ret);
231 for (0 .. $num - 1) {
232 my ($len, $off, $stro, $strt);
233 # The first word is the length of the string
234 $len = unpack $tmpl, substr($content, $offo+$_*8, 4);
235 # The second word is the offset of the string
236 $off = unpack $tmpl, substr($content, $offo+$_*8+4, 4);
238 $stro = substr($content, $off, $len);
240 # The first word is the length of the string
241 $len = unpack $tmpl, substr($content, $offt+$_*8, 4);
242 # The second word is the offset of the string
243 $off = unpack $tmpl, substr($content, $offt+$_*8+4, 4);
245 $strt = substr($content, $off, $len);
248 push @metadata, parse_metadata($strt) if $stro eq '';
249 push @ret, (map transform($_), $stro, $strt) if length $strt;
252 return {@metadata, @ret};
259 L<Locale::Maketext>, L<Locale::Maketext::Lexicon>
263 Autrijus Tang E<lt>autrijus@autrijus.orgE<gt>
267 Copyright 2002, 2003, 2004 by Autrijus Tang E<lt>autrijus@autrijus.orgE<gt>.
269 This program is free software; you can redistribute it and/or
270 modify it under the same terms as Perl itself.
272 See L<http://www.perl.com/perl/misc/Artistic.html>