colas@0
|
1 |
package Locale::Maketext::Lexicon::Gettext;
|
colas@0
|
2 |
$Locale::Maketext::Lexicon::Gettext::VERSION = '0.14';
|
colas@0
|
3 |
|
colas@0
|
4 |
use strict;
|
colas@0
|
5 |
|
colas@0
|
6 |
=head1 NAME
|
colas@0
|
7 |
|
colas@0
|
8 |
Locale::Maketext::Lexicon::Gettext - PO and MO file parser for Maketext
|
colas@0
|
9 |
|
colas@0
|
10 |
=head1 SYNOPSIS
|
colas@0
|
11 |
|
colas@0
|
12 |
Called via B<Locale::Maketext::Lexicon>:
|
colas@0
|
13 |
|
colas@0
|
14 |
package Hello::I18N;
|
colas@0
|
15 |
use base 'Locale::Maketext';
|
colas@0
|
16 |
use Locale::Maketext::Lexicon {
|
colas@0
|
17 |
de => [Gettext => 'hello/de.mo'],
|
colas@0
|
18 |
};
|
colas@0
|
19 |
|
colas@0
|
20 |
Directly calling C<parse()>:
|
colas@0
|
21 |
|
colas@0
|
22 |
use Locale::Maketext::Lexicon::Gettext;
|
colas@0
|
23 |
my %Lexicon = %{ Locale::Maketext::Lexicon::Gettext->parse(<DATA>) };
|
colas@0
|
24 |
__DATA__
|
colas@0
|
25 |
#: Hello.pm:10
|
colas@0
|
26 |
msgid "Hello, World!"
|
colas@0
|
27 |
msgstr "Hallo, Welt!"
|
colas@0
|
28 |
|
colas@0
|
29 |
#: Hello.pm:11
|
colas@0
|
30 |
msgid "You have %quant(%1,piece) of mail."
|
colas@0
|
31 |
msgstr "Sie haben %quant(%1,Poststueck,Poststuecken)."
|
colas@0
|
32 |
|
colas@0
|
33 |
=head1 DESCRIPTION
|
colas@0
|
34 |
|
colas@0
|
35 |
This module implements a perl-based C<Gettext> parser for
|
colas@0
|
36 |
B<Locale::Maketext>. It transforms all C<%1>, C<%2>, <%*>... sequences
|
colas@0
|
37 |
to C<[_1]>, C<[_2]>, C<[_*]>, and so on. It accepts either plain PO
|
colas@0
|
38 |
file, or a MO file which will be handled with a pure-perl parser
|
colas@0
|
39 |
adapted from Imacat's C<Locale::Maketext::Gettext>.
|
colas@0
|
40 |
|
colas@0
|
41 |
Since version 0.03, this module also looks for C<%I<function>(I<args...>)>
|
colas@0
|
42 |
in the lexicon strings, and transform it to C<[I<function>,I<args...>]>.
|
colas@0
|
43 |
Any C<%1>, C<%2>... sequences inside the I<args> will have their percent
|
colas@0
|
44 |
signs (C<%>) replaced by underscores (C<_>).
|
colas@0
|
45 |
|
colas@0
|
46 |
The name of I<function> above should begin with a letter or underscore,
|
colas@0
|
47 |
followed by any number of alphanumeric characters and/or underscores.
|
colas@0
|
48 |
As an exception, the function name may also consist of a single asterisk
|
colas@0
|
49 |
(C<*>) or pound sign (C<#>), which are C<Locale::Maketext>'s shorthands
|
colas@0
|
50 |
for C<quant> and C<numf>, respectively.
|
colas@0
|
51 |
|
colas@0
|
52 |
As an additional feature, this module also parses MIME-header style
|
colas@0
|
53 |
metadata specified in the null msgstr (C<"">), and add them to the
|
colas@0
|
54 |
C<%Lexicon> with a C<__> prefix. For example, the example above will
|
colas@0
|
55 |
set C<__Content-Type> to C<text/plain; charset=iso8859-1>, without
|
colas@0
|
56 |
the newline or the colon.
|
colas@0
|
57 |
|
colas@0
|
58 |
Any normal entry that duplicates a metadata entry takes precedence.
|
colas@0
|
59 |
Hence, a C<msgid "__Content-Type"> line occurs anywhere should override
|
colas@0
|
60 |
the above value.
|
colas@0
|
61 |
|
colas@0
|
62 |
=head1 OPTIONS
|
colas@0
|
63 |
|
colas@0
|
64 |
=head2 use_fuzzy
|
colas@0
|
65 |
|
colas@0
|
66 |
When parsing PO files, fuzzy entries (entries marked with C<#, fuzzy>)
|
colas@0
|
67 |
are silently ignored. If you wish to use fuzzy entries, specify a true
|
colas@0
|
68 |
value to the C<_use_fuzzy> option:
|
colas@0
|
69 |
|
colas@0
|
70 |
use Locale::Maketext::Lexicon {
|
colas@0
|
71 |
de => [Gettext => 'hello/de.mo'],
|
colas@0
|
72 |
_use_fuzzy => 1,
|
colas@0
|
73 |
};
|
colas@0
|
74 |
|
colas@0
|
75 |
=head2 allow_empty
|
colas@0
|
76 |
|
colas@0
|
77 |
When parsing PO files, empty entries (entries with C<msgstr "">) are
|
colas@0
|
78 |
silently ignored. If you wish to allow empty entries, specify a true
|
colas@0
|
79 |
value to the C<_allow_empty> option:
|
colas@0
|
80 |
|
colas@0
|
81 |
use Locale::Maketext::Lexicon {
|
colas@0
|
82 |
de => [Gettext => 'hello/de.mo'],
|
colas@0
|
83 |
_allow_empty => 1,
|
colas@0
|
84 |
};
|
colas@0
|
85 |
|
colas@0
|
86 |
=cut
|
colas@0
|
87 |
|
colas@0
|
88 |
my ($InputEncoding, $OutputEncoding, $DoEncoding);
|
colas@0
|
89 |
|
colas@0
|
90 |
sub input_encoding { $InputEncoding };
|
colas@0
|
91 |
sub output_encoding { $OutputEncoding };
|
colas@0
|
92 |
|
colas@0
|
93 |
sub parse {
|
colas@0
|
94 |
my $self = shift;
|
colas@0
|
95 |
my (%var, $key, @ret);
|
colas@0
|
96 |
my @metadata;
|
colas@0
|
97 |
|
colas@0
|
98 |
$InputEncoding = $OutputEncoding = $DoEncoding = undef;
|
colas@0
|
99 |
|
colas@0
|
100 |
use Carp;
|
colas@0
|
101 |
Carp::cluck "Undefined source called\n" unless defined $_[0];
|
colas@0
|
102 |
|
colas@0
|
103 |
# Check for magic string of MO files
|
colas@0
|
104 |
return parse_mo(join('', @_))
|
colas@0
|
105 |
if ($_[0] =~ /^\x95\x04\x12\xde/ or $_[0] =~ /^\xde\x12\x04\x95/);
|
colas@0
|
106 |
|
colas@0
|
107 |
local $^W; # no 'uninitialized' warnings, please.
|
colas@0
|
108 |
|
colas@0
|
109 |
require Locale::Maketext::Lexicon;
|
colas@0
|
110 |
my $UseFuzzy = Locale::Maketext::Lexicon::option('use_fuzzy');
|
colas@0
|
111 |
my $AllowEmpty = Locale::Maketext::Lexicon::option('allow_empty');
|
colas@0
|
112 |
my $process = sub {
|
colas@0
|
113 |
if ( length($var{msgstr}) and ($UseFuzzy or !$var{fuzzy}) ) {
|
colas@0
|
114 |
push @ret, (map transform($_), @var{'msgid', 'msgstr'});
|
colas@0
|
115 |
}
|
colas@0
|
116 |
elsif ( $AllowEmpty ) {
|
colas@0
|
117 |
push @ret, (transform($var{msgid}), '');
|
colas@0
|
118 |
}
|
colas@0
|
119 |
push @metadata, parse_metadata($var{msgstr})
|
colas@0
|
120 |
if $var{msgid} eq '';
|
colas@0
|
121 |
%var = ();
|
colas@0
|
122 |
};
|
colas@0
|
123 |
|
colas@0
|
124 |
# Parse PO files
|
colas@0
|
125 |
foreach (@_) {
|
colas@0
|
126 |
s/[\015\012]*\z//; # fix CRLF issues
|
colas@0
|
127 |
|
colas@0
|
128 |
/^(msgid|msgstr) +"(.*)" *$/ ? do { # leading strings
|
colas@0
|
129 |
$var{$1} = $2;
|
colas@0
|
130 |
$key = $1;
|
colas@0
|
131 |
} :
|
colas@0
|
132 |
|
colas@0
|
133 |
/^"(.*)" *$/ ? do { # continued strings
|
colas@0
|
134 |
$var{$key} .= $1;
|
colas@0
|
135 |
} :
|
colas@0
|
136 |
|
colas@0
|
137 |
/^#, +(.*) *$/ ? do { # control variables
|
colas@0
|
138 |
$var{$_} = 1 for split(/,\s+/, $1);
|
colas@0
|
139 |
} :
|
colas@0
|
140 |
|
colas@0
|
141 |
/^ *$/ && %var ? do { # interpolate string escapes
|
colas@0
|
142 |
$process->($_);
|
colas@0
|
143 |
} : ();
|
colas@0
|
144 |
}
|
colas@0
|
145 |
# do not silently skip last entry
|
colas@0
|
146 |
$process->() if keys %var != 0;
|
colas@0
|
147 |
|
colas@0
|
148 |
push @ret, map { transform($_) } @var{'msgid', 'msgstr'}
|
colas@0
|
149 |
if length $var{msgstr};
|
colas@0
|
150 |
push @metadata, parse_metadata($var{msgstr})
|
colas@0
|
151 |
if $var{msgid} eq '';
|
colas@0
|
152 |
|
colas@0
|
153 |
return {@metadata, @ret};
|
colas@0
|
154 |
}
|
colas@0
|
155 |
|
colas@0
|
156 |
sub parse_metadata {
|
colas@0
|
157 |
return map {
|
colas@0
|
158 |
(/^([^\x00-\x1f\x80-\xff :=]+):\s*(.*)$/) ?
|
colas@0
|
159 |
($1 eq 'Content-Type') ? do {
|
colas@0
|
160 |
my $enc = $2;
|
colas@0
|
161 |
if ($enc =~ /\bcharset=\s*([-\w]+)/i) {
|
colas@0
|
162 |
$InputEncoding = $1 || '';
|
colas@0
|
163 |
$OutputEncoding = Locale::Maketext::Lexicon::encoding() || '';
|
colas@0
|
164 |
$InputEncoding = 'utf8' if $InputEncoding =~ /^utf-?8$/i;
|
colas@0
|
165 |
$OutputEncoding = 'utf8' if $OutputEncoding =~ /^utf-?8$/i;
|
colas@0
|
166 |
if ( Locale::Maketext::Lexicon::option('decode') and
|
colas@0
|
167 |
(!$OutputEncoding or $InputEncoding ne $OutputEncoding)) {
|
colas@0
|
168 |
require Encode::compat if $] < 5.007001;
|
colas@0
|
169 |
require Encode;
|
colas@0
|
170 |
$DoEncoding = 1;
|
colas@0
|
171 |
}
|
colas@0
|
172 |
}
|
colas@0
|
173 |
("__Content-Type", $enc);
|
colas@0
|
174 |
} : ("__$1", $2)
|
colas@0
|
175 |
: ();
|
colas@0
|
176 |
} split(/\r*\n+\r*/, transform(pop));
|
colas@0
|
177 |
}
|
colas@0
|
178 |
|
colas@0
|
179 |
sub transform {
|
colas@0
|
180 |
my $str = shift;
|
colas@0
|
181 |
|
colas@0
|
182 |
if ($DoEncoding and $InputEncoding) {
|
colas@0
|
183 |
$str = ($InputEncoding eq 'utf8')
|
colas@0
|
184 |
? Encode::decode_utf8($str)
|
colas@0
|
185 |
: Encode::decode($InputEncoding, $str)
|
colas@0
|
186 |
}
|
colas@0
|
187 |
|
colas@0
|
188 |
$str =~ s/\\([0x]..|c?.)/qq{"\\$1"}/eeg;
|
colas@0
|
189 |
|
colas@0
|
190 |
if ($DoEncoding and $OutputEncoding) {
|
colas@0
|
191 |
$str = ($OutputEncoding eq 'utf8')
|
colas@0
|
192 |
? Encode::encode_utf8($str)
|
colas@0
|
193 |
: Encode::encode($OutputEncoding, $str)
|
colas@0
|
194 |
}
|
colas@0
|
195 |
|
colas@0
|
196 |
$str =~ s/([~\[\]])/~$1/g;
|
colas@0
|
197 |
$str =~ s/(?<![%\\])%([A-Za-z#*]\w*)\(([^\)]*)\)/[$1,~~~$2~~~]/g;
|
colas@0
|
198 |
$str = join('', map {
|
colas@0
|
199 |
/^~~~.*~~~$/ ? unescape(substr($_, 3, -3)) : $_
|
colas@0
|
200 |
} split(/(~~~.*?~~~)/, $str));
|
colas@0
|
201 |
$str =~ s/(?<![%\\])%(\d+|\*)/\[_$1]/g;
|
colas@0
|
202 |
|
colas@0
|
203 |
return $str;
|
colas@0
|
204 |
}
|
colas@0
|
205 |
|
colas@0
|
206 |
sub unescape {
|
colas@0
|
207 |
join(',', map {
|
colas@0
|
208 |
/^%(?:\d+|\*)$/ ? ("_" . substr($_, 1)) : $_
|
colas@0
|
209 |
} split(/,/, $_[0]));
|
colas@0
|
210 |
}
|
colas@0
|
211 |
|
colas@0
|
212 |
# This subroutine was derived from Locale::Maketext::Gettext::readmo()
|
colas@0
|
213 |
# under the Perl License; the original author is Yi Ma Mao (IMACAT).
|
colas@0
|
214 |
sub parse_mo {
|
colas@0
|
215 |
my $content = shift;
|
colas@0
|
216 |
my $tmpl = (substr($content, 0, 4) eq "\xde\x12\x04\x95") ? 'V' : 'N';
|
colas@0
|
217 |
|
colas@0
|
218 |
# Check the MO format revision number
|
colas@0
|
219 |
# There is only one revision now: revision 0.
|
colas@0
|
220 |
return if unpack($tmpl, substr($content, 4, 4)) > 0;
|
colas@0
|
221 |
|
colas@0
|
222 |
my ($num, $offo, $offt);
|
colas@0
|
223 |
# Number of strings
|
colas@0
|
224 |
$num = unpack $tmpl, substr($content, 8, 4);
|
colas@0
|
225 |
# Offset to the beginning of the original strings
|
colas@0
|
226 |
$offo = unpack $tmpl, substr($content, 12, 4);
|
colas@0
|
227 |
# Offset to the beginning of the translated strings
|
colas@0
|
228 |
$offt = unpack $tmpl, substr($content, 16, 4);
|
colas@0
|
229 |
|
colas@0
|
230 |
my (@metadata, @ret);
|
colas@0
|
231 |
for (0 .. $num - 1) {
|
colas@0
|
232 |
my ($len, $off, $stro, $strt);
|
colas@0
|
233 |
# The first word is the length of the string
|
colas@0
|
234 |
$len = unpack $tmpl, substr($content, $offo+$_*8, 4);
|
colas@0
|
235 |
# The second word is the offset of the string
|
colas@0
|
236 |
$off = unpack $tmpl, substr($content, $offo+$_*8+4, 4);
|
colas@0
|
237 |
# Original string
|
colas@0
|
238 |
$stro = substr($content, $off, $len);
|
colas@0
|
239 |
|
colas@0
|
240 |
# The first word is the length of the string
|
colas@0
|
241 |
$len = unpack $tmpl, substr($content, $offt+$_*8, 4);
|
colas@0
|
242 |
# The second word is the offset of the string
|
colas@0
|
243 |
$off = unpack $tmpl, substr($content, $offt+$_*8+4, 4);
|
colas@0
|
244 |
# Translated string
|
colas@0
|
245 |
$strt = substr($content, $off, $len);
|
colas@0
|
246 |
|
colas@0
|
247 |
# Hash it
|
colas@0
|
248 |
push @metadata, parse_metadata($strt) if $stro eq '';
|
colas@0
|
249 |
push @ret, (map transform($_), $stro, $strt) if length $strt;
|
colas@0
|
250 |
}
|
colas@0
|
251 |
|
colas@0
|
252 |
return {@metadata, @ret};
|
colas@0
|
253 |
}
|
colas@0
|
254 |
|
colas@0
|
255 |
1;
|
colas@0
|
256 |
|
colas@0
|
257 |
=head1 SEE ALSO
|
colas@0
|
258 |
|
colas@0
|
259 |
L<Locale::Maketext>, L<Locale::Maketext::Lexicon>
|
colas@0
|
260 |
|
colas@0
|
261 |
=head1 AUTHORS
|
colas@0
|
262 |
|
colas@0
|
263 |
Autrijus Tang E<lt>autrijus@autrijus.orgE<gt>
|
colas@0
|
264 |
|
colas@0
|
265 |
=head1 COPYRIGHT
|
colas@0
|
266 |
|
colas@0
|
267 |
Copyright 2002, 2003, 2004 by Autrijus Tang E<lt>autrijus@autrijus.orgE<gt>.
|
colas@0
|
268 |
|
colas@0
|
269 |
This program is free software; you can redistribute it and/or
|
colas@0
|
270 |
modify it under the same terms as Perl itself.
|
colas@0
|
271 |
|
colas@0
|
272 |
See L<http://www.perl.com/perl/misc/Artistic.html>
|
colas@0
|
273 |
|
colas@0
|
274 |
=cut
|