lib/TWiki/Plugins/WysiwygPlugin/HTML2TML/Node.pm
author Colas Nahaboo <colas@nahaboo.net>
Sat, 26 Jan 2008 15:50:53 +0100
changeset 0 414e01d06fd5
child 1 e2915a7cbdfa
permissions -rw-r--r--
RELEASE 4.2.0 freetown
colas@0
     1
# Copyright (C) 2005 ILOG http://www.ilog.fr
colas@0
     2
# and TWiki Contributors. All Rights Reserved. TWiki Contributors
colas@0
     3
# are listed in the AUTHORS file in the root of this distribution.
colas@0
     4
# NOTE: Please extend that file, not this notice.
colas@0
     5
#
colas@0
     6
# This program is free software; you can redistribute it and/or
colas@0
     7
# modify it under the terms of the GNU General Public License
colas@0
     8
# as published by the Free Software Foundation; either version 2
colas@0
     9
# of the License, or (at your option) any later version. For
colas@0
    10
# more details read LICENSE in the root of the TWiki distribution.
colas@0
    11
#
colas@0
    12
# This program is distributed in the hope that it will be useful,
colas@0
    13
# but WITHOUT ANY WARRANTY; without even the implied warranty of
colas@0
    14
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
colas@0
    15
#
colas@0
    16
# As per the GPL, removal of this notice is prohibited.
colas@0
    17
colas@0
    18
# The generator works by expanding and HTML parse tree to "decorated"
colas@0
    19
# text, where the decorators are non-printable characters. These characters
colas@0
    20
# act to express format requirements - for example, the need to have a
colas@0
    21
# newline before some text, or the need for a space. Whitespace is then
colas@0
    22
# collapsed down to the minimum that satisfies the format requirements.
colas@0
    23
colas@0
    24
=pod
colas@0
    25
colas@0
    26
---+ package TWiki::Plugins::WysiwygPlugin::HTML2TML::Node;
colas@0
    27
colas@0
    28
Object for storing a parsed HTML tag, and processing it
colas@0
    29
to generate TML from the parse tree.
colas@0
    30
colas@0
    31
See also TWiki::Plugins::WysiwygPlugin::HTML2TML::Leaf
colas@0
    32
colas@0
    33
=cut
colas@0
    34
colas@0
    35
package TWiki::Plugins::WysiwygPlugin::HTML2TML::Node;
colas@0
    36
use base 'TWiki::Plugins::WysiwygPlugin::HTML2TML::Base';
colas@0
    37
colas@0
    38
use strict;
colas@0
    39
colas@0
    40
use TWiki::Func; # needed for regular expressions
colas@0
    41
use Assert;
colas@0
    42
colas@0
    43
use vars qw( $reww );
colas@0
    44
colas@0
    45
require TWiki::Plugins::WysiwygPlugin::Constants;
colas@0
    46
require TWiki::Plugins::WysiwygPlugin::HTML2TML::WC;
colas@0
    47
colas@0
    48
=pod
colas@0
    49
colas@0
    50
---++ ObjectMethod new( $context, $tag, \%attrs )
colas@0
    51
colas@0
    52
Construct a new HTML tag node using the given tag name
colas@0
    53
and attribute hash.
colas@0
    54
colas@0
    55
=cut
colas@0
    56
colas@0
    57
sub new {
colas@0
    58
    my( $class, $context, $tag, $attrs ) = @_;
colas@0
    59
colas@0
    60
    my $this = {};
colas@0
    61
colas@0
    62
    $this->{context} = $context;
colas@0
    63
    $this->{tag} = $tag;
colas@0
    64
    $this->{attrs} = {};
colas@0
    65
    if( $attrs ) {
colas@0
    66
        foreach my $attr ( keys %$attrs ) {
colas@0
    67
            $this->{attrs}->{lc($attr)} = $attrs->{$attr};
colas@0
    68
        }
colas@0
    69
    }
colas@0
    70
    $this->{head} = $this->{tail} = undef;
colas@0
    71
colas@0
    72
    return bless( $this, $class );
colas@0
    73
}
colas@0
    74
colas@0
    75
# debug
colas@0
    76
sub stringify {
colas@0
    77
    my( $this, $shallow ) = @_;
colas@0
    78
    my $r = '';
colas@0
    79
    if( $this->{tag} ) {
colas@0
    80
        $r .= '<'.$this->{tag};
colas@0
    81
        foreach my $attr ( keys %{$this->{attrs}} ) {
colas@0
    82
            $r .= " ".$attr."='".$this->{attrs}->{$attr}."'";
colas@0
    83
        }
colas@0
    84
        $r .= '>';
colas@0
    85
    }
colas@0
    86
    if( $shallow ) {
colas@0
    87
        $r .= '...';
colas@0
    88
    } else {
colas@0
    89
        my $kid = $this->{head};
colas@0
    90
        while ($kid) {
colas@0
    91
            $r .= $kid->stringify();
colas@0
    92
            $kid = $kid->{next};
colas@0
    93
        }
colas@0
    94
    }
colas@0
    95
    if( $this->{tag} ) {
colas@0
    96
        $r .= '</'.$this->{tag}.'>';
colas@0
    97
    }
colas@0
    98
    return $r;
colas@0
    99
}
colas@0
   100
colas@0
   101
=pod
colas@0
   102
colas@0
   103
---++ ObjectMethod addChild( $node )
colas@0
   104
colas@0
   105
Add a child node to the ordered list of children of this node
colas@0
   106
colas@0
   107
=cut
colas@0
   108
colas@0
   109
sub addChild {
colas@0
   110
    my( $this, $node ) = @_;
colas@0
   111
colas@0
   112
    ASSERT($node != $this) if DEBUG;
colas@0
   113
colas@0
   114
    $node->{next} = undef;
colas@0
   115
    $node->{parent} = $this;
colas@0
   116
    my $kid = $this->{tail};
colas@0
   117
    if ($kid) {
colas@0
   118
        $kid->{next} = $node;
colas@0
   119
        $node->{prev} = $kid;
colas@0
   120
    } else {
colas@0
   121
        $node->{prev} = undef;
colas@0
   122
        $this->{head} = $node;
colas@0
   123
    }
colas@0
   124
    $this->{tail} = $node;
colas@0
   125
}
colas@0
   126
colas@0
   127
# top and tail a string
colas@0
   128
sub _trim {
colas@0
   129
    my $s = shift;
colas@0
   130
colas@0
   131
    # Item5076: removed CHECKn from the following exprs, because loss of it
colas@0
   132
    # breaks line-sensitive TML content inside flattened content.
colas@0
   133
    $s =~ s/^[ \t\n$WC::CHECKw$WC::CHECKs]+/$WC::CHECKw/o;
colas@0
   134
    $s =~ s/[ \t\n$WC::CHECKw]+$/$WC::CHECKw/o;
colas@0
   135
    return $s;
colas@0
   136
}
colas@0
   137
colas@0
   138
# Both object method and static method
colas@0
   139
sub hasClass {
colas@0
   140
    my ($this, $class) = @_;
colas@0
   141
    return 0 unless $this;
colas@0
   142
    if (UNIVERSAL::isa($this, 'TWiki::Plugins::WysiwygPlugin::HTML2TML::Node')) {
colas@0
   143
        return hasClass($this->{attrs}, $class);
colas@0
   144
    }
colas@0
   145
    return 0 unless defined $this->{class};
colas@0
   146
    return $this->{class} =~ /\b$class\b/ ? 1 : 0;
colas@0
   147
}
colas@0
   148
colas@0
   149
# Both object method and static method
colas@0
   150
sub _removeClass {
colas@0
   151
    my ($this, $class) = @_;
colas@0
   152
    return 0 unless $this;
colas@0
   153
    if (UNIVERSAL::isa($this, 'TWiki::Plugins::WysiwygPlugin::HTML2TML::Node')) {
colas@0
   154
        return _removeClass($this->{attrs}, $class);
colas@0
   155
    }
colas@0
   156
    return 0 unless hasClass($this, $class);
colas@0
   157
    $this->{class} =~ s/\b$class\b//;
colas@0
   158
    $this->{class} =~ s/\s+/ /g;
colas@0
   159
    $this->{class} =~ s/^\s+//;
colas@0
   160
    $this->{class} =~ s/\s+$//;
colas@0
   161
    if (!$this->{class}) {
colas@0
   162
        delete $this->{class};
colas@0
   163
    }
colas@0
   164
    return 1;
colas@0
   165
}
colas@0
   166
colas@0
   167
# Both object method and static method
colas@0
   168
sub _addClass {
colas@0
   169
    my ($this, $class) = @_;
colas@0
   170
    if (UNIVERSAL::isa($this, 'TWiki::Plugins::WysiwygPlugin::HTML2TML::Node')) {
colas@0
   171
        _addClass($this->{attrs}, $class);
colas@0
   172
        return;
colas@0
   173
    }
colas@0
   174
    _removeClass($this, $class); # avoid duplication
colas@0
   175
    if ($this->{class}) {
colas@0
   176
        $this->{class} .= ' '.$class;
colas@0
   177
    } else {
colas@0
   178
        $this->{class} = $class;
colas@0
   179
    }
colas@0
   180
}
colas@0
   181
colas@0
   182
# Move the content of $node into $this
colas@0
   183
sub _eat {
colas@0
   184
    my ($this, $node) = @_;
colas@0
   185
    my $kid = $this->{tail};
colas@0
   186
    if ($kid) {
colas@0
   187
        $kid->{next} = $node->{head};
colas@0
   188
        if ($node->{head}) {
colas@0
   189
            $node->{head}->{prev} = $kid;
colas@0
   190
        }
colas@0
   191
    } else {
colas@0
   192
        $this->{head} = $node->{head};
colas@0
   193
    }
colas@0
   194
    $this->{tail} = $node->{tail};
colas@0
   195
    $kid = $node->{head};
colas@0
   196
    while ($kid) {
colas@0
   197
        $kid->{parent} = $this;
colas@0
   198
        $kid = $kid->{next};
colas@0
   199
    }
colas@0
   200
    $node->{head} = $node->{tail} = undef;
colas@0
   201
}
colas@0
   202
colas@0
   203
=pod
colas@0
   204
colas@0
   205
---++ ObjectMethod rootGenerate($opts) -> $text
colas@0
   206
colas@0
   207
Generates TML from this HTML node. The generation is done
colas@0
   208
top down and bottom up, so that higher level nodes can make
colas@0
   209
decisions on whether to allow TML conversion in lower nodes,
colas@0
   210
and lower level nodes can constrain conversion in higher level
colas@0
   211
nodes.
colas@0
   212
colas@0
   213
$opts is a bitset. $WC::VERY_CLEAN will cause the generator
colas@0
   214
to drop unrecognised HTML (e.g. divs and spans that don't
colas@0
   215
generate TML)
colas@0
   216
colas@0
   217
=cut
colas@0
   218
colas@0
   219
sub rootGenerate {
colas@0
   220
    my( $this, $opts ) = @_;
colas@0
   221
colas@0
   222
    $this->cleanParseTree();
colas@0
   223
colas@0
   224
    # Perform some transformations on the parse tree
colas@0
   225
    $this->_collapse();
colas@0
   226
colas@0
   227
    my( $f, $text ) = $this->generate($opts);
colas@0
   228
colas@0
   229
    # Debug support
colas@0
   230
    #print STDERR "Converted ",WC::debugEncode($text),"\n";
colas@0
   231
colas@0
   232
    # Move leading \n out of protected region. Delicate hack fix required to
colas@0
   233
    # maintain TWiki variables at the start of lines.
colas@0
   234
    $text =~ s/$WC::PON$WC::NBBR/$WC::CHECKn$WC::PON/g;
colas@0
   235
colas@0
   236
    # isolate whitespace checks and convert to $NBSP
colas@0
   237
    $text =~ s/$WC::CHECKw$WC::CHECKw+/$WC::CHECKw/go;
colas@0
   238
    $text =~ s/([$WC::CHECKn$WC::CHECKs$WC::NBSP $WC::TAB$WC::NBBR]($WC::PON|$WC::POFF)?)$WC::CHECKw/$1/go;
colas@0
   239
    $text =~ s/$WC::CHECKw(($WC::PON|$WC::POFF)?[$WC::CHECKn$WC::CHECKs$WC::NBSP $WC::NBBR])/$1/go;
colas@0
   240
    $text =~ s/^($WC::CHECKw)+//gos;
colas@0
   241
    $text =~ s/($WC::CHECKw)+$//gos;
colas@0
   242
    $text =~ s/($WC::CHECKw)+/$WC::NBSP/go;
colas@0
   243
colas@0
   244
    # isolate $CHECKs and convert to $NBSP
colas@0
   245
    $text =~ s/$WC::CHECKs$WC::CHECKs+/$WC::CHECKs/go;
colas@0
   246
    $text =~ s/([ $WC::NBSP$WC::TAB])$WC::CHECKs/$1/go;
colas@0
   247
    $text =~ s/$WC::CHECKs( |$WC::NBSP)/$1/go;
colas@0
   248
    $text =~ s/($WC::CHECKs)+/$WC::NBSP/go;
colas@0
   249
colas@0
   250
    $text =~ s/<br( \/)?>$WC::NBBR/$WC::NBBR/g; # Remove BR before P
colas@0
   251
colas@0
   252
    #die "Converted ",WC::debugEncode($text),"\n";
colas@0
   253
colas@0
   254
    my @regions = split(/([$WC::PON$WC::POFF])/o, $text);
colas@0
   255
    my $protect = 0;
colas@0
   256
    $text = '';
colas@0
   257
    foreach my $tml (@regions) {
colas@0
   258
        if ($tml eq $WC::PON) {
colas@0
   259
            $protect++;
colas@0
   260
            next;
colas@0
   261
        } elsif ($tml eq $WC::POFF) {
colas@0
   262
            $protect--;
colas@0
   263
            next;
colas@0
   264
        }
colas@0
   265
colas@0
   266
        # isolate $NBBR and convert to \n.
colas@0
   267
        unless ($protect) {
colas@0
   268
            $tml =~ s/\n$WC::NBBR/$WC::NBBR$WC::NBBR/go;
colas@0
   269
            $tml =~ s/$WC::NBBR\n/$WC::NBBR$WC::NBBR/go;
colas@0
   270
            $tml =~ s/$WC::NBBR( |$WC::NBSP)+$WC::NBBR/$WC::NBBR$WC::NBBR/go;
colas@0
   271
            $tml =~ s/ +$WC::NBBR/$WC::NBBR/go;
colas@0
   272
            $tml =~ s/$WC::NBBR +/$WC::NBBR/go;
colas@0
   273
            $tml =~ s/$WC::NBBR$WC::NBBR+/$WC::NBBR$WC::NBBR/go;
colas@0
   274
colas@0
   275
            # Now convert adjacent NBBRs to recreate empty lines
colas@0
   276
            # 1 NBBR  -> 1 newline
colas@0
   277
            # 2 NBBRs -> <p /> - 1 blank line - 2 newlines
colas@0
   278
            # 3 NBBRs -> 3 newlines
colas@0
   279
            # 4 NBBRs -> <p /><p /> - 3 newlines
colas@0
   280
            # 5 NBBRs -> 4 newlines
colas@0
   281
            # 6 NBBRs -> <p /><p /><p /> - 3 blank lines - 4 newlines
colas@0
   282
            # 7 NBBRs -> 5 newlines
colas@0
   283
            # 8 NBBRs -> <p /><p /><p /><p /> - 4 blank lines - 5 newlines
colas@0
   284
            $tml =~ s.($WC::NBBR$WC::NBBR$WC::NBBR$WC::NBBR+).
colas@0
   285
              "\n" x ((length($1) + 1) / 2 + 1)
colas@0
   286
                .geo;
colas@0
   287
        }
colas@0
   288
        # isolate $CHECKn and convert to $NBBR
colas@0
   289
        $tml =~ s/$WC::CHECKn([$WC::NBSP $WC::TAB])*$WC::CHECKn/$WC::CHECKn/go;
colas@0
   290
        $tml =~ s/$WC::CHECKn$WC::CHECKn+/$WC::CHECKn/go;
colas@0
   291
        $tml =~ s/(?<=$WC::NBBR)$WC::CHECKn//gom;
colas@0
   292
        $tml =~ s/$WC::CHECKn(?=$WC::NBBR)//gom;
colas@0
   293
        $tml =~ s/$WC::CHECKn+/$WC::NBBR/gos;
colas@0
   294
colas@0
   295
        $tml =~ s/$WC::NBBR/\n/gos;
colas@0
   296
colas@0
   297
        # Convert tabs to NBSP
colas@0
   298
        $tml =~ s/$WC::TAB/$WC::NBSP$WC::NBSP$WC::NBSP/go;
colas@0
   299
colas@0
   300
        # isolate $NBSP and convert to space
colas@0
   301
        unless ($protect) {
colas@0
   302
            $tml =~ s/ +$WC::NBSP/$WC::NBSP/go;
colas@0
   303
            $tml =~ s/$WC::NBSP +/$WC::NBSP/go;
colas@0
   304
        }
colas@0
   305
        $tml =~ s/$WC::NBSP/ /go;
colas@0
   306
colas@0
   307
        $tml =~ s/$WC::CHECK1$WC::CHECK1+/$WC::CHECK1/go;
colas@0
   308
        $tml =~ s/$WC::CHECK2$WC::CHECK2+/$WC::CHECK2/go;
colas@0
   309
        $tml =~ s/$WC::CHECK2$WC::CHECK1/$WC::CHECK2/go;
colas@0
   310
colas@0
   311
        $tml =~ s/(^|[\s\(])$WC::CHECK1/$1/gso;
colas@0
   312
        $tml =~ s/$WC::CHECK2($|[\s\,\.\;\:\!\?\)\*])/$1/gso;
colas@0
   313
colas@0
   314
        $tml =~ s/$WC::CHECK1(\s|$)/$1/gso;
colas@0
   315
        $tml =~ s/(^|\s)$WC::CHECK2/$1/gso;
colas@0
   316
colas@0
   317
        $tml =~ s/$WC::CHECK1/ /go;
colas@0
   318
        $tml =~ s/$WC::CHECK2/ /go;
colas@0
   319
        #print STDERR WC::debugEncode($before);
colas@0
   320
        #print STDERR " -> '",WC::debugEncode($tml),"'\n";
colas@0
   321
        $text .= $tml;
colas@0
   322
    }
colas@0
   323
    # Collapse adjacent tags
colas@0
   324
    foreach my $tag qw(noautolink verbatim literal) {
colas@0
   325
        $text =~ s#</$tag>(\s*)<$tag>#$1#gs;
colas@0
   326
    }
colas@0
   327
    # Top and tail, and terminate with a single newline
colas@0
   328
    $text =~ s/^\n*//s;
colas@0
   329
    $text =~ s/\s*$/\n/s;
colas@0
   330
colas@0
   331
    # Item5127: Remove BR just before EOLs
colas@0
   332
    $text =~ s/<br( \/)?>\n/\n/g;
colas@0
   333
colas@0
   334
    return $text;
colas@0
   335
}
colas@0
   336
colas@0
   337
# Collapse adjacent VERBATIM nodes together
colas@0
   338
# Collapse a <p> than contains only a protected span into a protected P
colas@0
   339
# Collapse em in em
colas@0
   340
# Collapse adjacent text nodes
colas@0
   341
sub _collapse {
colas@0
   342
    my $this = shift;
colas@0
   343
colas@0
   344
    my @jobs = ( $this );
colas@0
   345
    while (scalar(@jobs)) {
colas@0
   346
        my $node = shift(@jobs);
colas@0
   347
        if (defined($node->{tag}) && $node->hasClass('TMLverbatim')) {
colas@0
   348
            my $next = $node->{next};
colas@0
   349
            my @edible;
colas@0
   350
            my $collapsible;
colas@0
   351
            while ($next &&
colas@0
   352
                     ((!$next->{tag} && $next->{text} =~ /^\s*$/) ||
colas@0
   353
                          ($node->{tag} eq $next->{tag} &&
colas@0
   354
                             $next->hasClass('TMLverbatim')))) {
colas@0
   355
                push(@edible, $next);
colas@0
   356
                $collapsible ||= $next->hasClass('TMLverbatim');
colas@0
   357
                $next = $next->{next};
colas@0
   358
            }
colas@0
   359
            if ($collapsible) {
colas@0
   360
                foreach my $meal (@edible) {
colas@0
   361
                    $meal->_remove();
colas@0
   362
                    if ($meal->{tag}) {
colas@0
   363
                        require TWiki::Plugins::WysiwygPlugin::HTML2TML::Leaf;
colas@0
   364
                        $node->addChild(new TWiki::Plugins::WysiwygPlugin::HTML2TML::Leaf($WC::NBBR));
colas@0
   365
                        $node->_eat($meal);
colas@0
   366
                    }
colas@0
   367
                }
colas@0
   368
            }
colas@0
   369
        }
colas@0
   370
        if ($node->{tag} eq 'p' &&
colas@0
   371
              $node->{head} && $node->{head} == $node->{tail}) {
colas@0
   372
            my $kid = $node->{head};
colas@0
   373
            if ($kid->{tag} eq 'SPAN' &&
colas@0
   374
                  $kid->hasClass('WYSIWYG_PROTECTED')) {
colas@0
   375
                $kid->_remove();
colas@0
   376
                $node->_eat($kid);
colas@0
   377
                $node->_addClass('WYSIWYG_PROTECTED');
colas@0
   378
            }
colas@0
   379
        }
colas@0
   380
colas@0
   381
        # If this is an emphasis (b, i, code, tt, strong) then
colas@0
   382
        # flatten out any child nodes that express the same emphasis.
colas@0
   383
        # This has to be done because TWiki emphases are single level.
colas@0
   384
        if ($WC::EMPHTAG{$node->{tag}}) {
colas@0
   385
            my $kid = $node->{head};
colas@0
   386
            while ($kid) {
colas@0
   387
                if ($WC::EMPHTAG{$kid->{tag}} &&
colas@0
   388
                      $WC::EMPHTAG{$kid->{tag}} eq
colas@0
   389
                        $WC::EMPHTAG{$node->{tag}}) {
colas@0
   390
                    $kid = $kid->_inline();
colas@0
   391
                } else {
colas@0
   392
                    $kid = $kid->{next};
colas@0
   393
                }
colas@0
   394
            }
colas@0
   395
        }
colas@0
   396
        $node->_combineLeaves();
colas@0
   397
colas@0
   398
        my $kid = $node->{head};
colas@0
   399
        while ($kid) {
colas@0
   400
            push(@jobs, $kid);
colas@0
   401
            $kid = $kid->{next};
colas@0
   402
        }
colas@0
   403
    }
colas@0
   404
}
colas@0
   405
colas@0
   406
# the actual generate function. rootGenerate is only applied to the root node.
colas@0
   407
sub generate {
colas@0
   408
    my( $this, $options ) = @_;
colas@0
   409
    my $fn;
colas@0
   410
    my $flags;
colas@0
   411
    my $text;
colas@0
   412
colas@0
   413
     if ($this->_isProtectedByAttrs()) {
colas@0
   414
         return $this->_defaultTag($options);
colas@0
   415
     }
colas@0
   416
colas@0
   417
    my $tag = $this->{tag};
colas@0
   418
colas@0
   419
    if ($this->hasClass('WYSIWYG_LITERAL')) {
colas@0
   420
        if ($tag eq 'div' || $tag eq 'p' || $tag eq 'span') {
colas@0
   421
            $text = '';
colas@0
   422
            my $kid = $this->{head};
colas@0
   423
            while ($kid) {
colas@0
   424
                $text .= $kid->stringify();
colas@0
   425
                $kid = $kid->{next};
colas@0
   426
            }
colas@0
   427
        } else {
colas@0
   428
            $this->_removeClass('WYSIWYG_LITERAL');
colas@0
   429
            $text = $this->stringify();
colas@0
   430
        }
colas@0
   431
        return ( 0, '<literal>'.$text.'</literal>' );
colas@0
   432
    }
colas@0
   433
colas@0
   434
    if( $options & $WC::NO_HTML ) {
colas@0
   435
        # NO_HTML implies NO_TML
colas@0
   436
        my $brats = $this->_flatten( $options );
colas@0
   437
        return ( 0, $brats );
colas@0
   438
    }
colas@0
   439
colas@0
   440
    if( $options & $WC::NO_TML ) {
colas@0
   441
        return ( 0, $this->stringify() );
colas@0
   442
    }
colas@0
   443
colas@0
   444
    # make the names of the function versions
colas@0
   445
    $tag =~ s/!//; # DOCTYPE
colas@0
   446
    my $tmlFn = '_handle'.uc($tag);
colas@0
   447
colas@0
   448
    # See if we have a TML translation function for this tag
colas@0
   449
    # the translation functions will work out the rendering
colas@0
   450
    # of their own children.
colas@0
   451
    if( $this->{tag} && defined( &$tmlFn ) ) {
colas@0
   452
        no strict 'refs';
colas@0
   453
        ( $flags, $text ) = &$tmlFn( $this, $options );
colas@0
   454
        use strict 'refs';
colas@0
   455
        # if the function returns undef, drop through
colas@0
   456
        return ( $flags, $text ) if defined $text;
colas@0
   457
    }
colas@0
   458
colas@0
   459
    # No translation, so we need the text of the children
colas@0
   460
    ( $flags, $text ) = $this->_flatten( $options );
colas@0
   461
colas@0
   462
    # just return the text if there is no tag name
colas@0
   463
    return ( $flags, $text ) unless $this->{tag};
colas@0
   464
colas@0
   465
    return $this->_defaultTag( $options );
colas@0
   466
}
colas@0
   467
colas@0
   468
# Return the children flattened out subject to the options
colas@0
   469
sub _flatten {
colas@0
   470
    my( $this, $options ) = @_;
colas@0
   471
    my $text = '';
colas@0
   472
    my $flags = 0;
colas@0
   473
colas@0
   474
    my $protected = ($options & $WC::PROTECTED) ||
colas@0
   475
      $this->hasClass('WYSIWYG_PROTECTED') ||
colas@0
   476
        $this->hasClass('WYSIWYG_STICKY') || 0;
colas@0
   477
colas@0
   478
    if ($protected) {
colas@0
   479
        # Expand brs, which are used in the protected encoding in place of
colas@0
   480
        # newlines, and protect whitespace
colas@0
   481
        $options |= $WC::BR2NL | $WC::KEEP_WS;
colas@0
   482
    }
colas@0
   483
colas@0
   484
    my $kid = $this->{head};
colas@0
   485
    while ($kid) {
colas@0
   486
        my( $f, $t ) = $kid->generate( $options );
colas@0
   487
        if (!($options & $WC::KEEP_WS)
colas@0
   488
              && $text && $text =~ /\w$/ && $t =~ /^\w/) {
colas@0
   489
            # if the last child ends in a \w and this child
colas@0
   490
            # starts in a \w, we need to insert a space
colas@0
   491
            $text .= ' ';
colas@0
   492
        }
colas@0
   493
        $text .= $t;
colas@0
   494
        $flags |= $f;
colas@0
   495
        $kid = $kid->{next};
colas@0
   496
    }
colas@0
   497
    if ($protected) {
colas@0
   498
        $text =~ s/[$WC::PON$WC::POFF]//g;
colas@0
   499
colas@0
   500
        unless ($options & $WC::KEEP_ENTITIES) {
colas@0
   501
            require HTML::Entities;
colas@0
   502
            $text = HTML::Entities::decode_entities($text);
colas@0
   503
            # &nbsp; decodes to \240, which we want to make a space.
colas@0
   504
            $text =~ s/\240/$WC::NBSP/g;
colas@0
   505
        }
colas@0
   506
        $text =~ s/ /$WC::NBSP/g;
colas@0
   507
        $text =~ s/\n/$WC::NBBR/g;
colas@0
   508
        $text = $WC::PON.$text.$WC::POFF;
colas@0
   509
    }
colas@0
   510
colas@0
   511
    $text = _trim($text) unless ($options & $WC::KEEP_WS);
colas@0
   512
colas@0
   513
    return ( $flags, $text );
colas@0
   514
}
colas@0
   515
colas@0
   516
# $cutClasses is an RE matching class names to cut
colas@0
   517
sub _htmlParams {
colas@0
   518
    my ( $attrs, $options ) = @_;
colas@0
   519
    my @params;
colas@0
   520
colas@0
   521
    while (my ($k, $v) = each %$attrs ) {
colas@0
   522
        next unless $k;
colas@0
   523
        if( $k eq 'class' ) {
colas@0
   524
            # if cleaning aggressively, remove class attributes completely
colas@0
   525
            next if ($options & $WC::VERY_CLEAN);
colas@0
   526
            foreach my $c qw(WYSIWYG_PROTECTED WYSIWYG_STICKY TMLverbatim WYSIWYG_LINK) {
colas@0
   527
                $v =~ s/\b$c\b//;
colas@0
   528
            }
colas@0
   529
            $v =~ s/\s+/ /;
colas@0
   530
            $v =~ s/^\s*(.*?)\s*$/$1/;
colas@0
   531
            next unless $v;
colas@0
   532
        }
colas@0
   533
        my $q = $v =~ /"/ ? "'" : '"';
colas@0
   534
        push( @params, $k.'='.$q.$v.$q );
colas@0
   535
    }
colas@0
   536
    my $p = join( ' ', @params );
colas@0
   537
    return '' unless $p;
colas@0
   538
    return ' '.$p;
colas@0
   539
}
colas@0
   540
colas@0
   541
# generate the default representation of an HTML tag
colas@0
   542
sub _defaultTag {
colas@0
   543
    my( $this, $options ) = @_;
colas@0
   544
    my( $flags, $text ) = $this->_flatten( $options );
colas@0
   545
    my $tag = $this->{tag};
colas@0
   546
    my $p = _htmlParams( $this->{attrs}, $options );
colas@0
   547
colas@0
   548
    if( $text =~ /^\s*$/ && $WC::SELFCLOSING{$tag}) {
colas@0
   549
        return ( $flags, '<'.$tag.$p.' />' );
colas@0
   550
    } else {
colas@0
   551
        return ( $flags, '<'.$tag.$p.'>'.$text.'</'.$tag.'>' );
colas@0
   552
    }
colas@0
   553
}
colas@0
   554
colas@0
   555
# Check to see if the HTML tag is protected by the presence of
colas@0
   556
# specific attributes that block conversion to TML. The conversion
colas@0
   557
# table is defined in 
colas@0
   558
sub _isProtectedByAttrs {
colas@0
   559
    my $this = shift;
colas@0
   560
colas@0
   561
    require TWiki::Plugins::WysiwygPlugin;
colas@0
   562
    foreach my $attr (keys %{$this->{attrs}}) {
colas@0
   563
        next unless length($this->{attrs}->{$attr}); # ignore nulls
colas@0
   564
        return $attr if TWiki::Plugins::WysiwygPlugin::protectedByAttr(
colas@0
   565
            $this->{tag}, $attr);
colas@0
   566
    }
colas@0
   567
    return 0;
colas@0
   568
}
colas@0
   569
colas@0
   570
# perform conversion on a list type
colas@0
   571
sub _convertList {
colas@0
   572
    my( $this, $indent ) = @_;
colas@0
   573
    my $basebullet;
colas@0
   574
    my $isdl = ( $this->{tag} eq 'dl' );
colas@0
   575
colas@0
   576
    if( $isdl ) {
colas@0
   577
        $basebullet = '';
colas@0
   578
    } elsif( $this->{tag} eq 'ol' ) {
colas@0
   579
        $basebullet = '1';
colas@0
   580
    } else {
colas@0
   581
        $basebullet = '*';
colas@0
   582
    }
colas@0
   583
colas@0
   584
    my $f;
colas@0
   585
    my $text = '';
colas@0
   586
    my $pendingDT = 0;
colas@0
   587
    my $kid = $this->{head};
colas@0
   588
    while ($kid) {
colas@0
   589
        # be tolerant of dl, ol and ul with no li
colas@0
   590
        if( $kid->{tag} =~ m/^[dou]l$/i ) {
colas@0
   591
            $text .= $kid->_convertList( $indent.$WC::TAB );
colas@0
   592
            $kid = $kid->{next};
colas@0
   593
            next;
colas@0
   594
        }
colas@0
   595
        unless ($kid->{tag} =~ m/^(dt|dd|li)$/i) {
colas@0
   596
            $kid = $kid->{next};
colas@0
   597
            next;
colas@0
   598
        }
colas@0
   599
        if( $isdl && ( $kid->{tag} eq 'dt' )) {
colas@0
   600
            # DT, set the bullet type for subsequent DT
colas@0
   601
            $basebullet = $kid->_flatten( $WC::NO_BLOCK_TML );
colas@0
   602
            $basebullet =~ s/[\s$WC::CHECKw$WC::CHECKs]+$//;
colas@0
   603
            $basebullet .= ':';
colas@0
   604
            $basebullet =~ s/$WC::CHECKn/ /g;
colas@0
   605
            $basebullet =~ s/^\s+//;
colas@0
   606
            $basebullet = '$ '.$basebullet;
colas@0
   607
            $pendingDT = 1; # remember in case there is no DD
colas@0
   608
            $kid = $kid->{next};
colas@0
   609
            next;
colas@0
   610
        }
colas@0
   611
        my $bullet = $basebullet;
colas@0
   612
        if( $basebullet eq '1' && $kid->{attrs}->{type} ) {
colas@0
   613
            $bullet = $kid->{attrs}->{type}.'.';
colas@0
   614
        }
colas@0
   615
        my $spawn = '';
colas@0
   616
        my $grandkid = $kid->{head};
colas@0
   617
        while ($grandkid) {
colas@0
   618
            my $t;
colas@0
   619
            if( $grandkid->{tag} =~ /^[dou]l$/i ) {
colas@0
   620
                #$spawn = _trim( $spawn );
colas@0
   621
                $t = $grandkid->_convertList( $indent.$WC::TAB );
colas@0
   622
            } else {
colas@0
   623
                ( $f, $t ) = $grandkid->generate( $WC::NO_BLOCK_TML );
colas@0
   624
                $t =~ s/$WC::CHECKn/ /g;
colas@0
   625
            }
colas@0
   626
            $spawn .= $t;
colas@0
   627
            $grandkid = $grandkid->{next};
colas@0
   628
        }
colas@0
   629
        #$spawn = _trim($spawn);
colas@0
   630
        $text .= $WC::CHECKn.$indent.$bullet.$WC::CHECKs.$spawn.$WC::CHECKn;
colas@0
   631
        $pendingDT = 0;
colas@0
   632
        $basebullet = '' if $isdl;
colas@0
   633
        $kid = $kid->{next};
colas@0
   634
    }
colas@0
   635
    if( $pendingDT ) {
colas@0
   636
        # DT with no corresponding DD
colas@0
   637
        $text .= $WC::CHECKn.$indent.$basebullet.$WC::CHECKn;
colas@0
   638
    }
colas@0
   639
    return $text;
colas@0
   640
}
colas@0
   641
colas@0
   642
# probe down into a list type to determine if it
colas@0
   643
# can be converted to TML.
colas@0
   644
sub _isConvertableList {
colas@0
   645
    my( $this, $options ) = @_;
colas@0
   646
colas@0
   647
    return 0 if ($this->_isProtectedByAttrs());
colas@0
   648
colas@0
   649
    my $kid = $this->{head};
colas@0
   650
    while ($kid) {
colas@0
   651
        # check for malformed list. We can still handle it,
colas@0
   652
        # by simply ignoring illegal text.
colas@0
   653
        # be tolerant of dl, ol and ul with no li
colas@0
   654
        if( $kid->{tag} =~ m/^[dou]l$/i ) {
colas@0
   655
            return 0 unless $kid->_isConvertableList( $options );
colas@0
   656
        } elsif( $kid->{tag} =~ m/^(dt|dd|li)$/i ) {
colas@0
   657
            unless( $kid->_isConvertableListItem( $options, $this )) {
colas@0
   658
                return 0;
colas@0
   659
            }
colas@0
   660
        }
colas@0
   661
        $kid = $kid->{next};
colas@0
   662
    }
colas@0
   663
    return 1;
colas@0
   664
}
colas@0
   665
colas@0
   666
# probe down into a list item to determine if the
colas@0
   667
# containing list can be converted to TML.
colas@0
   668
sub _isConvertableListItem {
colas@0
   669
    my( $this, $options, $parent ) = @_;
colas@0
   670
    my( $flags, $text );
colas@0
   671
colas@0
   672
    return 0 if ($this->_isProtectedByAttrs());
colas@0
   673
colas@0
   674
    if( $parent->{tag} eq 'dl' ) {
colas@0
   675
        return 0 unless( $this->{tag} =~ /^d[td]$/i );
colas@0
   676
    } else {
colas@0
   677
        return 0 unless( $this->{tag} eq 'li' );
colas@0
   678
    }
colas@0
   679
colas@0
   680
    my $kid = $this->{head};
colas@0
   681
    while ($kid) {
colas@0
   682
        if( $kid->{tag} =~ /^[oud]l$/i ) {
colas@0
   683
            unless( $kid->_isConvertableList( $options )) {
colas@0
   684
                return 0;
colas@0
   685
            }
colas@0
   686
        } else {
colas@0
   687
            ( $flags, $text ) = $kid->generate( $options );
colas@0
   688
            if( $flags & $WC::BLOCK_TML ) {
colas@0
   689
                return 0;
colas@0
   690
            }
colas@0
   691
        }
colas@0
   692
        $kid = $kid->{next};
colas@0
   693
    }
colas@0
   694
    return 1;
colas@0
   695
}
colas@0
   696
colas@0
   697
# probe down into a list type to determine if it
colas@0
   698
# can be converted to TML.
colas@0
   699
sub _isConvertableTable {
colas@0
   700
    my( $this, $options, $table ) = @_;
colas@0
   701
colas@0
   702
    if ($this->_isProtectedByAttrs()) {
colas@0
   703
        return 0;
colas@0
   704
    }
colas@0
   705
colas@0
   706
    my $kid = $this->{head};
colas@0
   707
    while ($kid) {
colas@0
   708
        if( $kid->{tag} =~ /^(colgroup|thead|tbody|tfoot|col)$/ ) {
colas@0
   709
            unless ($kid->_isConvertableTable( $options, $table )) {
colas@0
   710
                return 0;
colas@0
   711
            }
colas@0
   712
        } elsif( $kid->{tag} ) {
colas@0
   713
            unless ($kid->{tag} eq 'tr') {
colas@0
   714
                return 0;
colas@0
   715
            }
colas@0
   716
            my $row = $kid->_isConvertableTableRow( $options );
colas@0
   717
            unless ($row) {
colas@0
   718
                return 0;
colas@0
   719
            }
colas@0
   720
            push( @$table, $row );
colas@0
   721
        }
colas@0
   722
        $kid = $kid->{next};
colas@0
   723
    }
colas@0
   724
    return 1;
colas@0
   725
}
colas@0
   726
colas@0
   727
# Tidy up whitespace on the sides of a table cell, and also strip trailing
colas@0
   728
# BRs, as added by some table editors.
colas@0
   729
sub _TDtrim {
colas@0
   730
    my $td = shift;
colas@0
   731
    $td =~ s/^($WC::NBSP|$WC::NBBR|$WC::CHECKn|$WC::CHECKs|$WC::CHECKw|$WC::CHECK1|$WC::CHECK2|$WC::TAB|\s)+//so;
colas@0
   732
    $td =~ s/(<br \/>|<br>|$WC::NBSP|$WC::NBBR|$WC::CHECKn|$WC::CHECKs|$WC::CHECKw|$WC::CHECK1|$WC::CHECK2|$WC::TAB|\s)+$//so;
colas@0
   733
    return $td;
colas@0
   734
}
colas@0
   735
colas@0
   736
# probe down into a list item to determine if the
colas@0
   737
# containing table can be converted to TML.
colas@0
   738
sub _isConvertableTableRow {
colas@0
   739
    my( $this, $options ) = @_;
colas@0
   740
colas@0
   741
    return 0 if ($this->_isProtectedByAttrs());
colas@0
   742
colas@0
   743
    my( $flags, $text );
colas@0
   744
    my @row;
colas@0
   745
    my $ignoreCols = 0;
colas@0
   746
    my $kid = $this->{head};
colas@0
   747
    while ($kid) {
colas@0
   748
        if ($kid->{tag} eq 'th') {
colas@0
   749
            ( $flags, $text ) = $kid->_flatten( $options );
colas@0
   750
            $text = _TDtrim( $text );
colas@0
   751
            $text = "*$text*" if length($text);
colas@0
   752
        } elsif ($kid->{tag} eq 'td' ) {
colas@0
   753
            ( $flags, $text ) = $kid->_flatten( $options );
colas@0
   754
            $text = _TDtrim( $text );
colas@0
   755
        } elsif( !$kid->{tag} ) {
colas@0
   756
            $kid = $kid->{next};
colas@0
   757
            next;
colas@0
   758
        } else {
colas@0
   759
            # some other sort of (unexpected) tag
colas@0
   760
            return 0;
colas@0
   761
        }
colas@0
   762
        return 0 if( $flags & $WC::BLOCK_TML );
colas@0
   763
colas@0
   764
        if( $kid->{attrs} ) {
colas@0
   765
            my $a = _deduceAlignment( $kid );
colas@0
   766
            if( $text && $a eq 'right' ) {
colas@0
   767
                $text = $WC::NBSP.$text;
colas@0
   768
            } elsif( $text && $a eq 'center' ) {
colas@0
   769
                $text = $WC::NBSP.$text.$WC::NBSP;
colas@0
   770
            } elsif( $text && $a eq 'left' ) {
colas@0
   771
                $text .= $WC::NBSP;
colas@0
   772
            }
colas@0
   773
            if( $kid->{attrs}->{rowspan} && $kid->{attrs}->{rowspan} > 1 ) {
colas@0
   774
                return 0;
colas@0
   775
            }
colas@0
   776
        }
colas@0
   777
        $text =~ s/&nbsp;/$WC::NBSP/g;
colas@0
   778
        #if (--$ignoreCols > 0) {
colas@0
   779
        #    # colspanned
colas@0
   780
        #    $text = '';
colas@0
   781
        #} els
colas@0
   782
        if ($text =~ /^$WC::NBSP*$/) {
colas@0
   783
            $text = $WC::NBSP;
colas@0
   784
        } else {
colas@0
   785
            $text = $WC::NBSP.$text.$WC::NBSP;
colas@0
   786
        }
colas@0
   787
        if( $kid->{attrs} && $kid->{attrs}->{colspan} &&
colas@0
   788
              $kid->{attrs}->{colspan} > 1 ) {
colas@0
   789
            $ignoreCols = $kid->{attrs}->{colspan};
colas@0
   790
        }
colas@0
   791
        # Pad to allow wikiwords to work
colas@0
   792
        push( @row, $text );
colas@0
   793
        while ($ignoreCols > 1) {
colas@0
   794
            push( @row, '' );
colas@0
   795
            $ignoreCols--;
colas@0
   796
        }
colas@0
   797
        $kid = $kid->{next};
colas@0
   798
    }
colas@0
   799
    return \@row;
colas@0
   800
}
colas@0
   801
colas@0
   802
# Work out the alignment of a table cell from the style and/or class
colas@0
   803
sub _deduceAlignment {
colas@0
   804
    my $td = shift;
colas@0
   805
colas@0
   806
    if( $td->{attrs}->{align} ) {
colas@0
   807
        return lc( $td->{attrs}->{align} );
colas@0
   808
    } else {
colas@0
   809
        if( $td->{attrs}->{style} &&
colas@0
   810
              $td->{attrs}->{style} =~ /text-align\s*:\s*(left|right|center)/ ) {
colas@0
   811
            return $1;
colas@0
   812
        }
colas@0
   813
        if ($td->hasClass(qr/align-(left|right|center)/)) {
colas@0
   814
            return $1;
colas@0
   815
        }
colas@0
   816
    }
colas@0
   817
    return '';
colas@0
   818
}
colas@0
   819
colas@0
   820
# convert a heading tag
colas@0
   821
sub _H {
colas@0
   822
    my( $this, $options, $depth ) = @_;
colas@0
   823
    my( $flags, $contents ) = $this->_flatten( $options );
colas@0
   824
    return ( 0, undef ) if( $flags & $WC::BLOCK_TML );
colas@0
   825
    my $notoc = '';
colas@0
   826
    if($this->hasClass('notoc')) {
colas@0
   827
        $notoc = '!!';
colas@0
   828
    }
colas@0
   829
    $contents =~ s/^\s+/ /;
colas@0
   830
    $contents =~ s/\s+$//;
colas@0
   831
    my $res = $WC::CHECKn.'---'.('+' x $depth).$notoc.
colas@0
   832
      $WC::CHECKs.$contents.$WC::CHECKn;
colas@0
   833
    return ( $flags | $WC::BLOCK_TML, $res );
colas@0
   834
}
colas@0
   835
colas@0
   836
# generate an emphasis
colas@0
   837
sub _emphasis {
colas@0
   838
    my( $this, $options, $ch ) = @_;
colas@0
   839
    my( $flags, $contents ) = $this->_flatten( $options | $WC::NO_BLOCK_TML );
colas@0
   840
    return ( 0, undef ) if( !defined( $contents ) || ( $flags & $WC::BLOCK_TML ));
colas@0
   841
    # Remove whitespace from either side of the contents, retaining the
colas@0
   842
    # whitespace
colas@0
   843
    $contents =~ s/&nbsp;/$WC::NBSP/go;
colas@0
   844
    $contents =~ /^($WC::WS)(.*?)($WC::WS)$/;
colas@0
   845
    my ($pre, $post) = ($1, $3);
colas@0
   846
    $contents = $2;
colas@0
   847
    return (0, undef) if( $contents =~ /^</ || $contents =~ />$/ );
colas@0
   848
    return (0, '') unless( $contents =~ /\S/ );
colas@0
   849
colas@0
   850
    # Now see if we can collapse the emphases
colas@0
   851
    if ($ch eq '_' && $contents =~ s/^\*(.*)\*$/$1/ ||
colas@0
   852
          $ch eq '*' && $contents =~ s/^_(?!_)(.*)(?<!_)_$/$1/) {
colas@0
   853
        $ch = '__';
colas@0
   854
    } elsif ($ch eq '=' && $contents =~ s/^\*(.*)\*$/$1/ ||
colas@0
   855
          $ch eq '*' && $contents =~ s/^=(?!=)(.*)(?<!=)=$/$1/) {
colas@0
   856
        $ch = '==';
colas@0
   857
    } elsif ($contents =~ /^([*_=]).*\1$/) {
colas@0
   858
        return (0, undef);
colas@0
   859
    }
colas@0
   860
colas@0
   861
    return ( $flags, $pre.$WC::CHECKw.$ch.$contents.$ch.$WC::CHECK2.$post );
colas@0
   862
}
colas@0
   863
colas@0
   864
# generate verbatim for P, SPAN or PRE
colas@0
   865
sub _verbatim {
colas@0
   866
    my ($this, $tag, $options) = @_;
colas@0
   867
colas@0
   868
    $options |= $WC::PROTECTED|$WC::KEEP_ENTITIES|$WC::BR2NL | $WC::KEEP_WS;
colas@0
   869
    my( $flags, $text ) = $this->_flatten($options);
colas@0
   870
    # decode once, and once only
colas@0
   871
    require HTML::Entities;
colas@0
   872
    $text = HTML::Entities::decode_entities($text);
colas@0
   873
    # &nbsp; decodes to \240, which we want to make a space.
colas@0
   874
    $text =~ s/\240/$WC::NBSP/g;
colas@0
   875
    my $p = _htmlParams($this->{attrs}, $options);
colas@0
   876
    return ($flags, "<$tag$p>$text</$tag>");
colas@0
   877
}
colas@0
   878
colas@0
   879
# pseudo-tags that may leak through in TWikiVariables
colas@0
   880
# We have to handle this to avoid a matching close tag </nop>
colas@0
   881
sub _handleNOP {
colas@0
   882
    my( $this, $options ) = @_;
colas@0
   883
    my( $flags, $text ) = $this->_flatten( $options );
colas@0
   884
    return ($flags, '<nop>'.$text);
colas@0
   885
}
colas@0
   886
colas@0
   887
sub _handleNOPRESULT {
colas@0
   888
    my( $this, $options ) = @_;
colas@0
   889
    my( $flags, $text ) = $this->_flatten( $options );
colas@0
   890
    return ($flags, '<nop>'.$text);
colas@0
   891
}
colas@0
   892
colas@0
   893
# tags we ignore completely (contents as well)
colas@0
   894
sub _handleDOCTYPE { return ( 0, '' ); }
colas@0
   895
colas@0
   896
sub _LIST {
colas@0
   897
    my( $this, $options ) = @_;
colas@0
   898
    if( ( $options & $WC::NO_BLOCK_TML ) ||
colas@0
   899
        !$this->_isConvertableList( $options | $WC::NO_BLOCK_TML )) {
colas@0
   900
        return ( 0, undef );
colas@0
   901
    }
colas@0
   902
    return ( $WC::BLOCK_TML, $this->_convertList( $WC::TAB ));
colas@0
   903
}
colas@0
   904
colas@0
   905
# Performs initial cleanup of the parse tree before generation. Walks the
colas@0
   906
# tree, making parent links and removing attributes that don't add value.
colas@0
   907
# This simplifies determining whether a node is to be kept, or flattened
colas@0
   908
# out.
colas@0
   909
# $opts may include $WC::VERY_CLEAN
colas@0
   910
sub cleanNode {
colas@0
   911
    my( $this, $opts ) = @_;
colas@0
   912
    my $a;
colas@0
   913
colas@0
   914
    # Always delete these attrs
colas@0
   915
    foreach $a qw( lang _moz_dirty ) {
colas@0
   916
        delete $this->{attrs}->{$a}
colas@0
   917
          if( defined( $this->{attrs}->{$a} ));
colas@0
   918
    }
colas@0
   919
colas@0
   920
    # Delete these attrs if their value is empty
colas@0
   921
    foreach $a qw( class style ) {
colas@0
   922
        if( defined( $this->{attrs}->{$a} ) &&
colas@0
   923
              $this->{attrs}->{$a} !~ /\S/ ) {
colas@0
   924
            delete $this->{attrs}->{$a};
colas@0
   925
        }
colas@0
   926
    }
colas@0
   927
}
colas@0
   928
colas@0
   929
######################################################
colas@0
   930
# Handlers for different HTML tag types. Each handler returns
colas@0
   931
# a pair (flags,text) containing the result of the expansion.
colas@0
   932
#
colas@0
   933
# There are four ways of handling a tag:
colas@0
   934
# 1. Return (0,undef) which will cause the tag to be output
colas@0
   935
#    as HTML tags.
colas@0
   936
# 2. Return _flatten which will cause the tag to be ignored,
colas@0
   937
#    but the content expanded
colas@0
   938
# 3. Return (0, '') which will cause the tag not to be output
colas@0
   939
# 4. Something else more complex
colas@0
   940
#
colas@0
   941
# Note that tags like TFOOT and DT are handled inside the table
colas@0
   942
# and list processors.
colas@0
   943
# They only have handler methods in case the tag is seen outside
colas@0
   944
# the content of a table or list. In this case they are usually
colas@0
   945
# simply removed from the output.
colas@0
   946
#
colas@0
   947
sub _handleA {
colas@0
   948
    my( $this, $options ) = @_;
colas@0
   949
colas@0
   950
    my( $flags, $text ) = $this->_flatten( $options | $WC::NO_BLOCK_TML );
colas@0
   951
    if( $text && $text =~ /\S/ && $this->{attrs}->{href}) {
colas@0
   952
        # there's text and an href
colas@0
   953
        my $href = $this->{attrs}->{href};
colas@0
   954
        # decode URL params in the href
colas@0
   955
        $href =~ s/%([0-9A-F]{2})/chr(hex($1))/gei;
colas@0
   956
        if( $this->{context} && $this->{context}->{rewriteURL} ) {
colas@0
   957
            $href = $this->{context}->{rewriteURL}->(
colas@0
   958
                $href, $this->{context} );
colas@0
   959
        }
colas@0
   960
        $reww = TWiki::Func::getRegularExpression('wikiWordRegex')
colas@0
   961
          unless $reww;
colas@0
   962
        my $nop = ($options & $WC::NOP_ALL) ? '<nop>' : '';
colas@0
   963
        if( $href =~ /^(\w+\.)?($reww)(#\w+)?$/ ) {
colas@0
   964
            my $web = $1 || '';
colas@0
   965
            my $topic = $2;
colas@0
   966
            my $anchor = $3 || '';
colas@0
   967
            my $cleantext = $text;
colas@0
   968
            $cleantext =~ s/<nop>//g;
colas@0
   969
            $cleantext =~ s/^$this->{context}->{web}\.//;
colas@0
   970
colas@0
   971
            # if the clean text is the known topic we can ignore it
colas@0
   972
            if( ($cleantext eq $href || $href =~ /\.$cleantext$/)) {
colas@0
   973
                return (0, $WC::CHECK1.$nop.$web.$topic.$anchor.$WC::CHECK2);
colas@0
   974
            }
colas@0
   975
        }
colas@0
   976
colas@0
   977
        if( $href =~ /${WC::PROTOCOL}[^?]*$/ && $text eq $href ) {
colas@0
   978
            return (0, $WC::CHECK1.$nop.$text.$WC::CHECK2);
colas@0
   979
        }
colas@0
   980
        if( $text eq $href ) {
colas@0
   981
            return (0, $WC::CHECKw.'['.$nop.'['.$href.']]' );
colas@0
   982
        }
colas@0
   983
        return (0, $WC::CHECKw.'['.$nop.'['.$href.']['.$text.
colas@0
   984
                  ']]' );
colas@0
   985
    } elsif( $this->{attrs}->{name} ) {
colas@0
   986
        # allow anchors to be expanded normally. This won't generate
colas@0
   987
        # wiki anchors, but it's a small price to pay - it would
colas@0
   988
        # be too complex to generate wiki anchors, given their
colas@0
   989
        # line-oriented nature.
colas@0
   990
        return (0, undef);
colas@0
   991
    }
colas@0
   992
    # Otherwise generate nothing
colas@0
   993
    return (0, '');
colas@0
   994
}
colas@0
   995
colas@0
   996
sub _handleABBR { return _flatten( @_ ); };
colas@0
   997
sub _handleACRONYM { return _flatten( @_ ); };
colas@0
   998
sub _handleADDRESS { return _flatten( @_ ); };
colas@0
   999
colas@0
  1000
sub _handleB { return _emphasis( @_, '*' ); }
colas@0
  1001
sub _handleBASE { return ( 0, '' ); }
colas@0
  1002
sub _handleBASEFONT { return ( 0, '' ); }
colas@0
  1003
colas@0
  1004
sub _handleBIG { return( 0, '' ); };
colas@0
  1005
# BLOCKQUOTE
colas@0
  1006
sub _handleBODY { return _flatten( @_ ); }
colas@0
  1007
# BUTTON
colas@0
  1008
colas@0
  1009
sub _handleBR {
colas@0
  1010
    my( $this, $options ) = @_;
colas@0
  1011
    my($f, $kids ) = $this->_flatten( $options );
colas@0
  1012
    # Test conditions for keeping a <br>. These are:
colas@0
  1013
    # 1. We haven't explicitly been told to convert to \n (by BR2NL)
colas@0
  1014
    # 2. We have been told that block TML is illegal
colas@0
  1015
    # 3. The previous node is an inline element node or text node
colas@0
  1016
    # 4. The next node is an inline element or text node
colas@0
  1017
    my $sep = "\n";
colas@0
  1018
    if ($options & $WC::BR2NL) {
colas@0
  1019
    } elsif ($options & $WC::NO_BLOCK_TML) {
colas@0
  1020
        $sep = '<br />';
colas@0
  1021
    } elsif ($this->prevIsInline()) {
colas@0
  1022
        if ($this->isInline()) {
colas@0
  1023
            # Both <br> and </br> cause a NL
colas@0
  1024
            # if this is empty, look at next
colas@0
  1025
            if ($kids !~ /^[\000-\037]*$/ &&
colas@0
  1026
                  $kids !~ /^[\000-\037]*$WC::NBBR/ ||
colas@0
  1027
                    $this->nextIsInline()) {
colas@0
  1028
                $sep = '<br />';
colas@0
  1029
            }
colas@0
  1030
        }
colas@0
  1031
    }
colas@0
  1032
    return ($f, $sep.$kids);
colas@0
  1033
}
colas@0
  1034
colas@0
  1035
sub _handleCAPTION { return (0, '' ); }
colas@0
  1036
# CENTER
colas@0
  1037
# CITE
colas@0
  1038
colas@0
  1039
sub _handleCODE { return _emphasis( @_, '=' ); }
colas@0
  1040
colas@0
  1041
sub _handleCOL { return _flatten( @_ ); };
colas@0
  1042
sub _handleCOLGROUP { return _flatten( @_ ); };
colas@0
  1043
sub _handleDD { return _flatten( @_ ); };
colas@0
  1044
sub _handleDEL { return _flatten( @_ ); };
colas@0
  1045
sub _handleDFN { return _flatten( @_ ); };
colas@0
  1046
# DIR
colas@0
  1047
colas@0
  1048
sub _handleDIV { return _handleP(@_); }
colas@0
  1049
colas@0
  1050
sub _handleDL { return _LIST( @_ ); }
colas@0
  1051
sub _handleDT { return _flatten( @_ ); };
colas@0
  1052
colas@0
  1053
sub _handleEM { return _emphasis( @_, '_' ); }
colas@0
  1054
colas@0
  1055
sub _handleFIELDSET { return _flatten( @_ ); };
colas@0
  1056
colas@0
  1057
sub _handleFONT {
colas@0
  1058
    my( $this, $options ) = @_;
colas@0
  1059
colas@0
  1060
    my %atts = %{$this->{attrs}};
colas@0
  1061
    # Try to convert font tags into %COLOUR%..%ENDCOLOR%
colas@0
  1062
    # First extract the colour
colas@0
  1063
    my $colour;
colas@0
  1064
    if ($atts{style}) {
colas@0
  1065
        my $style = $atts{style};
colas@0
  1066
        if ($style =~ s/(^|\s|;)color\s*:\s*([^\s;]+);?//i) {
colas@0
  1067
            $colour = $2;
colas@0
  1068
            delete $atts{style} if $style =~ /^[\s;]*$/;
colas@0
  1069
        }
colas@0
  1070
    }
colas@0
  1071
    if ($atts{color}) {
colas@0
  1072
        $colour = $atts{color};
colas@0
  1073
        delete $atts{color};
colas@0
  1074
    }
colas@0
  1075
    # The presence of the class forces it to be converted to a
colas@0
  1076
    # TWiki variable
colas@0
  1077
    if (!_removeClass(\%atts, 'WYSIWYG_COLOUR')) {
colas@0
  1078
        delete $atts{class};
colas@0
  1079
        if (scalar(keys %atts) > 0 || !$colour || $colour !~ /^([a-z]+|#[0-9A-Fa-f]{6})$/i) {
colas@0
  1080
            return ( 0, undef );
colas@0
  1081
        }
colas@0
  1082
    }
colas@0
  1083
    # OK, just the colour
colas@0
  1084
    $colour = $WC::KNOWN_COLOUR{uc($colour)};
colas@0
  1085
    if (!$colour) {
colas@0
  1086
        # Not a recognised colour
colas@0
  1087
        return ( 0, undef );
colas@0
  1088
    }
colas@0
  1089
    my( $f, $kids ) = $this->_flatten( $options );
colas@0
  1090
    return ($f, '%'.uc($colour).'%'.$kids.'%ENDCOLOR%');
colas@0
  1091
};
colas@0
  1092
colas@0
  1093
# FORM
colas@0
  1094
sub _handleFRAME    { return _flatten( @_ ); };
colas@0
  1095
sub _handleFRAMESET { return _flatten( @_ ); };
colas@0
  1096
sub _handleHEAD     { return ( 0, '' ); }
colas@0
  1097
colas@0
  1098
sub _handleHR {
colas@0
  1099
    my( $this, $options ) = @_;
colas@0
  1100
colas@0
  1101
    my( $f, $kids ) = $this->_flatten( $options );
colas@0
  1102
    return ($f, '<hr />'.$kids) if( $options & $WC::NO_BLOCK_TML );
colas@0
  1103
    return ( $f | $WC::BLOCK_TML, $WC::CHECKn.'---'.$WC::CHECKn.$kids);
colas@0
  1104
}
colas@0
  1105
colas@0
  1106
sub _handleHTML   { return _flatten( @_ ); }
colas@0
  1107
sub _handleH1     { return _H( @_, 1 ); }
colas@0
  1108
sub _handleH2     { return _H( @_, 2 ); }
colas@0
  1109
sub _handleH3     { return _H( @_, 3 ); }
colas@0
  1110
sub _handleH4     { return _H( @_, 4 ); }
colas@0
  1111
sub _handleH5     { return _H( @_, 5 ); }
colas@0
  1112
sub _handleH6     { return _H( @_, 6 ); }
colas@0
  1113
sub _handleI      { return _emphasis( @_, '_' ); }
colas@0
  1114
colas@0
  1115
sub _handleIMG {
colas@0
  1116
    my( $this, $options ) = @_;
colas@0
  1117
colas@0
  1118
    # Hack out mce_src, which is TinyMCE-specific and causes indigestion
colas@0
  1119
    # when the topic is reloaded
colas@0
  1120
    delete $this->{attrs}->{mce_src} if defined $this->{attrs}->{mce_src};
colas@0
  1121
colas@0
  1122
    if( $this->{context} && $this->{context}->{rewriteURL} ) {
colas@0
  1123
        my $href = $this->{attrs}->{src};
colas@0
  1124
        # decode URL params in the href
colas@0
  1125
        $href =~ s/%([0-9A-F]{2})/chr(hex($1))/gei;
colas@0
  1126
        $href = &{$this->{context}->{rewriteURL}}(
colas@0
  1127
            $href, $this->{context} );
colas@0
  1128
        $this->{attrs}->{src} = $href;
colas@0
  1129
    }
colas@0
  1130
colas@0
  1131
    return (0, undef) unless $this->{context} &&
colas@0
  1132
      defined $this->{context}->{convertImage};
colas@0
  1133
colas@0
  1134
    my $alt = &{$this->{context}->{convertImage}}(
colas@0
  1135
        $this->{attrs}->{src},
colas@0
  1136
        $this->{context} );
colas@0
  1137
    if( $alt ) {
colas@0
  1138
        return (0, $alt);
colas@0
  1139
    }
colas@0
  1140
    return ( 0, undef );
colas@0
  1141
}
colas@0
  1142
colas@0
  1143
# INPUT
colas@0
  1144
# INS
colas@0
  1145
# ISINDEX
colas@0
  1146
sub _handleKBD      { return _handleTT( @_ ); }
colas@0
  1147
# LABEL
colas@0
  1148
# LI
colas@0
  1149
sub _handleLINK     { return( 0, '' ); };
colas@0
  1150
# MAP
colas@0
  1151
# MENU
colas@0
  1152
sub _handleMETA     { return ( 0, '' ); }
colas@0
  1153
sub _handleNOFRAMES { return ( 0, '' ); }
colas@0
  1154
sub _handleNOSCRIPT { return ( 0, '' ); }
colas@0
  1155
sub _handleOL       { return _LIST( @_ ); }
colas@0
  1156
# OPTGROUP
colas@0
  1157
# OPTION
colas@0
  1158
colas@0
  1159
sub _handleP {
colas@0
  1160
    my( $this, $options ) = @_;
colas@0
  1161
colas@0
  1162
    if ($this->hasClass('TMLverbatim')) {
colas@0
  1163
        return $this->_verbatim('verbatim', $options);
colas@0
  1164
    }
colas@0
  1165
    if ($this->hasClass('WYSIWYG_STICKY')) {
colas@0
  1166
        return $this->_verbatim('sticky', $options);
colas@0
  1167
    }
colas@0
  1168
colas@0
  1169
    my( $f, $kids ) = $this->_flatten( $options );
colas@0
  1170
    return ($f, '<p>'.$kids.'</p>') if( $options & $WC::NO_BLOCK_TML );
colas@0
  1171
    my $pre = '';
colas@0
  1172
    if ($this->prevIsInline()) {
colas@0
  1173
        $pre = $WC::NBBR;
colas@0
  1174
    }
colas@0
  1175
    return ($f | $WC::BLOCK_TML, $pre.$WC::NBBR.$kids.$WC::NBBR);
colas@0
  1176
}
colas@0
  1177
colas@0
  1178
# PARAM
colas@0
  1179
colas@0
  1180
sub _handlePRE {
colas@0
  1181
    my( $this, $options ) = @_;
colas@0
  1182
colas@0
  1183
    my $tag = 'pre';
colas@0
  1184
    if( $this->hasClass('TMLverbatim')) {
colas@0
  1185
        return $this->_verbatim('verbatim', $options);
colas@0
  1186
    }
colas@0
  1187
    if ($this->hasClass('WYSIWYG_STICKY')) {
colas@0
  1188
        return $this->_verbatim('sticky', $options);
colas@0
  1189
    }
colas@0
  1190
    unless( $options & $WC::NO_BLOCK_TML ) {
colas@0
  1191
        my( $flags, $text ) = $this->_flatten(
colas@0
  1192
            $options | $WC::NO_BLOCK_TML | $WC::BR2NL | $WC::KEEP_WS );
colas@0
  1193
        my $p = _htmlParams( $this->{attrs}, $options);
colas@0
  1194
        return ($WC::BLOCK_TML, "<$tag$p>$text</$tag>");
colas@0
  1195
    }
colas@0
  1196
    return ( 0, undef );
colas@0
  1197
}
colas@0
  1198
colas@0
  1199
sub _handleQ    { return _flatten( @_ ); };
colas@0
  1200
# S
colas@0
  1201
sub _handleSAMP { return _handleTT( @_ ); };
colas@0
  1202
# SCRIPT
colas@0
  1203
# SELECT
colas@0
  1204
# SMALL
colas@0
  1205
colas@0
  1206
sub _handleSPAN {
colas@0
  1207
    my( $this, $options ) = @_;
colas@0
  1208
colas@0
  1209
    my %atts = %{$this->{attrs}};
colas@0
  1210
    if (_removeClass(\%atts, 'TMLverbatim')) {
colas@0
  1211
        return $this->_verbatim('verbatim', $options);
colas@0
  1212
    }
colas@0
  1213
    if (_removeClass(\%atts, 'WYSIWYG_STICKY')) {
colas@0
  1214
        return $this->_verbatim('sticky', $options);
colas@0
  1215
    }
colas@0
  1216
colas@0
  1217
    if( _removeClass(\%atts, 'WYSIWYG_LINK')) {
colas@0
  1218
        $options |= $WC::NO_BLOCK_TML;
colas@0
  1219
    }
colas@0
  1220
colas@0
  1221
    if( _removeClass(\%atts, 'WYSIWYG_TT')) {
colas@0
  1222
        return _emphasis( @_, '=' );
colas@0
  1223
    }
colas@0
  1224
colas@0
  1225
    # Remove all other classes
colas@0
  1226
    delete $atts{class};
colas@0
  1227
colas@0
  1228
    if( $options & $WC::VERY_CLEAN ) {
colas@0
  1229
        # remove style attribute if cleaning aggressively. Have to do this
colas@0
  1230
        # because TWiki generates these.
colas@0
  1231
        delete $atts{style} if defined $atts{style}
colas@0
  1232
    }
colas@0
  1233
colas@0
  1234
    # ignore the span tag if there are no other attrs
colas@0
  1235
    if( scalar(keys %atts) == 0 ) {
colas@0
  1236
        return $this->_flatten( $options );
colas@0
  1237
    }
colas@0
  1238
colas@0
  1239
    # otherwise use the default generator.
colas@0
  1240
    return (0, undef);
colas@0
  1241
}
colas@0
  1242
colas@0
  1243
# STRIKE
colas@0
  1244
colas@0
  1245
sub _handleSTRONG { return _emphasis( @_, '*' ); }
colas@0
  1246
colas@0
  1247
sub _handleSTYLE { return ( 0, '' ); }
colas@0
  1248
# SUB
colas@0
  1249
# SUP
colas@0
  1250
colas@0
  1251
sub _handleTABLE {
colas@0
  1252
    my( $this, $options ) = @_;
colas@0
  1253
    return ( 0, undef) if( $options & $WC::NO_BLOCK_TML );
colas@0
  1254
colas@0
  1255
    # Should really look at the table attrs, but to heck with it
colas@0
  1256
colas@0
  1257
    return ( 0, undef ) if( $options & $WC::NO_BLOCK_TML );
colas@0
  1258
colas@0
  1259
    my @table;
colas@0
  1260
    return ( 0, undef ) unless
colas@0
  1261
      $this->_isConvertableTable( $options | $WC::NO_BLOCK_TML, \@table );
colas@0
  1262
colas@0
  1263
    my $maxrow = 0;
colas@0
  1264
    my $row;
colas@0
  1265
    foreach $row ( @table ) {
colas@0
  1266
        my $rw = scalar( @$row );
colas@0
  1267
        $maxrow = $rw if( $rw > $maxrow );
colas@0
  1268
    }
colas@0
  1269
    foreach $row ( @table ) {
colas@0
  1270
        while( scalar( @$row ) < $maxrow) {
colas@0
  1271
            push( @$row, '' );
colas@0
  1272
        }
colas@0
  1273
    }
colas@0
  1274
    my $text = $WC::CHECKn;
colas@0
  1275
    foreach $row ( @table ) {
colas@0
  1276
        # isConvertableTableRow has already formatted the cell
colas@0
  1277
        $text .= $WC::CHECKn.'|'.join('|', @$row).'|'.$WC::CHECKn;
colas@0
  1278
    }
colas@0
  1279
colas@0
  1280
    return ( $WC::BLOCK_TML, $text );
colas@0
  1281
}
colas@0
  1282
colas@0
  1283
# TBODY
colas@0
  1284
# TD
colas@0
  1285
colas@0
  1286
# TEXTAREA {
colas@0
  1287
# TFOOT
colas@0
  1288
# TH
colas@0
  1289
# THEAD
colas@0
  1290
sub _handleTITLE { return (0, '' ); }
colas@0
  1291
# TR
colas@0
  1292
sub _handleTT    { return _handleCODE( @_ ); }
colas@0
  1293
# U
colas@0
  1294
sub _handleUL    { return _LIST( @_ ); }
colas@0
  1295
sub _handleVAR   { return ( 0, '' ); }
colas@0
  1296
colas@0
  1297
1;