lib/TWiki/Plugins/WysiwygPlugin/HTML2TML/Node.pm
changeset 0 414e01d06fd5
child 1 e2915a7cbdfa
equal deleted inserted replaced
-1:000000000000 0:414e01d06fd5
       
     1 # Copyright (C) 2005 ILOG http://www.ilog.fr
       
     2 # and TWiki Contributors. All Rights Reserved. TWiki Contributors
       
     3 # are listed in the AUTHORS file in the root of this distribution.
       
     4 # NOTE: Please extend that file, not this notice.
       
     5 #
       
     6 # This program is free software; you can redistribute it and/or
       
     7 # modify it under the terms of the GNU General Public License
       
     8 # as published by the Free Software Foundation; either version 2
       
     9 # of the License, or (at your option) any later version. For
       
    10 # more details read LICENSE in the root of the TWiki distribution.
       
    11 #
       
    12 # This program is distributed in the hope that it will be useful,
       
    13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
       
    15 #
       
    16 # As per the GPL, removal of this notice is prohibited.
       
    17 
       
    18 # The generator works by expanding and HTML parse tree to "decorated"
       
    19 # text, where the decorators are non-printable characters. These characters
       
    20 # act to express format requirements - for example, the need to have a
       
    21 # newline before some text, or the need for a space. Whitespace is then
       
    22 # collapsed down to the minimum that satisfies the format requirements.
       
    23 
       
    24 =pod
       
    25 
       
    26 ---+ package TWiki::Plugins::WysiwygPlugin::HTML2TML::Node;
       
    27 
       
    28 Object for storing a parsed HTML tag, and processing it
       
    29 to generate TML from the parse tree.
       
    30 
       
    31 See also TWiki::Plugins::WysiwygPlugin::HTML2TML::Leaf
       
    32 
       
    33 =cut
       
    34 
       
    35 package TWiki::Plugins::WysiwygPlugin::HTML2TML::Node;
       
    36 use base 'TWiki::Plugins::WysiwygPlugin::HTML2TML::Base';
       
    37 
       
    38 use strict;
       
    39 
       
    40 use TWiki::Func; # needed for regular expressions
       
    41 use Assert;
       
    42 
       
    43 use vars qw( $reww );
       
    44 
       
    45 require TWiki::Plugins::WysiwygPlugin::Constants;
       
    46 require TWiki::Plugins::WysiwygPlugin::HTML2TML::WC;
       
    47 
       
    48 =pod
       
    49 
       
    50 ---++ ObjectMethod new( $context, $tag, \%attrs )
       
    51 
       
    52 Construct a new HTML tag node using the given tag name
       
    53 and attribute hash.
       
    54 
       
    55 =cut
       
    56 
       
    57 sub new {
       
    58     my( $class, $context, $tag, $attrs ) = @_;
       
    59 
       
    60     my $this = {};
       
    61 
       
    62     $this->{context} = $context;
       
    63     $this->{tag} = $tag;
       
    64     $this->{attrs} = {};
       
    65     if( $attrs ) {
       
    66         foreach my $attr ( keys %$attrs ) {
       
    67             $this->{attrs}->{lc($attr)} = $attrs->{$attr};
       
    68         }
       
    69     }
       
    70     $this->{head} = $this->{tail} = undef;
       
    71 
       
    72     return bless( $this, $class );
       
    73 }
       
    74 
       
    75 # debug
       
    76 sub stringify {
       
    77     my( $this, $shallow ) = @_;
       
    78     my $r = '';
       
    79     if( $this->{tag} ) {
       
    80         $r .= '<'.$this->{tag};
       
    81         foreach my $attr ( keys %{$this->{attrs}} ) {
       
    82             $r .= " ".$attr."='".$this->{attrs}->{$attr}."'";
       
    83         }
       
    84         $r .= '>';
       
    85     }
       
    86     if( $shallow ) {
       
    87         $r .= '...';
       
    88     } else {
       
    89         my $kid = $this->{head};
       
    90         while ($kid) {
       
    91             $r .= $kid->stringify();
       
    92             $kid = $kid->{next};
       
    93         }
       
    94     }
       
    95     if( $this->{tag} ) {
       
    96         $r .= '</'.$this->{tag}.'>';
       
    97     }
       
    98     return $r;
       
    99 }
       
   100 
       
   101 =pod
       
   102 
       
   103 ---++ ObjectMethod addChild( $node )
       
   104 
       
   105 Add a child node to the ordered list of children of this node
       
   106 
       
   107 =cut
       
   108 
       
   109 sub addChild {
       
   110     my( $this, $node ) = @_;
       
   111 
       
   112     ASSERT($node != $this) if DEBUG;
       
   113 
       
   114     $node->{next} = undef;
       
   115     $node->{parent} = $this;
       
   116     my $kid = $this->{tail};
       
   117     if ($kid) {
       
   118         $kid->{next} = $node;
       
   119         $node->{prev} = $kid;
       
   120     } else {
       
   121         $node->{prev} = undef;
       
   122         $this->{head} = $node;
       
   123     }
       
   124     $this->{tail} = $node;
       
   125 }
       
   126 
       
   127 # top and tail a string
       
   128 sub _trim {
       
   129     my $s = shift;
       
   130 
       
   131     # Item5076: removed CHECKn from the following exprs, because loss of it
       
   132     # breaks line-sensitive TML content inside flattened content.
       
   133     $s =~ s/^[ \t\n$WC::CHECKw$WC::CHECKs]+/$WC::CHECKw/o;
       
   134     $s =~ s/[ \t\n$WC::CHECKw]+$/$WC::CHECKw/o;
       
   135     return $s;
       
   136 }
       
   137 
       
   138 # Both object method and static method
       
   139 sub hasClass {
       
   140     my ($this, $class) = @_;
       
   141     return 0 unless $this;
       
   142     if (UNIVERSAL::isa($this, 'TWiki::Plugins::WysiwygPlugin::HTML2TML::Node')) {
       
   143         return hasClass($this->{attrs}, $class);
       
   144     }
       
   145     return 0 unless defined $this->{class};
       
   146     return $this->{class} =~ /\b$class\b/ ? 1 : 0;
       
   147 }
       
   148 
       
   149 # Both object method and static method
       
   150 sub _removeClass {
       
   151     my ($this, $class) = @_;
       
   152     return 0 unless $this;
       
   153     if (UNIVERSAL::isa($this, 'TWiki::Plugins::WysiwygPlugin::HTML2TML::Node')) {
       
   154         return _removeClass($this->{attrs}, $class);
       
   155     }
       
   156     return 0 unless hasClass($this, $class);
       
   157     $this->{class} =~ s/\b$class\b//;
       
   158     $this->{class} =~ s/\s+/ /g;
       
   159     $this->{class} =~ s/^\s+//;
       
   160     $this->{class} =~ s/\s+$//;
       
   161     if (!$this->{class}) {
       
   162         delete $this->{class};
       
   163     }
       
   164     return 1;
       
   165 }
       
   166 
       
   167 # Both object method and static method
       
   168 sub _addClass {
       
   169     my ($this, $class) = @_;
       
   170     if (UNIVERSAL::isa($this, 'TWiki::Plugins::WysiwygPlugin::HTML2TML::Node')) {
       
   171         _addClass($this->{attrs}, $class);
       
   172         return;
       
   173     }
       
   174     _removeClass($this, $class); # avoid duplication
       
   175     if ($this->{class}) {
       
   176         $this->{class} .= ' '.$class;
       
   177     } else {
       
   178         $this->{class} = $class;
       
   179     }
       
   180 }
       
   181 
       
   182 # Move the content of $node into $this
       
   183 sub _eat {
       
   184     my ($this, $node) = @_;
       
   185     my $kid = $this->{tail};
       
   186     if ($kid) {
       
   187         $kid->{next} = $node->{head};
       
   188         if ($node->{head}) {
       
   189             $node->{head}->{prev} = $kid;
       
   190         }
       
   191     } else {
       
   192         $this->{head} = $node->{head};
       
   193     }
       
   194     $this->{tail} = $node->{tail};
       
   195     $kid = $node->{head};
       
   196     while ($kid) {
       
   197         $kid->{parent} = $this;
       
   198         $kid = $kid->{next};
       
   199     }
       
   200     $node->{head} = $node->{tail} = undef;
       
   201 }
       
   202 
       
   203 =pod
       
   204 
       
   205 ---++ ObjectMethod rootGenerate($opts) -> $text
       
   206 
       
   207 Generates TML from this HTML node. The generation is done
       
   208 top down and bottom up, so that higher level nodes can make
       
   209 decisions on whether to allow TML conversion in lower nodes,
       
   210 and lower level nodes can constrain conversion in higher level
       
   211 nodes.
       
   212 
       
   213 $opts is a bitset. $WC::VERY_CLEAN will cause the generator
       
   214 to drop unrecognised HTML (e.g. divs and spans that don't
       
   215 generate TML)
       
   216 
       
   217 =cut
       
   218 
       
   219 sub rootGenerate {
       
   220     my( $this, $opts ) = @_;
       
   221 
       
   222     $this->cleanParseTree();
       
   223 
       
   224     # Perform some transformations on the parse tree
       
   225     $this->_collapse();
       
   226 
       
   227     my( $f, $text ) = $this->generate($opts);
       
   228 
       
   229     # Debug support
       
   230     #print STDERR "Converted ",WC::debugEncode($text),"\n";
       
   231 
       
   232     # Move leading \n out of protected region. Delicate hack fix required to
       
   233     # maintain TWiki variables at the start of lines.
       
   234     $text =~ s/$WC::PON$WC::NBBR/$WC::CHECKn$WC::PON/g;
       
   235 
       
   236     # isolate whitespace checks and convert to $NBSP
       
   237     $text =~ s/$WC::CHECKw$WC::CHECKw+/$WC::CHECKw/go;
       
   238     $text =~ s/([$WC::CHECKn$WC::CHECKs$WC::NBSP $WC::TAB$WC::NBBR]($WC::PON|$WC::POFF)?)$WC::CHECKw/$1/go;
       
   239     $text =~ s/$WC::CHECKw(($WC::PON|$WC::POFF)?[$WC::CHECKn$WC::CHECKs$WC::NBSP $WC::NBBR])/$1/go;
       
   240     $text =~ s/^($WC::CHECKw)+//gos;
       
   241     $text =~ s/($WC::CHECKw)+$//gos;
       
   242     $text =~ s/($WC::CHECKw)+/$WC::NBSP/go;
       
   243 
       
   244     # isolate $CHECKs and convert to $NBSP
       
   245     $text =~ s/$WC::CHECKs$WC::CHECKs+/$WC::CHECKs/go;
       
   246     $text =~ s/([ $WC::NBSP$WC::TAB])$WC::CHECKs/$1/go;
       
   247     $text =~ s/$WC::CHECKs( |$WC::NBSP)/$1/go;
       
   248     $text =~ s/($WC::CHECKs)+/$WC::NBSP/go;
       
   249 
       
   250     $text =~ s/<br( \/)?>$WC::NBBR/$WC::NBBR/g; # Remove BR before P
       
   251 
       
   252     #die "Converted ",WC::debugEncode($text),"\n";
       
   253 
       
   254     my @regions = split(/([$WC::PON$WC::POFF])/o, $text);
       
   255     my $protect = 0;
       
   256     $text = '';
       
   257     foreach my $tml (@regions) {
       
   258         if ($tml eq $WC::PON) {
       
   259             $protect++;
       
   260             next;
       
   261         } elsif ($tml eq $WC::POFF) {
       
   262             $protect--;
       
   263             next;
       
   264         }
       
   265 
       
   266         # isolate $NBBR and convert to \n.
       
   267         unless ($protect) {
       
   268             $tml =~ s/\n$WC::NBBR/$WC::NBBR$WC::NBBR/go;
       
   269             $tml =~ s/$WC::NBBR\n/$WC::NBBR$WC::NBBR/go;
       
   270             $tml =~ s/$WC::NBBR( |$WC::NBSP)+$WC::NBBR/$WC::NBBR$WC::NBBR/go;
       
   271             $tml =~ s/ +$WC::NBBR/$WC::NBBR/go;
       
   272             $tml =~ s/$WC::NBBR +/$WC::NBBR/go;
       
   273             $tml =~ s/$WC::NBBR$WC::NBBR+/$WC::NBBR$WC::NBBR/go;
       
   274 
       
   275             # Now convert adjacent NBBRs to recreate empty lines
       
   276             # 1 NBBR  -> 1 newline
       
   277             # 2 NBBRs -> <p /> - 1 blank line - 2 newlines
       
   278             # 3 NBBRs -> 3 newlines
       
   279             # 4 NBBRs -> <p /><p /> - 3 newlines
       
   280             # 5 NBBRs -> 4 newlines
       
   281             # 6 NBBRs -> <p /><p /><p /> - 3 blank lines - 4 newlines
       
   282             # 7 NBBRs -> 5 newlines
       
   283             # 8 NBBRs -> <p /><p /><p /><p /> - 4 blank lines - 5 newlines
       
   284             $tml =~ s.($WC::NBBR$WC::NBBR$WC::NBBR$WC::NBBR+).
       
   285               "\n" x ((length($1) + 1) / 2 + 1)
       
   286                 .geo;
       
   287         }
       
   288         # isolate $CHECKn and convert to $NBBR
       
   289         $tml =~ s/$WC::CHECKn([$WC::NBSP $WC::TAB])*$WC::CHECKn/$WC::CHECKn/go;
       
   290         $tml =~ s/$WC::CHECKn$WC::CHECKn+/$WC::CHECKn/go;
       
   291         $tml =~ s/(?<=$WC::NBBR)$WC::CHECKn//gom;
       
   292         $tml =~ s/$WC::CHECKn(?=$WC::NBBR)//gom;
       
   293         $tml =~ s/$WC::CHECKn+/$WC::NBBR/gos;
       
   294 
       
   295         $tml =~ s/$WC::NBBR/\n/gos;
       
   296 
       
   297         # Convert tabs to NBSP
       
   298         $tml =~ s/$WC::TAB/$WC::NBSP$WC::NBSP$WC::NBSP/go;
       
   299 
       
   300         # isolate $NBSP and convert to space
       
   301         unless ($protect) {
       
   302             $tml =~ s/ +$WC::NBSP/$WC::NBSP/go;
       
   303             $tml =~ s/$WC::NBSP +/$WC::NBSP/go;
       
   304         }
       
   305         $tml =~ s/$WC::NBSP/ /go;
       
   306 
       
   307         $tml =~ s/$WC::CHECK1$WC::CHECK1+/$WC::CHECK1/go;
       
   308         $tml =~ s/$WC::CHECK2$WC::CHECK2+/$WC::CHECK2/go;
       
   309         $tml =~ s/$WC::CHECK2$WC::CHECK1/$WC::CHECK2/go;
       
   310 
       
   311         $tml =~ s/(^|[\s\(])$WC::CHECK1/$1/gso;
       
   312         $tml =~ s/$WC::CHECK2($|[\s\,\.\;\:\!\?\)\*])/$1/gso;
       
   313 
       
   314         $tml =~ s/$WC::CHECK1(\s|$)/$1/gso;
       
   315         $tml =~ s/(^|\s)$WC::CHECK2/$1/gso;
       
   316 
       
   317         $tml =~ s/$WC::CHECK1/ /go;
       
   318         $tml =~ s/$WC::CHECK2/ /go;
       
   319         #print STDERR WC::debugEncode($before);
       
   320         #print STDERR " -> '",WC::debugEncode($tml),"'\n";
       
   321         $text .= $tml;
       
   322     }
       
   323     # Collapse adjacent tags
       
   324     foreach my $tag qw(noautolink verbatim literal) {
       
   325         $text =~ s#</$tag>(\s*)<$tag>#$1#gs;
       
   326     }
       
   327     # Top and tail, and terminate with a single newline
       
   328     $text =~ s/^\n*//s;
       
   329     $text =~ s/\s*$/\n/s;
       
   330 
       
   331     # Item5127: Remove BR just before EOLs
       
   332     $text =~ s/<br( \/)?>\n/\n/g;
       
   333 
       
   334     return $text;
       
   335 }
       
   336 
       
   337 # Collapse adjacent VERBATIM nodes together
       
   338 # Collapse a <p> than contains only a protected span into a protected P
       
   339 # Collapse em in em
       
   340 # Collapse adjacent text nodes
       
   341 sub _collapse {
       
   342     my $this = shift;
       
   343 
       
   344     my @jobs = ( $this );
       
   345     while (scalar(@jobs)) {
       
   346         my $node = shift(@jobs);
       
   347         if (defined($node->{tag}) && $node->hasClass('TMLverbatim')) {
       
   348             my $next = $node->{next};
       
   349             my @edible;
       
   350             my $collapsible;
       
   351             while ($next &&
       
   352                      ((!$next->{tag} && $next->{text} =~ /^\s*$/) ||
       
   353                           ($node->{tag} eq $next->{tag} &&
       
   354                              $next->hasClass('TMLverbatim')))) {
       
   355                 push(@edible, $next);
       
   356                 $collapsible ||= $next->hasClass('TMLverbatim');
       
   357                 $next = $next->{next};
       
   358             }
       
   359             if ($collapsible) {
       
   360                 foreach my $meal (@edible) {
       
   361                     $meal->_remove();
       
   362                     if ($meal->{tag}) {
       
   363                         require TWiki::Plugins::WysiwygPlugin::HTML2TML::Leaf;
       
   364                         $node->addChild(new TWiki::Plugins::WysiwygPlugin::HTML2TML::Leaf($WC::NBBR));
       
   365                         $node->_eat($meal);
       
   366                     }
       
   367                 }
       
   368             }
       
   369         }
       
   370         if ($node->{tag} eq 'p' &&
       
   371               $node->{head} && $node->{head} == $node->{tail}) {
       
   372             my $kid = $node->{head};
       
   373             if ($kid->{tag} eq 'SPAN' &&
       
   374                   $kid->hasClass('WYSIWYG_PROTECTED')) {
       
   375                 $kid->_remove();
       
   376                 $node->_eat($kid);
       
   377                 $node->_addClass('WYSIWYG_PROTECTED');
       
   378             }
       
   379         }
       
   380 
       
   381         # If this is an emphasis (b, i, code, tt, strong) then
       
   382         # flatten out any child nodes that express the same emphasis.
       
   383         # This has to be done because TWiki emphases are single level.
       
   384         if ($WC::EMPHTAG{$node->{tag}}) {
       
   385             my $kid = $node->{head};
       
   386             while ($kid) {
       
   387                 if ($WC::EMPHTAG{$kid->{tag}} &&
       
   388                       $WC::EMPHTAG{$kid->{tag}} eq
       
   389                         $WC::EMPHTAG{$node->{tag}}) {
       
   390                     $kid = $kid->_inline();
       
   391                 } else {
       
   392                     $kid = $kid->{next};
       
   393                 }
       
   394             }
       
   395         }
       
   396         $node->_combineLeaves();
       
   397 
       
   398         my $kid = $node->{head};
       
   399         while ($kid) {
       
   400             push(@jobs, $kid);
       
   401             $kid = $kid->{next};
       
   402         }
       
   403     }
       
   404 }
       
   405 
       
   406 # the actual generate function. rootGenerate is only applied to the root node.
       
   407 sub generate {
       
   408     my( $this, $options ) = @_;
       
   409     my $fn;
       
   410     my $flags;
       
   411     my $text;
       
   412 
       
   413      if ($this->_isProtectedByAttrs()) {
       
   414          return $this->_defaultTag($options);
       
   415      }
       
   416 
       
   417     my $tag = $this->{tag};
       
   418 
       
   419     if ($this->hasClass('WYSIWYG_LITERAL')) {
       
   420         if ($tag eq 'div' || $tag eq 'p' || $tag eq 'span') {
       
   421             $text = '';
       
   422             my $kid = $this->{head};
       
   423             while ($kid) {
       
   424                 $text .= $kid->stringify();
       
   425                 $kid = $kid->{next};
       
   426             }
       
   427         } else {
       
   428             $this->_removeClass('WYSIWYG_LITERAL');
       
   429             $text = $this->stringify();
       
   430         }
       
   431         return ( 0, '<literal>'.$text.'</literal>' );
       
   432     }
       
   433 
       
   434     if( $options & $WC::NO_HTML ) {
       
   435         # NO_HTML implies NO_TML
       
   436         my $brats = $this->_flatten( $options );
       
   437         return ( 0, $brats );
       
   438     }
       
   439 
       
   440     if( $options & $WC::NO_TML ) {
       
   441         return ( 0, $this->stringify() );
       
   442     }
       
   443 
       
   444     # make the names of the function versions
       
   445     $tag =~ s/!//; # DOCTYPE
       
   446     my $tmlFn = '_handle'.uc($tag);
       
   447 
       
   448     # See if we have a TML translation function for this tag
       
   449     # the translation functions will work out the rendering
       
   450     # of their own children.
       
   451     if( $this->{tag} && defined( &$tmlFn ) ) {
       
   452         no strict 'refs';
       
   453         ( $flags, $text ) = &$tmlFn( $this, $options );
       
   454         use strict 'refs';
       
   455         # if the function returns undef, drop through
       
   456         return ( $flags, $text ) if defined $text;
       
   457     }
       
   458 
       
   459     # No translation, so we need the text of the children
       
   460     ( $flags, $text ) = $this->_flatten( $options );
       
   461 
       
   462     # just return the text if there is no tag name
       
   463     return ( $flags, $text ) unless $this->{tag};
       
   464 
       
   465     return $this->_defaultTag( $options );
       
   466 }
       
   467 
       
   468 # Return the children flattened out subject to the options
       
   469 sub _flatten {
       
   470     my( $this, $options ) = @_;
       
   471     my $text = '';
       
   472     my $flags = 0;
       
   473 
       
   474     my $protected = ($options & $WC::PROTECTED) ||
       
   475       $this->hasClass('WYSIWYG_PROTECTED') ||
       
   476         $this->hasClass('WYSIWYG_STICKY') || 0;
       
   477 
       
   478     if ($protected) {
       
   479         # Expand brs, which are used in the protected encoding in place of
       
   480         # newlines, and protect whitespace
       
   481         $options |= $WC::BR2NL | $WC::KEEP_WS;
       
   482     }
       
   483 
       
   484     my $kid = $this->{head};
       
   485     while ($kid) {
       
   486         my( $f, $t ) = $kid->generate( $options );
       
   487         if (!($options & $WC::KEEP_WS)
       
   488               && $text && $text =~ /\w$/ && $t =~ /^\w/) {
       
   489             # if the last child ends in a \w and this child
       
   490             # starts in a \w, we need to insert a space
       
   491             $text .= ' ';
       
   492         }
       
   493         $text .= $t;
       
   494         $flags |= $f;
       
   495         $kid = $kid->{next};
       
   496     }
       
   497     if ($protected) {
       
   498         $text =~ s/[$WC::PON$WC::POFF]//g;
       
   499 
       
   500         unless ($options & $WC::KEEP_ENTITIES) {
       
   501             require HTML::Entities;
       
   502             $text = HTML::Entities::decode_entities($text);
       
   503             # &nbsp; decodes to \240, which we want to make a space.
       
   504             $text =~ s/\240/$WC::NBSP/g;
       
   505         }
       
   506         $text =~ s/ /$WC::NBSP/g;
       
   507         $text =~ s/\n/$WC::NBBR/g;
       
   508         $text = $WC::PON.$text.$WC::POFF;
       
   509     }
       
   510 
       
   511     $text = _trim($text) unless ($options & $WC::KEEP_WS);
       
   512 
       
   513     return ( $flags, $text );
       
   514 }
       
   515 
       
   516 # $cutClasses is an RE matching class names to cut
       
   517 sub _htmlParams {
       
   518     my ( $attrs, $options ) = @_;
       
   519     my @params;
       
   520 
       
   521     while (my ($k, $v) = each %$attrs ) {
       
   522         next unless $k;
       
   523         if( $k eq 'class' ) {
       
   524             # if cleaning aggressively, remove class attributes completely
       
   525             next if ($options & $WC::VERY_CLEAN);
       
   526             foreach my $c qw(WYSIWYG_PROTECTED WYSIWYG_STICKY TMLverbatim WYSIWYG_LINK) {
       
   527                 $v =~ s/\b$c\b//;
       
   528             }
       
   529             $v =~ s/\s+/ /;
       
   530             $v =~ s/^\s*(.*?)\s*$/$1/;
       
   531             next unless $v;
       
   532         }
       
   533         my $q = $v =~ /"/ ? "'" : '"';
       
   534         push( @params, $k.'='.$q.$v.$q );
       
   535     }
       
   536     my $p = join( ' ', @params );
       
   537     return '' unless $p;
       
   538     return ' '.$p;
       
   539 }
       
   540 
       
   541 # generate the default representation of an HTML tag
       
   542 sub _defaultTag {
       
   543     my( $this, $options ) = @_;
       
   544     my( $flags, $text ) = $this->_flatten( $options );
       
   545     my $tag = $this->{tag};
       
   546     my $p = _htmlParams( $this->{attrs}, $options );
       
   547 
       
   548     if( $text =~ /^\s*$/ && $WC::SELFCLOSING{$tag}) {
       
   549         return ( $flags, '<'.$tag.$p.' />' );
       
   550     } else {
       
   551         return ( $flags, '<'.$tag.$p.'>'.$text.'</'.$tag.'>' );
       
   552     }
       
   553 }
       
   554 
       
   555 # Check to see if the HTML tag is protected by the presence of
       
   556 # specific attributes that block conversion to TML. The conversion
       
   557 # table is defined in 
       
   558 sub _isProtectedByAttrs {
       
   559     my $this = shift;
       
   560 
       
   561     require TWiki::Plugins::WysiwygPlugin;
       
   562     foreach my $attr (keys %{$this->{attrs}}) {
       
   563         next unless length($this->{attrs}->{$attr}); # ignore nulls
       
   564         return $attr if TWiki::Plugins::WysiwygPlugin::protectedByAttr(
       
   565             $this->{tag}, $attr);
       
   566     }
       
   567     return 0;
       
   568 }
       
   569 
       
   570 # perform conversion on a list type
       
   571 sub _convertList {
       
   572     my( $this, $indent ) = @_;
       
   573     my $basebullet;
       
   574     my $isdl = ( $this->{tag} eq 'dl' );
       
   575 
       
   576     if( $isdl ) {
       
   577         $basebullet = '';
       
   578     } elsif( $this->{tag} eq 'ol' ) {
       
   579         $basebullet = '1';
       
   580     } else {
       
   581         $basebullet = '*';
       
   582     }
       
   583 
       
   584     my $f;
       
   585     my $text = '';
       
   586     my $pendingDT = 0;
       
   587     my $kid = $this->{head};
       
   588     while ($kid) {
       
   589         # be tolerant of dl, ol and ul with no li
       
   590         if( $kid->{tag} =~ m/^[dou]l$/i ) {
       
   591             $text .= $kid->_convertList( $indent.$WC::TAB );
       
   592             $kid = $kid->{next};
       
   593             next;
       
   594         }
       
   595         unless ($kid->{tag} =~ m/^(dt|dd|li)$/i) {
       
   596             $kid = $kid->{next};
       
   597             next;
       
   598         }
       
   599         if( $isdl && ( $kid->{tag} eq 'dt' )) {
       
   600             # DT, set the bullet type for subsequent DT
       
   601             $basebullet = $kid->_flatten( $WC::NO_BLOCK_TML );
       
   602             $basebullet =~ s/[\s$WC::CHECKw$WC::CHECKs]+$//;
       
   603             $basebullet .= ':';
       
   604             $basebullet =~ s/$WC::CHECKn/ /g;
       
   605             $basebullet =~ s/^\s+//;
       
   606             $basebullet = '$ '.$basebullet;
       
   607             $pendingDT = 1; # remember in case there is no DD
       
   608             $kid = $kid->{next};
       
   609             next;
       
   610         }
       
   611         my $bullet = $basebullet;
       
   612         if( $basebullet eq '1' && $kid->{attrs}->{type} ) {
       
   613             $bullet = $kid->{attrs}->{type}.'.';
       
   614         }
       
   615         my $spawn = '';
       
   616         my $grandkid = $kid->{head};
       
   617         while ($grandkid) {
       
   618             my $t;
       
   619             if( $grandkid->{tag} =~ /^[dou]l$/i ) {
       
   620                 #$spawn = _trim( $spawn );
       
   621                 $t = $grandkid->_convertList( $indent.$WC::TAB );
       
   622             } else {
       
   623                 ( $f, $t ) = $grandkid->generate( $WC::NO_BLOCK_TML );
       
   624                 $t =~ s/$WC::CHECKn/ /g;
       
   625             }
       
   626             $spawn .= $t;
       
   627             $grandkid = $grandkid->{next};
       
   628         }
       
   629         #$spawn = _trim($spawn);
       
   630         $text .= $WC::CHECKn.$indent.$bullet.$WC::CHECKs.$spawn.$WC::CHECKn;
       
   631         $pendingDT = 0;
       
   632         $basebullet = '' if $isdl;
       
   633         $kid = $kid->{next};
       
   634     }
       
   635     if( $pendingDT ) {
       
   636         # DT with no corresponding DD
       
   637         $text .= $WC::CHECKn.$indent.$basebullet.$WC::CHECKn;
       
   638     }
       
   639     return $text;
       
   640 }
       
   641 
       
   642 # probe down into a list type to determine if it
       
   643 # can be converted to TML.
       
   644 sub _isConvertableList {
       
   645     my( $this, $options ) = @_;
       
   646 
       
   647     return 0 if ($this->_isProtectedByAttrs());
       
   648 
       
   649     my $kid = $this->{head};
       
   650     while ($kid) {
       
   651         # check for malformed list. We can still handle it,
       
   652         # by simply ignoring illegal text.
       
   653         # be tolerant of dl, ol and ul with no li
       
   654         if( $kid->{tag} =~ m/^[dou]l$/i ) {
       
   655             return 0 unless $kid->_isConvertableList( $options );
       
   656         } elsif( $kid->{tag} =~ m/^(dt|dd|li)$/i ) {
       
   657             unless( $kid->_isConvertableListItem( $options, $this )) {
       
   658                 return 0;
       
   659             }
       
   660         }
       
   661         $kid = $kid->{next};
       
   662     }
       
   663     return 1;
       
   664 }
       
   665 
       
   666 # probe down into a list item to determine if the
       
   667 # containing list can be converted to TML.
       
   668 sub _isConvertableListItem {
       
   669     my( $this, $options, $parent ) = @_;
       
   670     my( $flags, $text );
       
   671 
       
   672     return 0 if ($this->_isProtectedByAttrs());
       
   673 
       
   674     if( $parent->{tag} eq 'dl' ) {
       
   675         return 0 unless( $this->{tag} =~ /^d[td]$/i );
       
   676     } else {
       
   677         return 0 unless( $this->{tag} eq 'li' );
       
   678     }
       
   679 
       
   680     my $kid = $this->{head};
       
   681     while ($kid) {
       
   682         if( $kid->{tag} =~ /^[oud]l$/i ) {
       
   683             unless( $kid->_isConvertableList( $options )) {
       
   684                 return 0;
       
   685             }
       
   686         } else {
       
   687             ( $flags, $text ) = $kid->generate( $options );
       
   688             if( $flags & $WC::BLOCK_TML ) {
       
   689                 return 0;
       
   690             }
       
   691         }
       
   692         $kid = $kid->{next};
       
   693     }
       
   694     return 1;
       
   695 }
       
   696 
       
   697 # probe down into a list type to determine if it
       
   698 # can be converted to TML.
       
   699 sub _isConvertableTable {
       
   700     my( $this, $options, $table ) = @_;
       
   701 
       
   702     if ($this->_isProtectedByAttrs()) {
       
   703         return 0;
       
   704     }
       
   705 
       
   706     my $kid = $this->{head};
       
   707     while ($kid) {
       
   708         if( $kid->{tag} =~ /^(colgroup|thead|tbody|tfoot|col)$/ ) {
       
   709             unless ($kid->_isConvertableTable( $options, $table )) {
       
   710                 return 0;
       
   711             }
       
   712         } elsif( $kid->{tag} ) {
       
   713             unless ($kid->{tag} eq 'tr') {
       
   714                 return 0;
       
   715             }
       
   716             my $row = $kid->_isConvertableTableRow( $options );
       
   717             unless ($row) {
       
   718                 return 0;
       
   719             }
       
   720             push( @$table, $row );
       
   721         }
       
   722         $kid = $kid->{next};
       
   723     }
       
   724     return 1;
       
   725 }
       
   726 
       
   727 # Tidy up whitespace on the sides of a table cell, and also strip trailing
       
   728 # BRs, as added by some table editors.
       
   729 sub _TDtrim {
       
   730     my $td = shift;
       
   731     $td =~ s/^($WC::NBSP|$WC::NBBR|$WC::CHECKn|$WC::CHECKs|$WC::CHECKw|$WC::CHECK1|$WC::CHECK2|$WC::TAB|\s)+//so;
       
   732     $td =~ s/(<br \/>|<br>|$WC::NBSP|$WC::NBBR|$WC::CHECKn|$WC::CHECKs|$WC::CHECKw|$WC::CHECK1|$WC::CHECK2|$WC::TAB|\s)+$//so;
       
   733     return $td;
       
   734 }
       
   735 
       
   736 # probe down into a list item to determine if the
       
   737 # containing table can be converted to TML.
       
   738 sub _isConvertableTableRow {
       
   739     my( $this, $options ) = @_;
       
   740 
       
   741     return 0 if ($this->_isProtectedByAttrs());
       
   742 
       
   743     my( $flags, $text );
       
   744     my @row;
       
   745     my $ignoreCols = 0;
       
   746     my $kid = $this->{head};
       
   747     while ($kid) {
       
   748         if ($kid->{tag} eq 'th') {
       
   749             ( $flags, $text ) = $kid->_flatten( $options );
       
   750             $text = _TDtrim( $text );
       
   751             $text = "*$text*" if length($text);
       
   752         } elsif ($kid->{tag} eq 'td' ) {
       
   753             ( $flags, $text ) = $kid->_flatten( $options );
       
   754             $text = _TDtrim( $text );
       
   755         } elsif( !$kid->{tag} ) {
       
   756             $kid = $kid->{next};
       
   757             next;
       
   758         } else {
       
   759             # some other sort of (unexpected) tag
       
   760             return 0;
       
   761         }
       
   762         return 0 if( $flags & $WC::BLOCK_TML );
       
   763 
       
   764         if( $kid->{attrs} ) {
       
   765             my $a = _deduceAlignment( $kid );
       
   766             if( $text && $a eq 'right' ) {
       
   767                 $text = $WC::NBSP.$text;
       
   768             } elsif( $text && $a eq 'center' ) {
       
   769                 $text = $WC::NBSP.$text.$WC::NBSP;
       
   770             } elsif( $text && $a eq 'left' ) {
       
   771                 $text .= $WC::NBSP;
       
   772             }
       
   773             if( $kid->{attrs}->{rowspan} && $kid->{attrs}->{rowspan} > 1 ) {
       
   774                 return 0;
       
   775             }
       
   776         }
       
   777         $text =~ s/&nbsp;/$WC::NBSP/g;
       
   778         #if (--$ignoreCols > 0) {
       
   779         #    # colspanned
       
   780         #    $text = '';
       
   781         #} els
       
   782         if ($text =~ /^$WC::NBSP*$/) {
       
   783             $text = $WC::NBSP;
       
   784         } else {
       
   785             $text = $WC::NBSP.$text.$WC::NBSP;
       
   786         }
       
   787         if( $kid->{attrs} && $kid->{attrs}->{colspan} &&
       
   788               $kid->{attrs}->{colspan} > 1 ) {
       
   789             $ignoreCols = $kid->{attrs}->{colspan};
       
   790         }
       
   791         # Pad to allow wikiwords to work
       
   792         push( @row, $text );
       
   793         while ($ignoreCols > 1) {
       
   794             push( @row, '' );
       
   795             $ignoreCols--;
       
   796         }
       
   797         $kid = $kid->{next};
       
   798     }
       
   799     return \@row;
       
   800 }
       
   801 
       
   802 # Work out the alignment of a table cell from the style and/or class
       
   803 sub _deduceAlignment {
       
   804     my $td = shift;
       
   805 
       
   806     if( $td->{attrs}->{align} ) {
       
   807         return lc( $td->{attrs}->{align} );
       
   808     } else {
       
   809         if( $td->{attrs}->{style} &&
       
   810               $td->{attrs}->{style} =~ /text-align\s*:\s*(left|right|center)/ ) {
       
   811             return $1;
       
   812         }
       
   813         if ($td->hasClass(qr/align-(left|right|center)/)) {
       
   814             return $1;
       
   815         }
       
   816     }
       
   817     return '';
       
   818 }
       
   819 
       
   820 # convert a heading tag
       
   821 sub _H {
       
   822     my( $this, $options, $depth ) = @_;
       
   823     my( $flags, $contents ) = $this->_flatten( $options );
       
   824     return ( 0, undef ) if( $flags & $WC::BLOCK_TML );
       
   825     my $notoc = '';
       
   826     if($this->hasClass('notoc')) {
       
   827         $notoc = '!!';
       
   828     }
       
   829     $contents =~ s/^\s+/ /;
       
   830     $contents =~ s/\s+$//;
       
   831     my $res = $WC::CHECKn.'---'.('+' x $depth).$notoc.
       
   832       $WC::CHECKs.$contents.$WC::CHECKn;
       
   833     return ( $flags | $WC::BLOCK_TML, $res );
       
   834 }
       
   835 
       
   836 # generate an emphasis
       
   837 sub _emphasis {
       
   838     my( $this, $options, $ch ) = @_;
       
   839     my( $flags, $contents ) = $this->_flatten( $options | $WC::NO_BLOCK_TML );
       
   840     return ( 0, undef ) if( !defined( $contents ) || ( $flags & $WC::BLOCK_TML ));
       
   841     # Remove whitespace from either side of the contents, retaining the
       
   842     # whitespace
       
   843     $contents =~ s/&nbsp;/$WC::NBSP/go;
       
   844     $contents =~ /^($WC::WS)(.*?)($WC::WS)$/;
       
   845     my ($pre, $post) = ($1, $3);
       
   846     $contents = $2;
       
   847     return (0, undef) if( $contents =~ /^</ || $contents =~ />$/ );
       
   848     return (0, '') unless( $contents =~ /\S/ );
       
   849 
       
   850     # Now see if we can collapse the emphases
       
   851     if ($ch eq '_' && $contents =~ s/^\*(.*)\*$/$1/ ||
       
   852           $ch eq '*' && $contents =~ s/^_(?!_)(.*)(?<!_)_$/$1/) {
       
   853         $ch = '__';
       
   854     } elsif ($ch eq '=' && $contents =~ s/^\*(.*)\*$/$1/ ||
       
   855           $ch eq '*' && $contents =~ s/^=(?!=)(.*)(?<!=)=$/$1/) {
       
   856         $ch = '==';
       
   857     } elsif ($contents =~ /^([*_=]).*\1$/) {
       
   858         return (0, undef);
       
   859     }
       
   860 
       
   861     return ( $flags, $pre.$WC::CHECKw.$ch.$contents.$ch.$WC::CHECK2.$post );
       
   862 }
       
   863 
       
   864 # generate verbatim for P, SPAN or PRE
       
   865 sub _verbatim {
       
   866     my ($this, $tag, $options) = @_;
       
   867 
       
   868     $options |= $WC::PROTECTED|$WC::KEEP_ENTITIES|$WC::BR2NL | $WC::KEEP_WS;
       
   869     my( $flags, $text ) = $this->_flatten($options);
       
   870     # decode once, and once only
       
   871     require HTML::Entities;
       
   872     $text = HTML::Entities::decode_entities($text);
       
   873     # &nbsp; decodes to \240, which we want to make a space.
       
   874     $text =~ s/\240/$WC::NBSP/g;
       
   875     my $p = _htmlParams($this->{attrs}, $options);
       
   876     return ($flags, "<$tag$p>$text</$tag>");
       
   877 }
       
   878 
       
   879 # pseudo-tags that may leak through in TWikiVariables
       
   880 # We have to handle this to avoid a matching close tag </nop>
       
   881 sub _handleNOP {
       
   882     my( $this, $options ) = @_;
       
   883     my( $flags, $text ) = $this->_flatten( $options );
       
   884     return ($flags, '<nop>'.$text);
       
   885 }
       
   886 
       
   887 sub _handleNOPRESULT {
       
   888     my( $this, $options ) = @_;
       
   889     my( $flags, $text ) = $this->_flatten( $options );
       
   890     return ($flags, '<nop>'.$text);
       
   891 }
       
   892 
       
   893 # tags we ignore completely (contents as well)
       
   894 sub _handleDOCTYPE { return ( 0, '' ); }
       
   895 
       
   896 sub _LIST {
       
   897     my( $this, $options ) = @_;
       
   898     if( ( $options & $WC::NO_BLOCK_TML ) ||
       
   899         !$this->_isConvertableList( $options | $WC::NO_BLOCK_TML )) {
       
   900         return ( 0, undef );
       
   901     }
       
   902     return ( $WC::BLOCK_TML, $this->_convertList( $WC::TAB ));
       
   903 }
       
   904 
       
   905 # Performs initial cleanup of the parse tree before generation. Walks the
       
   906 # tree, making parent links and removing attributes that don't add value.
       
   907 # This simplifies determining whether a node is to be kept, or flattened
       
   908 # out.
       
   909 # $opts may include $WC::VERY_CLEAN
       
   910 sub cleanNode {
       
   911     my( $this, $opts ) = @_;
       
   912     my $a;
       
   913 
       
   914     # Always delete these attrs
       
   915     foreach $a qw( lang _moz_dirty ) {
       
   916         delete $this->{attrs}->{$a}
       
   917           if( defined( $this->{attrs}->{$a} ));
       
   918     }
       
   919 
       
   920     # Delete these attrs if their value is empty
       
   921     foreach $a qw( class style ) {
       
   922         if( defined( $this->{attrs}->{$a} ) &&
       
   923               $this->{attrs}->{$a} !~ /\S/ ) {
       
   924             delete $this->{attrs}->{$a};
       
   925         }
       
   926     }
       
   927 }
       
   928 
       
   929 ######################################################
       
   930 # Handlers for different HTML tag types. Each handler returns
       
   931 # a pair (flags,text) containing the result of the expansion.
       
   932 #
       
   933 # There are four ways of handling a tag:
       
   934 # 1. Return (0,undef) which will cause the tag to be output
       
   935 #    as HTML tags.
       
   936 # 2. Return _flatten which will cause the tag to be ignored,
       
   937 #    but the content expanded
       
   938 # 3. Return (0, '') which will cause the tag not to be output
       
   939 # 4. Something else more complex
       
   940 #
       
   941 # Note that tags like TFOOT and DT are handled inside the table
       
   942 # and list processors.
       
   943 # They only have handler methods in case the tag is seen outside
       
   944 # the content of a table or list. In this case they are usually
       
   945 # simply removed from the output.
       
   946 #
       
   947 sub _handleA {
       
   948     my( $this, $options ) = @_;
       
   949 
       
   950     my( $flags, $text ) = $this->_flatten( $options | $WC::NO_BLOCK_TML );
       
   951     if( $text && $text =~ /\S/ && $this->{attrs}->{href}) {
       
   952         # there's text and an href
       
   953         my $href = $this->{attrs}->{href};
       
   954         # decode URL params in the href
       
   955         $href =~ s/%([0-9A-F]{2})/chr(hex($1))/gei;
       
   956         if( $this->{context} && $this->{context}->{rewriteURL} ) {
       
   957             $href = $this->{context}->{rewriteURL}->(
       
   958                 $href, $this->{context} );
       
   959         }
       
   960         $reww = TWiki::Func::getRegularExpression('wikiWordRegex')
       
   961           unless $reww;
       
   962         my $nop = ($options & $WC::NOP_ALL) ? '<nop>' : '';
       
   963         if( $href =~ /^(\w+\.)?($reww)(#\w+)?$/ ) {
       
   964             my $web = $1 || '';
       
   965             my $topic = $2;
       
   966             my $anchor = $3 || '';
       
   967             my $cleantext = $text;
       
   968             $cleantext =~ s/<nop>//g;
       
   969             $cleantext =~ s/^$this->{context}->{web}\.//;
       
   970 
       
   971             # if the clean text is the known topic we can ignore it
       
   972             if( ($cleantext eq $href || $href =~ /\.$cleantext$/)) {
       
   973                 return (0, $WC::CHECK1.$nop.$web.$topic.$anchor.$WC::CHECK2);
       
   974             }
       
   975         }
       
   976 
       
   977         if( $href =~ /${WC::PROTOCOL}[^?]*$/ && $text eq $href ) {
       
   978             return (0, $WC::CHECK1.$nop.$text.$WC::CHECK2);
       
   979         }
       
   980         if( $text eq $href ) {
       
   981             return (0, $WC::CHECKw.'['.$nop.'['.$href.']]' );
       
   982         }
       
   983         return (0, $WC::CHECKw.'['.$nop.'['.$href.']['.$text.
       
   984                   ']]' );
       
   985     } elsif( $this->{attrs}->{name} ) {
       
   986         # allow anchors to be expanded normally. This won't generate
       
   987         # wiki anchors, but it's a small price to pay - it would
       
   988         # be too complex to generate wiki anchors, given their
       
   989         # line-oriented nature.
       
   990         return (0, undef);
       
   991     }
       
   992     # Otherwise generate nothing
       
   993     return (0, '');
       
   994 }
       
   995 
       
   996 sub _handleABBR { return _flatten( @_ ); };
       
   997 sub _handleACRONYM { return _flatten( @_ ); };
       
   998 sub _handleADDRESS { return _flatten( @_ ); };
       
   999 
       
  1000 sub _handleB { return _emphasis( @_, '*' ); }
       
  1001 sub _handleBASE { return ( 0, '' ); }
       
  1002 sub _handleBASEFONT { return ( 0, '' ); }
       
  1003 
       
  1004 sub _handleBIG { return( 0, '' ); };
       
  1005 # BLOCKQUOTE
       
  1006 sub _handleBODY { return _flatten( @_ ); }
       
  1007 # BUTTON
       
  1008 
       
  1009 sub _handleBR {
       
  1010     my( $this, $options ) = @_;
       
  1011     my($f, $kids ) = $this->_flatten( $options );
       
  1012     # Test conditions for keeping a <br>. These are:
       
  1013     # 1. We haven't explicitly been told to convert to \n (by BR2NL)
       
  1014     # 2. We have been told that block TML is illegal
       
  1015     # 3. The previous node is an inline element node or text node
       
  1016     # 4. The next node is an inline element or text node
       
  1017     my $sep = "\n";
       
  1018     if ($options & $WC::BR2NL) {
       
  1019     } elsif ($options & $WC::NO_BLOCK_TML) {
       
  1020         $sep = '<br />';
       
  1021     } elsif ($this->prevIsInline()) {
       
  1022         if ($this->isInline()) {
       
  1023             # Both <br> and </br> cause a NL
       
  1024             # if this is empty, look at next
       
  1025             if ($kids !~ /^[\000-\037]*$/ &&
       
  1026                   $kids !~ /^[\000-\037]*$WC::NBBR/ ||
       
  1027                     $this->nextIsInline()) {
       
  1028                 $sep = '<br />';
       
  1029             }
       
  1030         }
       
  1031     }
       
  1032     return ($f, $sep.$kids);
       
  1033 }
       
  1034 
       
  1035 sub _handleCAPTION { return (0, '' ); }
       
  1036 # CENTER
       
  1037 # CITE
       
  1038 
       
  1039 sub _handleCODE { return _emphasis( @_, '=' ); }
       
  1040 
       
  1041 sub _handleCOL { return _flatten( @_ ); };
       
  1042 sub _handleCOLGROUP { return _flatten( @_ ); };
       
  1043 sub _handleDD { return _flatten( @_ ); };
       
  1044 sub _handleDEL { return _flatten( @_ ); };
       
  1045 sub _handleDFN { return _flatten( @_ ); };
       
  1046 # DIR
       
  1047 
       
  1048 sub _handleDIV { return _handleP(@_); }
       
  1049 
       
  1050 sub _handleDL { return _LIST( @_ ); }
       
  1051 sub _handleDT { return _flatten( @_ ); };
       
  1052 
       
  1053 sub _handleEM { return _emphasis( @_, '_' ); }
       
  1054 
       
  1055 sub _handleFIELDSET { return _flatten( @_ ); };
       
  1056 
       
  1057 sub _handleFONT {
       
  1058     my( $this, $options ) = @_;
       
  1059 
       
  1060     my %atts = %{$this->{attrs}};
       
  1061     # Try to convert font tags into %COLOUR%..%ENDCOLOR%
       
  1062     # First extract the colour
       
  1063     my $colour;
       
  1064     if ($atts{style}) {
       
  1065         my $style = $atts{style};
       
  1066         if ($style =~ s/(^|\s|;)color\s*:\s*([^\s;]+);?//i) {
       
  1067             $colour = $2;
       
  1068             delete $atts{style} if $style =~ /^[\s;]*$/;
       
  1069         }
       
  1070     }
       
  1071     if ($atts{color}) {
       
  1072         $colour = $atts{color};
       
  1073         delete $atts{color};
       
  1074     }
       
  1075     # The presence of the class forces it to be converted to a
       
  1076     # TWiki variable
       
  1077     if (!_removeClass(\%atts, 'WYSIWYG_COLOUR')) {
       
  1078         delete $atts{class};
       
  1079         if (scalar(keys %atts) > 0 || !$colour || $colour !~ /^([a-z]+|#[0-9A-Fa-f]{6})$/i) {
       
  1080             return ( 0, undef );
       
  1081         }
       
  1082     }
       
  1083     # OK, just the colour
       
  1084     $colour = $WC::KNOWN_COLOUR{uc($colour)};
       
  1085     if (!$colour) {
       
  1086         # Not a recognised colour
       
  1087         return ( 0, undef );
       
  1088     }
       
  1089     my( $f, $kids ) = $this->_flatten( $options );
       
  1090     return ($f, '%'.uc($colour).'%'.$kids.'%ENDCOLOR%');
       
  1091 };
       
  1092 
       
  1093 # FORM
       
  1094 sub _handleFRAME    { return _flatten( @_ ); };
       
  1095 sub _handleFRAMESET { return _flatten( @_ ); };
       
  1096 sub _handleHEAD     { return ( 0, '' ); }
       
  1097 
       
  1098 sub _handleHR {
       
  1099     my( $this, $options ) = @_;
       
  1100 
       
  1101     my( $f, $kids ) = $this->_flatten( $options );
       
  1102     return ($f, '<hr />'.$kids) if( $options & $WC::NO_BLOCK_TML );
       
  1103     return ( $f | $WC::BLOCK_TML, $WC::CHECKn.'---'.$WC::CHECKn.$kids);
       
  1104 }
       
  1105 
       
  1106 sub _handleHTML   { return _flatten( @_ ); }
       
  1107 sub _handleH1     { return _H( @_, 1 ); }
       
  1108 sub _handleH2     { return _H( @_, 2 ); }
       
  1109 sub _handleH3     { return _H( @_, 3 ); }
       
  1110 sub _handleH4     { return _H( @_, 4 ); }
       
  1111 sub _handleH5     { return _H( @_, 5 ); }
       
  1112 sub _handleH6     { return _H( @_, 6 ); }
       
  1113 sub _handleI      { return _emphasis( @_, '_' ); }
       
  1114 
       
  1115 sub _handleIMG {
       
  1116     my( $this, $options ) = @_;
       
  1117 
       
  1118     # Hack out mce_src, which is TinyMCE-specific and causes indigestion
       
  1119     # when the topic is reloaded
       
  1120     delete $this->{attrs}->{mce_src} if defined $this->{attrs}->{mce_src};
       
  1121 
       
  1122     if( $this->{context} && $this->{context}->{rewriteURL} ) {
       
  1123         my $href = $this->{attrs}->{src};
       
  1124         # decode URL params in the href
       
  1125         $href =~ s/%([0-9A-F]{2})/chr(hex($1))/gei;
       
  1126         $href = &{$this->{context}->{rewriteURL}}(
       
  1127             $href, $this->{context} );
       
  1128         $this->{attrs}->{src} = $href;
       
  1129     }
       
  1130 
       
  1131     return (0, undef) unless $this->{context} &&
       
  1132       defined $this->{context}->{convertImage};
       
  1133 
       
  1134     my $alt = &{$this->{context}->{convertImage}}(
       
  1135         $this->{attrs}->{src},
       
  1136         $this->{context} );
       
  1137     if( $alt ) {
       
  1138         return (0, $alt);
       
  1139     }
       
  1140     return ( 0, undef );
       
  1141 }
       
  1142 
       
  1143 # INPUT
       
  1144 # INS
       
  1145 # ISINDEX
       
  1146 sub _handleKBD      { return _handleTT( @_ ); }
       
  1147 # LABEL
       
  1148 # LI
       
  1149 sub _handleLINK     { return( 0, '' ); };
       
  1150 # MAP
       
  1151 # MENU
       
  1152 sub _handleMETA     { return ( 0, '' ); }
       
  1153 sub _handleNOFRAMES { return ( 0, '' ); }
       
  1154 sub _handleNOSCRIPT { return ( 0, '' ); }
       
  1155 sub _handleOL       { return _LIST( @_ ); }
       
  1156 # OPTGROUP
       
  1157 # OPTION
       
  1158 
       
  1159 sub _handleP {
       
  1160     my( $this, $options ) = @_;
       
  1161 
       
  1162     if ($this->hasClass('TMLverbatim')) {
       
  1163         return $this->_verbatim('verbatim', $options);
       
  1164     }
       
  1165     if ($this->hasClass('WYSIWYG_STICKY')) {
       
  1166         return $this->_verbatim('sticky', $options);
       
  1167     }
       
  1168 
       
  1169     my( $f, $kids ) = $this->_flatten( $options );
       
  1170     return ($f, '<p>'.$kids.'</p>') if( $options & $WC::NO_BLOCK_TML );
       
  1171     my $pre = '';
       
  1172     if ($this->prevIsInline()) {
       
  1173         $pre = $WC::NBBR;
       
  1174     }
       
  1175     return ($f | $WC::BLOCK_TML, $pre.$WC::NBBR.$kids.$WC::NBBR);
       
  1176 }
       
  1177 
       
  1178 # PARAM
       
  1179 
       
  1180 sub _handlePRE {
       
  1181     my( $this, $options ) = @_;
       
  1182 
       
  1183     my $tag = 'pre';
       
  1184     if( $this->hasClass('TMLverbatim')) {
       
  1185         return $this->_verbatim('verbatim', $options);
       
  1186     }
       
  1187     if ($this->hasClass('WYSIWYG_STICKY')) {
       
  1188         return $this->_verbatim('sticky', $options);
       
  1189     }
       
  1190     unless( $options & $WC::NO_BLOCK_TML ) {
       
  1191         my( $flags, $text ) = $this->_flatten(
       
  1192             $options | $WC::NO_BLOCK_TML | $WC::BR2NL | $WC::KEEP_WS );
       
  1193         my $p = _htmlParams( $this->{attrs}, $options);
       
  1194         return ($WC::BLOCK_TML, "<$tag$p>$text</$tag>");
       
  1195     }
       
  1196     return ( 0, undef );
       
  1197 }
       
  1198 
       
  1199 sub _handleQ    { return _flatten( @_ ); };
       
  1200 # S
       
  1201 sub _handleSAMP { return _handleTT( @_ ); };
       
  1202 # SCRIPT
       
  1203 # SELECT
       
  1204 # SMALL
       
  1205 
       
  1206 sub _handleSPAN {
       
  1207     my( $this, $options ) = @_;
       
  1208 
       
  1209     my %atts = %{$this->{attrs}};
       
  1210     if (_removeClass(\%atts, 'TMLverbatim')) {
       
  1211         return $this->_verbatim('verbatim', $options);
       
  1212     }
       
  1213     if (_removeClass(\%atts, 'WYSIWYG_STICKY')) {
       
  1214         return $this->_verbatim('sticky', $options);
       
  1215     }
       
  1216 
       
  1217     if( _removeClass(\%atts, 'WYSIWYG_LINK')) {
       
  1218         $options |= $WC::NO_BLOCK_TML;
       
  1219     }
       
  1220 
       
  1221     if( _removeClass(\%atts, 'WYSIWYG_TT')) {
       
  1222         return _emphasis( @_, '=' );
       
  1223     }
       
  1224 
       
  1225     # Remove all other classes
       
  1226     delete $atts{class};
       
  1227 
       
  1228     if( $options & $WC::VERY_CLEAN ) {
       
  1229         # remove style attribute if cleaning aggressively. Have to do this
       
  1230         # because TWiki generates these.
       
  1231         delete $atts{style} if defined $atts{style}
       
  1232     }
       
  1233 
       
  1234     # ignore the span tag if there are no other attrs
       
  1235     if( scalar(keys %atts) == 0 ) {
       
  1236         return $this->_flatten( $options );
       
  1237     }
       
  1238 
       
  1239     # otherwise use the default generator.
       
  1240     return (0, undef);
       
  1241 }
       
  1242 
       
  1243 # STRIKE
       
  1244 
       
  1245 sub _handleSTRONG { return _emphasis( @_, '*' ); }
       
  1246 
       
  1247 sub _handleSTYLE { return ( 0, '' ); }
       
  1248 # SUB
       
  1249 # SUP
       
  1250 
       
  1251 sub _handleTABLE {
       
  1252     my( $this, $options ) = @_;
       
  1253     return ( 0, undef) if( $options & $WC::NO_BLOCK_TML );
       
  1254 
       
  1255     # Should really look at the table attrs, but to heck with it
       
  1256 
       
  1257     return ( 0, undef ) if( $options & $WC::NO_BLOCK_TML );
       
  1258 
       
  1259     my @table;
       
  1260     return ( 0, undef ) unless
       
  1261       $this->_isConvertableTable( $options | $WC::NO_BLOCK_TML, \@table );
       
  1262 
       
  1263     my $maxrow = 0;
       
  1264     my $row;
       
  1265     foreach $row ( @table ) {
       
  1266         my $rw = scalar( @$row );
       
  1267         $maxrow = $rw if( $rw > $maxrow );
       
  1268     }
       
  1269     foreach $row ( @table ) {
       
  1270         while( scalar( @$row ) < $maxrow) {
       
  1271             push( @$row, '' );
       
  1272         }
       
  1273     }
       
  1274     my $text = $WC::CHECKn;
       
  1275     foreach $row ( @table ) {
       
  1276         # isConvertableTableRow has already formatted the cell
       
  1277         $text .= $WC::CHECKn.'|'.join('|', @$row).'|'.$WC::CHECKn;
       
  1278     }
       
  1279 
       
  1280     return ( $WC::BLOCK_TML, $text );
       
  1281 }
       
  1282 
       
  1283 # TBODY
       
  1284 # TD
       
  1285 
       
  1286 # TEXTAREA {
       
  1287 # TFOOT
       
  1288 # TH
       
  1289 # THEAD
       
  1290 sub _handleTITLE { return (0, '' ); }
       
  1291 # TR
       
  1292 sub _handleTT    { return _handleCODE( @_ ); }
       
  1293 # U
       
  1294 sub _handleUL    { return _LIST( @_ ); }
       
  1295 sub _handleVAR   { return ( 0, '' ); }
       
  1296 
       
  1297 1;