lib/TWiki/Plugins/WysiwygPlugin/HTML2TML/WC.pm
author Colas Nahaboo <colas@nahaboo.net>
Sat, 26 Jan 2008 15:50:53 +0100
changeset 0 414e01d06fd5
permissions -rw-r--r--
RELEASE 4.2.0 freetown
     1 # Copyright (C) 2005 ILOG http://www.ilog.fr
     2 # and TWiki Contributors. All Rights Reserved. TWiki Contributors
     3 # are listed in the AUTHORS file in the root of this distribution.
     4 # NOTE: Please extend that file, not this notice.
     5 #
     6 # This program is free software; you can redistribute it and/or
     7 # modify it under the terms of the GNU General Public License
     8 # as published by the Free Software Foundation; either version 2
     9 # of the License, or (at your option) any later version. For
    10 # more details read LICENSE in the root of the TWiki distribution.
    11 #
    12 # This program is distributed in the hope that it will be useful,
    13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
    14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
    15 #
    16 # As per the GPL, removal of this notice is prohibited.
    17 
    18 =pod
    19 
    20 ---+ package WC
    21 
    22 Constants
    23 
    24 =cut
    25 
    26 package WC;
    27 
    28 =pod
    29 
    30 ---++ Generator flags
    31 | $NO_TML | Flag that gets passed _down_ into generator functions. Constrains output to HTML only. |
    32 | $NO_BLOCK_TML | Flag that gets passed _down_ into generator functions. Don't generate block TML e.g. tables, lists |
    33 | $NOP_ALL | Flag that gets passed _down_ into generator functions. NOP all variables and WikiWords. |
    34 | $BLOCK_TML | Flag passed up from generator functions; set if expansion includes block TML |
    35 | $VERY_CLEAN | Flag passed to indicate that HTML must be aggressively cleaned (unrecognised or unuseful tags stripped out) |
    36 | $BR2NL | Flag set to force BR tags to be converted to newlines. |
    37 | $KEEP_WS | Set to force the generator to keep all whitespace. Otherwise whitespace gets collapsed (as it is when HTML is rendered) |
    38 | $PROTECTED | In a block marked as PROTECTED |
    39 | $KEEP_ENTITIES | Don't decode HTML entities |
    40 
    41 =cut
    42 
    43 our ($NO_TML, $NO_HTML, $NO_BLOCK_TML, $NOP_ALL, $BLOCK_TML, $BR2NL);
    44 our ($CHECKn, $CHECKw, $CHECKs, $NBSP, $NBBR, $TAB, $PON, $POFF, $WS);
    45 our ($VERY_CLEAN, $PROTECTED, $KEEP_ENTITIES, $KEEP_WS);
    46 
    47 $NO_HTML       = 1 << 0;
    48 $NO_TML        = 1 << 1;
    49 $NO_BLOCK_TML  = 1 << 2;
    50 $NOP_ALL       = 1 << 3;
    51 $VERY_CLEAN    = 1 << 4;
    52 $BR2NL         = 1 << 5;
    53 $KEEP_WS       = 1 << 6;
    54 $PROTECTED     = 1 << 7;
    55 $KEEP_ENTITIES = 1 << 8;
    56 
    57 $BLOCK_TML    = $NO_BLOCK_TML;
    58 
    59 my %cc = (
    60     'NBSP'   => 14, # unbreakable space
    61     'NBBR'   => 15, # para break required
    62     'CHECKn' => 16, # require adjacent newline (\n or $NBBR)
    63     'CHECKs' => 17, # require adjacent space character (' ' or $NBSP)
    64     'CHECKw' => 18, # require adjacent whitespace (\s|$NBBR|$NBSP)
    65     'CHECK1' => 19, # start of wiki-word
    66     'CHECK2' => 20, # end of wiki-word
    67     'TAB'    => 21, # list indent
    68     'PON'    => 22, # protect on
    69     'POFF'   => 23, # protect off
    70 );
    71 
    72 =pod
    73 
    74 ---++ Forced whitespace
    75 These single-character shortcuts are used to assert the presence of
    76 non-breaking whitespace.
    77 
    78 | $NBSP | Non-breaking space |
    79 | $NBBR | Non-breaking linebreak |
    80 
    81 =cut
    82 
    83 $NBSP   = chr($cc{NBSP});
    84 $NBBR   = chr($cc{NBBR});
    85 
    86 =pod
    87 
    88 ---++ Inline Assertions
    89 The generator works by expanding to "decorated" text, where the decorators
    90 are characters below ' '. These characters act to express format
    91 requirements - for example, the need to have a newline before some text,
    92 or the need for a space. The generator sticks this format requirements into
    93 the text stream, and they are then optimised down to the minimum in a post-
    94 process.
    95 
    96 | $CHECKn | there must be an adjacent newline (\n or $NBBR) |
    97 | $CHECKs | there must be an adjacent space (' ' or $NBSP) |
    98 | $CHECKw | There must be adjacent whitespace (\s or $NBBR or $NBSP) |
    99 | $CHECK1 | Marks the start of an inline wikiword. |
   100 | $CHECK2 | Marks the end of an inline wikiword. |
   101 | $TAB    | Shorthand for an indent level in a list |
   102 
   103 =cut
   104 
   105 $CHECKn = chr($cc{CHECKn});
   106 $CHECKs = chr($cc{CHECKs});
   107 $CHECKw = chr($cc{CHECKw});
   108 $CHECK1 = chr($cc{CHECK1});
   109 $CHECK2 = chr($cc{CHECK2});
   110 $TAB    = chr($cc{TAB});
   111 $PON    = chr($cc{PON});
   112 $POFF   = chr($cc{POFF});
   113 $WS     = qr/[$NBSP$NBBR$CHECKn$CHECKs$CHECKw$CHECK1$CHECK2$TAB\s]*/;
   114 
   115 =pod
   116 
   117 ---++ REs
   118 REs for matching delimiters of wikiwords, must be consistent with TML2HTML.pm
   119 
   120 | $STARTWW | Zero-width match for the start of a wikiword |
   121 | $ENDWW | Zero-width match for the end of a wikiword |
   122 | $PROTOCOL | match for a valid URL protocol e.g. http, mailto etc |
   123 
   124 =cut
   125 
   126 sub debugEncode {
   127     my $string = shift;
   128     while (my ($k, $v) = each %cc) {
   129         my $c = chr($v);
   130         $string =~ s/$c/\%$k/g;
   131     }
   132     return $string;
   133 }
   134 
   135 # Maps of tag types
   136 our (%SELFCLOSING, $EMPHTAG);
   137 
   138 %SELFCLOSING = (img => 1);
   139 
   140 # Map that specifies tags to be renamed to a canonical name
   141 %EMPHTAG = (
   142     b => 'strong',
   143     i => 'em',
   144     tt => 'code',
   145     strong => 'strong',
   146     em => 'em',
   147     code => 'code',
   148 );
   149 
   150 1;