lib/TWiki/Plugins/WysiwygPlugin/HTML2TML/WC.pm
changeset 0 414e01d06fd5
equal deleted inserted replaced
-1:000000000000 0:414e01d06fd5
       
     1 # Copyright (C) 2005 ILOG http://www.ilog.fr
       
     2 # and TWiki Contributors. All Rights Reserved. TWiki Contributors
       
     3 # are listed in the AUTHORS file in the root of this distribution.
       
     4 # NOTE: Please extend that file, not this notice.
       
     5 #
       
     6 # This program is free software; you can redistribute it and/or
       
     7 # modify it under the terms of the GNU General Public License
       
     8 # as published by the Free Software Foundation; either version 2
       
     9 # of the License, or (at your option) any later version. For
       
    10 # more details read LICENSE in the root of the TWiki distribution.
       
    11 #
       
    12 # This program is distributed in the hope that it will be useful,
       
    13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
       
    15 #
       
    16 # As per the GPL, removal of this notice is prohibited.
       
    17 
       
    18 =pod
       
    19 
       
    20 ---+ package WC
       
    21 
       
    22 Constants
       
    23 
       
    24 =cut
       
    25 
       
    26 package WC;
       
    27 
       
    28 =pod
       
    29 
       
    30 ---++ Generator flags
       
    31 | $NO_TML | Flag that gets passed _down_ into generator functions. Constrains output to HTML only. |
       
    32 | $NO_BLOCK_TML | Flag that gets passed _down_ into generator functions. Don't generate block TML e.g. tables, lists |
       
    33 | $NOP_ALL | Flag that gets passed _down_ into generator functions. NOP all variables and WikiWords. |
       
    34 | $BLOCK_TML | Flag passed up from generator functions; set if expansion includes block TML |
       
    35 | $VERY_CLEAN | Flag passed to indicate that HTML must be aggressively cleaned (unrecognised or unuseful tags stripped out) |
       
    36 | $BR2NL | Flag set to force BR tags to be converted to newlines. |
       
    37 | $KEEP_WS | Set to force the generator to keep all whitespace. Otherwise whitespace gets collapsed (as it is when HTML is rendered) |
       
    38 | $PROTECTED | In a block marked as PROTECTED |
       
    39 | $KEEP_ENTITIES | Don't decode HTML entities |
       
    40 
       
    41 =cut
       
    42 
       
    43 our ($NO_TML, $NO_HTML, $NO_BLOCK_TML, $NOP_ALL, $BLOCK_TML, $BR2NL);
       
    44 our ($CHECKn, $CHECKw, $CHECKs, $NBSP, $NBBR, $TAB, $PON, $POFF, $WS);
       
    45 our ($VERY_CLEAN, $PROTECTED, $KEEP_ENTITIES, $KEEP_WS);
       
    46 
       
    47 $NO_HTML       = 1 << 0;
       
    48 $NO_TML        = 1 << 1;
       
    49 $NO_BLOCK_TML  = 1 << 2;
       
    50 $NOP_ALL       = 1 << 3;
       
    51 $VERY_CLEAN    = 1 << 4;
       
    52 $BR2NL         = 1 << 5;
       
    53 $KEEP_WS       = 1 << 6;
       
    54 $PROTECTED     = 1 << 7;
       
    55 $KEEP_ENTITIES = 1 << 8;
       
    56 
       
    57 $BLOCK_TML    = $NO_BLOCK_TML;
       
    58 
       
    59 my %cc = (
       
    60     'NBSP'   => 14, # unbreakable space
       
    61     'NBBR'   => 15, # para break required
       
    62     'CHECKn' => 16, # require adjacent newline (\n or $NBBR)
       
    63     'CHECKs' => 17, # require adjacent space character (' ' or $NBSP)
       
    64     'CHECKw' => 18, # require adjacent whitespace (\s|$NBBR|$NBSP)
       
    65     'CHECK1' => 19, # start of wiki-word
       
    66     'CHECK2' => 20, # end of wiki-word
       
    67     'TAB'    => 21, # list indent
       
    68     'PON'    => 22, # protect on
       
    69     'POFF'   => 23, # protect off
       
    70 );
       
    71 
       
    72 =pod
       
    73 
       
    74 ---++ Forced whitespace
       
    75 These single-character shortcuts are used to assert the presence of
       
    76 non-breaking whitespace.
       
    77 
       
    78 | $NBSP | Non-breaking space |
       
    79 | $NBBR | Non-breaking linebreak |
       
    80 
       
    81 =cut
       
    82 
       
    83 $NBSP   = chr($cc{NBSP});
       
    84 $NBBR   = chr($cc{NBBR});
       
    85 
       
    86 =pod
       
    87 
       
    88 ---++ Inline Assertions
       
    89 The generator works by expanding to "decorated" text, where the decorators
       
    90 are characters below ' '. These characters act to express format
       
    91 requirements - for example, the need to have a newline before some text,
       
    92 or the need for a space. The generator sticks this format requirements into
       
    93 the text stream, and they are then optimised down to the minimum in a post-
       
    94 process.
       
    95 
       
    96 | $CHECKn | there must be an adjacent newline (\n or $NBBR) |
       
    97 | $CHECKs | there must be an adjacent space (' ' or $NBSP) |
       
    98 | $CHECKw | There must be adjacent whitespace (\s or $NBBR or $NBSP) |
       
    99 | $CHECK1 | Marks the start of an inline wikiword. |
       
   100 | $CHECK2 | Marks the end of an inline wikiword. |
       
   101 | $TAB    | Shorthand for an indent level in a list |
       
   102 
       
   103 =cut
       
   104 
       
   105 $CHECKn = chr($cc{CHECKn});
       
   106 $CHECKs = chr($cc{CHECKs});
       
   107 $CHECKw = chr($cc{CHECKw});
       
   108 $CHECK1 = chr($cc{CHECK1});
       
   109 $CHECK2 = chr($cc{CHECK2});
       
   110 $TAB    = chr($cc{TAB});
       
   111 $PON    = chr($cc{PON});
       
   112 $POFF   = chr($cc{POFF});
       
   113 $WS     = qr/[$NBSP$NBBR$CHECKn$CHECKs$CHECKw$CHECK1$CHECK2$TAB\s]*/;
       
   114 
       
   115 =pod
       
   116 
       
   117 ---++ REs
       
   118 REs for matching delimiters of wikiwords, must be consistent with TML2HTML.pm
       
   119 
       
   120 | $STARTWW | Zero-width match for the start of a wikiword |
       
   121 | $ENDWW | Zero-width match for the end of a wikiword |
       
   122 | $PROTOCOL | match for a valid URL protocol e.g. http, mailto etc |
       
   123 
       
   124 =cut
       
   125 
       
   126 sub debugEncode {
       
   127     my $string = shift;
       
   128     while (my ($k, $v) = each %cc) {
       
   129         my $c = chr($v);
       
   130         $string =~ s/$c/\%$k/g;
       
   131     }
       
   132     return $string;
       
   133 }
       
   134 
       
   135 # Maps of tag types
       
   136 our (%SELFCLOSING, $EMPHTAG);
       
   137 
       
   138 %SELFCLOSING = (img => 1);
       
   139 
       
   140 # Map that specifies tags to be renamed to a canonical name
       
   141 %EMPHTAG = (
       
   142     b => 'strong',
       
   143     i => 'em',
       
   144     tt => 'code',
       
   145     strong => 'strong',
       
   146     em => 'em',
       
   147     code => 'code',
       
   148 );
       
   149 
       
   150 1;