lib/TWiki/Plugins/WysiwygPlugin/HTML2TML/WC.pm
changeset 0 414e01d06fd5
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/lib/TWiki/Plugins/WysiwygPlugin/HTML2TML/WC.pm	Sat Jan 26 15:50:53 2008 +0100
     1.3 @@ -0,0 +1,150 @@
     1.4 +# Copyright (C) 2005 ILOG http://www.ilog.fr
     1.5 +# and TWiki Contributors. All Rights Reserved. TWiki Contributors
     1.6 +# are listed in the AUTHORS file in the root of this distribution.
     1.7 +# NOTE: Please extend that file, not this notice.
     1.8 +#
     1.9 +# This program is free software; you can redistribute it and/or
    1.10 +# modify it under the terms of the GNU General Public License
    1.11 +# as published by the Free Software Foundation; either version 2
    1.12 +# of the License, or (at your option) any later version. For
    1.13 +# more details read LICENSE in the root of the TWiki distribution.
    1.14 +#
    1.15 +# This program is distributed in the hope that it will be useful,
    1.16 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
    1.17 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
    1.18 +#
    1.19 +# As per the GPL, removal of this notice is prohibited.
    1.20 +
    1.21 +=pod
    1.22 +
    1.23 +---+ package WC
    1.24 +
    1.25 +Constants
    1.26 +
    1.27 +=cut
    1.28 +
    1.29 +package WC;
    1.30 +
    1.31 +=pod
    1.32 +
    1.33 +---++ Generator flags
    1.34 +| $NO_TML | Flag that gets passed _down_ into generator functions. Constrains output to HTML only. |
    1.35 +| $NO_BLOCK_TML | Flag that gets passed _down_ into generator functions. Don't generate block TML e.g. tables, lists |
    1.36 +| $NOP_ALL | Flag that gets passed _down_ into generator functions. NOP all variables and WikiWords. |
    1.37 +| $BLOCK_TML | Flag passed up from generator functions; set if expansion includes block TML |
    1.38 +| $VERY_CLEAN | Flag passed to indicate that HTML must be aggressively cleaned (unrecognised or unuseful tags stripped out) |
    1.39 +| $BR2NL | Flag set to force BR tags to be converted to newlines. |
    1.40 +| $KEEP_WS | Set to force the generator to keep all whitespace. Otherwise whitespace gets collapsed (as it is when HTML is rendered) |
    1.41 +| $PROTECTED | In a block marked as PROTECTED |
    1.42 +| $KEEP_ENTITIES | Don't decode HTML entities |
    1.43 +
    1.44 +=cut
    1.45 +
    1.46 +our ($NO_TML, $NO_HTML, $NO_BLOCK_TML, $NOP_ALL, $BLOCK_TML, $BR2NL);
    1.47 +our ($CHECKn, $CHECKw, $CHECKs, $NBSP, $NBBR, $TAB, $PON, $POFF, $WS);
    1.48 +our ($VERY_CLEAN, $PROTECTED, $KEEP_ENTITIES, $KEEP_WS);
    1.49 +
    1.50 +$NO_HTML       = 1 << 0;
    1.51 +$NO_TML        = 1 << 1;
    1.52 +$NO_BLOCK_TML  = 1 << 2;
    1.53 +$NOP_ALL       = 1 << 3;
    1.54 +$VERY_CLEAN    = 1 << 4;
    1.55 +$BR2NL         = 1 << 5;
    1.56 +$KEEP_WS       = 1 << 6;
    1.57 +$PROTECTED     = 1 << 7;
    1.58 +$KEEP_ENTITIES = 1 << 8;
    1.59 +
    1.60 +$BLOCK_TML    = $NO_BLOCK_TML;
    1.61 +
    1.62 +my %cc = (
    1.63 +    'NBSP'   => 14, # unbreakable space
    1.64 +    'NBBR'   => 15, # para break required
    1.65 +    'CHECKn' => 16, # require adjacent newline (\n or $NBBR)
    1.66 +    'CHECKs' => 17, # require adjacent space character (' ' or $NBSP)
    1.67 +    'CHECKw' => 18, # require adjacent whitespace (\s|$NBBR|$NBSP)
    1.68 +    'CHECK1' => 19, # start of wiki-word
    1.69 +    'CHECK2' => 20, # end of wiki-word
    1.70 +    'TAB'    => 21, # list indent
    1.71 +    'PON'    => 22, # protect on
    1.72 +    'POFF'   => 23, # protect off
    1.73 +);
    1.74 +
    1.75 +=pod
    1.76 +
    1.77 +---++ Forced whitespace
    1.78 +These single-character shortcuts are used to assert the presence of
    1.79 +non-breaking whitespace.
    1.80 +
    1.81 +| $NBSP | Non-breaking space |
    1.82 +| $NBBR | Non-breaking linebreak |
    1.83 +
    1.84 +=cut
    1.85 +
    1.86 +$NBSP   = chr($cc{NBSP});
    1.87 +$NBBR   = chr($cc{NBBR});
    1.88 +
    1.89 +=pod
    1.90 +
    1.91 +---++ Inline Assertions
    1.92 +The generator works by expanding to "decorated" text, where the decorators
    1.93 +are characters below ' '. These characters act to express format
    1.94 +requirements - for example, the need to have a newline before some text,
    1.95 +or the need for a space. The generator sticks this format requirements into
    1.96 +the text stream, and they are then optimised down to the minimum in a post-
    1.97 +process.
    1.98 +
    1.99 +| $CHECKn | there must be an adjacent newline (\n or $NBBR) |
   1.100 +| $CHECKs | there must be an adjacent space (' ' or $NBSP) |
   1.101 +| $CHECKw | There must be adjacent whitespace (\s or $NBBR or $NBSP) |
   1.102 +| $CHECK1 | Marks the start of an inline wikiword. |
   1.103 +| $CHECK2 | Marks the end of an inline wikiword. |
   1.104 +| $TAB    | Shorthand for an indent level in a list |
   1.105 +
   1.106 +=cut
   1.107 +
   1.108 +$CHECKn = chr($cc{CHECKn});
   1.109 +$CHECKs = chr($cc{CHECKs});
   1.110 +$CHECKw = chr($cc{CHECKw});
   1.111 +$CHECK1 = chr($cc{CHECK1});
   1.112 +$CHECK2 = chr($cc{CHECK2});
   1.113 +$TAB    = chr($cc{TAB});
   1.114 +$PON    = chr($cc{PON});
   1.115 +$POFF   = chr($cc{POFF});
   1.116 +$WS     = qr/[$NBSP$NBBR$CHECKn$CHECKs$CHECKw$CHECK1$CHECK2$TAB\s]*/;
   1.117 +
   1.118 +=pod
   1.119 +
   1.120 +---++ REs
   1.121 +REs for matching delimiters of wikiwords, must be consistent with TML2HTML.pm
   1.122 +
   1.123 +| $STARTWW | Zero-width match for the start of a wikiword |
   1.124 +| $ENDWW | Zero-width match for the end of a wikiword |
   1.125 +| $PROTOCOL | match for a valid URL protocol e.g. http, mailto etc |
   1.126 +
   1.127 +=cut
   1.128 +
   1.129 +sub debugEncode {
   1.130 +    my $string = shift;
   1.131 +    while (my ($k, $v) = each %cc) {
   1.132 +        my $c = chr($v);
   1.133 +        $string =~ s/$c/\%$k/g;
   1.134 +    }
   1.135 +    return $string;
   1.136 +}
   1.137 +
   1.138 +# Maps of tag types
   1.139 +our (%SELFCLOSING, $EMPHTAG);
   1.140 +
   1.141 +%SELFCLOSING = (img => 1);
   1.142 +
   1.143 +# Map that specifies tags to be renamed to a canonical name
   1.144 +%EMPHTAG = (
   1.145 +    b => 'strong',
   1.146 +    i => 'em',
   1.147 +    tt => 'code',
   1.148 +    strong => 'strong',
   1.149 +    em => 'em',
   1.150 +    code => 'code',
   1.151 +);
   1.152 +
   1.153 +1;