lib/TWiki/Plugins/WysiwygPlugin/HTML2TML/WC.pm
author Colas Nahaboo <colas@nahaboo.net>
Sat, 26 Jan 2008 15:50:53 +0100
changeset 0 414e01d06fd5
permissions -rw-r--r--
RELEASE 4.2.0 freetown
colas@0
     1
# Copyright (C) 2005 ILOG http://www.ilog.fr
colas@0
     2
# and TWiki Contributors. All Rights Reserved. TWiki Contributors
colas@0
     3
# are listed in the AUTHORS file in the root of this distribution.
colas@0
     4
# NOTE: Please extend that file, not this notice.
colas@0
     5
#
colas@0
     6
# This program is free software; you can redistribute it and/or
colas@0
     7
# modify it under the terms of the GNU General Public License
colas@0
     8
# as published by the Free Software Foundation; either version 2
colas@0
     9
# of the License, or (at your option) any later version. For
colas@0
    10
# more details read LICENSE in the root of the TWiki distribution.
colas@0
    11
#
colas@0
    12
# This program is distributed in the hope that it will be useful,
colas@0
    13
# but WITHOUT ANY WARRANTY; without even the implied warranty of
colas@0
    14
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
colas@0
    15
#
colas@0
    16
# As per the GPL, removal of this notice is prohibited.
colas@0
    17
colas@0
    18
=pod
colas@0
    19
colas@0
    20
---+ package WC
colas@0
    21
colas@0
    22
Constants
colas@0
    23
colas@0
    24
=cut
colas@0
    25
colas@0
    26
package WC;
colas@0
    27
colas@0
    28
=pod
colas@0
    29
colas@0
    30
---++ Generator flags
colas@0
    31
| $NO_TML | Flag that gets passed _down_ into generator functions. Constrains output to HTML only. |
colas@0
    32
| $NO_BLOCK_TML | Flag that gets passed _down_ into generator functions. Don't generate block TML e.g. tables, lists |
colas@0
    33
| $NOP_ALL | Flag that gets passed _down_ into generator functions. NOP all variables and WikiWords. |
colas@0
    34
| $BLOCK_TML | Flag passed up from generator functions; set if expansion includes block TML |
colas@0
    35
| $VERY_CLEAN | Flag passed to indicate that HTML must be aggressively cleaned (unrecognised or unuseful tags stripped out) |
colas@0
    36
| $BR2NL | Flag set to force BR tags to be converted to newlines. |
colas@0
    37
| $KEEP_WS | Set to force the generator to keep all whitespace. Otherwise whitespace gets collapsed (as it is when HTML is rendered) |
colas@0
    38
| $PROTECTED | In a block marked as PROTECTED |
colas@0
    39
| $KEEP_ENTITIES | Don't decode HTML entities |
colas@0
    40
colas@0
    41
=cut
colas@0
    42
colas@0
    43
our ($NO_TML, $NO_HTML, $NO_BLOCK_TML, $NOP_ALL, $BLOCK_TML, $BR2NL);
colas@0
    44
our ($CHECKn, $CHECKw, $CHECKs, $NBSP, $NBBR, $TAB, $PON, $POFF, $WS);
colas@0
    45
our ($VERY_CLEAN, $PROTECTED, $KEEP_ENTITIES, $KEEP_WS);
colas@0
    46
colas@0
    47
$NO_HTML       = 1 << 0;
colas@0
    48
$NO_TML        = 1 << 1;
colas@0
    49
$NO_BLOCK_TML  = 1 << 2;
colas@0
    50
$NOP_ALL       = 1 << 3;
colas@0
    51
$VERY_CLEAN    = 1 << 4;
colas@0
    52
$BR2NL         = 1 << 5;
colas@0
    53
$KEEP_WS       = 1 << 6;
colas@0
    54
$PROTECTED     = 1 << 7;
colas@0
    55
$KEEP_ENTITIES = 1 << 8;
colas@0
    56
colas@0
    57
$BLOCK_TML    = $NO_BLOCK_TML;
colas@0
    58
colas@0
    59
my %cc = (
colas@0
    60
    'NBSP'   => 14, # unbreakable space
colas@0
    61
    'NBBR'   => 15, # para break required
colas@0
    62
    'CHECKn' => 16, # require adjacent newline (\n or $NBBR)
colas@0
    63
    'CHECKs' => 17, # require adjacent space character (' ' or $NBSP)
colas@0
    64
    'CHECKw' => 18, # require adjacent whitespace (\s|$NBBR|$NBSP)
colas@0
    65
    'CHECK1' => 19, # start of wiki-word
colas@0
    66
    'CHECK2' => 20, # end of wiki-word
colas@0
    67
    'TAB'    => 21, # list indent
colas@0
    68
    'PON'    => 22, # protect on
colas@0
    69
    'POFF'   => 23, # protect off
colas@0
    70
);
colas@0
    71
colas@0
    72
=pod
colas@0
    73
colas@0
    74
---++ Forced whitespace
colas@0
    75
These single-character shortcuts are used to assert the presence of
colas@0
    76
non-breaking whitespace.
colas@0
    77
colas@0
    78
| $NBSP | Non-breaking space |
colas@0
    79
| $NBBR | Non-breaking linebreak |
colas@0
    80
colas@0
    81
=cut
colas@0
    82
colas@0
    83
$NBSP   = chr($cc{NBSP});
colas@0
    84
$NBBR   = chr($cc{NBBR});
colas@0
    85
colas@0
    86
=pod
colas@0
    87
colas@0
    88
---++ Inline Assertions
colas@0
    89
The generator works by expanding to "decorated" text, where the decorators
colas@0
    90
are characters below ' '. These characters act to express format
colas@0
    91
requirements - for example, the need to have a newline before some text,
colas@0
    92
or the need for a space. The generator sticks this format requirements into
colas@0
    93
the text stream, and they are then optimised down to the minimum in a post-
colas@0
    94
process.
colas@0
    95
colas@0
    96
| $CHECKn | there must be an adjacent newline (\n or $NBBR) |
colas@0
    97
| $CHECKs | there must be an adjacent space (' ' or $NBSP) |
colas@0
    98
| $CHECKw | There must be adjacent whitespace (\s or $NBBR or $NBSP) |
colas@0
    99
| $CHECK1 | Marks the start of an inline wikiword. |
colas@0
   100
| $CHECK2 | Marks the end of an inline wikiword. |
colas@0
   101
| $TAB    | Shorthand for an indent level in a list |
colas@0
   102
colas@0
   103
=cut
colas@0
   104
colas@0
   105
$CHECKn = chr($cc{CHECKn});
colas@0
   106
$CHECKs = chr($cc{CHECKs});
colas@0
   107
$CHECKw = chr($cc{CHECKw});
colas@0
   108
$CHECK1 = chr($cc{CHECK1});
colas@0
   109
$CHECK2 = chr($cc{CHECK2});
colas@0
   110
$TAB    = chr($cc{TAB});
colas@0
   111
$PON    = chr($cc{PON});
colas@0
   112
$POFF   = chr($cc{POFF});
colas@0
   113
$WS     = qr/[$NBSP$NBBR$CHECKn$CHECKs$CHECKw$CHECK1$CHECK2$TAB\s]*/;
colas@0
   114
colas@0
   115
=pod
colas@0
   116
colas@0
   117
---++ REs
colas@0
   118
REs for matching delimiters of wikiwords, must be consistent with TML2HTML.pm
colas@0
   119
colas@0
   120
| $STARTWW | Zero-width match for the start of a wikiword |
colas@0
   121
| $ENDWW | Zero-width match for the end of a wikiword |
colas@0
   122
| $PROTOCOL | match for a valid URL protocol e.g. http, mailto etc |
colas@0
   123
colas@0
   124
=cut
colas@0
   125
colas@0
   126
sub debugEncode {
colas@0
   127
    my $string = shift;
colas@0
   128
    while (my ($k, $v) = each %cc) {
colas@0
   129
        my $c = chr($v);
colas@0
   130
        $string =~ s/$c/\%$k/g;
colas@0
   131
    }
colas@0
   132
    return $string;
colas@0
   133
}
colas@0
   134
colas@0
   135
# Maps of tag types
colas@0
   136
our (%SELFCLOSING, $EMPHTAG);
colas@0
   137
colas@0
   138
%SELFCLOSING = (img => 1);
colas@0
   139
colas@0
   140
# Map that specifies tags to be renamed to a canonical name
colas@0
   141
%EMPHTAG = (
colas@0
   142
    b => 'strong',
colas@0
   143
    i => 'em',
colas@0
   144
    tt => 'code',
colas@0
   145
    strong => 'strong',
colas@0
   146
    em => 'em',
colas@0
   147
    code => 'code',
colas@0
   148
);
colas@0
   149
colas@0
   150
1;