lib/CPAN/lib/Text/Diff.pm
changeset 0 414e01d06fd5
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/lib/CPAN/lib/Text/Diff.pm	Sat Jan 26 15:50:53 2008 +0100
     1.3 @@ -0,0 +1,725 @@
     1.4 +package Text::Diff;
     1.5 +
     1.6 +$VERSION = 0.35;
     1.7 +
     1.8 +=head1 NAME
     1.9 +
    1.10 +Text::Diff - Perform diffs on files and record sets
    1.11 +
    1.12 +=head1 SYNOPSIS
    1.13 +
    1.14 +    use Text::Diff;
    1.15 +
    1.16 +    ## Mix and match filenames, strings, file handles, producer subs,
    1.17 +    ## or arrays of records; returns diff in a string.
    1.18 +    ## WARNING: can return B<large> diffs for large files.
    1.19 +    my $diff = diff "file1.txt", "file2.txt", { STYLE => "Context" };
    1.20 +    my $diff = diff \$string1,   \$string2,   \%options;
    1.21 +    my $diff = diff \*FH1,       \*FH2;
    1.22 +    my $diff = diff \&reader1,   \&reader2;
    1.23 +    my $diff = diff \@records1,  \@records2;
    1.24 +
    1.25 +    ## May also mix input types:
    1.26 +    my $diff = diff \@records1,  "file_B.txt";
    1.27 +
    1.28 +=head1 DESCRIPTION
    1.29 +
    1.30 +C<diff()> provides a basic set of services akin to the GNU C<diff> utility.  It
    1.31 +is not anywhere near as feature complete as GNU C<diff>, but it is better
    1.32 +integrated with Perl and available on all platforms.  It is often faster than
    1.33 +shelling out to a system's C<diff> executable for small files, and generally
    1.34 +slower on larger files.
    1.35 +
    1.36 +Relies on L<Algorithm::Diff> for, well, the algorithm.  This may not produce
    1.37 +the same exact diff as a system's local C<diff> executable, but it will be a
    1.38 +valid diff and comprehensible by C<patch>.  We haven't seen any differences
    1.39 +between Algorithm::Diff's logic and GNU diff's, but we have not examined them
    1.40 +to make sure they are indeed identical.
    1.41 +
    1.42 +B<Note>: If you don't want to import the C<diff> function, do one of the
    1.43 +following:
    1.44 +
    1.45 +   use Text::Diff ();
    1.46 +
    1.47 +   require Text::Diff;
    1.48 +
    1.49 +That's a pretty rare occurence, so C<diff()> is exported by default.
    1.50 +
    1.51 +=cut
    1.52 +
    1.53 +use Exporter;
    1.54 +@ISA = qw( Exporter );
    1.55 +@EXPORT = qw( diff );
    1.56 +
    1.57 +use strict;
    1.58 +use Carp;
    1.59 +use Algorithm::Diff qw( traverse_sequences );
    1.60 +
    1.61 +## Hunks are made of ops.  An op is the starting index for each
    1.62 +## sequence and the opcode:
    1.63 +use constant A       => 0;   # Array index before match/discard
    1.64 +use constant B       => 1;
    1.65 +use constant OPCODE  => 2;   # "-", " ", "+"
    1.66 +use constant FLAG    => 3;   # What to display if not OPCODE "!"
    1.67 +
    1.68 +
    1.69 +=head1 OPTIONS
    1.70 +
    1.71 +diff() takes two parameters from which to draw input and a set of
    1.72 +options to control it's output.  The options are:
    1.73 +
    1.74 +=over
    1.75 +
    1.76 +=item FILENAME_A, MTIME_A, FILENAME_B, MTIME_B
    1.77 +
    1.78 +The name of the file and the modification time "files"
    1.79 +
    1.80 +These are filled in automatically for each file when diff() is passed a
    1.81 +filename, unless a defined value is passed in.
    1.82 +
    1.83 +If a filename is not passed in and FILENAME_A and FILENAME_B are not provided
    1.84 +or C<undef>, the header will not be printed.
    1.85 +
    1.86 +Unused on C<OldStyle> diffs.
    1.87 +
    1.88 +=item OFFSET_A, OFFSET_B
    1.89 +
    1.90 +The index of the first line / element.  These default to 1 for all
    1.91 +parameter types except ARRAY references, for which the default is 0.  This
    1.92 +is because ARRAY references are presumed to be data structures, while the
    1.93 +others are line oriented text.
    1.94 +
    1.95 +=item STYLE
    1.96 +
    1.97 +"Unified", "Context", "OldStyle", or an object or class reference for a class
    1.98 +providing C<file_header()>, C<hunk_header()>, C<hunk()>, C<hunk_footer()> and
    1.99 +C<file_footer()> methods.  The two footer() methods are provided for
   1.100 +overloading only; none of the formats provide them.
   1.101 +
   1.102 +Defaults to "Unified" (unlike standard C<diff>, but Unified is what's most
   1.103 +often used in submitting patches and is the most human readable of the three.
   1.104 +
   1.105 +If the package indicated by the STYLE has no hunk() method, c<diff()> will
   1.106 +load it automatically (lazy loading).  Since all such packages should inherit
   1.107 +from Text::Diff::Base, this should be marvy.
   1.108 +
   1.109 +Styles may be specified as class names (C<STYLE => "Foo"), in which case they
   1.110 +will be C<new()>ed with no parameters, or as objects (C<STYLE => Foo->new>).
   1.111 +
   1.112 +=item CONTEXT
   1.113 +
   1.114 +How many lines before and after each diff to display.  Ignored on old-style
   1.115 +diffs.  Defaults to 3.
   1.116 +
   1.117 +=item OUTPUT
   1.118 +
   1.119 +Examples and their equivalent subroutines:
   1.120 +
   1.121 +    OUTPUT   => \*FOOHANDLE,   # like: sub { print FOOHANDLE shift() }
   1.122 +    OUTPUT   => \$output,      # like: sub { $output .= shift }
   1.123 +    OUTPUT   => \@output,      # like: sub { push @output, shift }
   1.124 +    OUTPUT   => sub { $output .= shift },
   1.125 +
   1.126 +If no C<OUTPUT> is supplied, returns the diffs in a string.  If
   1.127 +C<OUTPUT> is a C<CODE> ref, it will be called once with the (optional)
   1.128 +file header, and once for each hunk body with the text to emit.  If
   1.129 +C<OUTPUT> is an L<IO::Handle>, output will be emitted to that handle.
   1.130 +
   1.131 +=item FILENAME_PREFIX_A, FILENAME_PREFIX_B
   1.132 +
   1.133 +The string to print before the filename in the header. Unused on C<OldStyle>
   1.134 +diffs.  Defaults are C<"---">, C<"+++"> for Unified and C<"***">, C<"+++"> for
   1.135 +Context.
   1.136 +
   1.137 +=item KEYGEN, KEYGEN_ARGS
   1.138 +
   1.139 +These are passed to L<Algorithm::Diff/traverse_sequences>.
   1.140 +
   1.141 +=back
   1.142 +
   1.143 +B<Note>: if neither C<FILENAME_> option is defined, the header will not be
   1.144 +printed.  If at one is present, the other and both MTIME_ options must be
   1.145 +present or "Use of undefined variable" warnings will be generated (except
   1.146 +on C<OldStyle> diffs, which ignores these options).
   1.147 +
   1.148 +=cut
   1.149 +
   1.150 +my %internal_styles = (
   1.151 +    Unified  => undef,
   1.152 +    Context  => undef,
   1.153 +    OldStyle => undef,
   1.154 +    Table    => undef,   ## "internal", but in another module
   1.155 +);
   1.156 +
   1.157 +sub diff {
   1.158 +    my @seqs = ( shift, shift );
   1.159 +    my $options = shift || {};
   1.160 +
   1.161 +    for my $i ( 0..1 ) {
   1.162 +        my $seq = $seqs[$i];
   1.163 +	my $type = ref $seq;
   1.164 +
   1.165 +        while ( $type eq "CODE" ) {
   1.166 +	    $seqs[$i] = $seq = $seq->( $options );
   1.167 +	    $type = ref $seq;
   1.168 +	}
   1.169 +
   1.170 +	my $AorB = !$i ? "A" : "B";
   1.171 +
   1.172 +        if ( $type eq "ARRAY" ) {
   1.173 +            ## This is most efficient :)
   1.174 +            $options->{"OFFSET_$AorB"} = 0
   1.175 +                unless defined $options->{"OFFSET_$AorB"};
   1.176 +        }
   1.177 +        elsif ( $type eq "SCALAR" ) {
   1.178 +            $seqs[$i] = [split( /^/m, $$seq )];
   1.179 +            $options->{"OFFSET_$AorB"} = 1
   1.180 +                unless defined $options->{"OFFSET_$AorB"};
   1.181 +        }
   1.182 +        elsif ( ! $type ) {
   1.183 +            $options->{"OFFSET_$AorB"} = 1
   1.184 +                unless defined $options->{"OFFSET_$AorB"};
   1.185 +	    $options->{"FILENAME_$AorB"} = $seq
   1.186 +	        unless defined $options->{"FILENAME_$AorB"};
   1.187 +	    $options->{"MTIME_$AorB"} = (stat($seq))[9]
   1.188 +	        unless defined $options->{"MTIME_$AorB"};
   1.189 +
   1.190 +            local $/ = "\n";
   1.191 +            open F, "<$seq" or carp "$!: $seq";
   1.192 +            $seqs[$i] = [<F>];
   1.193 +            close F;
   1.194 +
   1.195 +        }
   1.196 +        elsif ( $type eq "GLOB" || UNIVERSAL::isa( $seq, "IO::Handle" ) ) {
   1.197 +            $options->{"OFFSET_$AorB"} = 1
   1.198 +                unless defined $options->{"OFFSET_$AorB"};
   1.199 +            local $/ = "\n";
   1.200 +            $seqs[$i] = [<$seq>];
   1.201 +        }
   1.202 +        else {
   1.203 +            confess "Can't handle input of type ", ref;
   1.204 +        }
   1.205 +    }
   1.206 +
   1.207 +    ## Config vars
   1.208 +    my $output;
   1.209 +    my $output_handler = $options->{OUTPUT};
   1.210 +    my $type = ref $output_handler ;
   1.211 +    if ( ! defined $output_handler ) {
   1.212 +        $output = "";
   1.213 +        $output_handler = sub { $output .= shift };
   1.214 +    }
   1.215 +    elsif ( $type eq "CODE" ) {
   1.216 +        ## No problems, mate.
   1.217 +    }
   1.218 +    elsif ( $type eq "SCALAR" ) {
   1.219 +        my $out_ref = $output_handler;
   1.220 +        $output_handler = sub { $$out_ref .= shift };
   1.221 +    }
   1.222 +    elsif ( $type eq "ARRAY" ) {
   1.223 +        my $out_ref = $output_handler;
   1.224 +        $output_handler = sub { push @$out_ref, shift };
   1.225 +    }
   1.226 +    elsif ( $type eq "GLOB" || UNIVERSAL::isa $output_handler, "IO::Handle" ) {
   1.227 +        my $output_handle = $output_handler;
   1.228 +        $output_handler = sub { print $output_handle shift };
   1.229 +    }
   1.230 +    else {
   1.231 +        croak "Unrecognized output type: $type";
   1.232 +    }
   1.233 +
   1.234 +    my $style  = $options->{STYLE};
   1.235 +    $style = "Unified" unless defined $options->{STYLE};
   1.236 +    $style = "Text::Diff::$style" if exists $internal_styles{$style};
   1.237 +
   1.238 +    if ( ! $style->can( "hunk" ) ) {
   1.239 +	eval "require $style; 1" or die $@;
   1.240 +    }
   1.241 +
   1.242 +    $style = $style->new
   1.243 +	if ! ref $style && $style->can( "new" );
   1.244 +
   1.245 +    my $ctx_lines = $options->{CONTEXT};
   1.246 +    $ctx_lines = 3 unless defined $ctx_lines;
   1.247 +    $ctx_lines = 0 if $style->isa( "Text::Diff::OldStyle" );
   1.248 +
   1.249 +    my @keygen_args = $options->{KEYGEN_ARGS}
   1.250 +        ? @{$options->{KEYGEN_ARGS}}
   1.251 +        : ();
   1.252 +
   1.253 +    ## State vars
   1.254 +    my $diffs = 0; ## Number of discards this hunk
   1.255 +    my $ctx   = 0; ## Number of " " (ctx_lines) ops pushed after last diff.
   1.256 +    my @ops;       ## ops (" ", +, -) in this hunk
   1.257 +    my $hunks = 0; ## Number of hunks
   1.258 +
   1.259 +    my $emit_ops = sub {
   1.260 +        $output_handler->( $style->file_header( @seqs,     $options ) )
   1.261 +	    unless $hunks++;
   1.262 +        $output_handler->( $style->hunk_header( @seqs, @_, $options ) );
   1.263 +        $output_handler->( $style->hunk       ( @seqs, @_, $options ) );
   1.264 +        $output_handler->( $style->hunk_footer( @seqs, @_, $options ) );
   1.265 +    };
   1.266 +
   1.267 +    ## We keep 2*ctx_lines so that if a diff occurs
   1.268 +    ## at 2*ctx_lines we continue to grow the hunk instead
   1.269 +    ## of emitting diffs and context as we go. We
   1.270 +    ## need to know the total length of both of the two
   1.271 +    ## subsequences so the line count can be printed in the
   1.272 +    ## header.
   1.273 +    my $dis_a = sub {push @ops, [@_[0,1],"-"]; ++$diffs ; $ctx = 0 };
   1.274 +    my $dis_b = sub {push @ops, [@_[0,1],"+"]; ++$diffs ; $ctx = 0 };
   1.275 +
   1.276 +    traverse_sequences(
   1.277 +        @seqs,
   1.278 +        {
   1.279 +            MATCH => sub {
   1.280 +                push @ops, [@_[0,1]," "];
   1.281 +
   1.282 +                if ( $diffs && ++$ctx > $ctx_lines * 2 ) {
   1.283 +        	   $emit_ops->( [ splice @ops, 0, $#ops - $ctx_lines ] );
   1.284 +        	   $ctx = $diffs = 0;
   1.285 +                }
   1.286 +
   1.287 +                ## throw away context lines that aren't needed any more
   1.288 +                shift @ops if ! $diffs && @ops > $ctx_lines;
   1.289 +            },
   1.290 +            DISCARD_A => $dis_a,
   1.291 +            DISCARD_B => $dis_b,
   1.292 +        },
   1.293 +        $options->{KEYGEN},  # pass in user arguments for key gen function
   1.294 +        @keygen_args,
   1.295 +    );
   1.296 +
   1.297 +    if ( $diffs ) {
   1.298 +        $#ops -= $ctx - $ctx_lines if $ctx > $ctx_lines;
   1.299 +        $emit_ops->( \@ops );
   1.300 +    }
   1.301 +
   1.302 +    $output_handler->( $style->file_footer( @seqs, $options ) ) if $hunks;
   1.303 +
   1.304 +    return defined $output ? $output : $hunks;
   1.305 +}
   1.306 +
   1.307 +
   1.308 +sub _header {
   1.309 +    my ( $h ) = @_;
   1.310 +    my ( $p1, $fn1, $t1, $p2, $fn2, $t2 ) = @{$h}{
   1.311 +        "FILENAME_PREFIX_A",
   1.312 +        "FILENAME_A",
   1.313 +        "MTIME_A",
   1.314 +        "FILENAME_PREFIX_B",
   1.315 +        "FILENAME_B",
   1.316 +        "MTIME_B"
   1.317 +    };
   1.318 +
   1.319 +    ## remember to change Text::Diff::Table if this logic is tweaked.
   1.320 +    return "" unless defined $fn1 && defined $fn2;
   1.321 +
   1.322 +    return join( "",
   1.323 +        $p1, " ", $fn1, defined $t1 ? "\t" . localtime $t1 : (), "\n",
   1.324 +        $p2, " ", $fn2, defined $t2 ? "\t" . localtime $t2 : (), "\n",
   1.325 +    );
   1.326 +}
   1.327 +
   1.328 +## _range encapsulates the building of, well, ranges.  Turns out there are
   1.329 +## a few nuances.
   1.330 +sub _range {
   1.331 +    my ( $ops, $a_or_b, $format ) = @_;
   1.332 +
   1.333 +    my $start = $ops->[ 0]->[$a_or_b];
   1.334 +    my $after = $ops->[-1]->[$a_or_b];
   1.335 +
   1.336 +    ## The sequence indexes in the lines are from *before* the OPCODE is
   1.337 +    ## executed, so we bump the last index up unless the OP indicates
   1.338 +    ## it didn't change.
   1.339 +    ++$after
   1.340 +        unless $ops->[-1]->[OPCODE] eq ( $a_or_b == A ? "+" : "-" );
   1.341 +
   1.342 +    ## convert from 0..n index to 1..(n+1) line number.  The unless modifier
   1.343 +    ## handles diffs with no context, where only one file is affected.  In this
   1.344 +    ## case $start == $after indicates an empty range, and the $start must
   1.345 +    ## not be incremented.
   1.346 +    my $empty_range = $start == $after;
   1.347 +    ++$start unless $empty_range;
   1.348 +
   1.349 +    return
   1.350 +        $start == $after
   1.351 +            ? $format eq "unified" && $empty_range
   1.352 +                ? "$start,0"
   1.353 +                : $start
   1.354 +            : $format eq "unified"
   1.355 +                ? "$start,".($after-$start+1)
   1.356 +                : "$start,$after";
   1.357 +}
   1.358 +
   1.359 +
   1.360 +sub _op_to_line {
   1.361 +    my ( $seqs, $op, $a_or_b, $op_prefixes ) = @_;
   1.362 +
   1.363 +    my $opcode = $op->[OPCODE];
   1.364 +    return () unless defined $op_prefixes->{$opcode};
   1.365 +
   1.366 +    my $op_sym = defined $op->[FLAG] ? $op->[FLAG] : $opcode;
   1.367 +    $op_sym = $op_prefixes->{$op_sym};
   1.368 +    return () unless defined $op_sym;
   1.369 +
   1.370 +    $a_or_b = $op->[OPCODE] ne "+" ? 0 : 1 unless defined $a_or_b;
   1.371 +    return ( $op_sym, $seqs->[$a_or_b][$op->[$a_or_b]] );
   1.372 +}
   1.373 +
   1.374 +
   1.375 +=head1 Formatting Classes
   1.376 +
   1.377 +These functions implement the output formats.  They are grouped in to classes
   1.378 +so diff() can use class names to call the correct set of output routines and so
   1.379 +that you may inherit from them easily.  There are no constructors or instance
   1.380 +methods for these classes, though subclasses may provide them if need be.
   1.381 +
   1.382 +Each class has file_header(), hunk_header(), hunk(), and footer() methods
   1.383 +identical to those documented in the Text::Diff::Unified section.  header() is
   1.384 +called before the hunk() is first called, footer() afterwards.  The default
   1.385 +footer function is an empty method provided for overloading:
   1.386 +
   1.387 +    sub footer { return "End of patch\n" }
   1.388 +
   1.389 +Some output formats are provided by external modules (which are loaded
   1.390 +automatically), such as L<Text::Diff::Table>.  These are
   1.391 +are documented here to keep the documentation simple.
   1.392 +
   1.393 +=over
   1.394 +
   1.395 +=head2 Text::Diff::Base
   1.396 +
   1.397 +Returns "" for all methods (other than C<new()>).
   1.398 +
   1.399 +=cut
   1.400 +
   1.401 +{
   1.402 +    package Text::Diff::Base;
   1.403 +    sub new         {
   1.404 +        my $proto = shift;
   1.405 +	return bless { @_ }, ref $proto || $proto;
   1.406 +    }
   1.407 +
   1.408 +    sub file_header { return "" }
   1.409 +    sub hunk_header { return "" }
   1.410 +    sub hunk        { return "" }
   1.411 +    sub hunk_footer { return "" }
   1.412 +    sub file_footer { return "" }
   1.413 +}
   1.414 +
   1.415 +
   1.416 +=head2 Text::Diff::Unified
   1.417 +
   1.418 +    --- A   Mon Nov 12 23:49:30 2001
   1.419 +    +++ B   Mon Nov 12 23:49:30 2001
   1.420 +    @@ -2,13 +2,13 @@
   1.421 +     2
   1.422 +     3
   1.423 +     4
   1.424 +    -5d
   1.425 +    +5a
   1.426 +     6
   1.427 +     7
   1.428 +     8
   1.429 +     9
   1.430 +    +9a
   1.431 +     10
   1.432 +     11
   1.433 +    -11d
   1.434 +     12
   1.435 +     13
   1.436 +
   1.437 +=over
   1.438 +
   1.439 +=item file_header
   1.440 +
   1.441 +    $s = Text::Diff::Unified->file_header( $options );
   1.442 +
   1.443 +Returns a string containing a unified header.  The sole parameter is the
   1.444 +options hash passed in to diff(), containing at least:
   1.445 +
   1.446 +    FILENAME_A  => $fn1,
   1.447 +    MTIME_A     => $mtime1,
   1.448 +    FILENAME_B  => $fn2,
   1.449 +    MTIME_B     => $mtime2
   1.450 +
   1.451 +May also contain
   1.452 +
   1.453 +    FILENAME_PREFIX_A    => "---",
   1.454 +    FILENAME_PREFIX_B    => "+++",
   1.455 +
   1.456 +to override the default prefixes (default values shown).
   1.457 +
   1.458 +=cut
   1.459 +
   1.460 +@Text::Diff::Unified::ISA = qw( Text::Diff::Base );
   1.461 +
   1.462 +sub Text::Diff::Unified::file_header {
   1.463 +    shift; ## No instance data
   1.464 +    my $options = pop ;
   1.465 +
   1.466 +    _header(
   1.467 +        { FILENAME_PREFIX_A => "---", FILENAME_PREFIX_B => "+++", %$options }
   1.468 +    );
   1.469 +}
   1.470 +
   1.471 +=item hunk_header
   1.472 +
   1.473 +    Text::Diff::Unified->hunk_header( \@ops, $options );
   1.474 +
   1.475 +Returns a string containing the output of one hunk of unified diff.
   1.476 +
   1.477 +=cut
   1.478 +
   1.479 +sub Text::Diff::Unified::hunk_header {
   1.480 +    shift; ## No instance data
   1.481 +    pop; ## Ignore options
   1.482 +    my $ops = pop;
   1.483 +
   1.484 +    return join( "",
   1.485 +        "@@ -",
   1.486 +        _range( $ops, A, "unified" ),
   1.487 +        " +",
   1.488 +        _range( $ops, B, "unified" ),
   1.489 +        " @@\n",
   1.490 +    );
   1.491 +}
   1.492 +
   1.493 +
   1.494 +=item Text::Diff::Unified::hunk
   1.495 +
   1.496 +    Text::Diff::Unified->hunk( \@seq_a, \@seq_b, \@ops, $options );
   1.497 +
   1.498 +Returns a string containing the output of one hunk of unified diff.
   1.499 +
   1.500 +=cut
   1.501 +
   1.502 +sub Text::Diff::Unified::hunk {
   1.503 +    shift; ## No instance data
   1.504 +    pop; ## Ignore options
   1.505 +    my $ops = pop;
   1.506 +
   1.507 +    my $prefixes = { "+" => "+", " " => " ", "-" => "-" };
   1.508 +
   1.509 +    return join "", map _op_to_line( \@_, $_, undef, $prefixes ), @$ops
   1.510 +}
   1.511 +
   1.512 +
   1.513 +=back
   1.514 +
   1.515 +=head2 Text::Diff::Table
   1.516 +
   1.517 + +--+----------------------------------+--+------------------------------+
   1.518 + |  |../Test-Differences-0.2/MANIFEST  |  |../Test-Differences/MANIFEST  |
   1.519 + |  |Thu Dec 13 15:38:49 2001          |  |Sat Dec 15 02:09:44 2001      |
   1.520 + +--+----------------------------------+--+------------------------------+
   1.521 + |  |                                  * 1|Changes                       *
   1.522 + | 1|Differences.pm                    | 2|Differences.pm                |
   1.523 + | 2|MANIFEST                          | 3|MANIFEST                      |
   1.524 + |  |                                  * 4|MANIFEST.SKIP                 *
   1.525 + | 3|Makefile.PL                       | 5|Makefile.PL                   |
   1.526 + |  |                                  * 6|t/00escape.t                  *
   1.527 + | 4|t/00flatten.t                     | 7|t/00flatten.t                 |
   1.528 + | 5|t/01text_vs_data.t                | 8|t/01text_vs_data.t            |
   1.529 + | 6|t/10test.t                        | 9|t/10test.t                    |
   1.530 + +--+----------------------------------+--+------------------------------+
   1.531 +
   1.532 +This format also goes to some pains to highlight "invisible" characters on
   1.533 +differing elements by selectively escaping whitespace:
   1.534 +
   1.535 + +--+--------------------------+--------------------------+
   1.536 + |  |demo_ws_A.txt             |demo_ws_B.txt             |
   1.537 + |  |Fri Dec 21 08:36:32 2001  |Fri Dec 21 08:36:50 2001  |
   1.538 + +--+--------------------------+--------------------------+
   1.539 + | 1|identical                 |identical                 |
   1.540 + * 2|        spaced in         |        also spaced in    *
   1.541 + * 3|embedded space            |embedded        tab       *
   1.542 + | 4|identical                 |identical                 |
   1.543 + * 5|        spaced in         |\ttabbed in               *
   1.544 + * 6|trailing spaces\s\s\n     |trailing tabs\t\t\n       *
   1.545 + | 7|identical                 |identical                 |
   1.546 + * 8|lf line\n                 |crlf line\r\n             *
   1.547 + * 9|embedded ws               |embedded\tws              *
   1.548 + +--+--------------------------+--------------------------+
   1.549 +
   1.550 +See L</Text::Diff::Table> for more details, including how the whitespace
   1.551 +escaping works.
   1.552 +
   1.553 +=head2 Text::Diff::Context
   1.554 +
   1.555 +    *** A   Mon Nov 12 23:49:30 2001
   1.556 +    --- B   Mon Nov 12 23:49:30 2001
   1.557 +    ***************
   1.558 +    *** 2,14 ****
   1.559 +      2
   1.560 +      3
   1.561 +      4
   1.562 +    ! 5d
   1.563 +      6
   1.564 +      7
   1.565 +      8
   1.566 +      9
   1.567 +      10
   1.568 +      11
   1.569 +    - 11d
   1.570 +      12
   1.571 +      13
   1.572 +    --- 2,14 ----
   1.573 +      2
   1.574 +      3
   1.575 +      4
   1.576 +    ! 5a
   1.577 +      6
   1.578 +      7
   1.579 +      8
   1.580 +      9
   1.581 +    + 9a
   1.582 +      10
   1.583 +      11
   1.584 +      12
   1.585 +      13
   1.586 +
   1.587 +Note: hunk_header() returns only "***************\n".
   1.588 +
   1.589 +=cut
   1.590 +
   1.591 +
   1.592 +@Text::Diff::Context::ISA = qw( Text::Diff::Base );
   1.593 +
   1.594 +sub Text::Diff::Context::file_header {
   1.595 +    _header { FILENAME_PREFIX_A=>"***", FILENAME_PREFIX_B=>"---", %{$_[-1]} };
   1.596 +}
   1.597 +
   1.598 +
   1.599 +sub Text::Diff::Context::hunk_header {
   1.600 +    return "***************\n";
   1.601 +}
   1.602 +
   1.603 +sub Text::Diff::Context::hunk {
   1.604 +    shift; ## No instance data
   1.605 +    pop; ## Ignore options
   1.606 +    my $ops = pop;
   1.607 +    ## Leave the sequences in @_[0,1]
   1.608 +
   1.609 +    my $a_range = _range( $ops, A, "" );
   1.610 +    my $b_range = _range( $ops, B, "" );
   1.611 +
   1.612 +    ## Sigh.  Gotta make sure that differences that aren't adds/deletions
   1.613 +    ## get prefixed with "!", and that the old opcodes are removed.
   1.614 +    my $after;
   1.615 +    for ( my $start = 0; $start <= $#$ops ; $start = $after ) {
   1.616 +        ## Scan until next difference
   1.617 +        $after = $start + 1;
   1.618 +        my $opcode = $ops->[$start]->[OPCODE];
   1.619 +        next if $opcode eq " ";
   1.620 +
   1.621 +        my $bang_it;
   1.622 +        while ( $after <= $#$ops && $ops->[$after]->[OPCODE] ne " " ) {
   1.623 +            $bang_it ||= $ops->[$after]->[OPCODE] ne $opcode;
   1.624 +            ++$after;
   1.625 +        }
   1.626 +
   1.627 +        if ( $bang_it ) {
   1.628 +            for my $i ( $start..($after-1) ) {
   1.629 +                $ops->[$i]->[FLAG] = "!";
   1.630 +            }
   1.631 +        }
   1.632 +    }
   1.633 +
   1.634 +    my $b_prefixes = { "+" => "+ ",  " " => "  ", "-" => undef, "!" => "! " };
   1.635 +    my $a_prefixes = { "+" => undef, " " => "  ", "-" => "- ",  "!" => "! " };
   1.636 +
   1.637 +    return join( "",
   1.638 +        "*** ", $a_range, " ****\n",
   1.639 +        map( _op_to_line( \@_, $_, A, $a_prefixes ), @$ops ),
   1.640 +        "--- ", $b_range, " ----\n",
   1.641 +        map( _op_to_line( \@_, $_, B, $b_prefixes ), @$ops ),
   1.642 +    );
   1.643 +}
   1.644 +=head2 Text::Diff::OldStyle
   1.645 +
   1.646 +    5c5
   1.647 +    < 5d
   1.648 +    ---
   1.649 +    > 5a
   1.650 +    9a10
   1.651 +    > 9a
   1.652 +    12d12
   1.653 +    < 11d
   1.654 +
   1.655 +Note: no file_header().
   1.656 +
   1.657 +=cut
   1.658 +
   1.659 +@Text::Diff::OldStyle::ISA = qw( Text::Diff::Base );
   1.660 +
   1.661 +sub _op {
   1.662 +    my $ops = shift;
   1.663 +    my $op = $ops->[0]->[OPCODE];
   1.664 +    $op = "c" if grep $_->[OPCODE] ne $op, @$ops;
   1.665 +    $op = "a" if $op eq "+";
   1.666 +    $op = "d" if $op eq "-";
   1.667 +    return $op;
   1.668 +}
   1.669 +
   1.670 +sub Text::Diff::OldStyle::hunk_header {
   1.671 +    shift; ## No instance data
   1.672 +    pop; ## ignore options
   1.673 +    my $ops = pop;
   1.674 +
   1.675 +    my $op = _op $ops;
   1.676 +
   1.677 +    return join "", _range( $ops, A, "" ), $op, _range( $ops, B, "" ), "\n";
   1.678 +}
   1.679 +
   1.680 +sub Text::Diff::OldStyle::hunk {
   1.681 +    shift; ## No instance data
   1.682 +    pop; ## ignore options
   1.683 +    my $ops = pop;
   1.684 +    ## Leave the sequences in @_[0,1]
   1.685 +
   1.686 +    my $a_prefixes = { "+" => undef,  " " => undef, "-" => "< "  };
   1.687 +    my $b_prefixes = { "+" => "> ",   " " => undef, "-" => undef };
   1.688 +
   1.689 +    my $op = _op $ops;
   1.690 +
   1.691 +    return join( "",
   1.692 +        map( _op_to_line( \@_, $_, A, $a_prefixes ), @$ops ),
   1.693 +        $op eq "c" ? "---\n" : (),
   1.694 +        map( _op_to_line( \@_, $_, B, $b_prefixes ), @$ops ),
   1.695 +    );
   1.696 +}
   1.697 +
   1.698 +=head1 LIMITATIONS
   1.699 +
   1.700 +Must suck both input files entirely in to memory and store them with a normal
   1.701 +amount of Perlish overhead (one array location) per record.  This is implied by
   1.702 +the implementation of Algorithm::Diff, which takes two arrays.  If
   1.703 +Algorithm::Diff ever offers an incremental mode, this can be changed (contact
   1.704 +the maintainers of Algorithm::Diff and Text::Diff if you need this; it
   1.705 +shouldn't be too terribly hard to tie arrays in this fashion).
   1.706 +
   1.707 +Does not provide most of the more refined GNU diff options: recursive directory
   1.708 +tree scanning, ignoring blank lines / whitespace, etc., etc.  These can all be
   1.709 +added as time permits and need arises, many are rather easy; patches quite
   1.710 +welcome.
   1.711 +
   1.712 +Uses closures internally, this may lead to leaks on C<perl> versions 5.6.1 and
   1.713 +prior if used many times over a process' life time.
   1.714 +
   1.715 +=head1 AUTHOR
   1.716 +
   1.717 +Barrie Slaymaker <barries@slaysys.com>.
   1.718 +
   1.719 +=head1 COPYRIGHT & LICENSE
   1.720 +
   1.721 +Copyright 2001, Barrie Slaymaker.  All Rights Reserved.
   1.722 +
   1.723 +You may use this under the terms of either the Artistic License or GNU Public
   1.724 +License v 2.0 or greater.
   1.725 +
   1.726 +=cut
   1.727 +
   1.728 +1;