lib/CPAN/lib/Algorithm/Diff.pm
changeset 0 414e01d06fd5
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/lib/CPAN/lib/Algorithm/Diff.pm	Sat Jan 26 15:50:53 2008 +0100
     1.3 @@ -0,0 +1,1713 @@
     1.4 +package Algorithm::Diff;
     1.5 +# Skip to first "=head" line for documentation.
     1.6 +use strict;
     1.7 +
     1.8 +use integer;    # see below in _replaceNextLargerWith() for mod to make
     1.9 +                # if you don't use this
    1.10 +use vars qw( $VERSION @EXPORT_OK );
    1.11 +$VERSION = 1.19_01;
    1.12 +#          ^ ^^ ^^-- Incremented at will
    1.13 +#          | \+----- Incremented for non-trivial changes to features
    1.14 +#          \-------- Incremented for fundamental changes
    1.15 +require Exporter;
    1.16 +*import    = \&Exporter::import;
    1.17 +@EXPORT_OK = qw(
    1.18 +    prepare LCS LCDidx LCS_length
    1.19 +    diff sdiff compact_diff
    1.20 +    traverse_sequences traverse_balanced
    1.21 +);
    1.22 +
    1.23 +# McIlroy-Hunt diff algorithm
    1.24 +# Adapted from the Smalltalk code of Mario I. Wolczko, <mario@wolczko.com>
    1.25 +# by Ned Konz, perl@bike-nomad.com
    1.26 +# Updates by Tye McQueen, http://perlmonks.org/?node=tye
    1.27 +
    1.28 +# Create a hash that maps each element of $aCollection to the set of
    1.29 +# positions it occupies in $aCollection, restricted to the elements
    1.30 +# within the range of indexes specified by $start and $end.
    1.31 +# The fourth parameter is a subroutine reference that will be called to
    1.32 +# generate a string to use as a key.
    1.33 +# Additional parameters, if any, will be passed to this subroutine.
    1.34 +#
    1.35 +# my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen );
    1.36 +
    1.37 +sub _withPositionsOfInInterval
    1.38 +{
    1.39 +    my $aCollection = shift;    # array ref
    1.40 +    my $start       = shift;
    1.41 +    my $end         = shift;
    1.42 +    my $keyGen      = shift;
    1.43 +    my %d;
    1.44 +    my $index;
    1.45 +    for ( $index = $start ; $index <= $end ; $index++ )
    1.46 +    {
    1.47 +        my $element = $aCollection->[$index];
    1.48 +        my $key = &$keyGen( $element, @_ );
    1.49 +        if ( exists( $d{$key} ) )
    1.50 +        {
    1.51 +            unshift ( @{ $d{$key} }, $index );
    1.52 +        }
    1.53 +        else
    1.54 +        {
    1.55 +            $d{$key} = [$index];
    1.56 +        }
    1.57 +    }
    1.58 +    return wantarray ? %d : \%d;
    1.59 +}
    1.60 +
    1.61 +# Find the place at which aValue would normally be inserted into the
    1.62 +# array. If that place is already occupied by aValue, do nothing, and
    1.63 +# return undef. If the place does not exist (i.e., it is off the end of
    1.64 +# the array), add it to the end, otherwise replace the element at that
    1.65 +# point with aValue.  It is assumed that the array's values are numeric.
    1.66 +# This is where the bulk (75%) of the time is spent in this module, so
    1.67 +# try to make it fast!
    1.68 +
    1.69 +sub _replaceNextLargerWith
    1.70 +{
    1.71 +    my ( $array, $aValue, $high ) = @_;
    1.72 +    $high ||= $#$array;
    1.73 +
    1.74 +    # off the end?
    1.75 +    if ( $high == -1 || $aValue > $array->[-1] )
    1.76 +    {
    1.77 +        push ( @$array, $aValue );
    1.78 +        return $high + 1;
    1.79 +    }
    1.80 +
    1.81 +    # binary search for insertion point...
    1.82 +    my $low = 0;
    1.83 +    my $index;
    1.84 +    my $found;
    1.85 +    while ( $low <= $high )
    1.86 +    {
    1.87 +        $index = ( $high + $low ) / 2;
    1.88 +
    1.89 +        # $index = int(( $high + $low ) / 2);  # without 'use integer'
    1.90 +        $found = $array->[$index];
    1.91 +
    1.92 +        if ( $aValue == $found )
    1.93 +        {
    1.94 +            return undef;
    1.95 +        }
    1.96 +        elsif ( $aValue > $found )
    1.97 +        {
    1.98 +            $low = $index + 1;
    1.99 +        }
   1.100 +        else
   1.101 +        {
   1.102 +            $high = $index - 1;
   1.103 +        }
   1.104 +    }
   1.105 +
   1.106 +    # now insertion point is in $low.
   1.107 +    $array->[$low] = $aValue;    # overwrite next larger
   1.108 +    return $low;
   1.109 +}
   1.110 +
   1.111 +# This method computes the longest common subsequence in $a and $b.
   1.112 +
   1.113 +# Result is array or ref, whose contents is such that
   1.114 +#   $a->[ $i ] == $b->[ $result[ $i ] ]
   1.115 +# foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined.
   1.116 +
   1.117 +# An additional argument may be passed; this is a hash or key generating
   1.118 +# function that should return a string that uniquely identifies the given
   1.119 +# element.  It should be the case that if the key is the same, the elements
   1.120 +# will compare the same. If this parameter is undef or missing, the key
   1.121 +# will be the element as a string.
   1.122 +
   1.123 +# By default, comparisons will use "eq" and elements will be turned into keys
   1.124 +# using the default stringizing operator '""'.
   1.125 +
   1.126 +# Additional parameters, if any, will be passed to the key generation
   1.127 +# routine.
   1.128 +
   1.129 +sub _longestCommonSubsequence
   1.130 +{
   1.131 +    my $a        = shift;    # array ref or hash ref
   1.132 +    my $b        = shift;    # array ref or hash ref
   1.133 +    my $counting = shift;    # scalar
   1.134 +    my $keyGen   = shift;    # code ref
   1.135 +    my $compare;             # code ref
   1.136 +
   1.137 +    if ( ref($a) eq 'HASH' )
   1.138 +    {                        # prepared hash must be in $b
   1.139 +        my $tmp = $b;
   1.140 +        $b = $a;
   1.141 +        $a = $tmp;
   1.142 +    }
   1.143 +
   1.144 +    # Check for bogus (non-ref) argument values
   1.145 +    if ( !ref($a) || !ref($b) )
   1.146 +    {
   1.147 +        my @callerInfo = caller(1);
   1.148 +        die 'error: must pass array or hash references to ' . $callerInfo[3];
   1.149 +    }
   1.150 +
   1.151 +    # set up code refs
   1.152 +    # Note that these are optimized.
   1.153 +    if ( !defined($keyGen) )    # optimize for strings
   1.154 +    {
   1.155 +        $keyGen = sub { $_[0] };
   1.156 +        $compare = sub { my ( $a, $b ) = @_; $a eq $b };
   1.157 +    }
   1.158 +    else
   1.159 +    {
   1.160 +        $compare = sub {
   1.161 +            my $a = shift;
   1.162 +            my $b = shift;
   1.163 +            &$keyGen( $a, @_ ) eq &$keyGen( $b, @_ );
   1.164 +        };
   1.165 +    }
   1.166 +
   1.167 +    my ( $aStart, $aFinish, $matchVector ) = ( 0, $#$a, [] );
   1.168 +    my ( $prunedCount, $bMatches ) = ( 0, {} );
   1.169 +
   1.170 +    if ( ref($b) eq 'HASH' )    # was $bMatches prepared for us?
   1.171 +    {
   1.172 +        $bMatches = $b;
   1.173 +    }
   1.174 +    else
   1.175 +    {
   1.176 +        my ( $bStart, $bFinish ) = ( 0, $#$b );
   1.177 +
   1.178 +        # First we prune off any common elements at the beginning
   1.179 +        while ( $aStart <= $aFinish
   1.180 +            and $bStart <= $bFinish
   1.181 +            and &$compare( $a->[$aStart], $b->[$bStart], @_ ) )
   1.182 +        {
   1.183 +            $matchVector->[ $aStart++ ] = $bStart++;
   1.184 +            $prunedCount++;
   1.185 +        }
   1.186 +
   1.187 +        # now the end
   1.188 +        while ( $aStart <= $aFinish
   1.189 +            and $bStart <= $bFinish
   1.190 +            and &$compare( $a->[$aFinish], $b->[$bFinish], @_ ) )
   1.191 +        {
   1.192 +            $matchVector->[ $aFinish-- ] = $bFinish--;
   1.193 +            $prunedCount++;
   1.194 +        }
   1.195 +
   1.196 +        # Now compute the equivalence classes of positions of elements
   1.197 +        $bMatches =
   1.198 +          _withPositionsOfInInterval( $b, $bStart, $bFinish, $keyGen, @_ );
   1.199 +    }
   1.200 +    my $thresh = [];
   1.201 +    my $links  = [];
   1.202 +
   1.203 +    my ( $i, $ai, $j, $k );
   1.204 +    for ( $i = $aStart ; $i <= $aFinish ; $i++ )
   1.205 +    {
   1.206 +        $ai = &$keyGen( $a->[$i], @_ );
   1.207 +        if ( exists( $bMatches->{$ai} ) )
   1.208 +        {
   1.209 +            $k = 0;
   1.210 +            for $j ( @{ $bMatches->{$ai} } )
   1.211 +            {
   1.212 +
   1.213 +                # optimization: most of the time this will be true
   1.214 +                if ( $k and $thresh->[$k] > $j and $thresh->[ $k - 1 ] < $j )
   1.215 +                {
   1.216 +                    $thresh->[$k] = $j;
   1.217 +                }
   1.218 +                else
   1.219 +                {
   1.220 +                    $k = _replaceNextLargerWith( $thresh, $j, $k );
   1.221 +                }
   1.222 +
   1.223 +                # oddly, it's faster to always test this (CPU cache?).
   1.224 +                if ( defined($k) )
   1.225 +                {
   1.226 +                    $links->[$k] =
   1.227 +                      [ ( $k ? $links->[ $k - 1 ] : undef ), $i, $j ];
   1.228 +                }
   1.229 +            }
   1.230 +        }
   1.231 +    }
   1.232 +
   1.233 +    if (@$thresh)
   1.234 +    {
   1.235 +        return $prunedCount + @$thresh if $counting;
   1.236 +        for ( my $link = $links->[$#$thresh] ; $link ; $link = $link->[0] )
   1.237 +        {
   1.238 +            $matchVector->[ $link->[1] ] = $link->[2];
   1.239 +        }
   1.240 +    }
   1.241 +    elsif ($counting)
   1.242 +    {
   1.243 +        return $prunedCount;
   1.244 +    }
   1.245 +
   1.246 +    return wantarray ? @$matchVector : $matchVector;
   1.247 +}
   1.248 +
   1.249 +sub traverse_sequences
   1.250 +{
   1.251 +    my $a                 = shift;          # array ref
   1.252 +    my $b                 = shift;          # array ref
   1.253 +    my $callbacks         = shift || {};
   1.254 +    my $keyGen            = shift;
   1.255 +    my $matchCallback     = $callbacks->{'MATCH'} || sub { };
   1.256 +    my $discardACallback  = $callbacks->{'DISCARD_A'} || sub { };
   1.257 +    my $finishedACallback = $callbacks->{'A_FINISHED'};
   1.258 +    my $discardBCallback  = $callbacks->{'DISCARD_B'} || sub { };
   1.259 +    my $finishedBCallback = $callbacks->{'B_FINISHED'};
   1.260 +    my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ );
   1.261 +
   1.262 +    # Process all the lines in @$matchVector
   1.263 +    my $lastA = $#$a;
   1.264 +    my $lastB = $#$b;
   1.265 +    my $bi    = 0;
   1.266 +    my $ai;
   1.267 +
   1.268 +    for ( $ai = 0 ; $ai <= $#$matchVector ; $ai++ )
   1.269 +    {
   1.270 +        my $bLine = $matchVector->[$ai];
   1.271 +        if ( defined($bLine) )    # matched
   1.272 +        {
   1.273 +            &$discardBCallback( $ai, $bi++, @_ ) while $bi < $bLine;
   1.274 +            &$matchCallback( $ai,    $bi++, @_ );
   1.275 +        }
   1.276 +        else
   1.277 +        {
   1.278 +            &$discardACallback( $ai, $bi, @_ );
   1.279 +        }
   1.280 +    }
   1.281 +
   1.282 +    # The last entry (if any) processed was a match.
   1.283 +    # $ai and $bi point just past the last matching lines in their sequences.
   1.284 +
   1.285 +    while ( $ai <= $lastA or $bi <= $lastB )
   1.286 +    {
   1.287 +
   1.288 +        # last A?
   1.289 +        if ( $ai == $lastA + 1 and $bi <= $lastB )
   1.290 +        {
   1.291 +            if ( defined($finishedACallback) )
   1.292 +            {
   1.293 +                &$finishedACallback( $lastA, @_ );
   1.294 +                $finishedACallback = undef;
   1.295 +            }
   1.296 +            else
   1.297 +            {
   1.298 +                &$discardBCallback( $ai, $bi++, @_ ) while $bi <= $lastB;
   1.299 +            }
   1.300 +        }
   1.301 +
   1.302 +        # last B?
   1.303 +        if ( $bi == $lastB + 1 and $ai <= $lastA )
   1.304 +        {
   1.305 +            if ( defined($finishedBCallback) )
   1.306 +            {
   1.307 +                &$finishedBCallback( $lastB, @_ );
   1.308 +                $finishedBCallback = undef;
   1.309 +            }
   1.310 +            else
   1.311 +            {
   1.312 +                &$discardACallback( $ai++, $bi, @_ ) while $ai <= $lastA;
   1.313 +            }
   1.314 +        }
   1.315 +
   1.316 +        &$discardACallback( $ai++, $bi, @_ ) if $ai <= $lastA;
   1.317 +        &$discardBCallback( $ai, $bi++, @_ ) if $bi <= $lastB;
   1.318 +    }
   1.319 +
   1.320 +    return 1;
   1.321 +}
   1.322 +
   1.323 +sub traverse_balanced
   1.324 +{
   1.325 +    my $a                 = shift;              # array ref
   1.326 +    my $b                 = shift;              # array ref
   1.327 +    my $callbacks         = shift || {};
   1.328 +    my $keyGen            = shift;
   1.329 +    my $matchCallback     = $callbacks->{'MATCH'} || sub { };
   1.330 +    my $discardACallback  = $callbacks->{'DISCARD_A'} || sub { };
   1.331 +    my $discardBCallback  = $callbacks->{'DISCARD_B'} || sub { };
   1.332 +    my $changeCallback    = $callbacks->{'CHANGE'};
   1.333 +    my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ );
   1.334 +
   1.335 +    # Process all the lines in match vector
   1.336 +    my $lastA = $#$a;
   1.337 +    my $lastB = $#$b;
   1.338 +    my $bi    = 0;
   1.339 +    my $ai    = 0;
   1.340 +    my $ma    = -1;
   1.341 +    my $mb;
   1.342 +
   1.343 +    while (1)
   1.344 +    {
   1.345 +
   1.346 +        # Find next match indices $ma and $mb
   1.347 +        do {
   1.348 +            $ma++;
   1.349 +        } while(
   1.350 +                $ma <= $#$matchVector
   1.351 +            &&  !defined $matchVector->[$ma]
   1.352 +        );
   1.353 +
   1.354 +        last if $ma > $#$matchVector;    # end of matchVector?
   1.355 +        $mb = $matchVector->[$ma];
   1.356 +
   1.357 +        # Proceed with discard a/b or change events until
   1.358 +        # next match
   1.359 +        while ( $ai < $ma || $bi < $mb )
   1.360 +        {
   1.361 +
   1.362 +            if ( $ai < $ma && $bi < $mb )
   1.363 +            {
   1.364 +
   1.365 +                # Change
   1.366 +                if ( defined $changeCallback )
   1.367 +                {
   1.368 +                    &$changeCallback( $ai++, $bi++, @_ );
   1.369 +                }
   1.370 +                else
   1.371 +                {
   1.372 +                    &$discardACallback( $ai++, $bi, @_ );
   1.373 +                    &$discardBCallback( $ai, $bi++, @_ );
   1.374 +                }
   1.375 +            }
   1.376 +            elsif ( $ai < $ma )
   1.377 +            {
   1.378 +                &$discardACallback( $ai++, $bi, @_ );
   1.379 +            }
   1.380 +            else
   1.381 +            {
   1.382 +
   1.383 +                # $bi < $mb
   1.384 +                &$discardBCallback( $ai, $bi++, @_ );
   1.385 +            }
   1.386 +        }
   1.387 +
   1.388 +        # Match
   1.389 +        &$matchCallback( $ai++, $bi++, @_ );
   1.390 +    }
   1.391 +
   1.392 +    while ( $ai <= $lastA || $bi <= $lastB )
   1.393 +    {
   1.394 +        if ( $ai <= $lastA && $bi <= $lastB )
   1.395 +        {
   1.396 +
   1.397 +            # Change
   1.398 +            if ( defined $changeCallback )
   1.399 +            {
   1.400 +                &$changeCallback( $ai++, $bi++, @_ );
   1.401 +            }
   1.402 +            else
   1.403 +            {
   1.404 +                &$discardACallback( $ai++, $bi, @_ );
   1.405 +                &$discardBCallback( $ai, $bi++, @_ );
   1.406 +            }
   1.407 +        }
   1.408 +        elsif ( $ai <= $lastA )
   1.409 +        {
   1.410 +            &$discardACallback( $ai++, $bi, @_ );
   1.411 +        }
   1.412 +        else
   1.413 +        {
   1.414 +
   1.415 +            # $bi <= $lastB
   1.416 +            &$discardBCallback( $ai, $bi++, @_ );
   1.417 +        }
   1.418 +    }
   1.419 +
   1.420 +    return 1;
   1.421 +}
   1.422 +
   1.423 +sub prepare
   1.424 +{
   1.425 +    my $a       = shift;    # array ref
   1.426 +    my $keyGen  = shift;    # code ref
   1.427 +
   1.428 +    # set up code ref
   1.429 +    $keyGen = sub { $_[0] } unless defined($keyGen);
   1.430 +
   1.431 +    return scalar _withPositionsOfInInterval( $a, 0, $#$a, $keyGen, @_ );
   1.432 +}
   1.433 +
   1.434 +sub LCS
   1.435 +{
   1.436 +    my $a = shift;                  # array ref
   1.437 +    my $b = shift;                  # array ref or hash ref
   1.438 +    my $matchVector = _longestCommonSubsequence( $a, $b, 0, @_ );
   1.439 +    my @retval;
   1.440 +    my $i;
   1.441 +    for ( $i = 0 ; $i <= $#$matchVector ; $i++ )
   1.442 +    {
   1.443 +        if ( defined( $matchVector->[$i] ) )
   1.444 +        {
   1.445 +            push ( @retval, $a->[$i] );
   1.446 +        }
   1.447 +    }
   1.448 +    return wantarray ? @retval : \@retval;
   1.449 +}
   1.450 +
   1.451 +sub LCS_length
   1.452 +{
   1.453 +    my $a = shift;                          # array ref
   1.454 +    my $b = shift;                          # array ref or hash ref
   1.455 +    return _longestCommonSubsequence( $a, $b, 1, @_ );
   1.456 +}
   1.457 +
   1.458 +sub LCSidx
   1.459 +{
   1.460 +    my $a= shift @_;
   1.461 +    my $b= shift @_;
   1.462 +    my $match= _longestCommonSubsequence( $a, $b, 0, @_ );
   1.463 +    my @am= grep defined $match->[$_], 0..$#$match;
   1.464 +    my @bm= @{$match}[@am];
   1.465 +    return \@am, \@bm;
   1.466 +}
   1.467 +
   1.468 +sub compact_diff
   1.469 +{
   1.470 +    my $a= shift @_;
   1.471 +    my $b= shift @_;
   1.472 +    my( $am, $bm )= LCSidx( $a, $b, @_ );
   1.473 +    my @cdiff;
   1.474 +    my( $ai, $bi )= ( 0, 0 );
   1.475 +    push @cdiff, $ai, $bi;
   1.476 +    while( 1 ) {
   1.477 +        while(  @$am  &&  $ai == $am->[0]  &&  $bi == $bm->[0]  ) {
   1.478 +            shift @$am;
   1.479 +            shift @$bm;
   1.480 +            ++$ai, ++$bi;
   1.481 +        }
   1.482 +        push @cdiff, $ai, $bi;
   1.483 +        last   if  ! @$am;
   1.484 +        $ai = $am->[0];
   1.485 +        $bi = $bm->[0];
   1.486 +        push @cdiff, $ai, $bi;
   1.487 +    }
   1.488 +    push @cdiff, 0+@$a, 0+@$b
   1.489 +        if  $ai < @$a || $bi < @$b;
   1.490 +    return wantarray ? @cdiff : \@cdiff;
   1.491 +}
   1.492 +
   1.493 +sub diff
   1.494 +{
   1.495 +    my $a      = shift;    # array ref
   1.496 +    my $b      = shift;    # array ref
   1.497 +    my $retval = [];
   1.498 +    my $hunk   = [];
   1.499 +    my $discard = sub {
   1.500 +        push @$hunk, [ '-', $_[0], $a->[ $_[0] ] ];
   1.501 +    };
   1.502 +    my $add = sub {
   1.503 +        push @$hunk, [ '+', $_[1], $b->[ $_[1] ] ];
   1.504 +    };
   1.505 +    my $match = sub {
   1.506 +        push @$retval, $hunk
   1.507 +            if 0 < @$hunk;
   1.508 +        $hunk = []
   1.509 +    };
   1.510 +    traverse_sequences( $a, $b,
   1.511 +        { MATCH => $match, DISCARD_A => $discard, DISCARD_B => $add }, @_ );
   1.512 +    &$match();
   1.513 +    return wantarray ? @$retval : $retval;
   1.514 +}
   1.515 +
   1.516 +sub sdiff
   1.517 +{
   1.518 +    my $a      = shift;    # array ref
   1.519 +    my $b      = shift;    # array ref
   1.520 +    my $retval = [];
   1.521 +    my $discard = sub { push ( @$retval, [ '-', $a->[ $_[0] ], "" ] ) };
   1.522 +    my $add = sub { push ( @$retval, [ '+', "", $b->[ $_[1] ] ] ) };
   1.523 +    my $change = sub {
   1.524 +        push ( @$retval, [ 'c', $a->[ $_[0] ], $b->[ $_[1] ] ] );
   1.525 +    };
   1.526 +    my $match = sub {
   1.527 +        push ( @$retval, [ 'u', $a->[ $_[0] ], $b->[ $_[1] ] ] );
   1.528 +    };
   1.529 +    traverse_balanced(
   1.530 +        $a,
   1.531 +        $b,
   1.532 +        {
   1.533 +            MATCH     => $match,
   1.534 +            DISCARD_A => $discard,
   1.535 +            DISCARD_B => $add,
   1.536 +            CHANGE    => $change,
   1.537 +        },
   1.538 +        @_
   1.539 +    );
   1.540 +    return wantarray ? @$retval : $retval;
   1.541 +}
   1.542 +
   1.543 +########################################
   1.544 +my $Root= __PACKAGE__;
   1.545 +package Algorithm::Diff::_impl;
   1.546 +use strict;
   1.547 +
   1.548 +sub _Idx()  { 0 } # $me->[_Idx]: Ref to array of hunk indices
   1.549 +            # 1   # $me->[1]: Ref to first sequence
   1.550 +            # 2   # $me->[2]: Ref to second sequence
   1.551 +sub _End()  { 3 } # $me->[_End]: Diff between forward and reverse pos
   1.552 +sub _Same() { 4 } # $me->[_Same]: 1 if pos 1 contains unchanged items
   1.553 +sub _Base() { 5 } # $me->[_Base]: Added to range's min and max
   1.554 +sub _Pos()  { 6 } # $me->[_Pos]: Which hunk is currently selected
   1.555 +sub _Off()  { 7 } # $me->[_Off]: Offset into _Idx for current position
   1.556 +sub _Min() { -2 } # Added to _Off to get min instead of max+1
   1.557 +
   1.558 +sub Die
   1.559 +{
   1.560 +    require Carp;
   1.561 +    Carp::confess( @_ );
   1.562 +}
   1.563 +
   1.564 +sub _ChkPos
   1.565 +{
   1.566 +    my( $me )= @_;
   1.567 +    return   if  $me->[_Pos];
   1.568 +    my $meth= ( caller(1) )[3];
   1.569 +    Die( "Called $meth on 'reset' object" );
   1.570 +}
   1.571 +
   1.572 +sub _ChkSeq
   1.573 +{
   1.574 +    my( $me, $seq )= @_;
   1.575 +    return $seq + $me->[_Off]
   1.576 +        if  1 == $seq  ||  2 == $seq;
   1.577 +    my $meth= ( caller(1) )[3];
   1.578 +    Die( "$meth: Invalid sequence number ($seq); must be 1 or 2" );
   1.579 +}
   1.580 +
   1.581 +sub getObjPkg
   1.582 +{
   1.583 +    my( $us )= @_;
   1.584 +    return ref $us   if  ref $us;
   1.585 +    return $us . "::_obj";
   1.586 +}
   1.587 +
   1.588 +sub new
   1.589 +{
   1.590 +    my( $us, $seq1, $seq2, $opts ) = @_;
   1.591 +    my @args;
   1.592 +    for( $opts->{keyGen} ) {
   1.593 +        push @args, $_   if  $_;
   1.594 +    }
   1.595 +    for( $opts->{keyGenArgs} ) {
   1.596 +        push @args, @$_   if  $_;
   1.597 +    }
   1.598 +    my $cdif= Algorithm::Diff::compact_diff( $seq1, $seq2, @args );
   1.599 +    my $same= 1;
   1.600 +    if(  0 == $cdif->[2]  &&  0 == $cdif->[3]  ) {
   1.601 +        $same= 0;
   1.602 +        splice @$cdif, 0, 2;
   1.603 +    }
   1.604 +    my @obj= ( $cdif, $seq1, $seq2 );
   1.605 +    $obj[_End] = (1+@$cdif)/2;
   1.606 +    $obj[_Same] = $same;
   1.607 +    $obj[_Base] = 0;
   1.608 +    my $me = bless \@obj, $us->getObjPkg();
   1.609 +    $me->Reset( 0 );
   1.610 +    return $me;
   1.611 +}
   1.612 +
   1.613 +sub Reset
   1.614 +{
   1.615 +    my( $me, $pos )= @_;
   1.616 +    $pos= int( $pos || 0 );
   1.617 +    $pos += $me->[_End]
   1.618 +        if  $pos < 0;
   1.619 +    $pos= 0
   1.620 +        if  $pos < 0  ||  $me->[_End] <= $pos;
   1.621 +    $me->[_Pos]= $pos || !1;
   1.622 +    $me->[_Off]= 2*$pos - 1;
   1.623 +    return $me;
   1.624 +}
   1.625 +
   1.626 +sub Base
   1.627 +{
   1.628 +    my( $me, $base )= @_;
   1.629 +    my $oldBase= $me->[_Base];
   1.630 +    $me->[_Base]= 0+$base   if  defined $base;
   1.631 +    return $oldBase;
   1.632 +}
   1.633 +
   1.634 +sub Copy
   1.635 +{
   1.636 +    my( $me, $pos, $base )= @_;
   1.637 +    my @obj= @$me;
   1.638 +    my $you= bless \@obj, ref($me);
   1.639 +    $you->Reset( $pos )   if  defined $pos;
   1.640 +    $you->Base( $base );
   1.641 +    return $you;
   1.642 +}
   1.643 +
   1.644 +sub Next {
   1.645 +    my( $me, $steps )= @_;
   1.646 +    $steps= 1   if  ! defined $steps;
   1.647 +    if( $steps ) {
   1.648 +        my $pos= $me->[_Pos];
   1.649 +        my $new= $pos + $steps;
   1.650 +        $new= 0   if  $pos  &&  $new < 0;
   1.651 +        $me->Reset( $new )
   1.652 +    }
   1.653 +    return $me->[_Pos];
   1.654 +}
   1.655 +
   1.656 +sub Prev {
   1.657 +    my( $me, $steps )= @_;
   1.658 +    $steps= 1   if  ! defined $steps;
   1.659 +    my $pos= $me->Next(-$steps);
   1.660 +    $pos -= $me->[_End]   if  $pos;
   1.661 +    return $pos;
   1.662 +}
   1.663 +
   1.664 +sub Diff {
   1.665 +    my( $me )= @_;
   1.666 +    $me->_ChkPos();
   1.667 +    return 0   if  $me->[_Same] == ( 1 & $me->[_Pos] );
   1.668 +    my $ret= 0;
   1.669 +    my $off= $me->[_Off];
   1.670 +    for my $seq ( 1, 2 ) {
   1.671 +        $ret |= $seq
   1.672 +            if  $me->[_Idx][ $off + $seq + _Min ]
   1.673 +            <   $me->[_Idx][ $off + $seq ];
   1.674 +    }
   1.675 +    return $ret;
   1.676 +}
   1.677 +
   1.678 +sub Min {
   1.679 +    my( $me, $seq, $base )= @_;
   1.680 +    $me->_ChkPos();
   1.681 +    my $off= $me->_ChkSeq($seq);
   1.682 +    $base= $me->[_Base] if !defined $base;
   1.683 +    return $base + $me->[_Idx][ $off + _Min ];
   1.684 +}
   1.685 +
   1.686 +sub Max {
   1.687 +    my( $me, $seq, $base )= @_;
   1.688 +    $me->_ChkPos();
   1.689 +    my $off= $me->_ChkSeq($seq);
   1.690 +    $base= $me->[_Base] if !defined $base;
   1.691 +    return $base + $me->[_Idx][ $off ] -1;
   1.692 +}
   1.693 +
   1.694 +sub Range {
   1.695 +    my( $me, $seq, $base )= @_;
   1.696 +    $me->_ChkPos();
   1.697 +    my $off = $me->_ChkSeq($seq);
   1.698 +    if( !wantarray ) {
   1.699 +        return  $me->[_Idx][ $off ]
   1.700 +            -   $me->[_Idx][ $off + _Min ];
   1.701 +    }
   1.702 +    $base= $me->[_Base] if !defined $base;
   1.703 +    return  ( $base + $me->[_Idx][ $off + _Min ] )
   1.704 +        ..  ( $base + $me->[_Idx][ $off ] - 1 );
   1.705 +}
   1.706 +
   1.707 +sub Items {
   1.708 +    my( $me, $seq )= @_;
   1.709 +    $me->_ChkPos();
   1.710 +    my $off = $me->_ChkSeq($seq);
   1.711 +    if( !wantarray ) {
   1.712 +        return  $me->[_Idx][ $off ]
   1.713 +            -   $me->[_Idx][ $off + _Min ];
   1.714 +    }
   1.715 +    return
   1.716 +        @{$me->[$seq]}[
   1.717 +                $me->[_Idx][ $off + _Min ]
   1.718 +            ..  ( $me->[_Idx][ $off ] - 1 )
   1.719 +        ];
   1.720 +}
   1.721 +
   1.722 +sub Same {
   1.723 +    my( $me )= @_;
   1.724 +    $me->_ChkPos();
   1.725 +    return wantarray ? () : 0
   1.726 +        if  $me->[_Same] != ( 1 & $me->[_Pos] );
   1.727 +    return $me->Items(1);
   1.728 +}
   1.729 +
   1.730 +my %getName;
   1.731 +BEGIN {
   1.732 +    %getName= (
   1.733 +        same => \&Same,
   1.734 +        diff => \&Diff,
   1.735 +        base => \&Base,
   1.736 +        min  => \&Min,
   1.737 +        max  => \&Max,
   1.738 +        range=> \&Range,
   1.739 +        items=> \&Items, # same thing
   1.740 +    );
   1.741 +}
   1.742 +
   1.743 +sub Get
   1.744 +{
   1.745 +    my $me= shift @_;
   1.746 +    $me->_ChkPos();
   1.747 +    my @value;
   1.748 +    for my $arg (  @_  ) {
   1.749 +        for my $word (  split ' ', $arg  ) {
   1.750 +            my $meth;
   1.751 +            if(     $word !~ /^(-?\d+)?([a-zA-Z]+)([12])?$/
   1.752 +                ||  not  $meth= $getName{ lc $2 }
   1.753 +            ) {
   1.754 +                Die( $Root, ", Get: Invalid request ($word)" );
   1.755 +            }
   1.756 +            my( $base, $name, $seq )= ( $1, $2, $3 );
   1.757 +            push @value, scalar(
   1.758 +                4 == length($name)
   1.759 +                    ? $meth->( $me )
   1.760 +                    : $meth->( $me, $seq, $base )
   1.761 +            );
   1.762 +        }
   1.763 +    }
   1.764 +    if(  wantarray  ) {
   1.765 +        return @value;
   1.766 +    } elsif(  1 == @value  ) {
   1.767 +        return $value[0];
   1.768 +    }
   1.769 +    Die( 0+@value, " values requested from ",
   1.770 +        $Root, "'s Get in scalar context" );
   1.771 +}
   1.772 +
   1.773 +
   1.774 +my $Obj= getObjPkg($Root);
   1.775 +no strict 'refs';
   1.776 +
   1.777 +for my $meth (  qw( new getObjPkg )  ) {
   1.778 +    *{$Root."::".$meth} = \&{$meth};
   1.779 +    *{$Obj ."::".$meth} = \&{$meth};
   1.780 +}
   1.781 +for my $meth (  qw(
   1.782 +    Next Prev Reset Copy Base Diff
   1.783 +    Same Items Range Min Max Get
   1.784 +    _ChkPos _ChkSeq
   1.785 +)  ) {
   1.786 +    *{$Obj."::".$meth} = \&{$meth};
   1.787 +}
   1.788 +
   1.789 +1;
   1.790 +__END__
   1.791 +
   1.792 +=head1 NAME
   1.793 +
   1.794 +Algorithm::Diff - Compute `intelligent' differences between two files / lists
   1.795 +
   1.796 +=head1 SYNOPSIS
   1.797 +
   1.798 +    require Algorithm::Diff;
   1.799 +
   1.800 +    # This example produces traditional 'diff' output:
   1.801 +
   1.802 +    my $diff = Algorithm::Diff->new( \@seq1, \@seq2 );
   1.803 +
   1.804 +    $diff->Base( 1 );   # Return line numbers, not indices
   1.805 +    while(  $diff->Next()  ) {
   1.806 +        next   if  $diff->Same();
   1.807 +        my $sep = '';
   1.808 +        if(  ! $diff->Items(2)  ) {
   1.809 +            sprintf "%d,%dd%d\n",
   1.810 +                $diff->Get(qw( Min1 Max1 Max2 ));
   1.811 +        } elsif(  ! $diff->Items(1)  ) {
   1.812 +            sprint "%da%d,%d\n",
   1.813 +                $diff->Get(qw( Max1 Min2 Max2 ));
   1.814 +        } else {
   1.815 +            $sep = "---\n";
   1.816 +            sprintf "%d,%dc%d,%d\n",
   1.817 +                $diff->Get(qw( Min1 Max1 Min2 Max2 ));
   1.818 +        }
   1.819 +        print "< $_"   for  $diff->Items(1);
   1.820 +        print $sep;
   1.821 +        print "> $_"   for  $diff->Items(2);
   1.822 +    }
   1.823 +
   1.824 +
   1.825 +    # Alternate interfaces:
   1.826 +
   1.827 +    use Algorithm::Diff qw(
   1.828 +        LCS LCS_length LCSidx
   1.829 +        diff sdiff compact_diff
   1.830 +        traverse_sequences traverse_balanced );
   1.831 +
   1.832 +    @lcs    = LCS( \@seq1, \@seq2 );
   1.833 +    $lcsref = LCS( \@seq1, \@seq2 );
   1.834 +    $count  = LCS_length( \@seq1, \@seq2 );
   1.835 +
   1.836 +    ( $seq1idxref, $seq2idxref ) = LCSidx( \@seq1, \@seq2 );
   1.837 +
   1.838 +
   1.839 +    # Complicated interfaces:
   1.840 +
   1.841 +    @diffs  = diff( \@seq1, \@seq2 );
   1.842 +
   1.843 +    @sdiffs = sdiff( \@seq1, \@seq2 );
   1.844 +
   1.845 +    @cdiffs = compact_diff( \@seq1, \@seq2 );
   1.846 +
   1.847 +    traverse_sequences(
   1.848 +        \@seq1,
   1.849 +        \@seq2,
   1.850 +        {   MATCH     => \&callback1,
   1.851 +            DISCARD_A => \&callback2,
   1.852 +            DISCARD_B => \&callback3,
   1.853 +        },
   1.854 +        \&key_generator,
   1.855 +        @extra_args,
   1.856 +    );
   1.857 +
   1.858 +    traverse_balanced(
   1.859 +        \@seq1,
   1.860 +        \@seq2,
   1.861 +        {   MATCH     => \&callback1,
   1.862 +            DISCARD_A => \&callback2,
   1.863 +            DISCARD_B => \&callback3,
   1.864 +            CHANGE    => \&callback4,
   1.865 +        },
   1.866 +        \&key_generator,
   1.867 +        @extra_args,
   1.868 +    );
   1.869 +
   1.870 +
   1.871 +=head1 INTRODUCTION
   1.872 +
   1.873 +(by Mark-Jason Dominus)
   1.874 +
   1.875 +I once read an article written by the authors of C<diff>; they said
   1.876 +that they worked very hard on the algorithm until they found the
   1.877 +right one.
   1.878 +
   1.879 +I think what they ended up using (and I hope someone will correct me,
   1.880 +because I am not very confident about this) was the `longest common
   1.881 +subsequence' method.  In the LCS problem, you have two sequences of
   1.882 +items:
   1.883 +
   1.884 +    a b c d f g h j q z
   1.885 +
   1.886 +    a b c d e f g i j k r x y z
   1.887 +
   1.888 +and you want to find the longest sequence of items that is present in
   1.889 +both original sequences in the same order.  That is, you want to find
   1.890 +a new sequence I<S> which can be obtained from the first sequence by
   1.891 +deleting some items, and from the secend sequence by deleting other
   1.892 +items.  You also want I<S> to be as long as possible.  In this case I<S>
   1.893 +is
   1.894 +
   1.895 +    a b c d f g j z
   1.896 +
   1.897 +From there it's only a small step to get diff-like output:
   1.898 +
   1.899 +    e   h i   k   q r x y
   1.900 +    +   - +   +   - + + +
   1.901 +
   1.902 +This module solves the LCS problem.  It also includes a canned function
   1.903 +to generate C<diff>-like output.
   1.904 +
   1.905 +It might seem from the example above that the LCS of two sequences is
   1.906 +always pretty obvious, but that's not always the case, especially when
   1.907 +the two sequences have many repeated elements.  For example, consider
   1.908 +
   1.909 +    a x b y c z p d q
   1.910 +    a b c a x b y c z
   1.911 +
   1.912 +A naive approach might start by matching up the C<a> and C<b> that
   1.913 +appear at the beginning of each sequence, like this:
   1.914 +
   1.915 +    a x b y c         z p d q
   1.916 +    a   b   c a b y c z
   1.917 +
   1.918 +This finds the common subsequence C<a b c z>.  But actually, the LCS
   1.919 +is C<a x b y c z>:
   1.920 +
   1.921 +          a x b y c z p d q
   1.922 +    a b c a x b y c z
   1.923 +
   1.924 +or
   1.925 +
   1.926 +    a       x b y c z p d q
   1.927 +    a b c a x b y c z
   1.928 +
   1.929 +=head1 USAGE
   1.930 +
   1.931 +(See also the README file and several example
   1.932 +scripts include with this module.)
   1.933 +
   1.934 +This module now provides an object-oriented interface that uses less
   1.935 +memory and is easier to use than most of the previous procedural
   1.936 +interfaces.  It also still provides several exportable functions.  We'll
   1.937 +deal with these in ascending order of difficulty:  C<LCS>,
   1.938 +C<LCS_length>, C<LCSidx>, OO interface, C<prepare>, C<diff>, C<sdiff>,
   1.939 +C<traverse_sequences>, and C<traverse_balanced>.
   1.940 +
   1.941 +=head2 C<LCS>
   1.942 +
   1.943 +Given references to two lists of items, LCS returns an array containing
   1.944 +their longest common subsequence.  In scalar context, it returns a
   1.945 +reference to such a list.
   1.946 +
   1.947 +    @lcs    = LCS( \@seq1, \@seq2 );
   1.948 +    $lcsref = LCS( \@seq1, \@seq2 );
   1.949 +
   1.950 +C<LCS> may be passed an optional third parameter; this is a CODE
   1.951 +reference to a key generation function.  See L</KEY GENERATION
   1.952 +FUNCTIONS>.
   1.953 +
   1.954 +    @lcs    = LCS( \@seq1, \@seq2, \&keyGen, @args );
   1.955 +    $lcsref = LCS( \@seq1, \@seq2, \&keyGen, @args );
   1.956 +
   1.957 +Additional parameters, if any, will be passed to the key generation
   1.958 +routine.
   1.959 +
   1.960 +=head2 C<LCS_length>
   1.961 +
   1.962 +This is just like C<LCS> except it only returns the length of the
   1.963 +longest common subsequence.  This provides a performance gain of about
   1.964 +9% compared to C<LCS>.
   1.965 +
   1.966 +=head2 C<LCSidx>
   1.967 +
   1.968 +Like C<LCS> except it returns references to two arrays.  The first array
   1.969 +contains the indices into @seq1 where the LCS items are located.  The
   1.970 +second array contains the indices into @seq2 where the LCS items are located.
   1.971 +
   1.972 +Therefore, the following three lists will contain the same values:
   1.973 +
   1.974 +    my( $idx1, $idx2 ) = LCSidx( \@seq1, \@seq2 );
   1.975 +    my @list1 = @seq1[ @$idx1 ];
   1.976 +    my @list2 = @seq2[ @$idx2 ];
   1.977 +    my @list3 = LCS( \@seq1, \@seq2 );
   1.978 +
   1.979 +=head2 C<new>
   1.980 +
   1.981 +    $diff = Algorithm::Diffs->new( \@seq1, \@seq2 );
   1.982 +    $diff = Algorithm::Diffs->new( \@seq1, \@seq2, \%opts );
   1.983 +
   1.984 +C<new> computes the smallest set of additions and deletions necessary
   1.985 +to turn the first sequence into the second and compactly records them
   1.986 +in the object.
   1.987 +
   1.988 +You use the object to iterate over I<hunks>, where each hunk represents
   1.989 +a contiguous section of items which should be added, deleted, replaced,
   1.990 +or left unchanged.
   1.991 +
   1.992 +=over 4
   1.993 +
   1.994 +The following summary of all of the methods looks a lot like Perl code
   1.995 +but some of the symbols have different meanings:
   1.996 +
   1.997 +    [ ]     Encloses optional arguments
   1.998 +    :       Is followed by the default value for an optional argument
   1.999 +    |       Separates alternate return results
  1.1000 +
  1.1001 +Method summary:
  1.1002 +
  1.1003 +    $obj        = Algorithm::Diff->new( \@seq1, \@seq2, [ \%opts ] );
  1.1004 +    $pos        = $obj->Next(  [ $count : 1 ] );
  1.1005 +    $revPos     = $obj->Prev(  [ $count : 1 ] );
  1.1006 +    $obj        = $obj->Reset( [ $pos : 0 ] );
  1.1007 +    $copy       = $obj->Copy(  [ $pos, [ $newBase ] ] );
  1.1008 +    $oldBase    = $obj->Base(  [ $newBase ] );
  1.1009 +
  1.1010 +Note that all of the following methods C<die> if used on an object that
  1.1011 +is "reset" (not currently pointing at any hunk).
  1.1012 +
  1.1013 +    $bits       = $obj->Diff(  );
  1.1014 +    @items|$cnt = $obj->Same(  );
  1.1015 +    @items|$cnt = $obj->Items( $seqNum );
  1.1016 +    @idxs |$cnt = $obj->Range( $seqNum, [ $base ] );
  1.1017 +    $minIdx     = $obj->Min(   $seqNum, [ $base ] );
  1.1018 +    $maxIdx     = $obj->Max(   $seqNum, [ $base ] );
  1.1019 +    @values     = $obj->Get(   @names );
  1.1020 +
  1.1021 +Passing in C<undef> for an optional argument is always treated the same
  1.1022 +as if no argument were passed in.
  1.1023 +
  1.1024 +=item C<Next>
  1.1025 +
  1.1026 +    $pos = $diff->Next();    # Move forward 1 hunk
  1.1027 +    $pos = $diff->Next( 2 ); # Move forward 2 hunks
  1.1028 +    $pos = $diff->Next(-5);  # Move backward 5 hunks
  1.1029 +
  1.1030 +C<Next> moves the object to point at the next hunk.  The object starts
  1.1031 +out "reset", which means it isn't pointing at any hunk.  If the object
  1.1032 +is reset, then C<Next()> moves to the first hunk.
  1.1033 +
  1.1034 +C<Next> returns a true value iff the move didn't go past the last hunk.
  1.1035 +So C<Next(0)> will return true iff the object is not reset.
  1.1036 +
  1.1037 +Actually, C<Next> returns the object's new position, which is a number
  1.1038 +between 1 and the number of hunks (inclusive), or returns a false value.
  1.1039 +
  1.1040 +=item C<Prev>
  1.1041 +
  1.1042 +C<Prev($N)> is almost identical to C<Next(-$N)>; it moves to the $Nth
  1.1043 +previous hunk.  On a 'reset' object, C<Prev()> [and C<Next(-1)>] move
  1.1044 +to the last hunk.
  1.1045 +
  1.1046 +The position returned by C<Prev> is relative to the I<end> of the
  1.1047 +hunks; -1 for the last hunk, -2 for the second-to-last, etc.
  1.1048 +
  1.1049 +=item C<Reset>
  1.1050 +
  1.1051 +    $diff->Reset();     # Reset the object's position
  1.1052 +    $diff->Reset($pos); # Move to the specified hunk
  1.1053 +    $diff->Reset(1);    # Move to the first hunk
  1.1054 +    $diff->Reset(-1);   # Move to the last hunk
  1.1055 +
  1.1056 +C<Reset> returns the object, so, for example, you could use
  1.1057 +C<< $diff->Reset()->Next(-1) >> to get the number of hunks.
  1.1058 +
  1.1059 +=item C<Copy>
  1.1060 +
  1.1061 +    $copy = $diff->Copy( $newPos, $newBase );
  1.1062 +
  1.1063 +C<Copy> returns a copy of the object.  The copy and the orignal object
  1.1064 +share most of their data, so making copies takes very little memory.
  1.1065 +The copy maintains its own position (separate from the original), which
  1.1066 +is the main purpose of copies.  It also maintains its own base.
  1.1067 +
  1.1068 +By default, the copy's position starts out the same as the original
  1.1069 +object's position.  But C<Copy> takes an optional first argument to set the
  1.1070 +new position, so the following three snippets are equivalent:
  1.1071 +
  1.1072 +    $copy = $diff->Copy($pos);
  1.1073 +
  1.1074 +    $copy = $diff->Copy();
  1.1075 +    $copy->Reset($pos);
  1.1076 +
  1.1077 +    $copy = $diff->Copy()->Reset($pos);
  1.1078 +
  1.1079 +C<Copy> takes an optional second argument to set the base for
  1.1080 +the copy.  If you wish to change the base of the copy but leave
  1.1081 +the position the same as in the original, here are two
  1.1082 +equivalent ways:
  1.1083 +
  1.1084 +    $copy = $diff->Copy();
  1.1085 +    $copy->Base( 0 );
  1.1086 +
  1.1087 +    $copy = $diff->Copy(undef,0);
  1.1088 +
  1.1089 +Here are two equivalent way to get a "reset" copy:
  1.1090 +
  1.1091 +    $copy = $diff->Copy(0);
  1.1092 +
  1.1093 +    $copy = $diff->Copy()->Reset();
  1.1094 +
  1.1095 +=item C<Diff>
  1.1096 +
  1.1097 +    $bits = $obj->Diff();
  1.1098 +
  1.1099 +C<Diff> returns a true value iff the current hunk contains items that are
  1.1100 +different between the two sequences.  It actually returns one of the
  1.1101 +follow 4 values:
  1.1102 +
  1.1103 +=over 4
  1.1104 +
  1.1105 +=item 3
  1.1106 +
  1.1107 +C<3==(1|2)>.  This hunk contains items from @seq1 and the items
  1.1108 +from @seq2 that should replace them.  Both sequence 1 and 2
  1.1109 +contain changed items so both the 1 and 2 bits are set.
  1.1110 +
  1.1111 +=item 2
  1.1112 +
  1.1113 +This hunk only contains items from @seq2 that should be inserted (not
  1.1114 +items from @seq1).  Only sequence 2 contains changed items so only the 2
  1.1115 +bit is set.
  1.1116 +
  1.1117 +=item 1
  1.1118 +
  1.1119 +This hunk only contains items from @seq1 that should be deleted (not
  1.1120 +items from @seq2).  Only sequence 1 contains changed items so only the 1
  1.1121 +bit is set.
  1.1122 +
  1.1123 +=item 0
  1.1124 +
  1.1125 +This means that the items in this hunk are the same in both sequences.
  1.1126 +Neither sequence 1 nor 2 contain changed items so neither the 1 nor the
  1.1127 +2 bits are set.
  1.1128 +
  1.1129 +=back
  1.1130 +
  1.1131 +=item C<Same>
  1.1132 +
  1.1133 +C<Same> returns a true value iff the current hunk contains items that
  1.1134 +are the same in both sequences.  It actually returns the list of items
  1.1135 +if they are the same or an emty list if they aren't.  In a scalar
  1.1136 +context, it returns the size of the list.
  1.1137 +
  1.1138 +=item C<Items>
  1.1139 +
  1.1140 +    $count = $diff->Items(2);
  1.1141 +    @items = $diff->Items($seqNum);
  1.1142 +
  1.1143 +C<Items> returns the (number of) items from the specified sequence that
  1.1144 +are part of the current hunk.
  1.1145 +
  1.1146 +If the current hunk contains only insertions, then
  1.1147 +C<< $diff->Items(1) >> will return an empty list (0 in a scalar conext).
  1.1148 +If the current hunk contains only deletions, then C<< $diff->Items(2) >>
  1.1149 +will return an empty list (0 in a scalar conext).
  1.1150 +
  1.1151 +If the hunk contains replacements, then both C<< $diff->Items(1) >> and
  1.1152 +C<< $diff->Items(2) >> will return different, non-empty lists.
  1.1153 +
  1.1154 +Otherwise, the hunk contains identical items and all of the following
  1.1155 +will return the same lists:
  1.1156 +
  1.1157 +    @items = $diff->Items(1);
  1.1158 +    @items = $diff->Items(2);
  1.1159 +    @items = $diff->Same();
  1.1160 +
  1.1161 +=item C<Range>
  1.1162 +
  1.1163 +    $count = $diff->Range( $seqNum );
  1.1164 +    @indices = $diff->Range( $seqNum );
  1.1165 +    @indices = $diff->Range( $seqNum, $base );
  1.1166 +
  1.1167 +C<Range> is like C<Items> except that it returns a list of I<indices> to
  1.1168 +the items rather than the items themselves.  By default, the index of
  1.1169 +the first item (in each sequence) is 0 but this can be changed by
  1.1170 +calling the C<Base> method.  So, by default, the following two snippets
  1.1171 +return the same lists:
  1.1172 +
  1.1173 +    @list = $diff->Items(2);
  1.1174 +    @list = @seq2[ $diff->Range(2) ];
  1.1175 +
  1.1176 +You can also specify the base to use as the second argument.  So the
  1.1177 +following two snippets I<always> return the same lists:
  1.1178 +
  1.1179 +    @list = $diff->Items(1);
  1.1180 +    @list = @seq1[ $diff->Range(1,0) ];
  1.1181 +
  1.1182 +=item C<Base>
  1.1183 +
  1.1184 +    $curBase = $diff->Base();
  1.1185 +    $oldBase = $diff->Base($newBase);
  1.1186 +
  1.1187 +C<Base> sets and/or returns the current base (usually 0 or 1) that is
  1.1188 +used when you request range information.  The base defaults to 0 so
  1.1189 +that range information is returned as array indices.  You can set the
  1.1190 +base to 1 if you want to report traditional line numbers instead.
  1.1191 +
  1.1192 +=item C<Min>
  1.1193 +
  1.1194 +    $min1 = $diff->Min(1);
  1.1195 +    $min = $diff->Min( $seqNum, $base );
  1.1196 +
  1.1197 +C<Min> returns the first value that C<Range> would return (given the
  1.1198 +same arguments) or returns C<undef> if C<Range> would return an empty
  1.1199 +list.
  1.1200 +
  1.1201 +=item C<Max>
  1.1202 +
  1.1203 +C<Max> returns the last value that C<Range> would return or C<undef>.
  1.1204 +
  1.1205 +=item C<Get>
  1.1206 +
  1.1207 +    ( $n, $x, $r ) = $diff->Get(qw( min1 max1 range1 ));
  1.1208 +    @values = $diff->Get(qw( 0min2 1max2 range2 same base ));
  1.1209 +
  1.1210 +C<Get> returns one or more scalar values.  You pass in a list of the
  1.1211 +names of the values you want returned.  Each name must match one of the
  1.1212 +following regexes:
  1.1213 +
  1.1214 +    /^(-?\d+)?(min|max)[12]$/i
  1.1215 +    /^(range[12]|same|diff|base)$/i
  1.1216 +
  1.1217 +The 1 or 2 after a name says which sequence you want the information
  1.1218 +for (and where allowed, it is required).  The optional number before
  1.1219 +"min" or "max" is the base to use.  So the following equalities hold:
  1.1220 +
  1.1221 +    $diff->Get('min1') == $diff->Min(1)
  1.1222 +    $diff->Get('0min2') == $diff->Min(2,0)
  1.1223 +
  1.1224 +Using C<Get> in a scalar context when you've passed in more than one
  1.1225 +name is a fatal error (C<die> is called).
  1.1226 +
  1.1227 +=back
  1.1228 +
  1.1229 +=head2 C<prepare>
  1.1230 +
  1.1231 +Given a reference to a list of items, C<prepare> returns a reference
  1.1232 +to a hash which can be used when comparing this sequence to other
  1.1233 +sequences with C<LCS> or C<LCS_length>.
  1.1234 +
  1.1235 +    $prep = prepare( \@seq1 );
  1.1236 +    for $i ( 0 .. 10_000 )
  1.1237 +    {
  1.1238 +        @lcs = LCS( $prep, $seq[$i] );
  1.1239 +        # do something useful with @lcs
  1.1240 +    }
  1.1241 +
  1.1242 +C<prepare> may be passed an optional third parameter; this is a CODE
  1.1243 +reference to a key generation function.  See L</KEY GENERATION
  1.1244 +FUNCTIONS>.
  1.1245 +
  1.1246 +    $prep = prepare( \@seq1, \&keyGen );
  1.1247 +    for $i ( 0 .. 10_000 )
  1.1248 +    {
  1.1249 +        @lcs = LCS( $seq[$i], $prep, \&keyGen );
  1.1250 +        # do something useful with @lcs
  1.1251 +    }
  1.1252 +
  1.1253 +Using C<prepare> provides a performance gain of about 50% when calling LCS
  1.1254 +many times compared with not preparing.
  1.1255 +
  1.1256 +=head2 C<diff>
  1.1257 +
  1.1258 +    @diffs     = diff( \@seq1, \@seq2 );
  1.1259 +    $diffs_ref = diff( \@seq1, \@seq2 );
  1.1260 +
  1.1261 +C<diff> computes the smallest set of additions and deletions necessary
  1.1262 +to turn the first sequence into the second, and returns a description
  1.1263 +of these changes.  The description is a list of I<hunks>; each hunk
  1.1264 +represents a contiguous section of items which should be added,
  1.1265 +deleted, or replaced.  (Hunks containing unchanged items are not
  1.1266 +included.)
  1.1267 +
  1.1268 +The return value of C<diff> is a list of hunks, or, in scalar context, a
  1.1269 +reference to such a list.  If there are no differences, the list will be
  1.1270 +empty.
  1.1271 +
  1.1272 +Here is an example.  Calling C<diff> for the following two sequences:
  1.1273 +
  1.1274 +    a b c e h j l m n p
  1.1275 +    b c d e f j k l m r s t
  1.1276 +
  1.1277 +would produce the following list:
  1.1278 +
  1.1279 +    (
  1.1280 +      [ [ '-', 0, 'a' ] ],
  1.1281 +
  1.1282 +      [ [ '+', 2, 'd' ] ],
  1.1283 +
  1.1284 +      [ [ '-', 4, 'h' ],
  1.1285 +        [ '+', 4, 'f' ] ],
  1.1286 +
  1.1287 +      [ [ '+', 6, 'k' ] ],
  1.1288 +
  1.1289 +      [ [ '-',  8, 'n' ],
  1.1290 +        [ '-',  9, 'p' ],
  1.1291 +        [ '+',  9, 'r' ],
  1.1292 +        [ '+', 10, 's' ],
  1.1293 +        [ '+', 11, 't' ] ],
  1.1294 +    )
  1.1295 +
  1.1296 +There are five hunks here.  The first hunk says that the C<a> at
  1.1297 +position 0 of the first sequence should be deleted (C<->).  The second
  1.1298 +hunk says that the C<d> at position 2 of the second sequence should
  1.1299 +be inserted (C<+>).  The third hunk says that the C<h> at position 4
  1.1300 +of the first sequence should be removed and replaced with the C<f>
  1.1301 +from position 4 of the second sequence.  And so on.
  1.1302 +
  1.1303 +C<diff> may be passed an optional third parameter; this is a CODE
  1.1304 +reference to a key generation function.  See L</KEY GENERATION
  1.1305 +FUNCTIONS>.
  1.1306 +
  1.1307 +Additional parameters, if any, will be passed to the key generation
  1.1308 +routine.
  1.1309 +
  1.1310 +=head2 C<sdiff>
  1.1311 +
  1.1312 +    @sdiffs     = sdiff( \@seq1, \@seq2 );
  1.1313 +    $sdiffs_ref = sdiff( \@seq1, \@seq2 );
  1.1314 +
  1.1315 +C<sdiff> computes all necessary components to show two sequences
  1.1316 +and their minimized differences side by side, just like the
  1.1317 +Unix-utility I<sdiff> does:
  1.1318 +
  1.1319 +    same             same
  1.1320 +    before     |     after
  1.1321 +    old        <     -
  1.1322 +    -          >     new
  1.1323 +
  1.1324 +It returns a list of array refs, each pointing to an array of
  1.1325 +display instructions. In scalar context it returns a reference
  1.1326 +to such a list. If there are no differences, the list will have one
  1.1327 +entry per item, each indicating that the item was unchanged.
  1.1328 +
  1.1329 +Display instructions consist of three elements: A modifier indicator
  1.1330 +(C<+>: Element added, C<->: Element removed, C<u>: Element unmodified,
  1.1331 +C<c>: Element changed) and the value of the old and new elements, to
  1.1332 +be displayed side-by-side.
  1.1333 +
  1.1334 +An C<sdiff> of the following two sequences:
  1.1335 +
  1.1336 +    a b c e h j l m n p
  1.1337 +    b c d e f j k l m r s t
  1.1338 +
  1.1339 +results in
  1.1340 +
  1.1341 +    ( [ '-', 'a', ''  ],
  1.1342 +      [ 'u', 'b', 'b' ],
  1.1343 +      [ 'u', 'c', 'c' ],
  1.1344 +      [ '+', '',  'd' ],
  1.1345 +      [ 'u', 'e', 'e' ],
  1.1346 +      [ 'c', 'h', 'f' ],
  1.1347 +      [ 'u', 'j', 'j' ],
  1.1348 +      [ '+', '',  'k' ],
  1.1349 +      [ 'u', 'l', 'l' ],
  1.1350 +      [ 'u', 'm', 'm' ],
  1.1351 +      [ 'c', 'n', 'r' ],
  1.1352 +      [ 'c', 'p', 's' ],
  1.1353 +      [ '+', '',  't' ],
  1.1354 +    )
  1.1355 +
  1.1356 +C<sdiff> may be passed an optional third parameter; this is a CODE
  1.1357 +reference to a key generation function.  See L</KEY GENERATION
  1.1358 +FUNCTIONS>.
  1.1359 +
  1.1360 +Additional parameters, if any, will be passed to the key generation
  1.1361 +routine.
  1.1362 +
  1.1363 +=head2 C<compact_diff>
  1.1364 +
  1.1365 +C<compact_diff> is much like C<sdiff> except it returns a much more
  1.1366 +compact description consisting of just one flat list of indices.  An
  1.1367 +example helps explain the format:
  1.1368 +
  1.1369 +    my @a = qw( a b c   e  h j   l m n p      );
  1.1370 +    my @b = qw(   b c d e f  j k l m    r s t );
  1.1371 +    @cdiff = compact_diff( \@a, \@b );
  1.1372 +    # Returns:
  1.1373 +    #   @a      @b       @a       @b
  1.1374 +    #  start   start   values   values
  1.1375 +    (    0,      0,   #       =
  1.1376 +         0,      0,   #    a  !
  1.1377 +         1,      0,   #  b c  =  b c
  1.1378 +         3,      2,   #       !  d
  1.1379 +         3,      3,   #    e  =  e
  1.1380 +         4,      4,   #    f  !  h
  1.1381 +         5,      5,   #    j  =  j
  1.1382 +         6,      6,   #       !  k
  1.1383 +         6,      7,   #  l m  =  l m
  1.1384 +         8,      9,   #  n p  !  r s t
  1.1385 +        10,     12,   #
  1.1386 +    );
  1.1387 +
  1.1388 +The 0th, 2nd, 4th, etc. entries are all indices into @seq1 (@a in the
  1.1389 +above example) indicating where a hunk begins.  The 1st, 3rd, 5th, etc.
  1.1390 +entries are all indices into @seq2 (@b in the above example) indicating
  1.1391 +where the same hunk begins.
  1.1392 +
  1.1393 +So each pair of indices (except the last pair) describes where a hunk
  1.1394 +begins (in each sequence).  Since each hunk must end at the item just
  1.1395 +before the item that starts the next hunk, the next pair of indices can
  1.1396 +be used to determine where the hunk ends.
  1.1397 +
  1.1398 +So, the first 4 entries (0..3) describe the first hunk.  Entries 0 and 1
  1.1399 +describe where the first hunk begins (and so are always both 0).
  1.1400 +Entries 2 and 3 describe where the next hunk begins, so subtracting 1
  1.1401 +from each tells us where the first hunk ends.  That is, the first hunk
  1.1402 +contains items C<$diff[0]> through C<$diff[2] - 1> of the first sequence
  1.1403 +and contains items C<$diff[1]> through C<$diff[3] - 1> of the second
  1.1404 +sequence.
  1.1405 +
  1.1406 +In other words, the first hunk consists of the following two lists of items:
  1.1407 +
  1.1408 +               #  1st pair     2nd pair
  1.1409 +               # of indices   of indices
  1.1410 +    @list1 = @a[ $cdiff[0] .. $cdiff[2]-1 ];
  1.1411 +    @list2 = @b[ $cdiff[1] .. $cdiff[3]-1 ];
  1.1412 +               # Hunk start   Hunk end
  1.1413 +
  1.1414 +Note that the hunks will always alternate between those that are part of
  1.1415 +the LCS (those that contain unchanged items) and those that contain
  1.1416 +changes.  This means that all we need to be told is whether the first
  1.1417 +hunk is a 'same' or 'diff' hunk and we can determine which of the other
  1.1418 +hunks contain 'same' items or 'diff' items.
  1.1419 +
  1.1420 +By convention, we always make the first hunk contain unchanged items.
  1.1421 +So the 1st, 3rd, 5th, etc. hunks (all odd-numbered hunks if you start
  1.1422 +counting from 1) all contain unchanged items.  And the 2nd, 4th, 6th,
  1.1423 +etc. hunks (all even-numbered hunks if you start counting from 1) all
  1.1424 +contain changed items.
  1.1425 +
  1.1426 +Since @a and @b don't begin with the same value, the first hunk in our
  1.1427 +example is empty (otherwise we'd violate the above convention).  Note
  1.1428 +that the first 4 index values in our example are all zero.  Plug these
  1.1429 +values into our previous code block and we get:
  1.1430 +
  1.1431 +    @hunk1a = @a[ 0 .. 0-1 ];
  1.1432 +    @hunk1b = @b[ 0 .. 0-1 ];
  1.1433 +
  1.1434 +And C<0..-1> returns the empty list.
  1.1435 +
  1.1436 +Move down one pair of indices (2..5) and we get the offset ranges for
  1.1437 +the second hunk, which contains changed items.
  1.1438 +
  1.1439 +Since C<@diff[2..5]> contains (0,0,1,0) in our example, the second hunk
  1.1440 +consists of these two lists of items:
  1.1441 +
  1.1442 +        @hunk2a = @a[ $cdiff[2] .. $cdiff[4]-1 ];
  1.1443 +        @hunk2b = @b[ $cdiff[3] .. $cdiff[5]-1 ];
  1.1444 +    # or
  1.1445 +        @hunk2a = @a[ 0 .. 1-1 ];
  1.1446 +        @hunk2b = @b[ 0 .. 0-1 ];
  1.1447 +    # or
  1.1448 +        @hunk2a = @a[ 0 .. 0 ];
  1.1449 +        @hunk2b = @b[ 0 .. -1 ];
  1.1450 +    # or
  1.1451 +        @hunk2a = ( 'a' );
  1.1452 +        @hunk2b = ( );
  1.1453 +
  1.1454 +That is, we would delete item 0 ('a') from @a.
  1.1455 +
  1.1456 +Since C<@diff[4..7]> contains (1,0,3,2) in our example, the third hunk
  1.1457 +consists of these two lists of items:
  1.1458 +
  1.1459 +        @hunk3a = @a[ $cdiff[4] .. $cdiff[6]-1 ];
  1.1460 +        @hunk3a = @b[ $cdiff[5] .. $cdiff[7]-1 ];
  1.1461 +    # or
  1.1462 +        @hunk3a = @a[ 1 .. 3-1 ];
  1.1463 +        @hunk3a = @b[ 0 .. 2-1 ];
  1.1464 +    # or
  1.1465 +        @hunk3a = @a[ 1 .. 2 ];
  1.1466 +        @hunk3a = @b[ 0 .. 1 ];
  1.1467 +    # or
  1.1468 +        @hunk3a = qw( b c );
  1.1469 +        @hunk3a = qw( b c );
  1.1470 +
  1.1471 +Note that this third hunk contains unchanged items as our convention demands.
  1.1472 +
  1.1473 +You can continue this process until you reach the last two indices,
  1.1474 +which will always be the number of items in each sequence.  This is
  1.1475 +required so that subtracting one from each will give you the indices to
  1.1476 +the last items in each sequence.
  1.1477 +
  1.1478 +=head2 C<traverse_sequences>
  1.1479 +
  1.1480 +C<traverse_sequences> used to be the most general facility provided by
  1.1481 +this module (the new OO interface is more powerful and much easier to
  1.1482 +use).
  1.1483 +
  1.1484 +Imagine that there are two arrows.  Arrow A points to an element of
  1.1485 +sequence A, and arrow B points to an element of the sequence B. 
  1.1486 +Initially, the arrows point to the first elements of the respective
  1.1487 +sequences.  C<traverse_sequences> will advance the arrows through the
  1.1488 +sequences one element at a time, calling an appropriate user-specified
  1.1489 +callback function before each advance.  It willadvance the arrows in
  1.1490 +such a way that if there are equal elements C<$A[$i]> and C<$B[$j]>
  1.1491 +which are equal and which are part of the LCS, there will be some moment
  1.1492 +during the execution of C<traverse_sequences> when arrow A is pointing
  1.1493 +to C<$A[$i]> and arrow B is pointing to C<$B[$j]>.  When this happens,
  1.1494 +C<traverse_sequences> will call the C<MATCH> callback function and then
  1.1495 +it will advance both arrows.
  1.1496 +
  1.1497 +Otherwise, one of the arrows is pointing to an element of its sequence
  1.1498 +that is not part of the LCS.  C<traverse_sequences> will advance that
  1.1499 +arrow and will call the C<DISCARD_A> or the C<DISCARD_B> callback,
  1.1500 +depending on which arrow it advanced.  If both arrows point to elements
  1.1501 +that are not part of the LCS, then C<traverse_sequences> will advance
  1.1502 +one of them and call the appropriate callback, but it is not specified
  1.1503 +which it will call.
  1.1504 +
  1.1505 +The arguments to C<traverse_sequences> are the two sequences to
  1.1506 +traverse, and a hash which specifies the callback functions, like this:
  1.1507 +
  1.1508 +    traverse_sequences(
  1.1509 +        \@seq1, \@seq2,
  1.1510 +        {   MATCH => $callback_1,
  1.1511 +            DISCARD_A => $callback_2,
  1.1512 +            DISCARD_B => $callback_3,
  1.1513 +        }
  1.1514 +    );
  1.1515 +
  1.1516 +Callbacks for MATCH, DISCARD_A, and DISCARD_B are invoked with at least
  1.1517 +the indices of the two arrows as their arguments.  They are not expected
  1.1518 +to return any values.  If a callback is omitted from the table, it is
  1.1519 +not called.
  1.1520 +
  1.1521 +Callbacks for A_FINISHED and B_FINISHED are invoked with at least the
  1.1522 +corresponding index in A or B.
  1.1523 +
  1.1524 +If arrow A reaches the end of its sequence, before arrow B does,
  1.1525 +C<traverse_sequences> will call the C<A_FINISHED> callback when it
  1.1526 +advances arrow B, if there is such a function; if not it will call
  1.1527 +C<DISCARD_B> instead.  Similarly if arrow B finishes first. 
  1.1528 +C<traverse_sequences> returns when both arrows are at the ends of their
  1.1529 +respective sequences.  It returns true on success and false on failure. 
  1.1530 +At present there is no way to fail.
  1.1531 +
  1.1532 +C<traverse_sequences> may be passed an optional fourth parameter; this
  1.1533 +is a CODE reference to a key generation function.  See L</KEY GENERATION
  1.1534 +FUNCTIONS>.
  1.1535 +
  1.1536 +Additional parameters, if any, will be passed to the key generation function.
  1.1537 +
  1.1538 +If you want to pass additional parameters to your callbacks, but don't
  1.1539 +need a custom key generation function, you can get the default by
  1.1540 +passing undef:
  1.1541 +
  1.1542 +    traverse_sequences(
  1.1543 +        \@seq1, \@seq2,
  1.1544 +        {   MATCH => $callback_1,
  1.1545 +            DISCARD_A => $callback_2,
  1.1546 +            DISCARD_B => $callback_3,
  1.1547 +        },
  1.1548 +        undef,     # default key-gen
  1.1549 +        $myArgument1,
  1.1550 +        $myArgument2,
  1.1551 +        $myArgument3,
  1.1552 +    );
  1.1553 +
  1.1554 +C<traverse_sequences> does not have a useful return value; you are
  1.1555 +expected to plug in the appropriate behavior with the callback
  1.1556 +functions.
  1.1557 +
  1.1558 +=head2 C<traverse_balanced>
  1.1559 +
  1.1560 +C<traverse_balanced> is an alternative to C<traverse_sequences>. It
  1.1561 +uses a different algorithm to iterate through the entries in the
  1.1562 +computed LCS. Instead of sticking to one side and showing element changes
  1.1563 +as insertions and deletions only, it will jump back and forth between
  1.1564 +the two sequences and report I<changes> occurring as deletions on one
  1.1565 +side followed immediatly by an insertion on the other side.
  1.1566 +
  1.1567 +In addition to the C<DISCARD_A>, C<DISCARD_B>, and C<MATCH> callbacks
  1.1568 +supported by C<traverse_sequences>, C<traverse_balanced> supports
  1.1569 +a C<CHANGE> callback indicating that one element got C<replaced> by another:
  1.1570 +
  1.1571 +    traverse_balanced(
  1.1572 +        \@seq1, \@seq2,
  1.1573 +        {   MATCH => $callback_1,
  1.1574 +            DISCARD_A => $callback_2,
  1.1575 +            DISCARD_B => $callback_3,
  1.1576 +            CHANGE    => $callback_4,
  1.1577 +        }
  1.1578 +    );
  1.1579 +
  1.1580 +If no C<CHANGE> callback is specified, C<traverse_balanced>
  1.1581 +will map C<CHANGE> events to C<DISCARD_A> and C<DISCARD_B> actions,
  1.1582 +therefore resulting in a similar behaviour as C<traverse_sequences>
  1.1583 +with different order of events.
  1.1584 +
  1.1585 +C<traverse_balanced> might be a bit slower than C<traverse_sequences>,
  1.1586 +noticable only while processing huge amounts of data.
  1.1587 +
  1.1588 +The C<sdiff> function of this module 
  1.1589 +is implemented as call to C<traverse_balanced>.
  1.1590 +
  1.1591 +C<traverse_balanced> does not have a useful return value; you are expected to
  1.1592 +plug in the appropriate behavior with the callback functions.
  1.1593 +
  1.1594 +=head1 KEY GENERATION FUNCTIONS
  1.1595 +
  1.1596 +Most of the functions accept an optional extra parameter.  This is a
  1.1597 +CODE reference to a key generating (hashing) function that should return
  1.1598 +a string that uniquely identifies a given element.  It should be the
  1.1599 +case that if two elements are to be considered equal, their keys should
  1.1600 +be the same (and the other way around).  If no key generation function
  1.1601 +is provided, the key will be the element as a string.
  1.1602 +
  1.1603 +By default, comparisons will use "eq" and elements will be turned into keys
  1.1604 +using the default stringizing operator '""'.
  1.1605 +
  1.1606 +Where this is important is when you're comparing something other than
  1.1607 +strings.  If it is the case that you have multiple different objects
  1.1608 +that should be considered to be equal, you should supply a key
  1.1609 +generation function. Otherwise, you have to make sure that your arrays
  1.1610 +contain unique references.
  1.1611 +
  1.1612 +For instance, consider this example:
  1.1613 +
  1.1614 +    package Person;
  1.1615 +
  1.1616 +    sub new
  1.1617 +    {
  1.1618 +        my $package = shift;
  1.1619 +        return bless { name => '', ssn => '', @_ }, $package;
  1.1620 +    }
  1.1621 +
  1.1622 +    sub clone
  1.1623 +    {
  1.1624 +        my $old = shift;
  1.1625 +        my $new = bless { %$old }, ref($old);
  1.1626 +    }
  1.1627 +
  1.1628 +    sub hash
  1.1629 +    {
  1.1630 +        return shift()->{'ssn'};
  1.1631 +    }
  1.1632 +
  1.1633 +    my $person1 = Person->new( name => 'Joe', ssn => '123-45-6789' );
  1.1634 +    my $person2 = Person->new( name => 'Mary', ssn => '123-47-0000' );
  1.1635 +    my $person3 = Person->new( name => 'Pete', ssn => '999-45-2222' );
  1.1636 +    my $person4 = Person->new( name => 'Peggy', ssn => '123-45-9999' );
  1.1637 +    my $person5 = Person->new( name => 'Frank', ssn => '000-45-9999' );
  1.1638 +
  1.1639 +If you did this:
  1.1640 +
  1.1641 +    my $array1 = [ $person1, $person2, $person4 ];
  1.1642 +    my $array2 = [ $person1, $person3, $person4, $person5 ];
  1.1643 +    Algorithm::Diff::diff( $array1, $array2 );
  1.1644 +
  1.1645 +everything would work out OK (each of the objects would be converted
  1.1646 +into a string like "Person=HASH(0x82425b0)" for comparison).
  1.1647 +
  1.1648 +But if you did this:
  1.1649 +
  1.1650 +    my $array1 = [ $person1, $person2, $person4 ];
  1.1651 +    my $array2 = [ $person1, $person3, $person4->clone(), $person5 ];
  1.1652 +    Algorithm::Diff::diff( $array1, $array2 );
  1.1653 +
  1.1654 +$person4 and $person4->clone() (which have the same name and SSN)
  1.1655 +would be seen as different objects. If you wanted them to be considered
  1.1656 +equivalent, you would have to pass in a key generation function:
  1.1657 +
  1.1658 +    my $array1 = [ $person1, $person2, $person4 ];
  1.1659 +    my $array2 = [ $person1, $person3, $person4->clone(), $person5 ];
  1.1660 +    Algorithm::Diff::diff( $array1, $array2, \&Person::hash );
  1.1661 +
  1.1662 +This would use the 'ssn' field in each Person as a comparison key, and
  1.1663 +so would consider $person4 and $person4->clone() as equal.
  1.1664 +
  1.1665 +You may also pass additional parameters to the key generation function
  1.1666 +if you wish.
  1.1667 +
  1.1668 +=head1 ERROR CHECKING
  1.1669 +
  1.1670 +If you pass these routines a non-reference and they expect a reference,
  1.1671 +they will die with a message.
  1.1672 +
  1.1673 +=head1 AUTHOR
  1.1674 +
  1.1675 +This version released by Tye McQueen (http://perlmonks.org/?node=tye).
  1.1676 +
  1.1677 +=head1 LICENSE
  1.1678 +
  1.1679 +Parts Copyright (c) 2000-2004 Ned Konz.  All rights reserved.
  1.1680 +Parts by Tye McQueen.
  1.1681 +
  1.1682 +This program is free software; you can redistribute it and/or modify it
  1.1683 +under the same terms as Perl.
  1.1684 +
  1.1685 +=head1 MAILING LIST
  1.1686 +
  1.1687 +Mark-Jason still maintains a mailing list.  To join a low-volume mailing
  1.1688 +list for announcements related to diff and Algorithm::Diff, send an
  1.1689 +empty mail message to mjd-perl-diff-request@plover.com.
  1.1690 +
  1.1691 +=head1 CREDITS
  1.1692 +
  1.1693 +Versions through 0.59 (and much of this documentation) were written by:
  1.1694 +
  1.1695 +Mark-Jason Dominus, mjd-perl-diff@plover.com
  1.1696 +
  1.1697 +This version borrows some documentation and routine names from
  1.1698 +Mark-Jason's, but Diff.pm's code was completely replaced.
  1.1699 +
  1.1700 +This code was adapted from the Smalltalk code of Mario Wolczko
  1.1701 +<mario@wolczko.com>, which is available at
  1.1702 +ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st
  1.1703 +
  1.1704 +C<sdiff> and C<traverse_balanced> were written by Mike Schilli
  1.1705 +<m@perlmeister.com>.
  1.1706 +
  1.1707 +The algorithm is that described in
  1.1708 +I<A Fast Algorithm for Computing Longest Common Subsequences>,
  1.1709 +CACM, vol.20, no.5, pp.350-353, May 1977, with a few
  1.1710 +minor improvements to improve the speed.
  1.1711 +
  1.1712 +Much work was done by Ned Konz (perl@bike-nomad.com).
  1.1713 +
  1.1714 +The OO interface and some other changes are by Tye McQueen.
  1.1715 +
  1.1716 +=cut