lib/TWiki/Query/HoistREs.pm
changeset 0 414e01d06fd5
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/lib/TWiki/Query/HoistREs.pm	Sat Jan 26 15:50:53 2008 +0100
     1.3 @@ -0,0 +1,233 @@
     1.4 +# See bottom of file for copyright and license details
     1.5 +
     1.6 +=pod
     1.7 +
     1.8 +---+ package TWiki::Query::HoistREs
     1.9 +
    1.10 +Static functions to extract regular expressions from queries. The REs can
    1.11 +be used in caching stores that use the TWiki standard inline meta-data
    1.12 +representation to pre-filter topic lists for more efficient query matching.
    1.13 +
    1.14 +See =Store/RcsFile.pm= for an example of usage.
    1.15 +
    1.16 +=cut
    1.17 +
    1.18 +package TWiki::Query::HoistREs;
    1.19 +
    1.20 +use strict;
    1.21 +
    1.22 +use TWiki::Infix::Node;
    1.23 +use TWiki::Query::Node;
    1.24 +
    1.25 +# Try to optimise a query by hoisting regular expression searches
    1.26 +# out of the query
    1.27 +#
    1.28 +# patterns we need to look for:
    1.29 +#
    1.30 +# top level is defined by a sequence of AND and OR conjunctions
    1.31 +# second level, = and ~
    1.32 +# second level LHS is a field access
    1.33 +# second level RHS is a static string or number
    1.34 +
    1.35 +sub MONITOR_HOIST { 0 }
    1.36 +
    1.37 +=pod
    1.38 +
    1.39 +---++ ObjectMethod hoist($query) -> @res
    1.40 +
    1.41 +Extract useful filter REs from the given query. The list returned is a list
    1.42 +of filter expressions that can be used with a cache search to refine the
    1.43 +list of topics. The full query should still be applied to topics that remain
    1.44 +after the filter match has been applied; this is purely an optimisation.
    1.45 +
    1.46 +=cut
    1.47 +
    1.48 +sub hoist {
    1.49 +    my $node = shift;
    1.50 +
    1.51 +    return () unless ref($node->{op});
    1.52 +
    1.53 +    if ($node->{op}->{name} eq '(') {
    1.54 +        return hoist($node->{params}[0]);
    1.55 +    }
    1.56 +
    1.57 +    print STDERR "hoist ",$node->stringify(),"\n" if MONITOR_HOIST;
    1.58 +    if ($node->{op}->{name} eq 'and') {
    1.59 +        my @lhs = hoist($node->{params}[0]);
    1.60 +        my $rhs = _hoistOR($node->{params}[1]);
    1.61 +        if (scalar(@lhs) && $rhs) {
    1.62 +            return ( @lhs, $rhs );
    1.63 +        } elsif (scalar(@lhs)) {
    1.64 +            return @lhs;
    1.65 +        } elsif ($rhs) {
    1.66 +            return ( $rhs );
    1.67 +        }
    1.68 +    } else {
    1.69 +        my $or = _hoistOR($node);
    1.70 +        return ( $or ) if $or;
    1.71 +    }
    1.72 +
    1.73 +    print STDERR "\tFAILED\n" if MONITOR_HOIST;
    1.74 +    return ();
    1.75 +}
    1.76 +
    1.77 +# depth 1; we can handle a sequence of ORs
    1.78 +sub _hoistOR {
    1.79 +    my $node = shift;
    1.80 +
    1.81 +    return undef unless ref($node->{op});
    1.82 +
    1.83 +    if ($node->{op}->{name} eq '(') {
    1.84 +        return _hoistOR($node->{params}[0]);
    1.85 +    }
    1.86 +
    1.87 +    print STDERR "hoistOR ",$node->stringify(),"\n" if MONITOR_HOIST;
    1.88 +
    1.89 +    if ($node->{op}->{name} eq 'or') {
    1.90 +        my $lhs = _hoistOR($node->{params}[0]);
    1.91 +        my $rhs = _hoistEQ($node->{params}[1]);
    1.92 +        if ($lhs && $rhs) {
    1.93 +            return "$lhs|$rhs";
    1.94 +        }
    1.95 +    } else {
    1.96 +        return _hoistEQ($node);
    1.97 +    }
    1.98 +
    1.99 +    print STDERR "\tFAILED\n" if MONITOR_HOIST;
   1.100 +    return undef;
   1.101 +}
   1.102 +
   1.103 +# depth 2: can handle = and ~ expressions
   1.104 +sub _hoistEQ {
   1.105 +    my $node = shift;
   1.106 +
   1.107 +    return undef unless ref($node->{op});
   1.108 +
   1.109 +    if ($node->{op}->{name} eq '(') {
   1.110 +        return _hoistEQ($node->{params}[0]);
   1.111 +    }
   1.112 +
   1.113 +    print STDERR "hoistEQ ",$node->stringify(),"\n" if MONITOR_HOIST;
   1.114 +    # \000RHS\001 is a placholder for the RHS term
   1.115 +    if ($node->{op}->{name} eq '=') {
   1.116 +        my $lhs = _hoistDOT($node->{params}[0]);
   1.117 +        my $rhs = _hoistConstant($node->{params}[1]);
   1.118 +        if ($lhs && $rhs) {
   1.119 +            $rhs = quotemeta($rhs);
   1.120 +            $lhs =~ s/\000RHS\001/$rhs/g;
   1.121 +            return $lhs;
   1.122 +        }
   1.123 +        # = is symmetric, so try the other order
   1.124 +        $lhs = _hoistDOT($node->{params}[1]);
   1.125 +        $rhs = _hoistConstant($node->{params}[0]);
   1.126 +        if ($lhs && $rhs) {
   1.127 +            $rhs = quotemeta($rhs);
   1.128 +            $lhs =~ s/\000RHS\001/$rhs/g;
   1.129 +            return $lhs;
   1.130 +        }
   1.131 +    } elsif ($node->{op}->{name} eq '~') {
   1.132 +        my $lhs = _hoistDOT($node->{params}[0]);
   1.133 +        my $rhs = _hoistConstant($node->{params}[1]);
   1.134 +        if ($lhs && $rhs) {
   1.135 +            $rhs = quotemeta($rhs);
   1.136 +            $rhs =~ s/\\\?/./g;
   1.137 +            $rhs =~ s/\\\*/.*/g;
   1.138 +            $lhs =~ s/\000RHS\001/$rhs/g;
   1.139 +            return $lhs;
   1.140 +        }
   1.141 +    }
   1.142 +
   1.143 +    print STDERR "\tFAILED\n" if MONITOR_HOIST;
   1.144 +    return undef;
   1.145 +}
   1.146 +
   1.147 +# Expecting a (root level) field access expression. This must be of the form
   1.148 +# <name>
   1.149 +# or
   1.150 +# <rootfield>.<name>
   1.151 +# <rootfield> may be aliased
   1.152 +sub _hoistDOT {
   1.153 +    my $node = shift;
   1.154 +
   1.155 +    if (ref($node->{op}) && $node->{op}->{name} eq '(') {
   1.156 +        return _hoistDOT($node->{params}[0]);
   1.157 +    }
   1.158 +
   1.159 +    print STDERR "hoistDOT ",$node->stringify(),"\n" if MONITOR_HOIST;
   1.160 +    if (ref($node->{op}) && $node->{op}->{name} eq '.') {
   1.161 +        my $lhs = $node->{params}[0];
   1.162 +        my $rhs = $node->{params}[1];
   1.163 +        if (!ref($lhs->{op}) && !ref($rhs->{op}) &&
   1.164 +              $lhs->{op} eq $TWiki::Infix::Node::NAME &&
   1.165 +                $rhs->{op} eq $TWiki::Infix::Node::NAME) {
   1.166 +            $lhs = $lhs->{params}[0];
   1.167 +            $rhs = $rhs->{params}[0];
   1.168 +            if ($TWiki::Query::Node::aliases{$lhs}) {
   1.169 +                $lhs = $TWiki::Query::Node::aliases{$lhs};
   1.170 +            }
   1.171 +            if ($lhs =~ /^META:/) {
   1.172 +                # \000RHS\001 is a placholder for the RHS term
   1.173 +                return '^%'.$lhs.'{.*\\b'.$rhs."=\\\"\000RHS\001\\\"";
   1.174 +            }
   1.175 +            # Otherwise assume the term before the dot is the form name
   1.176 +            if ($rhs eq 'text') {
   1.177 +                # Special case for the text body
   1.178 +                return "\000RHS\001";
   1.179 +            } else {
   1.180 +                return "^%META:FIELD{name=\\\"$rhs\\\".*\\bvalue=\\\"\000RHS\001\\\"";
   1.181 +            }
   1.182 +
   1.183 +        }
   1.184 +    } elsif (!ref($node->{op}) && $node->{op} eq $TWiki::Infix::Node::NAME) {
   1.185 +        if ($node->{params}[0] eq 'name') {
   1.186 +            # Special case for the topic name
   1.187 +	    return undef;
   1.188 +        } elsif ($node->{params}[0] eq 'web') {
   1.189 +            # Special case for the web name
   1.190 +	    return undef;
   1.191 +        } elsif ($node->{params}[0] eq 'text') {
   1.192 +            # Special case for the text body
   1.193 +            return "\000RHS\001";
   1.194 +        } else {
   1.195 +            return "^%META:FIELD{name=\\\"$node->{params}[0]\\\".*\\bvalue=\\\"\0RHS\1\\\"";
   1.196 +        }
   1.197 +    }
   1.198 +
   1.199 +    print STDERR "\tFAILED\n" if MONITOR_HOIST;
   1.200 +    return undef;
   1.201 +}
   1.202 +
   1.203 +# Expecting a constant
   1.204 +sub _hoistConstant {
   1.205 +    my $node = shift;
   1.206 +
   1.207 +    if (!ref($node->{op}) &&
   1.208 +          ($node->{op} eq $TWiki::Infix::Node::STRING ||
   1.209 +             $node->{op} eq $TWiki::Infix::Node::NUMBER)) {
   1.210 +        return $node->{params}[0];
   1.211 +    }
   1.212 +    return undef;
   1.213 +}
   1.214 +
   1.215 +1;
   1.216 +__DATA__
   1.217 +
   1.218 +Module of TWiki Enterprise Collaboration Platform, http://TWiki.org/
   1.219 +
   1.220 +Copyright (C) 2005-2007 TWiki Contributors. All Rights Reserved.
   1.221 +TWiki Contributors are listed in the AUTHORS file in the root of
   1.222 +this distribution. NOTE: Please extend that file, not this notice.
   1.223 +
   1.224 +This program is free software; you can redistribute it and/or
   1.225 +modify it under the terms of the GNU General Public License
   1.226 +as published by the Free Software Foundation; either version 2
   1.227 +of the License, or (at your option) any later version. For
   1.228 +more details read LICENSE in the root of this distribution.
   1.229 +
   1.230 +This program is distributed in the hope that it will be useful,
   1.231 +but WITHOUT ANY WARRANTY; without even the implied warranty of
   1.232 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
   1.233 +
   1.234 +As per the GPL, removal of this notice is prohibited.
   1.235 +
   1.236 +Author: Crawford Currie http://c-dot.co.uk