lib/TWiki/Query/HoistREs.pm
changeset 0 414e01d06fd5
equal deleted inserted replaced
-1:000000000000 0:414e01d06fd5
       
     1 # See bottom of file for copyright and license details
       
     2 
       
     3 =pod
       
     4 
       
     5 ---+ package TWiki::Query::HoistREs
       
     6 
       
     7 Static functions to extract regular expressions from queries. The REs can
       
     8 be used in caching stores that use the TWiki standard inline meta-data
       
     9 representation to pre-filter topic lists for more efficient query matching.
       
    10 
       
    11 See =Store/RcsFile.pm= for an example of usage.
       
    12 
       
    13 =cut
       
    14 
       
    15 package TWiki::Query::HoistREs;
       
    16 
       
    17 use strict;
       
    18 
       
    19 use TWiki::Infix::Node;
       
    20 use TWiki::Query::Node;
       
    21 
       
    22 # Try to optimise a query by hoisting regular expression searches
       
    23 # out of the query
       
    24 #
       
    25 # patterns we need to look for:
       
    26 #
       
    27 # top level is defined by a sequence of AND and OR conjunctions
       
    28 # second level, = and ~
       
    29 # second level LHS is a field access
       
    30 # second level RHS is a static string or number
       
    31 
       
    32 sub MONITOR_HOIST { 0 }
       
    33 
       
    34 =pod
       
    35 
       
    36 ---++ ObjectMethod hoist($query) -> @res
       
    37 
       
    38 Extract useful filter REs from the given query. The list returned is a list
       
    39 of filter expressions that can be used with a cache search to refine the
       
    40 list of topics. The full query should still be applied to topics that remain
       
    41 after the filter match has been applied; this is purely an optimisation.
       
    42 
       
    43 =cut
       
    44 
       
    45 sub hoist {
       
    46     my $node = shift;
       
    47 
       
    48     return () unless ref($node->{op});
       
    49 
       
    50     if ($node->{op}->{name} eq '(') {
       
    51         return hoist($node->{params}[0]);
       
    52     }
       
    53 
       
    54     print STDERR "hoist ",$node->stringify(),"\n" if MONITOR_HOIST;
       
    55     if ($node->{op}->{name} eq 'and') {
       
    56         my @lhs = hoist($node->{params}[0]);
       
    57         my $rhs = _hoistOR($node->{params}[1]);
       
    58         if (scalar(@lhs) && $rhs) {
       
    59             return ( @lhs, $rhs );
       
    60         } elsif (scalar(@lhs)) {
       
    61             return @lhs;
       
    62         } elsif ($rhs) {
       
    63             return ( $rhs );
       
    64         }
       
    65     } else {
       
    66         my $or = _hoistOR($node);
       
    67         return ( $or ) if $or;
       
    68     }
       
    69 
       
    70     print STDERR "\tFAILED\n" if MONITOR_HOIST;
       
    71     return ();
       
    72 }
       
    73 
       
    74 # depth 1; we can handle a sequence of ORs
       
    75 sub _hoistOR {
       
    76     my $node = shift;
       
    77 
       
    78     return undef unless ref($node->{op});
       
    79 
       
    80     if ($node->{op}->{name} eq '(') {
       
    81         return _hoistOR($node->{params}[0]);
       
    82     }
       
    83 
       
    84     print STDERR "hoistOR ",$node->stringify(),"\n" if MONITOR_HOIST;
       
    85 
       
    86     if ($node->{op}->{name} eq 'or') {
       
    87         my $lhs = _hoistOR($node->{params}[0]);
       
    88         my $rhs = _hoistEQ($node->{params}[1]);
       
    89         if ($lhs && $rhs) {
       
    90             return "$lhs|$rhs";
       
    91         }
       
    92     } else {
       
    93         return _hoistEQ($node);
       
    94     }
       
    95 
       
    96     print STDERR "\tFAILED\n" if MONITOR_HOIST;
       
    97     return undef;
       
    98 }
       
    99 
       
   100 # depth 2: can handle = and ~ expressions
       
   101 sub _hoistEQ {
       
   102     my $node = shift;
       
   103 
       
   104     return undef unless ref($node->{op});
       
   105 
       
   106     if ($node->{op}->{name} eq '(') {
       
   107         return _hoistEQ($node->{params}[0]);
       
   108     }
       
   109 
       
   110     print STDERR "hoistEQ ",$node->stringify(),"\n" if MONITOR_HOIST;
       
   111     # \000RHS\001 is a placholder for the RHS term
       
   112     if ($node->{op}->{name} eq '=') {
       
   113         my $lhs = _hoistDOT($node->{params}[0]);
       
   114         my $rhs = _hoistConstant($node->{params}[1]);
       
   115         if ($lhs && $rhs) {
       
   116             $rhs = quotemeta($rhs);
       
   117             $lhs =~ s/\000RHS\001/$rhs/g;
       
   118             return $lhs;
       
   119         }
       
   120         # = is symmetric, so try the other order
       
   121         $lhs = _hoistDOT($node->{params}[1]);
       
   122         $rhs = _hoistConstant($node->{params}[0]);
       
   123         if ($lhs && $rhs) {
       
   124             $rhs = quotemeta($rhs);
       
   125             $lhs =~ s/\000RHS\001/$rhs/g;
       
   126             return $lhs;
       
   127         }
       
   128     } elsif ($node->{op}->{name} eq '~') {
       
   129         my $lhs = _hoistDOT($node->{params}[0]);
       
   130         my $rhs = _hoistConstant($node->{params}[1]);
       
   131         if ($lhs && $rhs) {
       
   132             $rhs = quotemeta($rhs);
       
   133             $rhs =~ s/\\\?/./g;
       
   134             $rhs =~ s/\\\*/.*/g;
       
   135             $lhs =~ s/\000RHS\001/$rhs/g;
       
   136             return $lhs;
       
   137         }
       
   138     }
       
   139 
       
   140     print STDERR "\tFAILED\n" if MONITOR_HOIST;
       
   141     return undef;
       
   142 }
       
   143 
       
   144 # Expecting a (root level) field access expression. This must be of the form
       
   145 # <name>
       
   146 # or
       
   147 # <rootfield>.<name>
       
   148 # <rootfield> may be aliased
       
   149 sub _hoistDOT {
       
   150     my $node = shift;
       
   151 
       
   152     if (ref($node->{op}) && $node->{op}->{name} eq '(') {
       
   153         return _hoistDOT($node->{params}[0]);
       
   154     }
       
   155 
       
   156     print STDERR "hoistDOT ",$node->stringify(),"\n" if MONITOR_HOIST;
       
   157     if (ref($node->{op}) && $node->{op}->{name} eq '.') {
       
   158         my $lhs = $node->{params}[0];
       
   159         my $rhs = $node->{params}[1];
       
   160         if (!ref($lhs->{op}) && !ref($rhs->{op}) &&
       
   161               $lhs->{op} eq $TWiki::Infix::Node::NAME &&
       
   162                 $rhs->{op} eq $TWiki::Infix::Node::NAME) {
       
   163             $lhs = $lhs->{params}[0];
       
   164             $rhs = $rhs->{params}[0];
       
   165             if ($TWiki::Query::Node::aliases{$lhs}) {
       
   166                 $lhs = $TWiki::Query::Node::aliases{$lhs};
       
   167             }
       
   168             if ($lhs =~ /^META:/) {
       
   169                 # \000RHS\001 is a placholder for the RHS term
       
   170                 return '^%'.$lhs.'{.*\\b'.$rhs."=\\\"\000RHS\001\\\"";
       
   171             }
       
   172             # Otherwise assume the term before the dot is the form name
       
   173             if ($rhs eq 'text') {
       
   174                 # Special case for the text body
       
   175                 return "\000RHS\001";
       
   176             } else {
       
   177                 return "^%META:FIELD{name=\\\"$rhs\\\".*\\bvalue=\\\"\000RHS\001\\\"";
       
   178             }
       
   179 
       
   180         }
       
   181     } elsif (!ref($node->{op}) && $node->{op} eq $TWiki::Infix::Node::NAME) {
       
   182         if ($node->{params}[0] eq 'name') {
       
   183             # Special case for the topic name
       
   184 	    return undef;
       
   185         } elsif ($node->{params}[0] eq 'web') {
       
   186             # Special case for the web name
       
   187 	    return undef;
       
   188         } elsif ($node->{params}[0] eq 'text') {
       
   189             # Special case for the text body
       
   190             return "\000RHS\001";
       
   191         } else {
       
   192             return "^%META:FIELD{name=\\\"$node->{params}[0]\\\".*\\bvalue=\\\"\0RHS\1\\\"";
       
   193         }
       
   194     }
       
   195 
       
   196     print STDERR "\tFAILED\n" if MONITOR_HOIST;
       
   197     return undef;
       
   198 }
       
   199 
       
   200 # Expecting a constant
       
   201 sub _hoistConstant {
       
   202     my $node = shift;
       
   203 
       
   204     if (!ref($node->{op}) &&
       
   205           ($node->{op} eq $TWiki::Infix::Node::STRING ||
       
   206              $node->{op} eq $TWiki::Infix::Node::NUMBER)) {
       
   207         return $node->{params}[0];
       
   208     }
       
   209     return undef;
       
   210 }
       
   211 
       
   212 1;
       
   213 __DATA__
       
   214 
       
   215 Module of TWiki Enterprise Collaboration Platform, http://TWiki.org/
       
   216 
       
   217 Copyright (C) 2005-2007 TWiki Contributors. All Rights Reserved.
       
   218 TWiki Contributors are listed in the AUTHORS file in the root of
       
   219 this distribution. NOTE: Please extend that file, not this notice.
       
   220 
       
   221 This program is free software; you can redistribute it and/or
       
   222 modify it under the terms of the GNU General Public License
       
   223 as published by the Free Software Foundation; either version 2
       
   224 of the License, or (at your option) any later version. For
       
   225 more details read LICENSE in the root of this distribution.
       
   226 
       
   227 This program is distributed in the hope that it will be useful,
       
   228 but WITHOUT ANY WARRANTY; without even the implied warranty of
       
   229 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
       
   230 
       
   231 As per the GPL, removal of this notice is prohibited.
       
   232 
       
   233 Author: Crawford Currie http://c-dot.co.uk