LUCENE-3595: Fixed FieldCacheRangeFilter and FieldCacheTermsFilter to correctly respect deletions on reopened SegmentReaders. Factored out FieldCacheDocIdSet to be a top-level class

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1206033 13f79535-47bb-0310-9956-ffa450edef68
2011-11-25 01:21:53 +00:00 · 2011-11-25 01:21:53 +00:00 · 2cf0c05869
parent d03c59d685
commit 2cf0c05869
6 changed files with 175 additions and 147 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -663,6 +663,12 @@ New Features
  have at least one or no value at all in a specific field. (Simon Willnauer,
  Uwe Schindler, Robert Muir)
  
+Bug fixes
+
+* LUCENE-3595: Fixed FieldCacheRangeFilter and FieldCacheTermsFilter
+  to correctly respect deletions on reopened SegmentReaders. Factored out
+  FieldCacheDocIdSet to be a top-level class.  (Uwe Schindler, Simon Willnauer)
+
 ======================= Lucene 3.5.0 =======================

 Changes in backwards compatibility policy
--- a/lucene/src/java/org/apache/lucene/search/FieldCacheDocIdSet.java
+++ b/lucene/src/java/org/apache/lucene/search/FieldCacheDocIdSet.java
@ -0,0 +1,153 @@
+package org.apache.lucene.search;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.OpenBitSet;
+
+/**
+ * Base class for DocIdSet to be used with FieldCache. The implementation
+ * of its iterator is very stupid and slow if the implementation of the
+ * {@link #matchDoc} method is not optimized, as iterators simply increment
+ * the document id until {@code matchDoc(int)} returns true. Because of this
+ * {@code matchDoc(int)} must be as fast as possible and in no case do any
+ * I/O.
+ * @lucene.internal
+ */
+public abstract class FieldCacheDocIdSet extends DocIdSet {
+
+  protected final int maxDoc;
+  protected final Bits acceptDocs;
+
+  public FieldCacheDocIdSet(int maxDoc, Bits acceptDocs) {
+    this.maxDoc = maxDoc;
+    this.acceptDocs = acceptDocs;
+  }
+
+  /**
+   * this method checks, if a doc is a hit
+   */
+  protected abstract boolean matchDoc(int doc);
+
+  /**
+   * this DocIdSet is always cacheable (does not go back
+   * to the reader for iteration)
+   */
+  @Override
+  public final boolean isCacheable() {
+    return true;
+  }
+
+  @Override
+  public final Bits bits() {
+    return (acceptDocs == null) ? new Bits() {
+      public boolean get(int docid) {
+        return matchDoc(docid);
+      }
+
+      public int length() {
+        return maxDoc;
+      }
+    } : new Bits() {
+      public boolean get(int docid) {
+        return acceptDocs.get(docid) && matchDoc(docid);
+      }
+
+      public int length() {
+        return maxDoc;
+      }
+    };
+  }
+
+  @Override
+  public final DocIdSetIterator iterator() throws IOException {
+    if (acceptDocs == null) {
+      // Specialization optimization disregard acceptDocs
+      return new DocIdSetIterator() {
+        private int doc = -1;
+        
+        @Override
+        public int docID() {
+          return doc;
+        }
+      
+        @Override
+        public int nextDoc() {
+          do {
+            doc++;
+            if (doc >= maxDoc) {
+              return doc = NO_MORE_DOCS;
+            }
+          } while (!matchDoc(doc));
+          return doc;
+        }
+      
+        @Override
+        public int advance(int target) {
+          for(doc=target; doc<maxDoc; doc++) {
+            if (matchDoc(doc)) {
+              return doc;
+            }
+          }
+          return doc = NO_MORE_DOCS;
+        }
+      };
+    } else if (acceptDocs instanceof FixedBitSet || acceptDocs instanceof OpenBitSet) {
+      // special case for FixedBitSet / OpenBitSet: use the iterator and filter it
+      // (used e.g. when Filters are chained by FilteredQuery)
+      return new FilteredDocIdSetIterator(((DocIdSet) acceptDocs).iterator()) {
+        @Override
+        protected boolean match(int doc) {
+          return FieldCacheDocIdSet.this.matchDoc(doc);
+        }
+      };
+    } else {
+      // Stupid consultation of acceptDocs and matchDoc()
+      return new DocIdSetIterator() {
+        private int doc = -1;
+        
+        @Override
+        public int docID() {
+          return doc;
+        }
+      
+        @Override
+        public int nextDoc() {
+          do {
+            doc++;
+            if (doc >= maxDoc) {
+              return doc = NO_MORE_DOCS;
+            }
+          } while (!acceptDocs.get(doc) || !matchDoc(doc));
+          return doc;
+        }
+      
+        @Override
+        public int advance(int target) {
+          for(doc=target; doc<maxDoc; doc++) {
+            if (acceptDocs.get(doc) && matchDoc(doc)) {
+              return doc;
+            }
+          }
+          return doc = NO_MORE_DOCS;
+        }
+      };
+    }
+  }
+}
--- a/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
+++ b/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
@ -124,7 +124,7 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
        
        return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
          @Override
-          final boolean matchDoc(int doc) {
+          protected final boolean matchDoc(int doc) {
            final int docOrd = fcsi.getOrd(doc);
            return docOrd >= inclusiveLowerPoint && docOrd <= inclusiveUpperPoint;
          }
@ -175,7 +175,7 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
        final byte[] values = FieldCache.DEFAULT.getBytes(context.reader, field, (FieldCache.ByteParser) parser, false);
        return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
          @Override
-          boolean matchDoc(int doc) {
+          protected boolean matchDoc(int doc) {
            return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
          }
        };
@ -225,7 +225,7 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
        final short[] values = FieldCache.DEFAULT.getShorts(context.reader, field, (FieldCache.ShortParser) parser, false);
        return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
          @Override
-          boolean matchDoc(int doc) {
+          protected boolean matchDoc(int doc) {
            return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
          }
        };
@ -275,7 +275,7 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
        final int[] values = FieldCache.DEFAULT.getInts(context.reader, field, (FieldCache.IntParser) parser, false);
        return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
          @Override
-          boolean matchDoc(int doc) {
+          protected boolean matchDoc(int doc) {
            return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
          }
        };
@ -325,7 +325,7 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
        final long[] values = FieldCache.DEFAULT.getLongs(context.reader, field, (FieldCache.LongParser) parser, false);
        return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
          @Override
-          boolean matchDoc(int doc) {
+          protected boolean matchDoc(int doc) {
            return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
          }
        };
@ -379,7 +379,7 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
        final float[] values = FieldCache.DEFAULT.getFloats(context.reader, field, (FieldCache.FloatParser) parser, false);
        return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
          @Override
-          boolean matchDoc(int doc) {
+          protected boolean matchDoc(int doc) {
            return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
          }
        };
@ -434,7 +434,7 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
        // ignore deleted docs if range doesn't contain 0
        return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
          @Override
-          boolean matchDoc(int doc) {
+          protected boolean matchDoc(int doc) {
            return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
          }
        };
@ -497,122 +497,4 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
  
  /** Returns the current numeric parser ({@code null} for {@code T} is {@code String}} */
  public FieldCache.Parser getParser() { return parser; }
-  
-  static abstract class FieldCacheDocIdSet extends DocIdSet {
-    private final int maxDoc;
-    private final Bits acceptDocs;
-
-    FieldCacheDocIdSet(int maxDoc, Bits acceptDocs) {
-      this.maxDoc = maxDoc;
-      this.acceptDocs = acceptDocs;
-    }
-
-    /**
-     * this method checks, if a doc is a hit, should throw AIOBE, when position
-     * invalid
-     */
-    abstract boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException;
-
-    /**
-     * this DocIdSet is always cacheable (does not go back
-     * to the reader for iteration)
-     */
-    @Override
-    public boolean isCacheable() {
-      return true;
-    }
-
-    @Override
-    public Bits bits() {
-      return (acceptDocs == null) ? new Bits() {
-        public boolean get(int docid) {
-          return FieldCacheDocIdSet.this.matchDoc(docid);
-        }
-
-        public int length() {
-          return FieldCacheDocIdSet.this.maxDoc;
-        }
-      } : new Bits() {
-        public boolean get(int docid) {
-          return acceptDocs.get(docid) && FieldCacheDocIdSet.this.matchDoc(docid);
-        }
-
-        public int length() {
-          return FieldCacheDocIdSet.this.maxDoc;
-        }
-      };
-    }
-
-    @Override
-    public DocIdSetIterator iterator() throws IOException {
-      if (acceptDocs == null) {
-        // Specialization optimization disregard deletions
-        return new DocIdSetIterator() {
-          private int doc = -1;
-          @Override
-            public int docID() {
-            return doc;
-          }
-        
-          @Override
-          public int nextDoc() {
-            try {
-              do {
-                doc++;
-              } while (!matchDoc(doc));
-              return doc;
-            } catch (ArrayIndexOutOfBoundsException e) {
-              return doc = NO_MORE_DOCS;
-            }
-          }
-        
-          @Override
-          public int advance(int target) {
-            try {
-              doc = target;
-              while (!matchDoc(doc)) {
-                doc++;
-              }
-              return doc;
-            } catch (ArrayIndexOutOfBoundsException e) {
-              return doc = NO_MORE_DOCS;
-            }
-          }
-        };
-      } else {
-        // Must consult acceptDocs
-
-        // a DocIdSetIterator generating docIds by
-        // incrementing a variable & checking acceptDocs -
-        return new DocIdSetIterator() {
-          private int doc = -1;
-          @Override
-            public int docID() {
-            return doc;
-          }
-        
-          @Override
-          public int nextDoc() {
-            do {
-              doc++;
-              if (doc >= maxDoc) {
-                return doc = NO_MORE_DOCS;
-              }
-            } while (!acceptDocs.get(doc) || !matchDoc(doc));
-            return doc;
-          }
-        
-          @Override
-          public int advance(int target) {
-            for(doc=target;doc<maxDoc;doc++) {
-              if (acceptDocs.get(doc) && matchDoc(doc)) {
-                return doc;
-              }
-            }
-            return doc = NO_MORE_DOCS;
-          }
-        };
-      }
-    }
-  }
 }
--- a/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java
+++ b/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java
@ -127,10 +127,9 @@ public class FieldCacheTermsFilter extends Filter {
        bits.set(termNumber);
      }
    }
-    final int maxDoc = context.reader.maxDoc();
-    return new FieldCacheRangeFilter.FieldCacheDocIdSet(maxDoc, acceptDocs) {
+    return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
      @Override
-      boolean matchDoc(int doc) {
+      protected final boolean matchDoc(int doc) {
        return bits.get(fcsi.getOrd(doc));
      }
    };
--- a/lucene/src/java/org/apache/lucene/search/FieldValueFilter.java
+++ b/lucene/src/java/org/apache/lucene/search/FieldValueFilter.java
@ -19,7 +19,6 @@ package org.apache.lucene.search;
 import java.io.IOException;

 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
-import org.apache.lucene.search.FieldCacheRangeFilter.FieldCacheDocIdSet;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.Bits.MatchAllBits;
 import org.apache.lucene.util.Bits.MatchNoBits;
@ -67,14 +66,9 @@ public class FieldValueFilter extends Filter {
      if (docsWithField instanceof MatchAllBits) {
        return null;
      }
-      final int maxDoc = context.reader.maxDoc();
-      return new FieldCacheDocIdSet(maxDoc, acceptDocs) {
+      return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
        @Override
-        final boolean matchDoc(int doc) {
-          if (doc >= maxDoc) {
-            // TODO: this makes no sense we should check this on the caller level
-            throw new ArrayIndexOutOfBoundsException("doc: "+doc + " maxDoc: " + maxDoc);
-          }
+        protected final boolean matchDoc(int doc) {
          return !docsWithField.get(doc);
        }
      };
@ -87,14 +81,9 @@ public class FieldValueFilter extends Filter {
        // :-)
        return BitsFilteredDocIdSet.wrap((DocIdSet) docsWithField, acceptDocs);
      }
-      final int maxDoc = context.reader.maxDoc();
-      return new FieldCacheDocIdSet(maxDoc, acceptDocs) {
+      return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
        @Override
-        final boolean matchDoc(int doc) {
-          if (doc >= maxDoc) {
-            // TODO: this makes no sense we should check this on the caller level
-            throw new ArrayIndexOutOfBoundsException("doc: "+doc + " maxDoc: " + maxDoc);
-          }
+        protected final boolean matchDoc(int doc) {
          return docsWithField.get(doc);
        }
      };
@ -131,7 +120,7 @@ public class FieldValueFilter extends Filter {

  @Override
  public String toString() {
-    return "NoFieldValueFilter [field=" + field + ", negate=" + negate + "]";
+    return "FieldValueFilter [field=" + field + ", negate=" + negate + "]";
  }

 }
--- a/lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java
+++ b/lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java
@ -137,10 +137,9 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
        return DocIdSet.EMPTY_DOCIDSET;
      }
      
-      final int maxDoc = context.reader.maxDoc();
-      return new FieldCacheRangeFilter.FieldCacheDocIdSet(maxDoc, acceptDocs) {
+      return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
        @Override
-        boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException {
+        protected final boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException {
          return termSet.get(fcsi.getOrd(doc));
        }
      };