LUCENE-2891: merge to trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1064285 13f79535-47bb-0310-9956-ffa450edef68
2011-01-27 20:07:43 +00:00 · 2011-01-27 20:07:43 +00:00 · 4c62240087
parent 4aa8a1f179
commit 4c62240087
5 changed files with 65 additions and 4 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -701,6 +701,9 @@ Bug fixes
 * LUCENE-1846: DateTools now uses the US locale everywhere, so DateTools.round()
  is safe also in strange locales.  (Uwe Schindler)

+* LUCENE-2891: IndexWriterConfig did not accept -1 in setReaderTermIndexDivisor,
+  which can be used to prevent loading the terms index into memory. (Shai Erera)
+
 New features

 * LUCENE-2128: Parallelized fetching document frequencies during weight
--- a/lucene/src/java/org/apache/lucene/index/IndexReader.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java
@ -415,7 +415,10 @@ public abstract class IndexReader implements Cloneable,Closeable {
   *  memory.  By setting this to a value > 1 you can reduce
   *  memory usage, at the expense of higher latency when
   *  loading a TermInfo.  The default value is 1.  Set this
-   *  to -1 to skip loading the terms index entirely.
+   *  to -1 to skip loading the terms index entirely. This is only useful in 
+   *  advanced situations when you will only .next() through all terms; 
+   *  attempts to seek will hit an exception.
+   *  
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   */
--- a/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
@ -552,10 +552,13 @@ public final class IndexWriterConfig implements Cloneable {
  /** Sets the termsIndexDivisor passed to any readers that
   *  IndexWriter opens, for example when applying deletes
   *  or creating a near-real-time reader in {@link
-   *  IndexWriter#getReader}. */
+   *  IndexWriter#getReader}. If you pass -1, the terms index 
+   *  won't be loaded by the readers. This is only useful in 
+   *  advanced situations when you will only .next() through 
+   *  all terms; attempts to seek will hit an exception. */
  public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
-    if (divisor <= 0) {
-      throw new IllegalArgumentException("divisor must be >= 1 (got " + divisor + ")");
+    if (divisor <= 0 && divisor != -1) {
+      throw new IllegalArgumentException("divisor must be >= 1, or -1 (got " + divisor + ")");
    }
    readerTermsIndexDivisor = divisor;
    return this;
--- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
@ -222,6 +222,23 @@ public class TestIndexWriterConfig extends LuceneTestCase {
      // this is expected
    }

+    // Test setReaderTermsIndexDivisor
+    try {
+      conf.setReaderTermsIndexDivisor(0);
+      fail("should not have succeeded to set termsIndexDivisor to 0");
+    } catch (IllegalArgumentException e) {
+      // this is expected
+    }
+    
+    // Setting to -1 is ok
+    conf.setReaderTermsIndexDivisor(-1);
+    try {
+      conf.setReaderTermsIndexDivisor(-2);
+      fail("should not have succeeded to set termsIndexDivisor to < -1");
+    } catch (IllegalArgumentException e) {
+      // this is expected
+    }
+    
    assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates());
    conf.setMaxThreadStates(5);
    assertEquals(5, conf.getMaxThreadStates());
--- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java
@ -20,6 +20,7 @@ import java.io.IOException;
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Random;
 import java.util.concurrent.atomic.AtomicBoolean;
@ -30,6 +31,7 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Field.Index;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.Field.TermVector;
+import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
@ -990,4 +992,37 @@ public class TestIndexWriterReader extends LuceneTestCase {
    dir.close();
    assertTrue(didWarm.get());
  }
+  
+  public void testNoTermsIndex() throws Exception {
+    // Some Codecs don't honor the ReaderTermsIndexDiviso, so skip the test if
+    // they're picked.
+    HashSet<String> illegalCodecs = new HashSet<String>();
+    illegalCodecs.add("PreFlex");
+    illegalCodecs.add("MockRandom");
+    illegalCodecs.add("SimpleText");
+
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT,
+        new MockAnalyzer()).setReaderTermsIndexDivisor(-1);
+    
+    // Don't proceed if picked Codec is in the list of illegal ones.
+    if (illegalCodecs.contains(conf.getCodecProvider().getFieldCodec("f"))) return;
+
+    Directory dir = newDirectory();
+    IndexWriter w = new IndexWriter(dir, conf);
+    Document doc = new Document();
+    doc.add(new Field("f", "val", Store.NO, Index.ANALYZED));
+    w.addDocument(doc);
+    IndexReader r = IndexReader.open(w).getSequentialSubReaders()[0];
+    try {
+      r.termDocsEnum(null, "f", new BytesRef("val"));
+      fail("should have failed to seek since terms index was not loaded. Codec used " + conf.getCodecProvider().getFieldCodec("f"));
+    } catch (IllegalStateException e) {
+      // expected - we didn't load the term index
+    } finally {
+      r.close();
+      w.close();
+      dir.close();
+    }
+  }
+  
 }