LUCENE-5969: add changes and test

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5969@1629408 13f79535-47bb-0310-9956-ffa450edef68
2014-10-04 16:27:03 +00:00 · 2014-10-04 16:27:03 +00:00 · f97e873d8b
parent 206fb874bc
commit f97e873d8b
2 changed files with 46 additions and 0 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -73,6 +73,10 @@ New Features
 * LUCENE-5911: Add MemoryIndex.freeze() to allow thread-safe searching over a 
  MemoryIndex. (Alan Woodward)

+* LUCENE-5969: Lucene 5.0 has a new index format with mismatched file detection,
+  improved exception handling, and indirect norms encoding for sparse fields.
+  (Mike McCandless, Robert Muir)
+
 API Changes

 * LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and
@ -142,6 +146,11 @@ API Changes
 * LUCENE-5924: Rename CheckIndex -fix option to -exorcise. This option does not
  actually fix the index, it just drops data.  (Robert Muir)

+* LUCENE-5969: Add Codec.compoundFormat, which handles the encoding of compound 
+  files. Add getMergeInstance() to codec producer APIs, which can be overridden
+  to return an instance optimized for merging instead of searching.
+  (Mike McCandless, Robert Muir)
+
 Bug Fixes

 * LUCENE-5650: Enforce read-only access to any path outside the temporary
@ -193,6 +202,10 @@ Optimizations
  queries that match few documents by using a sparse bit set implementation.
  (Adrien Grand)

+* LUCENE-5969: Refactor merging to be more efficient, checksum calculation is
+  per-segment/per-producer, and norms and doc values merging no longer cause 
+  RAM spikes for latent fields. (Mike McCandless, Robert Muir)
+
 Build

 * LUCENE-5909: Smoke tester now has better command line parsing and
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat.java
@ -17,8 +17,17 @@ package org.apache.lucene.codecs.lucene41;
 * limitations under the License.
 */

+import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.blocktree.FieldReader;
+import org.apache.lucene.codecs.blocktree.Stats;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.index.BasePostingsFormatTestCase;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.TestUtil;

 /**
@ -31,4 +40,28 @@ public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
  protected Codec getCodec() {
    return codec;
  }
+  
+  /** Make sure the final sub-block(s) are not skipped. */
+  public void testFinalBlock() throws Exception {
+    Directory d = newDirectory();
+    IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random())));
+    for(int i=0;i<25;i++) {
+      Document doc = new Document();
+      doc.add(newStringField("field", Character.toString((char) (97+i)), Field.Store.NO));
+      doc.add(newStringField("field", "z" + Character.toString((char) (97+i)), Field.Store.NO));
+      w.addDocument(doc);
+    }
+    w.forceMerge(1);
+
+    DirectoryReader r = DirectoryReader.open(w, true);
+    assertEquals(1, r.leaves().size());
+    FieldReader field = (FieldReader) r.leaves().get(0).reader().fields().terms("field");
+    // We should see exactly two blocks: one root block (prefix empty string) and one block for z* terms (prefix z):
+    Stats stats = field.computeStats();
+    assertEquals(0, stats.floorBlockCount);
+    assertEquals(2, stats.nonFloorBlockCount);
+    r.close();
+    w.close();
+    d.close();
+  }
 }