diff --git a/src/java/org/apache/lucene/index/DocumentsWriter.java b/src/java/org/apache/lucene/index/DocumentsWriter.java index 8e584b00185..54e90dfe4c8 100644 --- a/src/java/org/apache/lucene/index/DocumentsWriter.java +++ b/src/java/org/apache/lucene/index/DocumentsWriter.java @@ -1020,6 +1020,59 @@ final class DocumentsWriter { quickSort(postings, left + 1, hi); } + void quickSort(FieldData[] array, int lo, int hi) { + if (lo >= hi) + return; + + int mid = (lo + hi) >>> 1; + + if (array[lo].compareTo(array[mid]) > 0) { + FieldData tmp = array[lo]; + array[lo] = array[mid]; + array[mid] = tmp; + } + + if (array[mid].compareTo(array[hi]) > 0) { + FieldData tmp = array[mid]; + array[mid] = array[hi]; + array[hi] = tmp; + + if (array[lo].compareTo(array[mid]) > 0) { + FieldData tmp2 = array[lo]; + array[lo] = array[mid]; + array[mid] = tmp2; + } + } + + int left = lo + 1; + int right = hi - 1; + + if (left >= right) + return; + + FieldData partition = array[mid]; + + for (; ;) { + while (array[right].compareTo(partition) > 0) + --right; + + while (left < right && array[left].compareTo(partition) <= 0) + ++left; + + if (left < right) { + FieldData tmp = array[left]; + array[left] = array[right]; + array[right] = tmp; + --right; + } else { + break; + } + } + + quickSort(array, lo, left); + quickSort(array, left + 1, hi); + } + /** If there are fields we've seen but did not see again * in the last run, then free them up. Also reduce * postings hash size. */ @@ -1098,6 +1151,7 @@ final class DocumentsWriter { throws IOException, AbortException { final int numFields = numFieldData; + assert clearLastVectorFieldName(); assert 0 == fdtLocal.length(); @@ -1108,7 +1162,7 @@ final class DocumentsWriter { // sort the subset of fields that have vectors // enabled; we could save [small amount of] CPU // here. - Arrays.sort(fieldDataArray, 0, numFields); + quickSort(fieldDataArray, 0, numFields-1); // We process the document one field at a time for(int i=0;i 0.95 * ramBufferSize) - balanceRAM(); } final ByteBlockPool postingsPool = new ByteBlockPool(); @@ -1295,6 +1345,26 @@ final class DocumentsWriter { pos[posUpto++] = b; } + String lastVectorFieldName; + + // Called only by assert + final boolean clearLastVectorFieldName() { + lastVectorFieldName = null; + return true; + } + + // Called only by assert + final boolean vectorFieldsInOrder(FieldInfo fi) { + try { + if (lastVectorFieldName != null) + return lastVectorFieldName.compareTo(fi.name) < 0; + else + return true; + } finally { + lastVectorFieldName = fi.name; + } + } + PostingVector[] postingsVectors = new PostingVector[1]; int maxPostingsVectors; @@ -1360,7 +1430,6 @@ final class DocumentsWriter { postingsHash = new Posting[postingsHashSize]; } - /** So Arrays.sort can sort us. */ public int compareTo(Object o) { return fieldInfo.name.compareTo(((FieldData) o).fieldInfo.name); } @@ -1535,9 +1604,9 @@ final class DocumentsWriter { /** Only called when term vectors are enabled. This * is called the first time we see a given term for - * each * document, to allocate a PostingVector - * instance that * is used to record data needed to - * write the posting * vectors. */ + * each document, to allocate a PostingVector + * instance that is used to record data needed to + * write the posting vectors. */ private PostingVector addNewVector() { if (postingsVectorsUpto == postingsVectors.length) { @@ -1837,6 +1906,7 @@ final class DocumentsWriter { void writeVectors(FieldInfo fieldInfo) throws IOException { assert fieldInfo.storeTermVector; + assert vectorFieldsInOrder(fieldInfo); vectorFieldNumbers[numVectorFields] = fieldInfo.number; vectorFieldPointers[numVectorFields] = tvfLocal.getFilePointer(); @@ -2586,6 +2656,10 @@ final class DocumentsWriter { return; } + if (ramBufferSize != IndexWriter.DISABLE_AUTO_FLUSH + && numBytesUsed >= ramBufferSize) + balanceRAM(); + // Now write the indexed document to the real files. if (nextWriteDocID == state.docID) { // It's my turn, so write everything now: @@ -2649,7 +2723,7 @@ final class DocumentsWriter { out.writeByte(b); } - byte[] copyByteBuffer = new byte[4096]; + final byte[] copyByteBuffer = new byte[4096]; /** Copy numBytes from srcIn to destIn */ void copyBytes(IndexInput srcIn, IndexOutput destIn, long numBytes) throws IOException { @@ -3138,7 +3212,7 @@ final class DocumentsWriter { * the other two. This method just frees allocations from * the pools once we are over-budget, which balances the * pools to match the current docs. */ - private synchronized void balanceRAM() { + synchronized void balanceRAM() { if (ramBufferSize == IndexWriter.DISABLE_AUTO_FLUSH || bufferIsFull) return; diff --git a/src/test/org/apache/lucene/search/TestTermVectors.java b/src/test/org/apache/lucene/search/TestTermVectors.java index aee7fff7710..79a816f9548 100644 --- a/src/test/org/apache/lucene/search/TestTermVectors.java +++ b/src/test/org/apache/lucene/search/TestTermVectors.java @@ -23,7 +23,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.*; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.store.MockRAMDirectory; import org.apache.lucene.util.English; import java.io.IOException; @@ -34,7 +34,7 @@ import java.util.SortedSet; public class TestTermVectors extends LuceneTestCase { private IndexSearcher searcher; - private RAMDirectory directory = new RAMDirectory(); + private Directory directory = new MockRAMDirectory(); public TestTermVectors(String s) { super(s); } @@ -91,6 +91,37 @@ public class TestTermVectors extends LuceneTestCase { } } + public void testTermVectorsFieldOrder() throws IOException { + Directory dir = new MockRAMDirectory(); + IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true); + Document doc = new Document(); + doc.add(new Field("c", "some content here", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("a", "some content here", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("b", "some content here", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("x", "some content here", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + writer.close(); + IndexReader reader = IndexReader.open(dir); + TermFreqVector[] v = reader.getTermFreqVectors(0); + assertEquals(4, v.length); + String[] expectedFields = new String[]{"a", "b", "c", "x"}; + int[] expectedPositions = new int[]{1, 2, 0}; + for(int i=0;i