mirror of https://github.com/apache/lucene.git
LUCENE-6766: more IW.infoStream logging around sorting; fix test bug
This commit is contained in:
parent
4740056f09
commit
0dd65f6130
|
@ -831,7 +831,7 @@ public final class CheckIndex implements Closeable {
|
||||||
|
|
||||||
if (sort != null) {
|
if (sort != null) {
|
||||||
if (infoStream != null) {
|
if (infoStream != null) {
|
||||||
infoStream.print(" test: check index sort.....");
|
infoStream.print(" test: index sort..........");
|
||||||
}
|
}
|
||||||
|
|
||||||
SortField fields[] = sort.getSort();
|
SortField fields[] = sort.getSort();
|
||||||
|
|
|
@ -57,7 +57,8 @@ public class DocIDMerger<T extends DocIDMerger.Sub> {
|
||||||
public DocIDMerger(List<T> subs, int maxCount, boolean indexIsSorted) {
|
public DocIDMerger(List<T> subs, int maxCount, boolean indexIsSorted) {
|
||||||
this.subs = subs;
|
this.subs = subs;
|
||||||
|
|
||||||
if (indexIsSorted) {
|
// nocommit safe?
|
||||||
|
if (indexIsSorted && maxCount > 1) {
|
||||||
queue = new PriorityQueue<T>(maxCount) {
|
queue = new PriorityQueue<T>(maxCount) {
|
||||||
@Override
|
@Override
|
||||||
protected boolean lessThan(Sub a, Sub b) {
|
protected boolean lessThan(Sub a, Sub b) {
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
@ -185,7 +186,13 @@ public class MergeState {
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// do a merge sort of the incoming leaves:
|
// do a merge sort of the incoming leaves:
|
||||||
return MultiSorter.sort(indexSort, readers);
|
long t0 = System.nanoTime();
|
||||||
|
DocMap[] result = MultiSorter.sort(indexSort, readers);
|
||||||
|
long t1 = System.nanoTime();
|
||||||
|
if (infoStream.isEnabled("SM")) {
|
||||||
|
infoStream.message("SM", String.format(Locale.ROOT, "%.2f msec to build merge sorted DocMaps", (t1-t0)/1000000.0));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -218,10 +225,14 @@ public class MergeState {
|
||||||
// to their index files on each indexed document:
|
// to their index files on each indexed document:
|
||||||
|
|
||||||
// This segment was written by flush, so documents are not yet sorted, so we sort them now:
|
// This segment was written by flush, so documents are not yet sorted, so we sort them now:
|
||||||
|
long t0 = System.nanoTime();
|
||||||
Sorter.DocMap sortDocMap = sorter.sort(leaf);
|
Sorter.DocMap sortDocMap = sorter.sort(leaf);
|
||||||
|
long t1 = System.nanoTime();
|
||||||
|
double msec = (t1-t0)/1000000.0;
|
||||||
|
|
||||||
if (sortDocMap != null) {
|
if (sortDocMap != null) {
|
||||||
if (infoStream.isEnabled("SM")) {
|
if (infoStream.isEnabled("SM")) {
|
||||||
infoStream.message("SM", "segment " + leaf + " is not sorted; wrapping for sort " + indexSort + " now");
|
infoStream.message("SM", String.format(Locale.ROOT, "segment %s is not sorted; wrapping for sort %s now (%.2f msec to sort)", leaf, indexSort, msec));
|
||||||
}
|
}
|
||||||
leaf = SlowCodecReaderWrapper.wrap(SortingLeafReader.wrap(new MergeReaderWrapper(leaf), sortDocMap));
|
leaf = SlowCodecReaderWrapper.wrap(SortingLeafReader.wrap(new MergeReaderWrapper(leaf), sortDocMap));
|
||||||
leafDocMaps[readers.size()] = new DocMap() {
|
leafDocMaps[readers.size()] = new DocMap() {
|
||||||
|
@ -232,7 +243,7 @@ public class MergeState {
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
if (infoStream.isEnabled("SM")) {
|
if (infoStream.isEnabled("SM")) {
|
||||||
infoStream.message("SM", "segment " + leaf + " is not sorted, but is already accidentally in sort " + indexSort + " order");
|
infoStream.message("SM", String.format(Locale.ROOT, "segment %s is not sorted, but is already accidentally in sort %s order (%.2f msec to sort)", leaf, indexSort, msec));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,8 +30,11 @@ import java.util.Set;
|
||||||
import java.util.concurrent.CountDownLatch;
|
import java.util.concurrent.CountDownLatch;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
|
@ -1053,6 +1056,14 @@ public class TestIndexSorting extends LuceneTestCase {
|
||||||
TERM_VECTORS_TYPE.setStoreTermVectors(true);
|
TERM_VECTORS_TYPE.setStoreTermVectors(true);
|
||||||
TERM_VECTORS_TYPE.freeze();
|
TERM_VECTORS_TYPE.freeze();
|
||||||
|
|
||||||
|
Analyzer a = new Analyzer() {
|
||||||
|
@Override
|
||||||
|
protected TokenStreamComponents createComponents(String fieldName) {
|
||||||
|
Tokenizer tokenizer = new MockTokenizer();
|
||||||
|
return new TokenStreamComponents(tokenizer, tokenizer);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
List<Document> docs = new ArrayList<>();
|
List<Document> docs = new ArrayList<>();
|
||||||
for (int i=0;i<numDocs;i++) {
|
for (int i=0;i<numDocs;i++) {
|
||||||
int id = i * 10;
|
int id = i * 10;
|
||||||
|
@ -1085,14 +1096,15 @@ public class TestIndexSorting extends LuceneTestCase {
|
||||||
// We add document alread in ID order for the first writer:
|
// We add document alread in ID order for the first writer:
|
||||||
Directory dir1 = newFSDirectory(createTempDir());
|
Directory dir1 = newFSDirectory(createTempDir());
|
||||||
|
|
||||||
IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
|
Random random1 = new Random(seed);
|
||||||
|
IndexWriterConfig iwc1 = newIndexWriterConfig(random1, a);
|
||||||
iwc1.setSimilarity(new NormsSimilarity(iwc1.getSimilarity())); // for testing norms field
|
iwc1.setSimilarity(new NormsSimilarity(iwc1.getSimilarity())); // for testing norms field
|
||||||
// preserve docIDs
|
// preserve docIDs
|
||||||
iwc1.setMergePolicy(newLogMergePolicy());
|
iwc1.setMergePolicy(newLogMergePolicy());
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("TEST: now index pre-sorted");
|
System.out.println("TEST: now index pre-sorted");
|
||||||
}
|
}
|
||||||
RandomIndexWriter w1 = new RandomIndexWriter(new Random(seed), dir1, iwc1);
|
RandomIndexWriter w1 = new RandomIndexWriter(random1, dir1, iwc1);
|
||||||
for(Document doc : docs) {
|
for(Document doc : docs) {
|
||||||
((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
|
((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
|
||||||
w1.addDocument(doc);
|
w1.addDocument(doc);
|
||||||
|
@ -1101,7 +1113,8 @@ public class TestIndexSorting extends LuceneTestCase {
|
||||||
// We shuffle documents, but set index sort, for the second writer:
|
// We shuffle documents, but set index sort, for the second writer:
|
||||||
Directory dir2 = newFSDirectory(createTempDir());
|
Directory dir2 = newFSDirectory(createTempDir());
|
||||||
|
|
||||||
IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
|
Random random2 = new Random(seed);
|
||||||
|
IndexWriterConfig iwc2 = newIndexWriterConfig(random2, a);
|
||||||
iwc2.setSimilarity(new NormsSimilarity(iwc2.getSimilarity())); // for testing norms field
|
iwc2.setSimilarity(new NormsSimilarity(iwc2.getSimilarity())); // for testing norms field
|
||||||
|
|
||||||
Sort sort = new Sort(new SortField("numeric", SortField.Type.INT));
|
Sort sort = new Sort(new SortField("numeric", SortField.Type.INT));
|
||||||
|
@ -1111,7 +1124,7 @@ public class TestIndexSorting extends LuceneTestCase {
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("TEST: now index with index-time sorting");
|
System.out.println("TEST: now index with index-time sorting");
|
||||||
}
|
}
|
||||||
RandomIndexWriter w2 = new RandomIndexWriter(new Random(seed), dir2, iwc2);
|
RandomIndexWriter w2 = new RandomIndexWriter(random2, dir2, iwc2);
|
||||||
int count = 0;
|
int count = 0;
|
||||||
int commitAtCount = TestUtil.nextInt(random(), 1, numDocs-1);
|
int commitAtCount = TestUtil.nextInt(random(), 1, numDocs-1);
|
||||||
for(Document doc : docs) {
|
for(Document doc : docs) {
|
||||||
|
@ -1122,10 +1135,16 @@ public class TestIndexSorting extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
w2.addDocument(doc);
|
w2.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: now force merge");
|
||||||
|
}
|
||||||
w2.forceMerge(1);
|
w2.forceMerge(1);
|
||||||
|
|
||||||
DirectoryReader r1 = w1.getReader();
|
DirectoryReader r1 = w1.getReader();
|
||||||
DirectoryReader r2 = w2.getReader();
|
DirectoryReader r2 = w2.getReader();
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: now compare r1=" + r1 + " r2=" + r2);
|
||||||
|
}
|
||||||
assertEquals(sort, getOnlyLeafReader(r2).getIndexSort());
|
assertEquals(sort, getOnlyLeafReader(r2).getIndexSort());
|
||||||
assertReaderEquals("left: sorted by hand; right: sorted by Lucene", r1, r2);
|
assertReaderEquals("left: sorted by hand; right: sorted by Lucene", r1, r2);
|
||||||
IOUtils.close(w1, w2, r1, r2, dir1, dir2);
|
IOUtils.close(w1, w2, r1, r2, dir1, dir2);
|
||||||
|
|
Loading…
Reference in New Issue