mirror of https://github.com/apache/lucene.git
LUCENE-3829: improve test coverage of DocValues
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1294286 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dc0c24053e
commit
29ab4d99d6
|
@ -23,16 +23,17 @@ import java.util.Arrays;
|
|||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.document.DocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.DocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DocValues.SortedSource;
|
||||
|
@ -817,6 +818,103 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
int nextDoc = termDocsEnum.nextDoc();
|
||||
assertEquals(DocsEnum.NO_MORE_DOCS, termDocsEnum.nextDoc());
|
||||
return nextDoc;
|
||||
}
|
||||
|
||||
public void testWithThreads() throws Exception {
|
||||
final int NUM_DOCS = atLeast(100);
|
||||
final Directory dir = newDirectory();
|
||||
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
|
||||
final boolean allowDups = random.nextBoolean();
|
||||
final Set<String> seen = new HashSet<String>();
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
|
||||
}
|
||||
int numDocs = 0;
|
||||
final List<BytesRef> docValues = new ArrayList<BytesRef>();
|
||||
|
||||
// TODO: deletions
|
||||
while (numDocs < NUM_DOCS) {
|
||||
final String s;
|
||||
if (random.nextBoolean()) {
|
||||
s = _TestUtil.randomSimpleString(random);
|
||||
} else {
|
||||
s = _TestUtil.randomUnicodeString(random);
|
||||
}
|
||||
final BytesRef br = new BytesRef(s);
|
||||
|
||||
if (!allowDups) {
|
||||
if (seen.contains(s)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(s);
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" " + numDocs + ": s=" + s);
|
||||
}
|
||||
|
||||
final Document doc = new Document();
|
||||
doc.add(new DocValuesField("stringdv", br, DocValues.Type.BYTES_VAR_SORTED));
|
||||
doc.add(new DocValuesField("id", numDocs, DocValues.Type.VAR_INTS));
|
||||
docValues.add(br);
|
||||
writer.addDocument(doc);
|
||||
numDocs++;
|
||||
|
||||
if (random.nextInt(40) == 17) {
|
||||
// force flush
|
||||
writer.getReader().close();
|
||||
}
|
||||
}
|
||||
|
||||
writer.forceMerge(1);
|
||||
final DirectoryReader r = writer.getReader();
|
||||
writer.close();
|
||||
|
||||
final AtomicReader sr = getOnlySegmentReader(r);
|
||||
final DocValues dv = sr.docValues("stringdv");
|
||||
final DocValues.Source stringDVSource = dv.getSource();
|
||||
assertNotNull(stringDVSource);
|
||||
final DocValues.Source stringDVDirectSource = dv.getDirectSource();
|
||||
assertNotNull(stringDVDirectSource);
|
||||
assertNotNull(dv);
|
||||
|
||||
final long END_TIME = System.currentTimeMillis() + (TEST_NIGHTLY ? 30 : 1);
|
||||
|
||||
final DocValues.Source docIDToID = sr.docValues("id").getSource();
|
||||
|
||||
final int NUM_THREADS = _TestUtil.nextInt(random, 1, 10);
|
||||
Thread[] threads = new Thread[NUM_THREADS];
|
||||
for(int thread=0;thread<NUM_THREADS;thread++) {
|
||||
threads[thread] = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
while(System.currentTimeMillis() < END_TIME) {
|
||||
final DocValues.Source source;
|
||||
// LUCENE-3829: remove this 'true ||' below
|
||||
// once we fix thread safety of DirectSource
|
||||
if (true || random.nextBoolean()) {
|
||||
source = stringDVSource;
|
||||
} else {
|
||||
source = stringDVDirectSource;
|
||||
}
|
||||
|
||||
final DocValues.SortedSource sortedSource = source.asSortedSource();
|
||||
assertNotNull(sortedSource);
|
||||
|
||||
final BytesRef scratch = new BytesRef();
|
||||
|
||||
for(int iter=0;iter<100;iter++) {
|
||||
final int docID = random.nextInt(sr.maxDoc());
|
||||
final BytesRef br = sortedSource.getBytes(docID, scratch);
|
||||
assertEquals(docValues.get((int) docIDToID.getInt(docID)), br);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
threads[thread].start();
|
||||
}
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
|||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
lineDocFile = new LineFileDocs(random);
|
||||
lineDocFile = new LineFileDocs(random, defaultCodecSupportsDocValues());
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
|
@ -323,6 +323,8 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
|||
}
|
||||
writer.commit();
|
||||
} catch (Throwable ex) {
|
||||
System.out.println("FAILED exc:");
|
||||
ex.printStackTrace(System.out);
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -59,7 +59,7 @@ public class TestForceMergeForever extends LuceneTestCase {
|
|||
// Try to make an index that requires merging:
|
||||
w.getConfig().setMaxBufferedDocs(_TestUtil.nextInt(random, 2, 11));
|
||||
final int numStartDocs = atLeast(20);
|
||||
final LineFileDocs docs = new LineFileDocs(random);
|
||||
final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
|
||||
for(int docIDX=0;docIDX<numStartDocs;docIDX++) {
|
||||
w.addDocument(docs.nextDoc());
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.search.similarities.DefaultSimilarity;
|
|||
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
|
@ -179,7 +178,7 @@ public class TestNorms extends LuceneTestCase {
|
|||
Similarity provider = new MySimProvider(writeNorms);
|
||||
config.setSimilarity(provider);
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
|
||||
final LineFileDocs docs = new LineFileDocs(random);
|
||||
final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
|
||||
int num = atLeast(100);
|
||||
for (int i = 0; i < num; i++) {
|
||||
Document doc = docs.nextDoc();
|
||||
|
|
|
@ -34,7 +34,7 @@ public class TestRollingUpdates extends LuceneTestCase {
|
|||
public void testRollingUpdates() throws Exception {
|
||||
final MockDirectoryWrapper dir = newDirectory();
|
||||
dir.setCheckIndexOnClose(false); // we use a custom codec provider
|
||||
final LineFileDocs docs = new LineFileDocs(random);
|
||||
final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
|
||||
|
||||
//provider.register(new MemoryCodec());
|
||||
if ( (!"Lucene3x".equals(Codec.getDefault().getName())) && random.nextBoolean()) {
|
||||
|
|
|
@ -51,7 +51,7 @@ import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
|
|||
public class TestTermsEnum extends LuceneTestCase {
|
||||
|
||||
public void test() throws Exception {
|
||||
final LineFileDocs docs = new LineFileDocs(random);
|
||||
final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
|
||||
final Directory d = newDirectory();
|
||||
final RandomIndexWriter w = new RandomIndexWriter(random, d);
|
||||
final int numDocs = atLeast(10);
|
||||
|
|
|
@ -48,7 +48,8 @@ public class TestNRTCachingDirectory extends LuceneTestCase {
|
|||
NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random, cachedDir, conf);
|
||||
final LineFileDocs docs = new LineFileDocs(random);
|
||||
final LineFileDocs docs = new LineFileDocs(random,
|
||||
defaultCodecSupportsDocValues());
|
||||
final int numDocs = _TestUtil.nextInt(random, 100, 400);
|
||||
|
||||
if (VERBOSE) {
|
||||
|
|
|
@ -1094,7 +1094,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
Codec.setDefault(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()));
|
||||
}
|
||||
|
||||
final LineFileDocs docs = new LineFileDocs(random);
|
||||
final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
|
||||
final int RUN_TIME_MSEC = atLeast(500);
|
||||
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
|
||||
final File tempDir = _TestUtil.getTempDir("fstlines");
|
||||
|
|
|
@ -356,37 +356,29 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas
|
|||
shift = 0;
|
||||
trigger = 1;
|
||||
} else {
|
||||
trigger = totTermCount.get()/10;
|
||||
trigger = totTermCount.get()/30;
|
||||
shift = random.nextInt(trigger);
|
||||
}
|
||||
BytesRef term = termsEnum.next();
|
||||
if (term == null) {
|
||||
if (seenTermCount == 0) {
|
||||
while (true) {
|
||||
BytesRef term = termsEnum.next();
|
||||
if (term == null) {
|
||||
if (seenTermCount == 0) {
|
||||
break;
|
||||
}
|
||||
totTermCount.set(seenTermCount);
|
||||
break;
|
||||
}
|
||||
totTermCount.set(seenTermCount);
|
||||
seenTermCount = 0;
|
||||
if (totTermCount.get() < 10) {
|
||||
shift = 0;
|
||||
seenTermCount++;
|
||||
// search 30 terms
|
||||
if (trigger == 0) {
|
||||
trigger = 1;
|
||||
} else {
|
||||
trigger = totTermCount.get()/10;
|
||||
//System.out.println("trigger " + trigger);
|
||||
shift = random.nextInt(trigger);
|
||||
}
|
||||
termsEnum.seekCeil(new BytesRef(""));
|
||||
continue;
|
||||
}
|
||||
seenTermCount++;
|
||||
// search 10 terms
|
||||
if (trigger == 0) {
|
||||
trigger = 1;
|
||||
}
|
||||
if ((seenTermCount + shift) % trigger == 0) {
|
||||
//if (VERBOSE) {
|
||||
//System.out.println(Thread.currentThread().getName() + " now search body:" + term.utf8ToString());
|
||||
//}
|
||||
totHits.addAndGet(runQuery(s, new TermQuery(new Term("body", term))));
|
||||
if ((seenTermCount + shift) % trigger == 0) {
|
||||
//if (VERBOSE) {
|
||||
//System.out.println(Thread.currentThread().getName() + " now search body:" + term.utf8ToString());
|
||||
//}
|
||||
totHits.addAndGet(runQuery(s, new TermQuery(new Term("body", term))));
|
||||
}
|
||||
}
|
||||
//if (VERBOSE) {
|
||||
//System.out.println(Thread.currentThread().getName() + ": search done");
|
||||
|
@ -432,7 +424,7 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas
|
|||
|
||||
final long t0 = System.currentTimeMillis();
|
||||
|
||||
final LineFileDocs docs = new LineFileDocs(random);
|
||||
final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
|
||||
final File tempDir = _TestUtil.getTempDir(testName);
|
||||
dir = newFSDirectory(tempDir);
|
||||
((MockDirectoryWrapper) dir).setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves.
|
||||
|
@ -636,7 +628,14 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas
|
|||
|
||||
private int runQuery(IndexSearcher s, Query q) throws Exception {
|
||||
s.search(q, 10);
|
||||
return s.search(q, null, 10, new Sort(new SortField("title", SortField.Type.STRING))).totalHits;
|
||||
int hitCount = s.search(q, null, 10, new Sort(new SortField("title", SortField.Type.STRING))).totalHits;
|
||||
if (defaultCodecSupportsDocValues()) {
|
||||
final Sort dvSort = new Sort(new SortField("title", SortField.Type.STRING));
|
||||
dvSort.getSort()[0].setUseIndexValues(true);
|
||||
int hitCount2 = s.search(q, null, 10, dvSort).totalHits;
|
||||
assertEquals(hitCount, hitCount2);
|
||||
}
|
||||
return hitCount;
|
||||
}
|
||||
|
||||
protected void smokeTestSearcher(IndexSearcher s) throws Exception {
|
||||
|
|
|
@ -518,7 +518,7 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
|
|||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
final LineFileDocs docs = new LineFileDocs(random);
|
||||
final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
|
||||
int numDocs = 0;
|
||||
while (System.nanoTime() < endTimeNanos) {
|
||||
final int what = random.nextInt(3);
|
||||
|
|
|
@ -17,22 +17,24 @@ package org.apache.lucene.util;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.Closeable;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.document.DocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
|
||||
/** Minimal port of contrib/benchmark's LneDocSource +
|
||||
* DocMaker, so tests can enum docs from a line file created
|
||||
|
@ -43,16 +45,22 @@ public class LineFileDocs implements Closeable {
|
|||
private final static int BUFFER_SIZE = 1 << 16; // 64K
|
||||
private final AtomicInteger id = new AtomicInteger();
|
||||
private final String path;
|
||||
private final boolean useDocValues;
|
||||
|
||||
/** If forever is true, we rewind the file at EOF (repeat
|
||||
* the docs over and over) */
|
||||
public LineFileDocs(Random random, String path) throws IOException {
|
||||
public LineFileDocs(Random random, String path, boolean useDocValues) throws IOException {
|
||||
this.path = path;
|
||||
this.useDocValues = useDocValues;
|
||||
open(random);
|
||||
}
|
||||
|
||||
public LineFileDocs(Random random) throws IOException {
|
||||
this(random, LuceneTestCase.TEST_LINE_DOCS_FILE);
|
||||
this(random, LuceneTestCase.TEST_LINE_DOCS_FILE, true);
|
||||
}
|
||||
|
||||
public LineFileDocs(Random random, boolean useDocValues) throws IOException {
|
||||
this(random, LuceneTestCase.TEST_LINE_DOCS_FILE, useDocValues);
|
||||
}
|
||||
|
||||
public synchronized void close() throws IOException {
|
||||
|
@ -113,11 +121,12 @@ public class LineFileDocs implements Closeable {
|
|||
final Document doc;
|
||||
final Field titleTokenized;
|
||||
final Field title;
|
||||
final Field titleDV;
|
||||
final Field body;
|
||||
final Field id;
|
||||
final Field date;
|
||||
|
||||
public DocState() {
|
||||
public DocState(boolean useDocValues) {
|
||||
doc = new Document();
|
||||
|
||||
title = new StringField("title", "");
|
||||
|
@ -139,6 +148,13 @@ public class LineFileDocs implements Closeable {
|
|||
|
||||
date = new Field("date", "", StringField.TYPE_STORED);
|
||||
doc.add(date);
|
||||
|
||||
if (useDocValues) {
|
||||
titleDV = new DocValuesField("titleDV", new BytesRef(), DocValues.Type.BYTES_VAR_SORTED);
|
||||
doc.add(titleDV);
|
||||
} else {
|
||||
titleDV = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -162,7 +178,7 @@ public class LineFileDocs implements Closeable {
|
|||
|
||||
DocState docState = threadDocs.get();
|
||||
if (docState == null) {
|
||||
docState = new DocState();
|
||||
docState = new DocState(useDocValues);
|
||||
threadDocs.set(docState);
|
||||
}
|
||||
|
||||
|
@ -178,6 +194,9 @@ public class LineFileDocs implements Closeable {
|
|||
docState.body.setStringValue(line.substring(1+spot2, line.length()));
|
||||
final String title = line.substring(0, spot);
|
||||
docState.title.setStringValue(title);
|
||||
if (docState.titleDV != null) {
|
||||
docState.titleDV.setBytesValue(new BytesRef(title));
|
||||
}
|
||||
docState.titleTokenized.setStringValue(title);
|
||||
docState.date.setStringValue(line.substring(1+spot, spot2));
|
||||
docState.id.setStringValue(Integer.toString(id.getAndIncrement()));
|
||||
|
|
|
@ -1573,4 +1573,8 @@ public abstract class LuceneTestCase extends Assert {
|
|||
|
||||
@Ignore("just a hack")
|
||||
public final void alwaysIgnoredTestMethod() {}
|
||||
|
||||
protected static boolean defaultCodecSupportsDocValues() {
|
||||
return !Codec.getDefault().getName().equals("Lucene3x");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,10 +38,12 @@ import org.apache.lucene.codecs.Codec;
|
|||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40Codec;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
import org.apache.lucene.document.DocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.CheckIndex;
|
||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
|
@ -677,9 +679,36 @@ public class _TestUtil {
|
|||
public static Document cloneDocument(Document doc1) {
|
||||
final Document doc2 = new Document();
|
||||
for(IndexableField f : doc1) {
|
||||
Field field1 = (Field) f;
|
||||
|
||||
Field field2 = new Field(field1.name(), field1.stringValue(), field1.fieldType());
|
||||
final Field field1 = (Field) f;
|
||||
final Field field2;
|
||||
if (field1 instanceof DocValuesField) {
|
||||
final DocValues.Type dvType = field1.fieldType().docValueType();
|
||||
switch (dvType) {
|
||||
case VAR_INTS:
|
||||
case FIXED_INTS_8:
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
field2 = new DocValuesField(field1.name(), field1.numericValue().intValue(), dvType);
|
||||
break;
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_VAR_SORTED:
|
||||
field2 = new DocValuesField(field1.name(), BytesRef.deepCopyOf(field1.binaryValue()), dvType);
|
||||
break;
|
||||
case FLOAT_32:
|
||||
case FLOAT_64:
|
||||
field2 = new DocValuesField(field1.name(), field1.numericValue().doubleValue(), dvType);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("don't know how to clone DV field=" + field1);
|
||||
}
|
||||
} else {
|
||||
field2 = new Field(field1.name(), field1.stringValue(), field1.fieldType());
|
||||
}
|
||||
doc2.add(field2);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue