LUCENE-3829: improve test coverage of DocValues

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1294286 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-02-27 19:20:08 +00:00
parent dc0c24053e
commit 29ab4d99d6
13 changed files with 199 additions and 48 deletions

View File

@ -23,16 +23,17 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.DocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DocValues.SortedSource;
@ -817,6 +818,103 @@ public class TestDocValuesIndexing extends LuceneTestCase {
int nextDoc = termDocsEnum.nextDoc();
assertEquals(DocsEnum.NO_MORE_DOCS, termDocsEnum.nextDoc());
return nextDoc;
}
public void testWithThreads() throws Exception {
final int NUM_DOCS = atLeast(100);
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
final boolean allowDups = random.nextBoolean();
final Set<String> seen = new HashSet<String>();
if (VERBOSE) {
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
}
int numDocs = 0;
final List<BytesRef> docValues = new ArrayList<BytesRef>();
// TODO: deletions
while (numDocs < NUM_DOCS) {
final String s;
if (random.nextBoolean()) {
s = _TestUtil.randomSimpleString(random);
} else {
s = _TestUtil.randomUnicodeString(random);
}
final BytesRef br = new BytesRef(s);
if (!allowDups) {
if (seen.contains(s)) {
continue;
}
seen.add(s);
}
if (VERBOSE) {
System.out.println(" " + numDocs + ": s=" + s);
}
final Document doc = new Document();
doc.add(new DocValuesField("stringdv", br, DocValues.Type.BYTES_VAR_SORTED));
doc.add(new DocValuesField("id", numDocs, DocValues.Type.VAR_INTS));
docValues.add(br);
writer.addDocument(doc);
numDocs++;
if (random.nextInt(40) == 17) {
// force flush
writer.getReader().close();
}
}
writer.forceMerge(1);
final DirectoryReader r = writer.getReader();
writer.close();
final AtomicReader sr = getOnlySegmentReader(r);
final DocValues dv = sr.docValues("stringdv");
final DocValues.Source stringDVSource = dv.getSource();
assertNotNull(stringDVSource);
final DocValues.Source stringDVDirectSource = dv.getDirectSource();
assertNotNull(stringDVDirectSource);
assertNotNull(dv);
final long END_TIME = System.currentTimeMillis() + (TEST_NIGHTLY ? 30 : 1);
final DocValues.Source docIDToID = sr.docValues("id").getSource();
final int NUM_THREADS = _TestUtil.nextInt(random, 1, 10);
Thread[] threads = new Thread[NUM_THREADS];
for(int thread=0;thread<NUM_THREADS;thread++) {
threads[thread] = new Thread() {
@Override
public void run() {
while(System.currentTimeMillis() < END_TIME) {
final DocValues.Source source;
// LUCENE-3829: remove this 'true ||' below
// once we fix thread safety of DirectSource
if (true || random.nextBoolean()) {
source = stringDVSource;
} else {
source = stringDVDirectSource;
}
final DocValues.SortedSource sortedSource = source.asSortedSource();
assertNotNull(sortedSource);
final BytesRef scratch = new BytesRef();
for(int iter=0;iter<100;iter++) {
final int docID = random.nextInt(sr.maxDoc());
final BytesRef br = sortedSource.getBytes(docID, scratch);
assertEquals(docValues.get((int) docIDToID.getInt(docID)), br);
}
}
}
};
threads[thread].start();
}
r.close();
dir.close();
}
}

View File

@ -39,7 +39,7 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
@BeforeClass
public static void beforeClass() throws Exception {
lineDocFile = new LineFileDocs(random);
lineDocFile = new LineFileDocs(random, defaultCodecSupportsDocValues());
}
@AfterClass
@ -323,6 +323,8 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
}
writer.commit();
} catch (Throwable ex) {
System.out.println("FAILED exc:");
ex.printStackTrace(System.out);
throw new RuntimeException(ex);
}
}

View File

@ -59,7 +59,7 @@ public class TestForceMergeForever extends LuceneTestCase {
// Try to make an index that requires merging:
w.getConfig().setMaxBufferedDocs(_TestUtil.nextInt(random, 2, 11));
final int numStartDocs = atLeast(20);
final LineFileDocs docs = new LineFileDocs(random);
final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
for(int docIDX=0;docIDX<numStartDocs;docIDX++) {
w.addDocument(docs.nextDoc());
}

View File

@ -29,7 +29,6 @@ import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
@ -179,7 +178,7 @@ public class TestNorms extends LuceneTestCase {
Similarity provider = new MySimProvider(writeNorms);
config.setSimilarity(provider);
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
final LineFileDocs docs = new LineFileDocs(random);
final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
int num = atLeast(100);
for (int i = 0; i < num; i++) {
Document doc = docs.nextDoc();

View File

@ -34,7 +34,7 @@ public class TestRollingUpdates extends LuceneTestCase {
public void testRollingUpdates() throws Exception {
final MockDirectoryWrapper dir = newDirectory();
dir.setCheckIndexOnClose(false); // we use a custom codec provider
final LineFileDocs docs = new LineFileDocs(random);
final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
//provider.register(new MemoryCodec());
if ( (!"Lucene3x".equals(Codec.getDefault().getName())) && random.nextBoolean()) {

View File

@ -51,7 +51,7 @@ import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
public class TestTermsEnum extends LuceneTestCase {
public void test() throws Exception {
final LineFileDocs docs = new LineFileDocs(random);
final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
final Directory d = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random, d);
final int numDocs = atLeast(10);

View File

@ -48,7 +48,8 @@ public class TestNRTCachingDirectory extends LuceneTestCase {
NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
RandomIndexWriter w = new RandomIndexWriter(random, cachedDir, conf);
final LineFileDocs docs = new LineFileDocs(random);
final LineFileDocs docs = new LineFileDocs(random,
defaultCodecSupportsDocValues());
final int numDocs = _TestUtil.nextInt(random, 100, 400);
if (VERBOSE) {

View File

@ -1094,7 +1094,7 @@ public class TestFSTs extends LuceneTestCase {
Codec.setDefault(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()));
}
final LineFileDocs docs = new LineFileDocs(random);
final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
final int RUN_TIME_MSEC = atLeast(500);
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
final File tempDir = _TestUtil.getTempDir("fstlines");

View File

@ -356,37 +356,29 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas
shift = 0;
trigger = 1;
} else {
trigger = totTermCount.get()/10;
trigger = totTermCount.get()/30;
shift = random.nextInt(trigger);
}
BytesRef term = termsEnum.next();
if (term == null) {
if (seenTermCount == 0) {
while (true) {
BytesRef term = termsEnum.next();
if (term == null) {
if (seenTermCount == 0) {
break;
}
totTermCount.set(seenTermCount);
break;
}
totTermCount.set(seenTermCount);
seenTermCount = 0;
if (totTermCount.get() < 10) {
shift = 0;
seenTermCount++;
// search 30 terms
if (trigger == 0) {
trigger = 1;
} else {
trigger = totTermCount.get()/10;
//System.out.println("trigger " + trigger);
shift = random.nextInt(trigger);
}
termsEnum.seekCeil(new BytesRef(""));
continue;
}
seenTermCount++;
// search 10 terms
if (trigger == 0) {
trigger = 1;
}
if ((seenTermCount + shift) % trigger == 0) {
//if (VERBOSE) {
//System.out.println(Thread.currentThread().getName() + " now search body:" + term.utf8ToString());
//}
totHits.addAndGet(runQuery(s, new TermQuery(new Term("body", term))));
if ((seenTermCount + shift) % trigger == 0) {
//if (VERBOSE) {
//System.out.println(Thread.currentThread().getName() + " now search body:" + term.utf8ToString());
//}
totHits.addAndGet(runQuery(s, new TermQuery(new Term("body", term))));
}
}
//if (VERBOSE) {
//System.out.println(Thread.currentThread().getName() + ": search done");
@ -432,7 +424,7 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas
final long t0 = System.currentTimeMillis();
final LineFileDocs docs = new LineFileDocs(random);
final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
final File tempDir = _TestUtil.getTempDir(testName);
dir = newFSDirectory(tempDir);
((MockDirectoryWrapper) dir).setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves.
@ -636,7 +628,14 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas
private int runQuery(IndexSearcher s, Query q) throws Exception {
s.search(q, 10);
return s.search(q, null, 10, new Sort(new SortField("title", SortField.Type.STRING))).totalHits;
int hitCount = s.search(q, null, 10, new Sort(new SortField("title", SortField.Type.STRING))).totalHits;
if (defaultCodecSupportsDocValues()) {
final Sort dvSort = new Sort(new SortField("title", SortField.Type.STRING));
dvSort.getSort()[0].setUseIndexValues(true);
int hitCount2 = s.search(q, null, 10, dvSort).totalHits;
assertEquals(hitCount, hitCount2);
}
return hitCount;
}
protected void smokeTestSearcher(IndexSearcher s) throws Exception {

View File

@ -518,7 +518,7 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
@Override
public void run() {
try {
final LineFileDocs docs = new LineFileDocs(random);
final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
int numDocs = 0;
while (System.nanoTime() < endTimeNanos) {
final int what = random.nextInt(3);

View File

@ -17,22 +17,24 @@ package org.apache.lucene.util;
* limitations under the License.
*/
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.zip.GZIPInputStream;
import java.util.Random;
import org.apache.lucene.document.DocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DocValues;
/** Minimal port of contrib/benchmark's LneDocSource +
* DocMaker, so tests can enum docs from a line file created
@ -43,16 +45,22 @@ public class LineFileDocs implements Closeable {
private final static int BUFFER_SIZE = 1 << 16; // 64K
private final AtomicInteger id = new AtomicInteger();
private final String path;
private final boolean useDocValues;
/** If forever is true, we rewind the file at EOF (repeat
* the docs over and over) */
public LineFileDocs(Random random, String path) throws IOException {
public LineFileDocs(Random random, String path, boolean useDocValues) throws IOException {
this.path = path;
this.useDocValues = useDocValues;
open(random);
}
public LineFileDocs(Random random) throws IOException {
this(random, LuceneTestCase.TEST_LINE_DOCS_FILE);
this(random, LuceneTestCase.TEST_LINE_DOCS_FILE, true);
}
public LineFileDocs(Random random, boolean useDocValues) throws IOException {
this(random, LuceneTestCase.TEST_LINE_DOCS_FILE, useDocValues);
}
public synchronized void close() throws IOException {
@ -113,11 +121,12 @@ public class LineFileDocs implements Closeable {
final Document doc;
final Field titleTokenized;
final Field title;
final Field titleDV;
final Field body;
final Field id;
final Field date;
public DocState() {
public DocState(boolean useDocValues) {
doc = new Document();
title = new StringField("title", "");
@ -139,6 +148,13 @@ public class LineFileDocs implements Closeable {
date = new Field("date", "", StringField.TYPE_STORED);
doc.add(date);
if (useDocValues) {
titleDV = new DocValuesField("titleDV", new BytesRef(), DocValues.Type.BYTES_VAR_SORTED);
doc.add(titleDV);
} else {
titleDV = null;
}
}
}
@ -162,7 +178,7 @@ public class LineFileDocs implements Closeable {
DocState docState = threadDocs.get();
if (docState == null) {
docState = new DocState();
docState = new DocState(useDocValues);
threadDocs.set(docState);
}
@ -178,6 +194,9 @@ public class LineFileDocs implements Closeable {
docState.body.setStringValue(line.substring(1+spot2, line.length()));
final String title = line.substring(0, spot);
docState.title.setStringValue(title);
if (docState.titleDV != null) {
docState.titleDV.setBytesValue(new BytesRef(title));
}
docState.titleTokenized.setStringValue(title);
docState.date.setStringValue(line.substring(1+spot, spot2));
docState.id.setStringValue(Integer.toString(id.getAndIncrement()));

View File

@ -1573,4 +1573,8 @@ public abstract class LuceneTestCase extends Assert {
@Ignore("just a hack")
public final void alwaysIgnoredTestMethod() {}
protected static boolean defaultCodecSupportsDocValues() {
return !Codec.getDefault().getName().equals("Lucene3x");
}
}

View File

@ -38,10 +38,12 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40Codec;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.document.DocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfos;
@ -677,9 +679,36 @@ public class _TestUtil {
public static Document cloneDocument(Document doc1) {
final Document doc2 = new Document();
for(IndexableField f : doc1) {
Field field1 = (Field) f;
Field field2 = new Field(field1.name(), field1.stringValue(), field1.fieldType());
final Field field1 = (Field) f;
final Field field2;
if (field1 instanceof DocValuesField) {
final DocValues.Type dvType = field1.fieldType().docValueType();
switch (dvType) {
case VAR_INTS:
case FIXED_INTS_8:
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
field2 = new DocValuesField(field1.name(), field1.numericValue().intValue(), dvType);
break;
case BYTES_FIXED_DEREF:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_STRAIGHT:
case BYTES_FIXED_SORTED:
case BYTES_VAR_SORTED:
field2 = new DocValuesField(field1.name(), BytesRef.deepCopyOf(field1.binaryValue()), dvType);
break;
case FLOAT_32:
case FLOAT_64:
field2 = new DocValuesField(field1.name(), field1.numericValue().doubleValue(), dvType);
break;
default:
throw new IllegalArgumentException("don't know how to clone DV field=" + field1);
}
} else {
field2 = new Field(field1.name(), field1.stringValue(), field1.fieldType());
}
doc2.add(field2);
}