improve testing for sparse points

This commit is contained in:
Mike McCandless 2016-03-08 15:21:37 -05:00
parent 56ad6e5d8a
commit 62b3aaa526
13 changed files with 95 additions and 31 deletions

View File

@ -148,7 +148,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
IndexWriterConfig conf = new IndexWriterConfig(analyzer)
.setMergePolicy(mp).setUseCompoundFile(false);
IndexWriter writer = new IndexWriter(dir, conf);
LineFileDocs docs = new LineFileDocs(null, true);
LineFileDocs docs = new LineFileDocs(null);
for(int i=0;i<50;i++) {
writer.addDocument(docs.nextDoc());
}

View File

@ -39,7 +39,7 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
@BeforeClass
public static void beforeClass() throws Exception {
lineDocFile = new LineFileDocs(random(), true);
lineDocFile = new LineFileDocs(random());
}
@AfterClass

View File

@ -62,7 +62,7 @@ public class TestForceMergeForever extends LuceneTestCase {
// Try to make an index that requires merging:
w.getConfig().setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 11));
final int numStartDocs = atLeast(20);
final LineFileDocs docs = new LineFileDocs(random(), true);
final LineFileDocs docs = new LineFileDocs(random());
for(int docIDX=0;docIDX<numStartDocs;docIDX++) {
w.addDocument(docs.nextDoc());
}

View File

@ -133,7 +133,7 @@ public class TestNorms extends LuceneTestCase {
Similarity provider = new MySimProvider();
config.setSimilarity(provider);
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
final LineFileDocs docs = new LineFileDocs(random, true);
final LineFileDocs docs = new LineFileDocs(random);
int num = atLeast(100);
for (int i = 0; i < num; i++) {
Document doc = docs.nextDoc();

View File

@ -582,4 +582,50 @@ public class TestPointValues extends LuceneTestCase {
w.close();
dir.close();
}
public void testSparsePoints() throws Exception {
Directory dir = newDirectory();
int numDocs = atLeast(1000);
int numFields = TestUtil.nextInt(random(), 1, 10);
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
int[] fieldDocCounts = new int[numFields];
int[] fieldSizes = new int[numFields];
for(int i=0;i<numDocs;i++) {
Document doc = new Document();
for(int field=0;field<numFields;field++) {
String fieldName = "int" + field;
if (random().nextInt(100) == 17) {
doc.add(new IntPoint(fieldName, random().nextInt()));
fieldDocCounts[field]++;
fieldSizes[field]++;
if (random().nextInt(10) == 5) {
// add same field again!
doc.add(new IntPoint(fieldName, random().nextInt()));
fieldSizes[field]++;
}
}
}
w.addDocument(doc);
}
IndexReader r = w.getReader();
for(int field=0;field<numFields;field++) {
int docCount = 0;
int size = 0;
String fieldName = "int" + field;
for(LeafReaderContext ctx : r.leaves()) {
PointValues points = ctx.reader().getPointValues();
if (ctx.reader().getFieldInfos().fieldInfo(fieldName) != null) {
docCount += points.getDocCount(fieldName);
size += points.size(fieldName);
}
}
assertEquals(fieldDocCounts[field], docCount);
assertEquals(fieldSizes[field], size);
}
r.close();
w.close();
dir.close();
}
}

View File

@ -40,7 +40,7 @@ public class TestRollingUpdates extends LuceneTestCase {
Random random = new Random(random().nextLong());
final BaseDirectoryWrapper dir = newDirectory();
final LineFileDocs docs = new LineFileDocs(random, true);
final LineFileDocs docs = new LineFileDocs(random);
//provider.register(new MemoryCodec());
if (random().nextBoolean()) {

View File

@ -41,7 +41,7 @@ public class TestTermsEnum extends LuceneTestCase {
public void test() throws Exception {
Random random = new Random(random().nextLong());
final LineFileDocs docs = new LineFileDocs(random, true);
final LineFileDocs docs = new LineFileDocs(random);
final Directory d = newDirectory();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));

View File

@ -56,7 +56,7 @@ public class TestNRTCachingDirectory extends BaseDirectoryTestCase {
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
RandomIndexWriter w = new RandomIndexWriter(random(), cachedDir, conf);
final LineFileDocs docs = new LineFileDocs(random(), true);
final LineFileDocs docs = new LineFileDocs(random());
final int numDocs = TestUtil.nextInt(random(), 100, 400);
if (VERBOSE) {

View File

@ -310,7 +310,7 @@ public class TestFSTs extends LuceneTestCase {
// file, up until a doc limit
public void testRealTerms() throws Exception {
final LineFileDocs docs = new LineFileDocs(random(), true);
final LineFileDocs docs = new LineFileDocs(random());
final int numDocs = TEST_NIGHTLY ? atLeast(1000) : atLeast(100);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));

View File

@ -131,7 +131,7 @@ public class TestFreeTextSuggester extends LuceneTestCase {
@Ignore
public void testWiki() throws Exception {
final LineFileDocs lfd = new LineFileDocs(null, "/lucenedata/enwiki/enwiki-20120502-lines-1k.txt", false);
final LineFileDocs lfd = new LineFileDocs(null, "/lucenedata/enwiki/enwiki-20120502-lines-1k.txt");
// Skip header:
lfd.nextDoc();
Analyzer analyzer = new MockAnalyzer(random());

View File

@ -429,7 +429,7 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas
final long t0 = System.currentTimeMillis();
Random random = new Random(random().nextLong());
final LineFileDocs docs = new LineFileDocs(random, true);
final LineFileDocs docs = new LineFileDocs(random);
final Path tempDir = createTempDir(testName);
dir = getDirectory(newMockFSDirectory(tempDir)); // some subclasses rely on this being MDW
if (dir instanceof BaseDirectoryWrapper) {

View File

@ -552,7 +552,7 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
@Override
public void run() {
try {
final LineFileDocs docs = new LineFileDocs(random(), true);
final LineFileDocs docs = new LineFileDocs(random());
int numDocs = 0;
while (System.nanoTime() < endTimeNanos) {
final int what = random().nextInt(3);

View File

@ -33,16 +33,17 @@ import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.zip.GZIPInputStream;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
/** Minimal port of benchmark's LneDocSource +
* DocMaker, so tests can enum docs from a line file created
@ -53,22 +54,18 @@ public class LineFileDocs implements Closeable {
private final static int BUFFER_SIZE = 1 << 16; // 64K
private final AtomicInteger id = new AtomicInteger();
private final String path;
private final boolean useDocValues;
private final Random random;
/** If forever is true, we rewind the file at EOF (repeat
* the docs over and over) */
public LineFileDocs(Random random, String path, boolean useDocValues) throws IOException {
public LineFileDocs(Random random, String path) throws IOException {
this.path = path;
this.useDocValues = useDocValues;
this.random = new Random(random.nextLong());
open(random);
}
public LineFileDocs(Random random) throws IOException {
this(random, LuceneTestCase.TEST_LINE_DOCS_FILE, true);
}
public LineFileDocs(Random random, boolean useDocValues) throws IOException {
this(random, LuceneTestCase.TEST_LINE_DOCS_FILE, useDocValues);
this(random, LuceneTestCase.TEST_LINE_DOCS_FILE);
}
@Override
@ -165,7 +162,7 @@ public class LineFileDocs implements Closeable {
final Field idNumDV;
final Field date;
public DocState(boolean useDocValues) {
public DocState() {
doc = new Document();
title = new StringField("title", "", Field.Store.NO);
@ -192,15 +189,10 @@ public class LineFileDocs implements Closeable {
date = new StringField("date", "", Field.Store.YES);
doc.add(date);
if (useDocValues) {
titleDV = new SortedDocValuesField("titleDV", new BytesRef());
idNumDV = new NumericDocValuesField("docid_intDV", 0);
doc.add(titleDV);
doc.add(idNumDV);
} else {
titleDV = null;
idNumDV = null;
}
titleDV = new SortedDocValuesField("titleDV", new BytesRef());
idNumDV = new NumericDocValuesField("docid_intDV", 0);
doc.add(titleDV);
doc.add(idNumDV);
}
}
@ -225,7 +217,7 @@ public class LineFileDocs implements Closeable {
DocState docState = threadDocs.get();
if (docState == null) {
docState = new DocState(useDocValues);
docState = new DocState();
threadDocs.set(docState);
}
@ -252,6 +244,32 @@ public class LineFileDocs implements Closeable {
if (docState.idNumDV != null) {
docState.idNumDV.setLongValue(i);
}
if (random.nextInt(5) == 4) {
// Make some sparse fields
Document doc = new Document();
for(IndexableField field : docState.doc) {
doc.add(field);
}
if (random.nextInt(3) == 1) {
int x = random.nextInt(4);
doc.add(new IntPoint("docLength" + x, line.length()));
}
if (random.nextInt(3) == 1) {
int x = random.nextInt(4);
doc.add(new IntPoint("docTitleLength" + x, title.length()));
}
if (random.nextInt(3) == 1) {
int x = random.nextInt(4);
doc.add(new NumericDocValuesField("docLength" + x, line.length()));
}
// TODO: more random sparse fields here too
}
return docState.doc;
}
}