mirror of https://github.com/apache/lucene.git
improve testing for sparse points
This commit is contained in:
parent
56ad6e5d8a
commit
62b3aaa526
lucene
backward-codecs/src/test/org/apache/lucene/index
core/src/test/org/apache/lucene
index
TestFlushByRamOrCountsPolicy.javaTestForceMergeForever.javaTestNorms.javaTestPointValues.javaTestRollingUpdates.javaTestTermsEnum.java
store
util/fst
suggest/src/test/org/apache/lucene/search/suggest/analyzing
test-framework/src/java/org/apache/lucene
|
@ -148,7 +148,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
||||||
IndexWriterConfig conf = new IndexWriterConfig(analyzer)
|
IndexWriterConfig conf = new IndexWriterConfig(analyzer)
|
||||||
.setMergePolicy(mp).setUseCompoundFile(false);
|
.setMergePolicy(mp).setUseCompoundFile(false);
|
||||||
IndexWriter writer = new IndexWriter(dir, conf);
|
IndexWriter writer = new IndexWriter(dir, conf);
|
||||||
LineFileDocs docs = new LineFileDocs(null, true);
|
LineFileDocs docs = new LineFileDocs(null);
|
||||||
for(int i=0;i<50;i++) {
|
for(int i=0;i<50;i++) {
|
||||||
writer.addDocument(docs.nextDoc());
|
writer.addDocument(docs.nextDoc());
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,7 +39,7 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void beforeClass() throws Exception {
|
public static void beforeClass() throws Exception {
|
||||||
lineDocFile = new LineFileDocs(random(), true);
|
lineDocFile = new LineFileDocs(random());
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterClass
|
@AfterClass
|
||||||
|
|
|
@ -62,7 +62,7 @@ public class TestForceMergeForever extends LuceneTestCase {
|
||||||
// Try to make an index that requires merging:
|
// Try to make an index that requires merging:
|
||||||
w.getConfig().setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 11));
|
w.getConfig().setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 11));
|
||||||
final int numStartDocs = atLeast(20);
|
final int numStartDocs = atLeast(20);
|
||||||
final LineFileDocs docs = new LineFileDocs(random(), true);
|
final LineFileDocs docs = new LineFileDocs(random());
|
||||||
for(int docIDX=0;docIDX<numStartDocs;docIDX++) {
|
for(int docIDX=0;docIDX<numStartDocs;docIDX++) {
|
||||||
w.addDocument(docs.nextDoc());
|
w.addDocument(docs.nextDoc());
|
||||||
}
|
}
|
||||||
|
|
|
@ -133,7 +133,7 @@ public class TestNorms extends LuceneTestCase {
|
||||||
Similarity provider = new MySimProvider();
|
Similarity provider = new MySimProvider();
|
||||||
config.setSimilarity(provider);
|
config.setSimilarity(provider);
|
||||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
|
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
|
||||||
final LineFileDocs docs = new LineFileDocs(random, true);
|
final LineFileDocs docs = new LineFileDocs(random);
|
||||||
int num = atLeast(100);
|
int num = atLeast(100);
|
||||||
for (int i = 0; i < num; i++) {
|
for (int i = 0; i < num; i++) {
|
||||||
Document doc = docs.nextDoc();
|
Document doc = docs.nextDoc();
|
||||||
|
|
|
@ -582,4 +582,50 @@ public class TestPointValues extends LuceneTestCase {
|
||||||
w.close();
|
w.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSparsePoints() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
int numDocs = atLeast(1000);
|
||||||
|
int numFields = TestUtil.nextInt(random(), 1, 10);
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||||
|
int[] fieldDocCounts = new int[numFields];
|
||||||
|
int[] fieldSizes = new int[numFields];
|
||||||
|
for(int i=0;i<numDocs;i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
for(int field=0;field<numFields;field++) {
|
||||||
|
String fieldName = "int" + field;
|
||||||
|
if (random().nextInt(100) == 17) {
|
||||||
|
doc.add(new IntPoint(fieldName, random().nextInt()));
|
||||||
|
fieldDocCounts[field]++;
|
||||||
|
fieldSizes[field]++;
|
||||||
|
|
||||||
|
if (random().nextInt(10) == 5) {
|
||||||
|
// add same field again!
|
||||||
|
doc.add(new IntPoint(fieldName, random().nextInt()));
|
||||||
|
fieldSizes[field]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexReader r = w.getReader();
|
||||||
|
for(int field=0;field<numFields;field++) {
|
||||||
|
int docCount = 0;
|
||||||
|
int size = 0;
|
||||||
|
String fieldName = "int" + field;
|
||||||
|
for(LeafReaderContext ctx : r.leaves()) {
|
||||||
|
PointValues points = ctx.reader().getPointValues();
|
||||||
|
if (ctx.reader().getFieldInfos().fieldInfo(fieldName) != null) {
|
||||||
|
docCount += points.getDocCount(fieldName);
|
||||||
|
size += points.size(fieldName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertEquals(fieldDocCounts[field], docCount);
|
||||||
|
assertEquals(fieldSizes[field], size);
|
||||||
|
}
|
||||||
|
r.close();
|
||||||
|
w.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,7 +40,7 @@ public class TestRollingUpdates extends LuceneTestCase {
|
||||||
Random random = new Random(random().nextLong());
|
Random random = new Random(random().nextLong());
|
||||||
final BaseDirectoryWrapper dir = newDirectory();
|
final BaseDirectoryWrapper dir = newDirectory();
|
||||||
|
|
||||||
final LineFileDocs docs = new LineFileDocs(random, true);
|
final LineFileDocs docs = new LineFileDocs(random);
|
||||||
|
|
||||||
//provider.register(new MemoryCodec());
|
//provider.register(new MemoryCodec());
|
||||||
if (random().nextBoolean()) {
|
if (random().nextBoolean()) {
|
||||||
|
|
|
@ -41,7 +41,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
||||||
|
|
||||||
public void test() throws Exception {
|
public void test() throws Exception {
|
||||||
Random random = new Random(random().nextLong());
|
Random random = new Random(random().nextLong());
|
||||||
final LineFileDocs docs = new LineFileDocs(random, true);
|
final LineFileDocs docs = new LineFileDocs(random);
|
||||||
final Directory d = newDirectory();
|
final Directory d = newDirectory();
|
||||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||||
|
|
|
@ -56,7 +56,7 @@ public class TestNRTCachingDirectory extends BaseDirectoryTestCase {
|
||||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||||
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
|
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
|
||||||
RandomIndexWriter w = new RandomIndexWriter(random(), cachedDir, conf);
|
RandomIndexWriter w = new RandomIndexWriter(random(), cachedDir, conf);
|
||||||
final LineFileDocs docs = new LineFileDocs(random(), true);
|
final LineFileDocs docs = new LineFileDocs(random());
|
||||||
final int numDocs = TestUtil.nextInt(random(), 100, 400);
|
final int numDocs = TestUtil.nextInt(random(), 100, 400);
|
||||||
|
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
|
|
|
@ -310,7 +310,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
// file, up until a doc limit
|
// file, up until a doc limit
|
||||||
public void testRealTerms() throws Exception {
|
public void testRealTerms() throws Exception {
|
||||||
|
|
||||||
final LineFileDocs docs = new LineFileDocs(random(), true);
|
final LineFileDocs docs = new LineFileDocs(random());
|
||||||
final int numDocs = TEST_NIGHTLY ? atLeast(1000) : atLeast(100);
|
final int numDocs = TEST_NIGHTLY ? atLeast(1000) : atLeast(100);
|
||||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||||
|
|
|
@ -131,7 +131,7 @@ public class TestFreeTextSuggester extends LuceneTestCase {
|
||||||
|
|
||||||
@Ignore
|
@Ignore
|
||||||
public void testWiki() throws Exception {
|
public void testWiki() throws Exception {
|
||||||
final LineFileDocs lfd = new LineFileDocs(null, "/lucenedata/enwiki/enwiki-20120502-lines-1k.txt", false);
|
final LineFileDocs lfd = new LineFileDocs(null, "/lucenedata/enwiki/enwiki-20120502-lines-1k.txt");
|
||||||
// Skip header:
|
// Skip header:
|
||||||
lfd.nextDoc();
|
lfd.nextDoc();
|
||||||
Analyzer analyzer = new MockAnalyzer(random());
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
|
|
@ -429,7 +429,7 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas
|
||||||
final long t0 = System.currentTimeMillis();
|
final long t0 = System.currentTimeMillis();
|
||||||
|
|
||||||
Random random = new Random(random().nextLong());
|
Random random = new Random(random().nextLong());
|
||||||
final LineFileDocs docs = new LineFileDocs(random, true);
|
final LineFileDocs docs = new LineFileDocs(random);
|
||||||
final Path tempDir = createTempDir(testName);
|
final Path tempDir = createTempDir(testName);
|
||||||
dir = getDirectory(newMockFSDirectory(tempDir)); // some subclasses rely on this being MDW
|
dir = getDirectory(newMockFSDirectory(tempDir)); // some subclasses rely on this being MDW
|
||||||
if (dir instanceof BaseDirectoryWrapper) {
|
if (dir instanceof BaseDirectoryWrapper) {
|
||||||
|
|
|
@ -552,7 +552,7 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
try {
|
try {
|
||||||
final LineFileDocs docs = new LineFileDocs(random(), true);
|
final LineFileDocs docs = new LineFileDocs(random());
|
||||||
int numDocs = 0;
|
int numDocs = 0;
|
||||||
while (System.nanoTime() < endTimeNanos) {
|
while (System.nanoTime() < endTimeNanos) {
|
||||||
final int what = random().nextInt(3);
|
final int what = random().nextInt(3);
|
||||||
|
|
|
@ -33,16 +33,17 @@ import java.util.Random;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.zip.GZIPInputStream;
|
import java.util.zip.GZIPInputStream;
|
||||||
|
|
||||||
import org.apache.lucene.document.IntPoint;
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.document.IntPoint;
|
import org.apache.lucene.document.IntPoint;
|
||||||
|
import org.apache.lucene.document.IntPoint;
|
||||||
import org.apache.lucene.document.NumericDocValuesField;
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
import org.apache.lucene.document.SortedDocValuesField;
|
import org.apache.lucene.document.SortedDocValuesField;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.index.IndexableField;
|
||||||
|
|
||||||
/** Minimal port of benchmark's LneDocSource +
|
/** Minimal port of benchmark's LneDocSource +
|
||||||
* DocMaker, so tests can enum docs from a line file created
|
* DocMaker, so tests can enum docs from a line file created
|
||||||
|
@ -53,22 +54,18 @@ public class LineFileDocs implements Closeable {
|
||||||
private final static int BUFFER_SIZE = 1 << 16; // 64K
|
private final static int BUFFER_SIZE = 1 << 16; // 64K
|
||||||
private final AtomicInteger id = new AtomicInteger();
|
private final AtomicInteger id = new AtomicInteger();
|
||||||
private final String path;
|
private final String path;
|
||||||
private final boolean useDocValues;
|
private final Random random;
|
||||||
|
|
||||||
/** If forever is true, we rewind the file at EOF (repeat
|
/** If forever is true, we rewind the file at EOF (repeat
|
||||||
* the docs over and over) */
|
* the docs over and over) */
|
||||||
public LineFileDocs(Random random, String path, boolean useDocValues) throws IOException {
|
public LineFileDocs(Random random, String path) throws IOException {
|
||||||
this.path = path;
|
this.path = path;
|
||||||
this.useDocValues = useDocValues;
|
this.random = new Random(random.nextLong());
|
||||||
open(random);
|
open(random);
|
||||||
}
|
}
|
||||||
|
|
||||||
public LineFileDocs(Random random) throws IOException {
|
public LineFileDocs(Random random) throws IOException {
|
||||||
this(random, LuceneTestCase.TEST_LINE_DOCS_FILE, true);
|
this(random, LuceneTestCase.TEST_LINE_DOCS_FILE);
|
||||||
}
|
|
||||||
|
|
||||||
public LineFileDocs(Random random, boolean useDocValues) throws IOException {
|
|
||||||
this(random, LuceneTestCase.TEST_LINE_DOCS_FILE, useDocValues);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -165,7 +162,7 @@ public class LineFileDocs implements Closeable {
|
||||||
final Field idNumDV;
|
final Field idNumDV;
|
||||||
final Field date;
|
final Field date;
|
||||||
|
|
||||||
public DocState(boolean useDocValues) {
|
public DocState() {
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
|
|
||||||
title = new StringField("title", "", Field.Store.NO);
|
title = new StringField("title", "", Field.Store.NO);
|
||||||
|
@ -192,15 +189,10 @@ public class LineFileDocs implements Closeable {
|
||||||
date = new StringField("date", "", Field.Store.YES);
|
date = new StringField("date", "", Field.Store.YES);
|
||||||
doc.add(date);
|
doc.add(date);
|
||||||
|
|
||||||
if (useDocValues) {
|
|
||||||
titleDV = new SortedDocValuesField("titleDV", new BytesRef());
|
titleDV = new SortedDocValuesField("titleDV", new BytesRef());
|
||||||
idNumDV = new NumericDocValuesField("docid_intDV", 0);
|
idNumDV = new NumericDocValuesField("docid_intDV", 0);
|
||||||
doc.add(titleDV);
|
doc.add(titleDV);
|
||||||
doc.add(idNumDV);
|
doc.add(idNumDV);
|
||||||
} else {
|
|
||||||
titleDV = null;
|
|
||||||
idNumDV = null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -225,7 +217,7 @@ public class LineFileDocs implements Closeable {
|
||||||
|
|
||||||
DocState docState = threadDocs.get();
|
DocState docState = threadDocs.get();
|
||||||
if (docState == null) {
|
if (docState == null) {
|
||||||
docState = new DocState(useDocValues);
|
docState = new DocState();
|
||||||
threadDocs.set(docState);
|
threadDocs.set(docState);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -252,6 +244,32 @@ public class LineFileDocs implements Closeable {
|
||||||
if (docState.idNumDV != null) {
|
if (docState.idNumDV != null) {
|
||||||
docState.idNumDV.setLongValue(i);
|
docState.idNumDV.setLongValue(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (random.nextInt(5) == 4) {
|
||||||
|
// Make some sparse fields
|
||||||
|
Document doc = new Document();
|
||||||
|
for(IndexableField field : docState.doc) {
|
||||||
|
doc.add(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (random.nextInt(3) == 1) {
|
||||||
|
int x = random.nextInt(4);
|
||||||
|
doc.add(new IntPoint("docLength" + x, line.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (random.nextInt(3) == 1) {
|
||||||
|
int x = random.nextInt(4);
|
||||||
|
doc.add(new IntPoint("docTitleLength" + x, title.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (random.nextInt(3) == 1) {
|
||||||
|
int x = random.nextInt(4);
|
||||||
|
doc.add(new NumericDocValuesField("docLength" + x, line.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: more random sparse fields here too
|
||||||
|
}
|
||||||
|
|
||||||
return docState.doc;
|
return docState.doc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue