mirror of https://github.com/apache/lucene.git
LUCENE-3345: add another test
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1151346 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
58066af99d
commit
0fbb554886
|
@ -66,6 +66,11 @@ public class PulsingCodec extends Codec {
|
||||||
this(1);
|
this(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @lucene.internal */
|
||||||
|
public int getFreqCutoff() {
|
||||||
|
return freqCutoff;
|
||||||
|
}
|
||||||
|
|
||||||
/** Terms with freq <= freqCutoff are inlined into terms
|
/** Terms with freq <= freqCutoff are inlined into terms
|
||||||
* dict. */
|
* dict. */
|
||||||
public PulsingCodec(int freqCutoff) {
|
public PulsingCodec(int freqCutoff) {
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.MultiFields;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.index.codecs.Codec;
|
||||||
import org.apache.lucene.index.codecs.CodecProvider;
|
import org.apache.lucene.index.codecs.CodecProvider;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||||
|
@ -92,4 +93,65 @@ public class Test10KPulsings extends LuceneTestCase {
|
||||||
_TestUtil.checkIndex(dir);
|
_TestUtil.checkIndex(dir);
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** a variant, that uses pulsing, but uses a high TF to force pass thru to the underlying codec
|
||||||
|
* creates a broken index (triggers a different assert) than test10kPulsed, with this:
|
||||||
|
* ant test -Dtestcase=Test10KPulsings -Dtestmethod=test10kNotPulsed -Dtests.seed=7065174228571869719:2545882165086224608!!!!
|
||||||
|
*/
|
||||||
|
public void test10kNotPulsed() throws Exception {
|
||||||
|
// we always run this test with pulsing codec.
|
||||||
|
CodecProvider cp = _TestUtil.alwaysCodec(new PulsingCodec(1));
|
||||||
|
|
||||||
|
File f = _TestUtil.getTempDir("10kpulsings");
|
||||||
|
MockDirectoryWrapper dir = newFSDirectory(f);
|
||||||
|
dir.setCheckIndexOnClose(false); // we do this ourselves explicitly
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random, dir,
|
||||||
|
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setCodecProvider(cp));
|
||||||
|
|
||||||
|
Document document = new Document();
|
||||||
|
Field field = newField("field", "", Field.Store.YES, Field.Index.ANALYZED);
|
||||||
|
|
||||||
|
switch(_TestUtil.nextInt(random, 0, 2)) {
|
||||||
|
case 0: field.setIndexOptions(IndexOptions.DOCS_ONLY); break;
|
||||||
|
case 1: field.setIndexOptions(IndexOptions.DOCS_AND_FREQS); break;
|
||||||
|
default: field.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); break;
|
||||||
|
}
|
||||||
|
|
||||||
|
document.add(field);
|
||||||
|
|
||||||
|
NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ENGLISH));
|
||||||
|
|
||||||
|
Codec codec = cp.lookup(cp.getFieldCodec("field"));
|
||||||
|
assertTrue(codec instanceof PulsingCodec);
|
||||||
|
PulsingCodec pulsing = (PulsingCodec) codec;
|
||||||
|
final int freq = pulsing.getFreqCutoff() + 1;
|
||||||
|
|
||||||
|
for (int i = 0; i < 10050; i++) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int j = 0; j < freq; j++) {
|
||||||
|
sb.append(df.format(i));
|
||||||
|
sb.append(' '); // whitespace
|
||||||
|
}
|
||||||
|
field.setValue(sb.toString());
|
||||||
|
iw.addDocument(document);
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexReader ir = iw.getReader();
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
TermsEnum te = MultiFields.getTerms(ir, "field").iterator();
|
||||||
|
DocsEnum de = null;
|
||||||
|
|
||||||
|
for (int i = 0; i < 10050; i++) {
|
||||||
|
String expected = df.format(i);
|
||||||
|
assertEquals(expected, te.next().utf8ToString());
|
||||||
|
de = te.docs(null, de);
|
||||||
|
assertTrue(de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||||
|
assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
|
||||||
|
}
|
||||||
|
ir.close();
|
||||||
|
|
||||||
|
_TestUtil.checkIndex(dir);
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue