mirror of https://github.com/apache/lucene.git
LUCENE-3345: add another test
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1151346 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
58066af99d
commit
0fbb554886
|
@ -66,6 +66,11 @@ public class PulsingCodec extends Codec {
|
|||
this(1);
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public int getFreqCutoff() {
|
||||
return freqCutoff;
|
||||
}
|
||||
|
||||
/** Terms with freq <= freqCutoff are inlined into terms
|
||||
* dict. */
|
||||
public PulsingCodec(int freqCutoff) {
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
|
@ -92,4 +93,65 @@ public class Test10KPulsings extends LuceneTestCase {
|
|||
_TestUtil.checkIndex(dir);
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/** a variant, that uses pulsing, but uses a high TF to force pass thru to the underlying codec
|
||||
* creates a broken index (triggers a different assert) than test10kPulsed, with this:
|
||||
* ant test -Dtestcase=Test10KPulsings -Dtestmethod=test10kNotPulsed -Dtests.seed=7065174228571869719:2545882165086224608!!!!
|
||||
*/
|
||||
public void test10kNotPulsed() throws Exception {
|
||||
// we always run this test with pulsing codec.
|
||||
CodecProvider cp = _TestUtil.alwaysCodec(new PulsingCodec(1));
|
||||
|
||||
File f = _TestUtil.getTempDir("10kpulsings");
|
||||
MockDirectoryWrapper dir = newFSDirectory(f);
|
||||
dir.setCheckIndexOnClose(false); // we do this ourselves explicitly
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random, dir,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setCodecProvider(cp));
|
||||
|
||||
Document document = new Document();
|
||||
Field field = newField("field", "", Field.Store.YES, Field.Index.ANALYZED);
|
||||
|
||||
switch(_TestUtil.nextInt(random, 0, 2)) {
|
||||
case 0: field.setIndexOptions(IndexOptions.DOCS_ONLY); break;
|
||||
case 1: field.setIndexOptions(IndexOptions.DOCS_AND_FREQS); break;
|
||||
default: field.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); break;
|
||||
}
|
||||
|
||||
document.add(field);
|
||||
|
||||
NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ENGLISH));
|
||||
|
||||
Codec codec = cp.lookup(cp.getFieldCodec("field"));
|
||||
assertTrue(codec instanceof PulsingCodec);
|
||||
PulsingCodec pulsing = (PulsingCodec) codec;
|
||||
final int freq = pulsing.getFreqCutoff() + 1;
|
||||
|
||||
for (int i = 0; i < 10050; i++) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int j = 0; j < freq; j++) {
|
||||
sb.append(df.format(i));
|
||||
sb.append(' '); // whitespace
|
||||
}
|
||||
field.setValue(sb.toString());
|
||||
iw.addDocument(document);
|
||||
}
|
||||
|
||||
IndexReader ir = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
TermsEnum te = MultiFields.getTerms(ir, "field").iterator();
|
||||
DocsEnum de = null;
|
||||
|
||||
for (int i = 0; i < 10050; i++) {
|
||||
String expected = df.format(i);
|
||||
assertEquals(expected, te.next().utf8ToString());
|
||||
de = te.docs(null, de);
|
||||
assertTrue(de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
|
||||
}
|
||||
ir.close();
|
||||
|
||||
_TestUtil.checkIndex(dir);
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue