LUCENE-3345: add another test

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1151346 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-07-27 06:10:40 +00:00
parent 58066af99d
commit 0fbb554886
2 changed files with 67 additions and 0 deletions

View File

@ -66,6 +66,11 @@ public class PulsingCodec extends Codec {
this(1);
}
/** @lucene.internal */
public int getFreqCutoff() {
return freqCutoff;
}
/** Terms with freq <= freqCutoff are inlined into terms
* dict. */
public PulsingCodec(int freqCutoff) {

View File

@ -32,6 +32,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.MockDirectoryWrapper;
@ -92,4 +93,65 @@ public class Test10KPulsings extends LuceneTestCase {
_TestUtil.checkIndex(dir);
dir.close();
}
/** a variant, that uses pulsing, but uses a high TF to force pass thru to the underlying codec
* creates a broken index (triggers a different assert) than test10kPulsed, with this:
* ant test -Dtestcase=Test10KPulsings -Dtestmethod=test10kNotPulsed -Dtests.seed=7065174228571869719:2545882165086224608!!!!
*/
public void test10kNotPulsed() throws Exception {
// we always run this test with pulsing codec.
CodecProvider cp = _TestUtil.alwaysCodec(new PulsingCodec(1));
File f = _TestUtil.getTempDir("10kpulsings");
MockDirectoryWrapper dir = newFSDirectory(f);
dir.setCheckIndexOnClose(false); // we do this ourselves explicitly
RandomIndexWriter iw = new RandomIndexWriter(random, dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setCodecProvider(cp));
Document document = new Document();
Field field = newField("field", "", Field.Store.YES, Field.Index.ANALYZED);
switch(_TestUtil.nextInt(random, 0, 2)) {
case 0: field.setIndexOptions(IndexOptions.DOCS_ONLY); break;
case 1: field.setIndexOptions(IndexOptions.DOCS_AND_FREQS); break;
default: field.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); break;
}
document.add(field);
NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ENGLISH));
Codec codec = cp.lookup(cp.getFieldCodec("field"));
assertTrue(codec instanceof PulsingCodec);
PulsingCodec pulsing = (PulsingCodec) codec;
final int freq = pulsing.getFreqCutoff() + 1;
for (int i = 0; i < 10050; i++) {
StringBuilder sb = new StringBuilder();
for (int j = 0; j < freq; j++) {
sb.append(df.format(i));
sb.append(' '); // whitespace
}
field.setValue(sb.toString());
iw.addDocument(document);
}
IndexReader ir = iw.getReader();
iw.close();
TermsEnum te = MultiFields.getTerms(ir, "field").iterator();
DocsEnum de = null;
for (int i = 0; i < 10050; i++) {
String expected = df.format(i);
assertEquals(expected, te.next().utf8ToString());
de = te.docs(null, de);
assertTrue(de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
}
ir.close();
_TestUtil.checkIndex(dir);
dir.close();
}
}