From 0fbb5548864f60336a85f65654ed110466316f1c Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Wed, 27 Jul 2011 06:10:40 +0000 Subject: [PATCH] LUCENE-3345: add another test git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1151346 13f79535-47bb-0310-9956-ffa450edef68 --- .../index/codecs/pulsing/PulsingCodec.java | 5 ++ .../index/codecs/pulsing/Test10KPulsings.java | 62 +++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java index 2bb6d97dfe3..3c05f495914 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java @@ -66,6 +66,11 @@ public class PulsingCodec extends Codec { this(1); } + /** @lucene.internal */ + public int getFreqCutoff() { + return freqCutoff; + } + /** Terms with freq <= freqCutoff are inlined into terms * dict. */ public PulsingCodec(int freqCutoff) { diff --git a/lucene/src/test/org/apache/lucene/index/codecs/pulsing/Test10KPulsings.java b/lucene/src/test/org/apache/lucene/index/codecs/pulsing/Test10KPulsings.java index 447bcdb42e2..c2015663a55 100644 --- a/lucene/src/test/org/apache/lucene/index/codecs/pulsing/Test10KPulsings.java +++ b/lucene/src/test/org/apache/lucene/index/codecs/pulsing/Test10KPulsings.java @@ -32,6 +32,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.MockDirectoryWrapper; @@ -92,4 +93,65 @@ public class Test10KPulsings extends LuceneTestCase { _TestUtil.checkIndex(dir); dir.close(); } + + /** a variant, that uses pulsing, but uses a high TF to force pass thru to the underlying codec + * creates a broken index (triggers a different assert) than test10kPulsed, with this: + * ant test -Dtestcase=Test10KPulsings -Dtestmethod=test10kNotPulsed -Dtests.seed=7065174228571869719:2545882165086224608!!!! + */ + public void test10kNotPulsed() throws Exception { + // we always run this test with pulsing codec. + CodecProvider cp = _TestUtil.alwaysCodec(new PulsingCodec(1)); + + File f = _TestUtil.getTempDir("10kpulsings"); + MockDirectoryWrapper dir = newFSDirectory(f); + dir.setCheckIndexOnClose(false); // we do this ourselves explicitly + RandomIndexWriter iw = new RandomIndexWriter(random, dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setCodecProvider(cp)); + + Document document = new Document(); + Field field = newField("field", "", Field.Store.YES, Field.Index.ANALYZED); + + switch(_TestUtil.nextInt(random, 0, 2)) { + case 0: field.setIndexOptions(IndexOptions.DOCS_ONLY); break; + case 1: field.setIndexOptions(IndexOptions.DOCS_AND_FREQS); break; + default: field.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); break; + } + + document.add(field); + + NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ENGLISH)); + + Codec codec = cp.lookup(cp.getFieldCodec("field")); + assertTrue(codec instanceof PulsingCodec); + PulsingCodec pulsing = (PulsingCodec) codec; + final int freq = pulsing.getFreqCutoff() + 1; + + for (int i = 0; i < 10050; i++) { + StringBuilder sb = new StringBuilder(); + for (int j = 0; j < freq; j++) { + sb.append(df.format(i)); + sb.append(' '); // whitespace + } + field.setValue(sb.toString()); + iw.addDocument(document); + } + + IndexReader ir = iw.getReader(); + iw.close(); + + TermsEnum te = MultiFields.getTerms(ir, "field").iterator(); + DocsEnum de = null; + + for (int i = 0; i < 10050; i++) { + String expected = df.format(i); + assertEquals(expected, te.next().utf8ToString()); + de = te.docs(null, de); + assertTrue(de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc()); + } + ir.close(); + + _TestUtil.checkIndex(dir); + dir.close(); + } }