LUCENE-3526: preflex codec returned ghost terms if you used empty field name

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1188232 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-10-24 16:59:59 +00:00
parent 55eebdfdbf
commit 8602dd54fd
7 changed files with 48 additions and 14 deletions

View File

@ -726,6 +726,9 @@ public class CheckIndex {
}
final int docFreq = terms.docFreq();
if (docFreq <= 0) {
throw new RuntimeException("docfreq: " + docFreq + " is out of bounds");
}
status.totFreq += docFreq;
sumDocFreq += docFreq;
@ -823,6 +826,9 @@ public class CheckIndex {
throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount);
}
if (hasTotalTermFreq) {
if (totalTermFreq2 <= 0) {
throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds");
}
sumTotalTermFreq += totalTermFreq;
if (totalTermFreq != totalTermFreq2) {
throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq);

View File

@ -61,6 +61,7 @@ public final class SegmentTermEnum implements Cloneable {
int skipInterval;
int newSuffixStart;
int maxSkipLevels;
private boolean first = true;
SegmentTermEnum(IndexInput i, FieldInfos fis, boolean isi)
throws CorruptIndexException, IOException {
@ -123,6 +124,7 @@ public final class SegmentTermEnum implements Cloneable {
prevBuffer.reset();
//System.out.println(" ste doSeek prev=" + prevBuffer.toTerm() + " this=" + this);
termInfo.set(ti);
first = p == -1;
}
/** Increments the enumeration to the next element. True if one exists.*/
@ -162,6 +164,13 @@ public final class SegmentTermEnum implements Cloneable {
final int scanTo(Term term) throws IOException {
scanBuffer.set(term);
int count = 0;
if (first) {
// Always force initial next() in case term is
// Term("", "")
next();
first = false;
count++;
}
while (scanBuffer.compareTo(termBuffer) > 0 && next()) {
count++;
}

View File

@ -320,13 +320,7 @@ public final class TermInfosReader {
ti = enumerator.termInfo;
if (tiOrd == null) {
if (useCache) {
// LUCENE-3183: it's possible, if term is Term("",
// ""), for the STE to be incorrectly un-positioned
// after scan-to; work around this by not caching in
// this case:
if (enumerator.position >= 0) {
termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, enumerator.position));
}
termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, enumerator.position));
}
} else {
assert sameTermInfo(ti, tiOrd, enumerator);

View File

@ -199,6 +199,11 @@ final class TermInfosWriter implements Closeable {
if (ch1 != ch2)
return ch1-ch2;
}
if (utf16Result1.length == 0 && lastFieldNumber == -1) {
// If there is a field named "" (empty string) with a term text of "" (empty string) then we
// will get 0 on this comparison, yet, it's "OK".
return -1;
}
return utf16Result1.length - utf16Result2.length;
}

View File

@ -869,6 +869,24 @@ public class TestIndexWriter extends LuceneTestCase {
writer.close();
dir.close();
}
public void testEmptyFieldNameTerms() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
Document doc = new Document();
doc.add(newField("", "a b c", TextField.TYPE_UNSTORED));
writer.addDocument(doc);
writer.close();
IndexReader reader = IndexReader.open(dir, true);
IndexReader subreader = getOnlySegmentReader(reader);
TermsEnum te = subreader.fields().terms("").iterator();
assertEquals(new BytesRef("a"), te.next());
assertEquals(new BytesRef("b"), te.next());
assertEquals(new BytesRef("c"), te.next());
assertNull(te.next());
reader.close();
dir.close();
}

View File

@ -30,10 +30,10 @@ public class TestFieldCacheRewriteMethod extends TestRegexpRandom2 {
/** Test fieldcache rewrite against filter rewrite */
@Override
protected void assertSame(String regexp) throws IOException {
RegexpQuery fieldCache = new RegexpQuery(new Term("field", regexp), RegExp.NONE);
RegexpQuery fieldCache = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
fieldCache.setRewriteMethod(new FieldCacheRewriteMethod());
RegexpQuery filter = new RegexpQuery(new Term("field", regexp), RegExp.NONE);
RegexpQuery filter = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
filter.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
TopDocs fieldCacheDocs = searcher1.search(fieldCache, 25);

View File

@ -55,16 +55,18 @@ public class TestRegexpRandom2 extends LuceneTestCase {
protected IndexSearcher searcher2;
private IndexReader reader;
private Directory dir;
protected String fieldName;
@Override
public void setUp() throws Exception {
super.setUp();
dir = newDirectory();
fieldName = random.nextBoolean() ? "field" : ""; // sometimes use an empty string as field name
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false))
.setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000)));
Document doc = new Document();
Field field = newField("field", "", StringField.TYPE_UNSTORED);
Field field = newField(fieldName, "", StringField.TYPE_UNSTORED);
doc.add(field);
List<String> terms = new ArrayList<String>();
int num = atLeast(200);
@ -141,7 +143,7 @@ public class TestRegexpRandom2 extends LuceneTestCase {
public void testRegexps() throws Exception {
// we generate aweful regexps: good for testing.
// but for preflex codec, the test can be very slow, so use less iterations.
int num = CodecProvider.getDefault().getFieldCodec("field").equals("PreFlex") ? 100 * RANDOM_MULTIPLIER : atLeast(1000);
int num = CodecProvider.getDefault().getFieldCodec(fieldName).equals("PreFlex") ? 100 * RANDOM_MULTIPLIER : atLeast(1000);
for (int i = 0; i < num; i++) {
String reg = AutomatonTestUtil.randomRegexp(random);
if (VERBOSE) {
@ -155,8 +157,8 @@ public class TestRegexpRandom2 extends LuceneTestCase {
* simple regexpquery implementation.
*/
protected void assertSame(String regexp) throws IOException {
RegexpQuery smart = new RegexpQuery(new Term("field", regexp), RegExp.NONE);
DumbRegexpQuery dumb = new DumbRegexpQuery(new Term("field", regexp), RegExp.NONE);
RegexpQuery smart = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
DumbRegexpQuery dumb = new DumbRegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
TopDocs smartDocs = searcher1.search(smart, 25);
TopDocs dumbDocs = searcher2.search(dumb, 25);