mirror of https://github.com/apache/lucene.git
LUCENE-3526: preflex codec returned ghost terms if you used empty field name
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1188232 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
55eebdfdbf
commit
8602dd54fd
|
@ -726,6 +726,9 @@ public class CheckIndex {
|
|||
}
|
||||
|
||||
final int docFreq = terms.docFreq();
|
||||
if (docFreq <= 0) {
|
||||
throw new RuntimeException("docfreq: " + docFreq + " is out of bounds");
|
||||
}
|
||||
status.totFreq += docFreq;
|
||||
sumDocFreq += docFreq;
|
||||
|
||||
|
@ -823,6 +826,9 @@ public class CheckIndex {
|
|||
throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount);
|
||||
}
|
||||
if (hasTotalTermFreq) {
|
||||
if (totalTermFreq2 <= 0) {
|
||||
throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds");
|
||||
}
|
||||
sumTotalTermFreq += totalTermFreq;
|
||||
if (totalTermFreq != totalTermFreq2) {
|
||||
throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq);
|
||||
|
|
|
@ -61,6 +61,7 @@ public final class SegmentTermEnum implements Cloneable {
|
|||
int skipInterval;
|
||||
int newSuffixStart;
|
||||
int maxSkipLevels;
|
||||
private boolean first = true;
|
||||
|
||||
SegmentTermEnum(IndexInput i, FieldInfos fis, boolean isi)
|
||||
throws CorruptIndexException, IOException {
|
||||
|
@ -123,6 +124,7 @@ public final class SegmentTermEnum implements Cloneable {
|
|||
prevBuffer.reset();
|
||||
//System.out.println(" ste doSeek prev=" + prevBuffer.toTerm() + " this=" + this);
|
||||
termInfo.set(ti);
|
||||
first = p == -1;
|
||||
}
|
||||
|
||||
/** Increments the enumeration to the next element. True if one exists.*/
|
||||
|
@ -162,6 +164,13 @@ public final class SegmentTermEnum implements Cloneable {
|
|||
final int scanTo(Term term) throws IOException {
|
||||
scanBuffer.set(term);
|
||||
int count = 0;
|
||||
if (first) {
|
||||
// Always force initial next() in case term is
|
||||
// Term("", "")
|
||||
next();
|
||||
first = false;
|
||||
count++;
|
||||
}
|
||||
while (scanBuffer.compareTo(termBuffer) > 0 && next()) {
|
||||
count++;
|
||||
}
|
||||
|
|
|
@ -320,13 +320,7 @@ public final class TermInfosReader {
|
|||
ti = enumerator.termInfo;
|
||||
if (tiOrd == null) {
|
||||
if (useCache) {
|
||||
// LUCENE-3183: it's possible, if term is Term("",
|
||||
// ""), for the STE to be incorrectly un-positioned
|
||||
// after scan-to; work around this by not caching in
|
||||
// this case:
|
||||
if (enumerator.position >= 0) {
|
||||
termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, enumerator.position));
|
||||
}
|
||||
termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, enumerator.position));
|
||||
}
|
||||
} else {
|
||||
assert sameTermInfo(ti, tiOrd, enumerator);
|
||||
|
|
|
@ -199,6 +199,11 @@ final class TermInfosWriter implements Closeable {
|
|||
if (ch1 != ch2)
|
||||
return ch1-ch2;
|
||||
}
|
||||
if (utf16Result1.length == 0 && lastFieldNumber == -1) {
|
||||
// If there is a field named "" (empty string) with a term text of "" (empty string) then we
|
||||
// will get 0 on this comparison, yet, it's "OK".
|
||||
return -1;
|
||||
}
|
||||
return utf16Result1.length - utf16Result2.length;
|
||||
}
|
||||
|
||||
|
|
|
@ -869,6 +869,24 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
writer.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testEmptyFieldNameTerms() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
|
||||
Document doc = new Document();
|
||||
doc.add(newField("", "a b c", TextField.TYPE_UNSTORED));
|
||||
writer.addDocument(doc);
|
||||
writer.close();
|
||||
IndexReader reader = IndexReader.open(dir, true);
|
||||
IndexReader subreader = getOnlySegmentReader(reader);
|
||||
TermsEnum te = subreader.fields().terms("").iterator();
|
||||
assertEquals(new BytesRef("a"), te.next());
|
||||
assertEquals(new BytesRef("b"), te.next());
|
||||
assertEquals(new BytesRef("c"), te.next());
|
||||
assertNull(te.next());
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -30,10 +30,10 @@ public class TestFieldCacheRewriteMethod extends TestRegexpRandom2 {
|
|||
/** Test fieldcache rewrite against filter rewrite */
|
||||
@Override
|
||||
protected void assertSame(String regexp) throws IOException {
|
||||
RegexpQuery fieldCache = new RegexpQuery(new Term("field", regexp), RegExp.NONE);
|
||||
RegexpQuery fieldCache = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
|
||||
fieldCache.setRewriteMethod(new FieldCacheRewriteMethod());
|
||||
|
||||
RegexpQuery filter = new RegexpQuery(new Term("field", regexp), RegExp.NONE);
|
||||
RegexpQuery filter = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
|
||||
filter.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
|
||||
|
||||
TopDocs fieldCacheDocs = searcher1.search(fieldCache, 25);
|
||||
|
|
|
@ -55,16 +55,18 @@ public class TestRegexpRandom2 extends LuceneTestCase {
|
|||
protected IndexSearcher searcher2;
|
||||
private IndexReader reader;
|
||||
private Directory dir;
|
||||
|
||||
protected String fieldName;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
dir = newDirectory();
|
||||
fieldName = random.nextBoolean() ? "field" : ""; // sometimes use an empty string as field name
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false))
|
||||
.setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000)));
|
||||
Document doc = new Document();
|
||||
Field field = newField("field", "", StringField.TYPE_UNSTORED);
|
||||
Field field = newField(fieldName, "", StringField.TYPE_UNSTORED);
|
||||
doc.add(field);
|
||||
List<String> terms = new ArrayList<String>();
|
||||
int num = atLeast(200);
|
||||
|
@ -141,7 +143,7 @@ public class TestRegexpRandom2 extends LuceneTestCase {
|
|||
public void testRegexps() throws Exception {
|
||||
// we generate aweful regexps: good for testing.
|
||||
// but for preflex codec, the test can be very slow, so use less iterations.
|
||||
int num = CodecProvider.getDefault().getFieldCodec("field").equals("PreFlex") ? 100 * RANDOM_MULTIPLIER : atLeast(1000);
|
||||
int num = CodecProvider.getDefault().getFieldCodec(fieldName).equals("PreFlex") ? 100 * RANDOM_MULTIPLIER : atLeast(1000);
|
||||
for (int i = 0; i < num; i++) {
|
||||
String reg = AutomatonTestUtil.randomRegexp(random);
|
||||
if (VERBOSE) {
|
||||
|
@ -155,8 +157,8 @@ public class TestRegexpRandom2 extends LuceneTestCase {
|
|||
* simple regexpquery implementation.
|
||||
*/
|
||||
protected void assertSame(String regexp) throws IOException {
|
||||
RegexpQuery smart = new RegexpQuery(new Term("field", regexp), RegExp.NONE);
|
||||
DumbRegexpQuery dumb = new DumbRegexpQuery(new Term("field", regexp), RegExp.NONE);
|
||||
RegexpQuery smart = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
|
||||
DumbRegexpQuery dumb = new DumbRegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
|
||||
|
||||
TopDocs smartDocs = searcher1.search(smart, 25);
|
||||
TopDocs dumbDocs = searcher2.search(dumb, 25);
|
||||
|
|
Loading…
Reference in New Issue