LUCENE-4127: don't allow 0 posInc for first token of indexed field

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1348606 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-06-10 14:42:34 +00:00
parent 9a6fe90dd2
commit cbbf7bfc6a
6 changed files with 33 additions and 35 deletions

View File

@ -393,6 +393,11 @@ Changes in Runtime Behavior
any of the calls to the Analyzer throw an IOException. QueryParseBase.analyzeRangePart() any of the calls to the Analyzer throw an IOException. QueryParseBase.analyzeRangePart()
will throw a RuntimException if an IOException is thrown by the Analyzer. will throw a RuntimException if an IOException is thrown by the Analyzer.
* LUCENE-4127: IndexWriter will now throw IllegalArgumentException if
the first token of an indexed field has 0 positionIncrement
(previously it silently corrected it to 1, possibly masking bugs).
(Robert Muir, Mike McCandless)
API Changes API Changes
* LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer * LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer

View File

@ -18,9 +18,11 @@ package org.apache.lucene.index;
*/ */
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.IOUtils;
/** /**
* Holds state for inverting all occurrences of a single * Holds state for inverting all occurrences of a single
@ -87,6 +89,8 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
// reset the TokenStream to the first token // reset the TokenStream to the first token
stream.reset(); stream.reset();
boolean success2 = false;
try { try {
boolean hasMoreTokens = stream.incrementToken(); boolean hasMoreTokens = stream.incrementToken();
@ -109,8 +113,16 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
if (!hasMoreTokens) break; if (!hasMoreTokens) break;
final int posIncr = posIncrAttribute.getPositionIncrement(); final int posIncr = posIncrAttribute.getPositionIncrement();
if (posIncr < 0) {
throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ")");
}
if (fieldState.position == 0 && posIncr == 0) {
throw new IllegalArgumentException("first position increment must be > 0 (got 0)");
}
int position = fieldState.position + posIncr; int position = fieldState.position + posIncr;
if (position > 0) { if (position > 0) {
// NOTE: confusing: this "mirrors" the
// position++ we do below
position--; position--;
} else if (position < 0) { } else if (position < 0) {
throw new IllegalArgumentException("position overflow for field '" + field.name() + "'"); throw new IllegalArgumentException("position overflow for field '" + field.name() + "'");
@ -147,8 +159,13 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
stream.end(); stream.end();
fieldState.offset += offsetAttribute.endOffset(); fieldState.offset += offsetAttribute.endOffset();
success2 = true;
} finally { } finally {
stream.close(); if (!success2) {
IOUtils.closeWhileHandlingException(stream);
} else {
stream.close();
}
} }
fieldState.offset += docState.analyzer == null ? 0 : docState.analyzer.getOffsetGap(field); fieldState.offset += docState.analyzer == null ? 0 : docState.analyzer.getOffsetGap(field);

View File

@ -883,39 +883,16 @@ public class TestIndexWriter extends LuceneTestCase {
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random()))); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())));
Document doc = new Document(); Document doc = new Document();
doc.add(new TextField("field", tokens)); doc.add(new TextField("field", tokens));
w.addDocument(doc); try {
w.commit(); w.addDocument(doc);
fail("did not hit expected exception");
IndexReader r = DirectoryReader.open(dir); } catch (IllegalArgumentException iea) {
IndexSearcher s = new IndexSearcher(r); // expected
PhraseQuery pq = new PhraseQuery(); }
pq.add(new Term("field", "a"));
pq.add(new Term("field", "b"));
pq.add(new Term("field", "c"));
ScoreDoc[] hits = s.search(pq, null, 1000).scoreDocs;
assertEquals(1, hits.length);
Query q = new SpanTermQuery(new Term("field", "a"));
hits = s.search(q, null, 1000).scoreDocs;
assertEquals(1, hits.length);
DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(s.getIndexReader(),
MultiFields.getLiveDocs(s.getIndexReader()),
"field",
new BytesRef("a"),
false);
assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(1, tps.freq());
assertEquals(0, tps.nextPosition());
w.close(); w.close();
r.close();
dir.close(); dir.close();
} }
// LUCENE-1219 // LUCENE-1219
public void testBinaryFieldOffsetLength() throws IOException { public void testBinaryFieldOffsetLength() throws IOException {
Directory dir = newDirectory(); Directory dir = newDirectory();

View File

@ -61,7 +61,7 @@ public class TestPositionIncrement extends LuceneTestCase {
return new TokenStreamComponents(new Tokenizer(reader) { return new TokenStreamComponents(new Tokenizer(reader) {
// TODO: use CannedTokenStream // TODO: use CannedTokenStream
private final String[] TOKENS = {"1", "2", "3", "4", "5"}; private final String[] TOKENS = {"1", "2", "3", "4", "5"};
private final int[] INCREMENTS = {0, 2, 1, 0, 1}; private final int[] INCREMENTS = {1, 2, 1, 0, 1};
private int i = 0; private int i = 0;
PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
@ -222,8 +222,7 @@ public class TestPositionIncrement extends LuceneTestCase {
assertTrue(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertTrue(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
// "a" occurs 4 times // "a" occurs 4 times
assertEquals(4, tp.freq()); assertEquals(4, tp.freq());
int expected = 0; assertEquals(0, tp.nextPosition());
assertEquals(expected, tp.nextPosition());
assertEquals(1, tp.nextPosition()); assertEquals(1, tp.nextPosition());
assertEquals(3, tp.nextPosition()); assertEquals(3, tp.nextPosition());
assertEquals(6, tp.nextPosition()); assertEquals(6, tp.nextPosition());

View File

@ -546,7 +546,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
// we write here (e.g., to write parent+2), and need to do a workaround // we write here (e.g., to write parent+2), and need to do a workaround
// in the reader (which knows that anyway only category 0 has a parent // in the reader (which knows that anyway only category 0 has a parent
// -1). // -1).
parentStream.set(parent + 1); parentStream.set(Math.max(parent+1, 1));
Document d = new Document(); Document d = new Document();
d.add(parentStreamField); d.add(parentStreamField);

View File

@ -69,7 +69,7 @@ final class MockPayloadFilter extends TokenFilter {
if (input.incrementToken()) { if (input.incrementToken()) {
payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes())); payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes()));
int posIncr; int posIncr;
if (i % 2 == 1) { if (pos == 0 || i % 2 == 1) {
posIncr = 1; posIncr = 1;
} else { } else {
posIncr = 0; posIncr = 0;