mirror of https://github.com/apache/lucene.git
LUCENE-4127: don't allow 0 posInc for first token of indexed field
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1348606 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9a6fe90dd2
commit
cbbf7bfc6a
|
@ -393,6 +393,11 @@ Changes in Runtime Behavior
|
|||
any of the calls to the Analyzer throw an IOException. QueryParseBase.analyzeRangePart()
|
||||
will throw a RuntimException if an IOException is thrown by the Analyzer.
|
||||
|
||||
* LUCENE-4127: IndexWriter will now throw IllegalArgumentException if
|
||||
the first token of an indexed field has 0 positionIncrement
|
||||
(previously it silently corrected it to 1, possibly masking bugs).
|
||||
(Robert Muir, Mike McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer
|
||||
|
|
|
@ -18,9 +18,11 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Holds state for inverting all occurrences of a single
|
||||
|
@ -87,6 +89,8 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
|||
// reset the TokenStream to the first token
|
||||
stream.reset();
|
||||
|
||||
boolean success2 = false;
|
||||
|
||||
try {
|
||||
boolean hasMoreTokens = stream.incrementToken();
|
||||
|
||||
|
@ -109,8 +113,16 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
|||
if (!hasMoreTokens) break;
|
||||
|
||||
final int posIncr = posIncrAttribute.getPositionIncrement();
|
||||
if (posIncr < 0) {
|
||||
throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ")");
|
||||
}
|
||||
if (fieldState.position == 0 && posIncr == 0) {
|
||||
throw new IllegalArgumentException("first position increment must be > 0 (got 0)");
|
||||
}
|
||||
int position = fieldState.position + posIncr;
|
||||
if (position > 0) {
|
||||
// NOTE: confusing: this "mirrors" the
|
||||
// position++ we do below
|
||||
position--;
|
||||
} else if (position < 0) {
|
||||
throw new IllegalArgumentException("position overflow for field '" + field.name() + "'");
|
||||
|
@ -147,8 +159,13 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
|||
stream.end();
|
||||
|
||||
fieldState.offset += offsetAttribute.endOffset();
|
||||
success2 = true;
|
||||
} finally {
|
||||
stream.close();
|
||||
if (!success2) {
|
||||
IOUtils.closeWhileHandlingException(stream);
|
||||
} else {
|
||||
stream.close();
|
||||
}
|
||||
}
|
||||
|
||||
fieldState.offset += docState.analyzer == null ? 0 : docState.analyzer.getOffsetGap(field);
|
||||
|
|
|
@ -883,39 +883,16 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())));
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("field", tokens));
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
|
||||
IndexReader r = DirectoryReader.open(dir);
|
||||
IndexSearcher s = new IndexSearcher(r);
|
||||
PhraseQuery pq = new PhraseQuery();
|
||||
pq.add(new Term("field", "a"));
|
||||
pq.add(new Term("field", "b"));
|
||||
pq.add(new Term("field", "c"));
|
||||
ScoreDoc[] hits = s.search(pq, null, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
|
||||
Query q = new SpanTermQuery(new Term("field", "a"));
|
||||
hits = s.search(q, null, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
|
||||
DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(s.getIndexReader(),
|
||||
MultiFields.getLiveDocs(s.getIndexReader()),
|
||||
"field",
|
||||
new BytesRef("a"),
|
||||
false);
|
||||
|
||||
assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertEquals(1, tps.freq());
|
||||
assertEquals(0, tps.nextPosition());
|
||||
try {
|
||||
w.addDocument(doc);
|
||||
fail("did not hit expected exception");
|
||||
} catch (IllegalArgumentException iea) {
|
||||
// expected
|
||||
}
|
||||
w.close();
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
|
||||
|
||||
// LUCENE-1219
|
||||
public void testBinaryFieldOffsetLength() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
|
|
|
@ -61,7 +61,7 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
return new TokenStreamComponents(new Tokenizer(reader) {
|
||||
// TODO: use CannedTokenStream
|
||||
private final String[] TOKENS = {"1", "2", "3", "4", "5"};
|
||||
private final int[] INCREMENTS = {0, 2, 1, 0, 1};
|
||||
private final int[] INCREMENTS = {1, 2, 1, 0, 1};
|
||||
private int i = 0;
|
||||
|
||||
PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
|
@ -222,8 +222,7 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
assertTrue(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
// "a" occurs 4 times
|
||||
assertEquals(4, tp.freq());
|
||||
int expected = 0;
|
||||
assertEquals(expected, tp.nextPosition());
|
||||
assertEquals(0, tp.nextPosition());
|
||||
assertEquals(1, tp.nextPosition());
|
||||
assertEquals(3, tp.nextPosition());
|
||||
assertEquals(6, tp.nextPosition());
|
||||
|
|
|
@ -546,7 +546,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
// we write here (e.g., to write parent+2), and need to do a workaround
|
||||
// in the reader (which knows that anyway only category 0 has a parent
|
||||
// -1).
|
||||
parentStream.set(parent + 1);
|
||||
parentStream.set(Math.max(parent+1, 1));
|
||||
Document d = new Document();
|
||||
d.add(parentStreamField);
|
||||
|
||||
|
|
|
@ -69,7 +69,7 @@ final class MockPayloadFilter extends TokenFilter {
|
|||
if (input.incrementToken()) {
|
||||
payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes()));
|
||||
int posIncr;
|
||||
if (i % 2 == 1) {
|
||||
if (pos == 0 || i % 2 == 1) {
|
||||
posIncr = 1;
|
||||
} else {
|
||||
posIncr = 0;
|
||||
|
|
Loading…
Reference in New Issue