mirror of https://github.com/apache/lucene.git
LUCENE-4127: don't allow 0 posInc for first token of indexed field
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1348606 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9a6fe90dd2
commit
cbbf7bfc6a
|
@ -393,6 +393,11 @@ Changes in Runtime Behavior
|
||||||
any of the calls to the Analyzer throw an IOException. QueryParseBase.analyzeRangePart()
|
any of the calls to the Analyzer throw an IOException. QueryParseBase.analyzeRangePart()
|
||||||
will throw a RuntimException if an IOException is thrown by the Analyzer.
|
will throw a RuntimException if an IOException is thrown by the Analyzer.
|
||||||
|
|
||||||
|
* LUCENE-4127: IndexWriter will now throw IllegalArgumentException if
|
||||||
|
the first token of an indexed field has 0 positionIncrement
|
||||||
|
(previously it silently corrected it to 1, possibly masking bugs).
|
||||||
|
(Robert Muir, Mike McCandless)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer
|
* LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer
|
||||||
|
|
|
@ -18,9 +18,11 @@ package org.apache.lucene.index;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds state for inverting all occurrences of a single
|
* Holds state for inverting all occurrences of a single
|
||||||
|
@ -87,6 +89,8 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
||||||
// reset the TokenStream to the first token
|
// reset the TokenStream to the first token
|
||||||
stream.reset();
|
stream.reset();
|
||||||
|
|
||||||
|
boolean success2 = false;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
boolean hasMoreTokens = stream.incrementToken();
|
boolean hasMoreTokens = stream.incrementToken();
|
||||||
|
|
||||||
|
@ -109,8 +113,16 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
||||||
if (!hasMoreTokens) break;
|
if (!hasMoreTokens) break;
|
||||||
|
|
||||||
final int posIncr = posIncrAttribute.getPositionIncrement();
|
final int posIncr = posIncrAttribute.getPositionIncrement();
|
||||||
|
if (posIncr < 0) {
|
||||||
|
throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ")");
|
||||||
|
}
|
||||||
|
if (fieldState.position == 0 && posIncr == 0) {
|
||||||
|
throw new IllegalArgumentException("first position increment must be > 0 (got 0)");
|
||||||
|
}
|
||||||
int position = fieldState.position + posIncr;
|
int position = fieldState.position + posIncr;
|
||||||
if (position > 0) {
|
if (position > 0) {
|
||||||
|
// NOTE: confusing: this "mirrors" the
|
||||||
|
// position++ we do below
|
||||||
position--;
|
position--;
|
||||||
} else if (position < 0) {
|
} else if (position < 0) {
|
||||||
throw new IllegalArgumentException("position overflow for field '" + field.name() + "'");
|
throw new IllegalArgumentException("position overflow for field '" + field.name() + "'");
|
||||||
|
@ -147,8 +159,13 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
||||||
stream.end();
|
stream.end();
|
||||||
|
|
||||||
fieldState.offset += offsetAttribute.endOffset();
|
fieldState.offset += offsetAttribute.endOffset();
|
||||||
|
success2 = true;
|
||||||
} finally {
|
} finally {
|
||||||
stream.close();
|
if (!success2) {
|
||||||
|
IOUtils.closeWhileHandlingException(stream);
|
||||||
|
} else {
|
||||||
|
stream.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fieldState.offset += docState.analyzer == null ? 0 : docState.analyzer.getOffsetGap(field);
|
fieldState.offset += docState.analyzer == null ? 0 : docState.analyzer.getOffsetGap(field);
|
||||||
|
|
|
@ -883,39 +883,16 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())));
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())));
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new TextField("field", tokens));
|
doc.add(new TextField("field", tokens));
|
||||||
w.addDocument(doc);
|
try {
|
||||||
w.commit();
|
w.addDocument(doc);
|
||||||
|
fail("did not hit expected exception");
|
||||||
IndexReader r = DirectoryReader.open(dir);
|
} catch (IllegalArgumentException iea) {
|
||||||
IndexSearcher s = new IndexSearcher(r);
|
// expected
|
||||||
PhraseQuery pq = new PhraseQuery();
|
}
|
||||||
pq.add(new Term("field", "a"));
|
|
||||||
pq.add(new Term("field", "b"));
|
|
||||||
pq.add(new Term("field", "c"));
|
|
||||||
ScoreDoc[] hits = s.search(pq, null, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
|
|
||||||
Query q = new SpanTermQuery(new Term("field", "a"));
|
|
||||||
hits = s.search(q, null, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
|
|
||||||
DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(s.getIndexReader(),
|
|
||||||
MultiFields.getLiveDocs(s.getIndexReader()),
|
|
||||||
"field",
|
|
||||||
new BytesRef("a"),
|
|
||||||
false);
|
|
||||||
|
|
||||||
assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
|
||||||
assertEquals(1, tps.freq());
|
|
||||||
assertEquals(0, tps.nextPosition());
|
|
||||||
w.close();
|
w.close();
|
||||||
|
|
||||||
r.close();
|
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// LUCENE-1219
|
// LUCENE-1219
|
||||||
public void testBinaryFieldOffsetLength() throws IOException {
|
public void testBinaryFieldOffsetLength() throws IOException {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
|
|
|
@ -61,7 +61,7 @@ public class TestPositionIncrement extends LuceneTestCase {
|
||||||
return new TokenStreamComponents(new Tokenizer(reader) {
|
return new TokenStreamComponents(new Tokenizer(reader) {
|
||||||
// TODO: use CannedTokenStream
|
// TODO: use CannedTokenStream
|
||||||
private final String[] TOKENS = {"1", "2", "3", "4", "5"};
|
private final String[] TOKENS = {"1", "2", "3", "4", "5"};
|
||||||
private final int[] INCREMENTS = {0, 2, 1, 0, 1};
|
private final int[] INCREMENTS = {1, 2, 1, 0, 1};
|
||||||
private int i = 0;
|
private int i = 0;
|
||||||
|
|
||||||
PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
|
@ -222,8 +222,7 @@ public class TestPositionIncrement extends LuceneTestCase {
|
||||||
assertTrue(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
assertTrue(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||||
// "a" occurs 4 times
|
// "a" occurs 4 times
|
||||||
assertEquals(4, tp.freq());
|
assertEquals(4, tp.freq());
|
||||||
int expected = 0;
|
assertEquals(0, tp.nextPosition());
|
||||||
assertEquals(expected, tp.nextPosition());
|
|
||||||
assertEquals(1, tp.nextPosition());
|
assertEquals(1, tp.nextPosition());
|
||||||
assertEquals(3, tp.nextPosition());
|
assertEquals(3, tp.nextPosition());
|
||||||
assertEquals(6, tp.nextPosition());
|
assertEquals(6, tp.nextPosition());
|
||||||
|
|
|
@ -546,7 +546,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
// we write here (e.g., to write parent+2), and need to do a workaround
|
// we write here (e.g., to write parent+2), and need to do a workaround
|
||||||
// in the reader (which knows that anyway only category 0 has a parent
|
// in the reader (which knows that anyway only category 0 has a parent
|
||||||
// -1).
|
// -1).
|
||||||
parentStream.set(parent + 1);
|
parentStream.set(Math.max(parent+1, 1));
|
||||||
Document d = new Document();
|
Document d = new Document();
|
||||||
d.add(parentStreamField);
|
d.add(parentStreamField);
|
||||||
|
|
||||||
|
|
|
@ -69,7 +69,7 @@ final class MockPayloadFilter extends TokenFilter {
|
||||||
if (input.incrementToken()) {
|
if (input.incrementToken()) {
|
||||||
payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes()));
|
payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes()));
|
||||||
int posIncr;
|
int posIncr;
|
||||||
if (i % 2 == 1) {
|
if (pos == 0 || i % 2 == 1) {
|
||||||
posIncr = 1;
|
posIncr = 1;
|
||||||
} else {
|
} else {
|
||||||
posIncr = 0;
|
posIncr = 0;
|
||||||
|
|
Loading…
Reference in New Issue