mirror of https://github.com/apache/lucene.git
LUCENE-5980: don't let document length overflow
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1629835 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f6cbe636bb
commit
6f480ae9dd
|
@ -171,6 +171,8 @@ Bug Fixes
|
|||
not have the regular "spinlock" of DirectoryReader.open. It now implements
|
||||
Closeable and you must close it to release the lock. (Mike McCandless, Robert Muir)
|
||||
|
||||
* LUCENE-5980: Don't let document length overflow. (Robert Muir)
|
||||
|
||||
Documentation
|
||||
|
||||
* LUCENE-5392: Add/improve analysis package documentation to reflect
|
||||
|
|
|
@ -622,6 +622,10 @@ final class DefaultIndexingChain extends DocConsumer {
|
|||
invertState.lastStartOffset = startOffset;
|
||||
}
|
||||
|
||||
invertState.length++;
|
||||
if (invertState.length < 0) {
|
||||
throw new IllegalArgumentException("too many tokens in field '" + field.name() + "'");
|
||||
}
|
||||
//System.out.println(" term=" + invertState.termAttribute);
|
||||
|
||||
// If we hit an exception in here, we abort
|
||||
|
@ -633,8 +637,6 @@ final class DefaultIndexingChain extends DocConsumer {
|
|||
aborting = true;
|
||||
termsHashPerField.add();
|
||||
aborting = false;
|
||||
|
||||
invertState.length++;
|
||||
}
|
||||
|
||||
// trigger streams to perform end-of-stream operations
|
||||
|
|
|
@ -35,6 +35,8 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -46,6 +48,7 @@ import org.apache.lucene.document.SortedSetDocValuesField;
|
|||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
|
@ -2118,6 +2121,48 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
// kind of slow, but omits positions, so just CPU
|
||||
@Nightly
|
||||
public void testTooManyTokens() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
Document doc = new Document();
|
||||
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
doc.add(new Field("foo", new TokenStream() {
|
||||
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
long num = 0;
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (num == Integer.MAX_VALUE + 1) {
|
||||
return false;
|
||||
}
|
||||
clearAttributes();
|
||||
if (num == 0) {
|
||||
posIncAtt.setPositionIncrement(1);
|
||||
} else {
|
||||
posIncAtt.setPositionIncrement(0);
|
||||
}
|
||||
termAtt.append("a");
|
||||
num++;
|
||||
if (VERBOSE && num % 1000000 == 0) {
|
||||
System.out.println("indexed: " + num);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}, ft));
|
||||
try {
|
||||
iw.addDocument(doc);
|
||||
fail("didn't hit exception");
|
||||
} catch (IllegalArgumentException expected) {
|
||||
assertTrue(expected.getMessage().contains("too many tokens"));
|
||||
}
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testExceptionDuringRollback() throws Exception {
|
||||
// currently: fail in two different places
|
||||
final String messageToFailOn = random().nextBoolean() ?
|
||||
|
|
Loading…
Reference in New Issue