LUCENE-985: throw clearer exception when term is too long (> 16383 chars)

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@567338 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2007-08-18 23:15:14 +00:00
parent c67fd79a83
commit 27c4c6a3b1
4 changed files with 36 additions and 1 deletions

View File

@ -76,6 +76,11 @@ Bug fixes
close any streams they had opened if an exception is hit in the close any streams they had opened if an exception is hit in the
constructor. (Ning Li via Mike McCandless) constructor. (Ning Li via Mike McCandless)
12. LUCENE-985: If an extremely long term is in a doc (> 16383 chars),
we now throw an IllegalArgumentException saying the term is too
long, instead of cryptic ArrayIndexOutOfBoundsException. (Karl
Wettin via Mike McCandless)
New features New features
1. LUCENE-906: Elision filter for French. 1. LUCENE-906: Elision filter for French.

View File

@ -1452,8 +1452,11 @@ final class DocumentsWriter {
p = postingsFreeList[--postingsFreeCount]; p = postingsFreeList[--postingsFreeCount];
final int textLen1 = 1+tokenTextLen; final int textLen1 = 1+tokenTextLen;
if (textLen1 + charPool.byteUpto > CHAR_BLOCK_SIZE) if (textLen1 + charPool.byteUpto > CHAR_BLOCK_SIZE) {
if (textLen1 > CHAR_BLOCK_SIZE)
throw new IllegalArgumentException("term length " + tokenTextLen + " exceeds max term length " + (CHAR_BLOCK_SIZE-1));
charPool.nextBuffer(); charPool.nextBuffer();
}
final char[] text = charPool.buffer; final char[] text = charPool.buffer;
final int textUpto = charPool.byteUpto; final int textUpto = charPool.byteUpto;
p.textStart = textUpto + charPool.byteOffset; p.textStart = textUpto + charPool.byteOffset;

View File

@ -1091,6 +1091,10 @@ public class IndexWriter {
* temporary space usage) then the maximum free disk space * temporary space usage) then the maximum free disk space
* required is the same as {@link #optimize}.</p> * required is the same as {@link #optimize}.</p>
* *
* <p>Note that each term in the document can be no longer
* than 16383 characters, otherwise an
* IllegalArgumentException will be thrown.</p>
*
* @throws CorruptIndexException if the index is corrupt * @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error * @throws IOException if there is a low-level IO error
*/ */

View File

@ -25,6 +25,7 @@ import java.util.Random;
import junit.framework.TestCase; import junit.framework.TestCase;
import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
@ -490,6 +491,28 @@ public class TestIndexWriter extends TestCase
} }
} }
/**
* Make sure we get a friendly exception for a wicked
* long term.
*/
public void testWickedLongTerm() throws IOException {
RAMDirectory dir = new RAMDirectory();
IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true);
char[] chars = new char[16384];
Arrays.fill(chars, 'x');
Document doc = new Document();
String contents = "a b c " + new String(chars);
doc.add(new Field("content", contents, Field.Store.NO, Field.Index.TOKENIZED));
try {
writer.addDocument(doc);
fail("did not hit expected exception");
} catch (IllegalArgumentException e) {
}
writer.close();
dir.close();
}
/** /**
* Make sure optimize doesn't use any more than 1X * Make sure optimize doesn't use any more than 1X
* starting index size as its temporary free space * starting index size as its temporary free space