LUCENE-5405: when there's an exception thrown by an analysis component, note the field name

on the info stream to aid in debugging.

closes #21



git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1562657 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Benson Margulies 2014-01-30 00:43:52 +00:00
parent 9feffa8472
commit 82f58d83d0
2 changed files with 130 additions and 0 deletions

View File

@ -92,6 +92,13 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
fieldState.position += analyzed ? docState.analyzer.getPositionIncrementGap(fieldInfo.name) : 0;
}
/*
* To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream
* when we fail. We expect some caller to eventually deal with the real exception, so we don't want any 'catch' clauses,
* but rather a finally that takes note of the problem.
*/
boolean succeededInProcessingField = false;
try (TokenStream stream = field.tokenStream(docState.analyzer)) {
// reset the TokenStream to the first token
stream.reset();
@ -175,6 +182,12 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
// when we come back around to the field...
fieldState.position += posIncrAttribute.getPositionIncrement();
fieldState.offset += offsetAttribute.endOffset();
/* if success was false above there is an exception coming through and we won't get here.*/
succeededInProcessingField = true;
} finally {
if (!succeededInProcessingField && docState.infoStream.isEnabled("DW")) {
docState.infoStream.message("DW", "An exception was thrown while processing field " + fieldInfo.name);
}
}
fieldState.offset += analyzed ? docState.analyzer.getOffsetGap(fieldInfo.name) : 0;

View File

@ -0,0 +1,117 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.PrintStreamInfoStream;
import org.junit.Test;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
/**
* Test adding to the info stream when there's an exception thrown during field analysis.
*/
public class TestDocInverterPerFieldErrorInfo extends LuceneTestCase {
private static final FieldType storedTextType = new FieldType(TextField.TYPE_NOT_STORED);
private static class BadNews extends RuntimeException {
private BadNews(String message) {
super(message);
}
}
private static class ThrowingAnalyzer extends Analyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer();
if (fieldName.equals("distinctiveFieldName")) {
TokenFilter tosser = new TokenFilter(tokenizer) {
@Override
public boolean incrementToken() throws IOException {
throw new BadNews("Something is icky.");
}
};
return new TokenStreamComponents(tokenizer, tosser);
} else {
return new TokenStreamComponents(tokenizer);
}
}
}
@Test
public void testInfoStreamGetsFieldName() throws Exception {
Directory dir = newDirectory();
IndexWriter writer;
IndexWriterConfig c = new IndexWriterConfig(TEST_VERSION_CURRENT, new ThrowingAnalyzer());
final ByteArrayOutputStream infoBytes = new ByteArrayOutputStream();
PrintStream infoPrintStream = new PrintStream(infoBytes, true, "utf-8");
PrintStreamInfoStream printStreamInfoStream = new PrintStreamInfoStream(infoPrintStream);
c.setInfoStream(printStreamInfoStream);
writer = new IndexWriter(dir, c);
Document doc = new Document();
doc.add(newField("distinctiveFieldName", "aaa ", storedTextType));
try {
writer.addDocument(doc);
fail("Failed to fail.");
} catch(BadNews badNews) {
infoPrintStream.flush();
String infoStream = new String(infoBytes.toByteArray(), "utf-8");
assertTrue(infoStream.contains("distinctiveFieldName"));
}
writer.close();
dir.close();
}
@Test
public void testNoExtraNoise() throws Exception {
Directory dir = newDirectory();
IndexWriter writer;
IndexWriterConfig c = new IndexWriterConfig(TEST_VERSION_CURRENT, new ThrowingAnalyzer());
final ByteArrayOutputStream infoBytes = new ByteArrayOutputStream();
PrintStream infoPrintStream = new PrintStream(infoBytes, true, "utf-8");
PrintStreamInfoStream printStreamInfoStream = new PrintStreamInfoStream(infoPrintStream);
c.setInfoStream(printStreamInfoStream);
writer = new IndexWriter(dir, c);
Document doc = new Document();
doc.add(newField("boringFieldName", "aaa ", storedTextType));
try {
writer.addDocument(doc);
} catch(BadNews badNews) {
fail("Unwanted exception");
}
infoPrintStream.flush();
String infoStream = new String(infoBytes.toByteArray(), "utf-8");
assertFalse(infoStream.contains("boringFieldName"));
writer.close();
dir.close();
}
}