mirror of https://github.com/apache/lucene.git
add some javadocs to the test-framework analysis components
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1125165 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
897ad47877
commit
ba7845ae74
|
@ -30,7 +30,15 @@ import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util._TestUtil;
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base class for all Lucene unit tests that use TokenStreams.
|
* Base class for all Lucene unit tests that use TokenStreams.
|
||||||
|
* <p>
|
||||||
|
* When writing unit tests for analysis components, its highly recommended
|
||||||
|
* to use the helper methods here (especially in conjunction with {@link MockAnalyzer} or
|
||||||
|
* {@link MockTokenizer}), as they contain many assertions and checks to
|
||||||
|
* catch bugs.
|
||||||
|
*
|
||||||
|
* @see MockAnalyzer
|
||||||
|
* @see MockTokenizer
|
||||||
*/
|
*/
|
||||||
public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
|
public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
|
||||||
// some helpers to test Analyzers and TokenStreams:
|
// some helpers to test Analyzers and TokenStreams:
|
||||||
|
|
|
@ -27,6 +27,19 @@ import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Analyzer for testing
|
* Analyzer for testing
|
||||||
|
* <p>
|
||||||
|
* This analyzer is a replacement for Whitespace/Simple/KeywordAnalyzers
|
||||||
|
* for unit tests. If you are testing a custom component such as a queryparser
|
||||||
|
* or analyzer-wrapper that consumes analysis streams, its a great idea to test
|
||||||
|
* it with this analyzer instead. MockAnalyzer has the following behavior:
|
||||||
|
* <ul>
|
||||||
|
* <li>By default, the assertions in {@link MockTokenizer} are turned on for extra
|
||||||
|
* checks that the consumer is consuming properly. These checks can be disabled
|
||||||
|
* with {@link #setEnableChecks(boolean)}.
|
||||||
|
* <li>Payload data is randomly injected into the stream for more thorough testing
|
||||||
|
* of payloads.
|
||||||
|
* </ul>
|
||||||
|
* @see MockTokenizer
|
||||||
*/
|
*/
|
||||||
public final class MockAnalyzer extends Analyzer {
|
public final class MockAnalyzer extends Analyzer {
|
||||||
private final CharacterRunAutomaton runAutomaton;
|
private final CharacterRunAutomaton runAutomaton;
|
||||||
|
|
|
@ -26,7 +26,16 @@ import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||||
import org.apache.lucene.util.automaton.RegExp;
|
import org.apache.lucene.util.automaton.RegExp;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Automaton-based tokenizer for testing. Optionally lowercases.
|
* Tokenizer for testing.
|
||||||
|
* <p>
|
||||||
|
* This tokenizer is a replacement for {@link #WHITESPACE}, {@link #SIMPLE}, and {@link #KEYWORD}
|
||||||
|
* tokenizers. If you are writing a component such as a TokenFilter, its a great idea to test
|
||||||
|
* it wrapping this tokenizer instead for extra checks. This tokenizer has the following behavior:
|
||||||
|
* <ul>
|
||||||
|
* <li>An internal state-machine is used for checking consumer consistency. These checks can
|
||||||
|
* be disabled with {@link #setEnableChecks(boolean)}.
|
||||||
|
* <li>For convenience, optionally lowercases terms that it outputs.
|
||||||
|
* </ul>
|
||||||
*/
|
*/
|
||||||
public class MockTokenizer extends Tokenizer {
|
public class MockTokenizer extends Tokenizer {
|
||||||
/** Acts Similar to WhitespaceTokenizer */
|
/** Acts Similar to WhitespaceTokenizer */
|
||||||
|
|
Loading…
Reference in New Issue