mirror of https://github.com/apache/lucene.git
add some javadocs to the test-framework analysis components
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1125165 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
897ad47877
commit
ba7845ae74
|
@ -31,6 +31,14 @@ import org.apache.lucene.util._TestUtil;
|
|||
|
||||
/**
|
||||
* Base class for all Lucene unit tests that use TokenStreams.
|
||||
* <p>
|
||||
* When writing unit tests for analysis components, its highly recommended
|
||||
* to use the helper methods here (especially in conjunction with {@link MockAnalyzer} or
|
||||
* {@link MockTokenizer}), as they contain many assertions and checks to
|
||||
* catch bugs.
|
||||
*
|
||||
* @see MockAnalyzer
|
||||
* @see MockTokenizer
|
||||
*/
|
||||
public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
|
||||
// some helpers to test Analyzers and TokenStreams:
|
||||
|
|
|
@ -27,6 +27,19 @@ import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
|||
|
||||
/**
|
||||
* Analyzer for testing
|
||||
* <p>
|
||||
* This analyzer is a replacement for Whitespace/Simple/KeywordAnalyzers
|
||||
* for unit tests. If you are testing a custom component such as a queryparser
|
||||
* or analyzer-wrapper that consumes analysis streams, its a great idea to test
|
||||
* it with this analyzer instead. MockAnalyzer has the following behavior:
|
||||
* <ul>
|
||||
* <li>By default, the assertions in {@link MockTokenizer} are turned on for extra
|
||||
* checks that the consumer is consuming properly. These checks can be disabled
|
||||
* with {@link #setEnableChecks(boolean)}.
|
||||
* <li>Payload data is randomly injected into the stream for more thorough testing
|
||||
* of payloads.
|
||||
* </ul>
|
||||
* @see MockTokenizer
|
||||
*/
|
||||
public final class MockAnalyzer extends Analyzer {
|
||||
private final CharacterRunAutomaton runAutomaton;
|
||||
|
|
|
@ -26,7 +26,16 @@ import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
|||
import org.apache.lucene.util.automaton.RegExp;
|
||||
|
||||
/**
|
||||
* Automaton-based tokenizer for testing. Optionally lowercases.
|
||||
* Tokenizer for testing.
|
||||
* <p>
|
||||
* This tokenizer is a replacement for {@link #WHITESPACE}, {@link #SIMPLE}, and {@link #KEYWORD}
|
||||
* tokenizers. If you are writing a component such as a TokenFilter, its a great idea to test
|
||||
* it wrapping this tokenizer instead for extra checks. This tokenizer has the following behavior:
|
||||
* <ul>
|
||||
* <li>An internal state-machine is used for checking consumer consistency. These checks can
|
||||
* be disabled with {@link #setEnableChecks(boolean)}.
|
||||
* <li>For convenience, optionally lowercases terms that it outputs.
|
||||
* </ul>
|
||||
*/
|
||||
public class MockTokenizer extends Tokenizer {
|
||||
/** Acts Similar to WhitespaceTokenizer */
|
||||
|
|
Loading…
Reference in New Issue