improvements to CharStream/Reader/Filter & related classes' javadocs

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@807574 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-08-25 12:02:17 +00:00
parent 53dab68854
commit 4b441c2eef
5 changed files with 40 additions and 25 deletions

View File

@ -21,15 +21,15 @@ import java.util.ArrayList;
import java.util.List;
/**
* Base utility class for implementing a {@link
* CharFilter}. You record mappings by calling {@link
* #addOffCorrectMap}, and then invoke the correct method.
* This class is not particularly efficient, eg a new class
* instance is created for every call to {@link
* #addOffCorrectMap}, which is appended to a private list.
* When retrieving a mapping, that list is linearly
* checked.
* @version $Id$
* Base utility class for implementing a {@link CharFilter}.
* You subclass this, and then record mappings by calling
* {@link #addOffCorrectMap}, and then invoke the correct
* method to correct an offset.
*
* <p><b>NOTE</b>: This class is not particularly efficient.
* For example, a new class instance is created for every
* call to {@link #addOffCorrectMap}, which is then appended
* to a private list.
*/
public abstract class BaseCharFilter extends CharFilter {
@ -41,8 +41,10 @@ public abstract class BaseCharFilter extends CharFilter {
}
/** Retrieve the corrected offset. Note that this method
* is slow if you correct positions far before the most
* recently added position. */
* is slow, if you correct positions far before the most
* recently added position, as it's a simple linear
* searhc backwards through all offset corrections added
* by {@link #addOffCorrectMap}. */
protected int correct(int currentOff) {
if (pcmList == null || pcmList.isEmpty()) {
return currentOff;

View File

@ -21,10 +21,10 @@ import java.io.IOException;
import java.io.Reader;
/**
* CharReader is a Reader wrapper. It reads chars from Reader and outputs CharStream.
*
* @version $Id$
*
* CharReader is a Reader wrapper. It reads chars from
* Reader and outputs {@link CharStream}, defining an
* identify fucntion {@link #correctOffset} method that
* simply returns the provided offset.
*/
public final class CharReader extends CharStream {

View File

@ -20,17 +20,23 @@ package org.apache.lucene.analysis;
import java.io.Reader;
/**
* CharStream adds <a href="#correctOffset(int)">correctOffset</a> functionality over Reader.
*
* @version $Id$
* CharStream adds <a
* href="#correctOffset(int)">correctOffset</a>
* functionality over Reader. All Tokenizers accept a
* CharStream as input, which enables arbitrary character
* based filtering before tokenization. The {@link
* #correctOffset} method fixed offsets to account for
* removal or insertion of characters, so that the offsets
* reported in the tokens match the character offsets of the
* original Reader.
*/
public abstract class CharStream extends Reader {
/**
* Called by CharFilter(s) and Tokenizer to correct token offset.
*
* @param currentOff current offset
* @return corrected token offset
* @param currentOff offset as seen in the output
* @return corrected offset based on the input
*/
public abstract int correctOffset(int currentOff);
}

View File

@ -21,10 +21,10 @@ import java.io.IOException;
import java.util.LinkedList;
/**
* {@link CharFilter} that applies the mappings contained in
* a {@link NormalizeCharMap} to the character stream.
*
* @version $Id$
* Simplistic {@link CharFilter} that applies the mappings
* contained in a {@link NormalizeCharMap} to the character
* stream, and correcting the resulting changes to the
* offsets.
*/
public class MappingCharFilter extends BaseCharFilter {

View File

@ -23,7 +23,6 @@ import java.util.Map;
/**
* Holds a map of String input to String output, to be used
* with {@link MappingCharFilter}.
* @version $Id$
*/
public class NormalizeCharMap {
@ -32,6 +31,14 @@ public class NormalizeCharMap {
String normStr;
int diff;
/** Records a replacement to be applied to the inputs
* stream. Whenever <code>singleMatch</code> occurs in
* the input, it will be replaced with
* <code>replacement</code>.
*
* @param singleMatch input String to be replaced
* @param replacement output String
*/
public void add(String singleMatch, String replacement) {
NormalizeCharMap currMap = this;
for(int i = 0; i < singleMatch.length(); i++) {