mirror of https://github.com/apache/lucene.git
improvements to CharStream/Reader/Filter & related classes' javadocs
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@807574 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
53dab68854
commit
4b441c2eef
|
@ -21,15 +21,15 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Base utility class for implementing a {@link
|
||||
* CharFilter}. You record mappings by calling {@link
|
||||
* #addOffCorrectMap}, and then invoke the correct method.
|
||||
* This class is not particularly efficient, eg a new class
|
||||
* instance is created for every call to {@link
|
||||
* #addOffCorrectMap}, which is appended to a private list.
|
||||
* When retrieving a mapping, that list is linearly
|
||||
* checked.
|
||||
* @version $Id$
|
||||
* Base utility class for implementing a {@link CharFilter}.
|
||||
* You subclass this, and then record mappings by calling
|
||||
* {@link #addOffCorrectMap}, and then invoke the correct
|
||||
* method to correct an offset.
|
||||
*
|
||||
* <p><b>NOTE</b>: This class is not particularly efficient.
|
||||
* For example, a new class instance is created for every
|
||||
* call to {@link #addOffCorrectMap}, which is then appended
|
||||
* to a private list.
|
||||
*/
|
||||
public abstract class BaseCharFilter extends CharFilter {
|
||||
|
||||
|
@ -41,8 +41,10 @@ public abstract class BaseCharFilter extends CharFilter {
|
|||
}
|
||||
|
||||
/** Retrieve the corrected offset. Note that this method
|
||||
* is slow if you correct positions far before the most
|
||||
* recently added position. */
|
||||
* is slow, if you correct positions far before the most
|
||||
* recently added position, as it's a simple linear
|
||||
* searhc backwards through all offset corrections added
|
||||
* by {@link #addOffCorrectMap}. */
|
||||
protected int correct(int currentOff) {
|
||||
if (pcmList == null || pcmList.isEmpty()) {
|
||||
return currentOff;
|
||||
|
|
|
@ -21,10 +21,10 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* CharReader is a Reader wrapper. It reads chars from Reader and outputs CharStream.
|
||||
*
|
||||
* @version $Id$
|
||||
*
|
||||
* CharReader is a Reader wrapper. It reads chars from
|
||||
* Reader and outputs {@link CharStream}, defining an
|
||||
* identify fucntion {@link #correctOffset} method that
|
||||
* simply returns the provided offset.
|
||||
*/
|
||||
public final class CharReader extends CharStream {
|
||||
|
||||
|
|
|
@ -20,17 +20,23 @@ package org.apache.lucene.analysis;
|
|||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* CharStream adds <a href="#correctOffset(int)">correctOffset</a> functionality over Reader.
|
||||
*
|
||||
* @version $Id$
|
||||
* CharStream adds <a
|
||||
* href="#correctOffset(int)">correctOffset</a>
|
||||
* functionality over Reader. All Tokenizers accept a
|
||||
* CharStream as input, which enables arbitrary character
|
||||
* based filtering before tokenization. The {@link
|
||||
* #correctOffset} method fixed offsets to account for
|
||||
* removal or insertion of characters, so that the offsets
|
||||
* reported in the tokens match the character offsets of the
|
||||
* original Reader.
|
||||
*/
|
||||
public abstract class CharStream extends Reader {
|
||||
|
||||
/**
|
||||
* Called by CharFilter(s) and Tokenizer to correct token offset.
|
||||
*
|
||||
* @param currentOff current offset
|
||||
* @return corrected token offset
|
||||
* @param currentOff offset as seen in the output
|
||||
* @return corrected offset based on the input
|
||||
*/
|
||||
public abstract int correctOffset(int currentOff);
|
||||
}
|
||||
|
|
|
@ -21,10 +21,10 @@ import java.io.IOException;
|
|||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
* {@link CharFilter} that applies the mappings contained in
|
||||
* a {@link NormalizeCharMap} to the character stream.
|
||||
*
|
||||
* @version $Id$
|
||||
* Simplistic {@link CharFilter} that applies the mappings
|
||||
* contained in a {@link NormalizeCharMap} to the character
|
||||
* stream, and correcting the resulting changes to the
|
||||
* offsets.
|
||||
*/
|
||||
public class MappingCharFilter extends BaseCharFilter {
|
||||
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Map;
|
|||
/**
|
||||
* Holds a map of String input to String output, to be used
|
||||
* with {@link MappingCharFilter}.
|
||||
* @version $Id$
|
||||
*/
|
||||
public class NormalizeCharMap {
|
||||
|
||||
|
@ -32,6 +31,14 @@ public class NormalizeCharMap {
|
|||
String normStr;
|
||||
int diff;
|
||||
|
||||
/** Records a replacement to be applied to the inputs
|
||||
* stream. Whenever <code>singleMatch</code> occurs in
|
||||
* the input, it will be replaced with
|
||||
* <code>replacement</code>.
|
||||
*
|
||||
* @param singleMatch input String to be replaced
|
||||
* @param replacement output String
|
||||
*/
|
||||
public void add(String singleMatch, String replacement) {
|
||||
NormalizeCharMap currMap = this;
|
||||
for(int i = 0; i < singleMatch.length(); i++) {
|
||||
|
|
Loading…
Reference in New Issue