[LUCENE-3731] - refactored analyzeText method to initializeIterator and made it abstract inside BaseUIMATokenizer

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1293614 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tommaso Teofili 2012-02-25 14:14:00 +00:00
parent 2e015271c5
commit 482c0610fd
3 changed files with 35 additions and 22 deletions

View File

@ -51,7 +51,7 @@ public abstract class BaseUIMATokenizer extends Tokenizer {
/**
* analyzes the tokenizer input using the given analysis engine
*
* <p/>
* {@link #cas} will be filled with extracted metadata (UIMA annotations, feature structures)
*
* @throws AnalysisEngineProcessException
@ -63,6 +63,13 @@ public abstract class BaseUIMATokenizer extends Tokenizer {
ae.process(cas);
}
/**
* initialize the FSIterator which is used to build tokens at each incrementToken() method call
*
* @throws IOException
*/
protected abstract void initializeIterator() throws IOException;
private String toString(Reader reader) throws IOException {
StringBuilder stringBuilder = new StringBuilder();
int ch;

View File

@ -47,8 +47,12 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
this.offsetAttr = addAttribute(OffsetAttribute.class);
}
private void analyzeText() throws IOException, AnalysisEngineProcessException {
protected void initializeIterator() throws IOException {
try {
analyzeInput();
} catch (AnalysisEngineProcessException e) {
throw new IOException(e);
}
finalOffset = correctOffset(cas.getDocumentText().length());
Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
iterator = cas.getAnnotationIndex(tokenType).iterator();
@ -57,11 +61,7 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
@Override
public boolean incrementToken() throws IOException {
if (iterator == null) {
try {
analyzeText();
} catch (Exception e) {
throw new IOException(e);
}
initializeIterator();
}
if (iterator.hasNext()) {
clearAttributes();

View File

@ -59,23 +59,29 @@ public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer {
this.typeAttributeFeaturePath = typeAttributeFeaturePath;
}
private void analyzeText() throws IOException, AnalysisEngineProcessException, CASException {
protected void initializeIterator() throws IOException {
try {
analyzeInput();
} catch (AnalysisEngineProcessException e) {
throw new IOException(e);
}
featurePath = cas.createFeaturePath();
try {
featurePath.initialize(typeAttributeFeaturePath);
} catch (CASException e) {
featurePath = null;
throw new IOException(e);
}
finalOffset = correctOffset(cas.getDocumentText().length());
Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
iterator = cas.getAnnotationIndex(tokenType).iterator();
featurePath = cas.createFeaturePath();
featurePath.initialize(typeAttributeFeaturePath);
}
@Override
public boolean incrementToken() throws IOException {
if (iterator == null) {
try {
analyzeText();
} catch (Exception e) {
throw new IOException(e);
}
initializeIterator();
}
if (iterator.hasNext()) {
clearAttributes();