mirror of https://github.com/apache/lucene.git
[LUCENE-3731] - refactored analyzeText method to initializeIterator and made it abstract inside BaseUIMATokenizer
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1293614 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2e015271c5
commit
482c0610fd
|
@ -51,18 +51,25 @@ public abstract class BaseUIMATokenizer extends Tokenizer {
|
|||
|
||||
/**
|
||||
* analyzes the tokenizer input using the given analysis engine
|
||||
*
|
||||
* <p/>
|
||||
* {@link #cas} will be filled with extracted metadata (UIMA annotations, feature structures)
|
||||
*
|
||||
* @throws AnalysisEngineProcessException
|
||||
* @throws IOException
|
||||
*/
|
||||
protected void analyzeInput() throws AnalysisEngineProcessException,IOException {
|
||||
protected void analyzeInput() throws AnalysisEngineProcessException, IOException {
|
||||
cas.reset();
|
||||
cas.setDocumentText(toString(input));
|
||||
ae.process(cas);
|
||||
}
|
||||
|
||||
/**
|
||||
* initialize the FSIterator which is used to build tokens at each incrementToken() method call
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
protected abstract void initializeIterator() throws IOException;
|
||||
|
||||
private String toString(Reader reader) throws IOException {
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
int ch;
|
||||
|
@ -82,6 +89,6 @@ public abstract class BaseUIMATokenizer extends Tokenizer {
|
|||
public void end() throws IOException {
|
||||
iterator = null;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
|
|||
private final OffsetAttribute offsetAttr;
|
||||
|
||||
private final String tokenTypeString;
|
||||
|
||||
|
||||
private int finalOffset = 0;
|
||||
|
||||
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Reader input) {
|
||||
|
@ -47,8 +47,12 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
|
|||
this.offsetAttr = addAttribute(OffsetAttribute.class);
|
||||
}
|
||||
|
||||
private void analyzeText() throws IOException, AnalysisEngineProcessException {
|
||||
analyzeInput();
|
||||
protected void initializeIterator() throws IOException {
|
||||
try {
|
||||
analyzeInput();
|
||||
} catch (AnalysisEngineProcessException e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
finalOffset = correctOffset(cas.getDocumentText().length());
|
||||
Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
|
||||
iterator = cas.getAnnotationIndex(tokenType).iterator();
|
||||
|
@ -57,11 +61,7 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
|
|||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (iterator == null) {
|
||||
try {
|
||||
analyzeText();
|
||||
} catch (Exception e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
initializeIterator();
|
||||
}
|
||||
if (iterator.hasNext()) {
|
||||
clearAttributes();
|
||||
|
|
|
@ -47,7 +47,7 @@ public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer {
|
|||
private final String typeAttributeFeaturePath;
|
||||
|
||||
private FeaturePath featurePath;
|
||||
|
||||
|
||||
private int finalOffset = 0;
|
||||
|
||||
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Reader input) {
|
||||
|
@ -59,23 +59,29 @@ public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer {
|
|||
this.typeAttributeFeaturePath = typeAttributeFeaturePath;
|
||||
}
|
||||
|
||||
private void analyzeText() throws IOException, AnalysisEngineProcessException, CASException {
|
||||
analyzeInput();
|
||||
protected void initializeIterator() throws IOException {
|
||||
try {
|
||||
analyzeInput();
|
||||
} catch (AnalysisEngineProcessException e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
featurePath = cas.createFeaturePath();
|
||||
try {
|
||||
featurePath.initialize(typeAttributeFeaturePath);
|
||||
} catch (CASException e) {
|
||||
featurePath = null;
|
||||
throw new IOException(e);
|
||||
}
|
||||
finalOffset = correctOffset(cas.getDocumentText().length());
|
||||
Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
|
||||
iterator = cas.getAnnotationIndex(tokenType).iterator();
|
||||
featurePath = cas.createFeaturePath();
|
||||
featurePath.initialize(typeAttributeFeaturePath);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (iterator == null) {
|
||||
try {
|
||||
analyzeText();
|
||||
} catch (Exception e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
initializeIterator();
|
||||
}
|
||||
if (iterator.hasNext()) {
|
||||
clearAttributes();
|
||||
|
|
Loading…
Reference in New Issue