[LUCENE-3731] - refactored analyzeText method to initializeIterator and made it abstract inside BaseUIMATokenizer

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1293614 13f79535-47bb-0310-9956-ffa450edef68
2012-02-25 14:14:00 +00:00 · 2012-02-25 14:14:00 +00:00 · 482c0610fd
parent 2e015271c5
commit 482c0610fd
3 changed files with 35 additions and 22 deletions
--- a/modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
+++ b/modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
@ -51,7 +51,7 @@ public abstract class BaseUIMATokenizer extends Tokenizer {

  /**
   * analyzes the tokenizer input using the given analysis engine
-   * 
+   * <p/>
   * {@link #cas} will be filled with  extracted metadata (UIMA annotations, feature structures)
   *
   * @throws AnalysisEngineProcessException
@ -63,6 +63,13 @@ public abstract class BaseUIMATokenizer extends Tokenizer {
    ae.process(cas);
  }

+  /**
+   * initialize the FSIterator which is used to build tokens at each incrementToken() method call
+   *
+   * @throws IOException
+   */
+  protected abstract void initializeIterator() throws IOException;
+
  private String toString(Reader reader) throws IOException {
    StringBuilder stringBuilder = new StringBuilder();
    int ch;
--- a/modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
+++ b/modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
@ -47,8 +47,12 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
    this.offsetAttr = addAttribute(OffsetAttribute.class);
  }

-  private void analyzeText() throws IOException, AnalysisEngineProcessException {
+  protected void initializeIterator() throws IOException {
+    try {
      analyzeInput();
+    } catch (AnalysisEngineProcessException e) {
+      throw new IOException(e);
+    }
    finalOffset = correctOffset(cas.getDocumentText().length());
    Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
    iterator = cas.getAnnotationIndex(tokenType).iterator();
@ -57,11 +61,7 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
  @Override
  public boolean incrementToken() throws IOException {
    if (iterator == null) {
-      try {
-        analyzeText();
-      } catch (Exception e) {
-        throw new IOException(e);
-      }
+      initializeIterator();
    }
    if (iterator.hasNext()) {
      clearAttributes();
--- a/modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
+++ b/modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
@ -59,23 +59,29 @@ public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer {
    this.typeAttributeFeaturePath = typeAttributeFeaturePath;
  }

-  private void analyzeText() throws IOException, AnalysisEngineProcessException, CASException {
+  protected void initializeIterator() throws IOException {
+    try {
      analyzeInput();
+    } catch (AnalysisEngineProcessException e) {
+      throw new IOException(e);
+    }
+    featurePath = cas.createFeaturePath();
+    try {
+      featurePath.initialize(typeAttributeFeaturePath);
+    } catch (CASException e) {
+      featurePath = null;
+      throw new IOException(e);
+    }
    finalOffset = correctOffset(cas.getDocumentText().length());
    Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
    iterator = cas.getAnnotationIndex(tokenType).iterator();
-    featurePath = cas.createFeaturePath();
-    featurePath.initialize(typeAttributeFeaturePath);
+
  }

  @Override
  public boolean incrementToken() throws IOException {
    if (iterator == null) {
-      try {
-        analyzeText();
-      } catch (Exception e) {
-        throw new IOException(e);
-      }
+      initializeIterator();
    }
    if (iterator.hasNext()) {
      clearAttributes();