[LUCENE-3731] - refactored analyzeText method to initializeIterator and made it abstract inside BaseUIMATokenizer

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1293614 13f79535-47bb-0310-9956-ffa450edef68
2012-02-25 14:14:00 +00:00 · 2012-02-25 14:14:00 +00:00 · 482c0610fd
parent 2e015271c5
commit 482c0610fd
3 changed files with 35 additions and 22 deletions
--- a/modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
+++ b/modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
@ -51,18 +51,25 @@ public abstract class BaseUIMATokenizer extends Tokenizer {

  /**
   * analyzes the tokenizer input using the given analysis engine
-   * 
+   * <p/>
   * {@link #cas} will be filled with  extracted metadata (UIMA annotations, feature structures)
   *
   * @throws AnalysisEngineProcessException
   * @throws IOException
   */
-  protected void analyzeInput() throws AnalysisEngineProcessException,IOException {
+  protected void analyzeInput() throws AnalysisEngineProcessException, IOException {
    cas.reset();
    cas.setDocumentText(toString(input));
    ae.process(cas);
  }

+  /**
+   * initialize the FSIterator which is used to build tokens at each incrementToken() method call
+   *
+   * @throws IOException
+   */
+  protected abstract void initializeIterator() throws IOException;
+
  private String toString(Reader reader) throws IOException {
    StringBuilder stringBuilder = new StringBuilder();
    int ch;
@ -82,6 +89,6 @@ public abstract class BaseUIMATokenizer extends Tokenizer {
  public void end() throws IOException {
    iterator = null;
  }
-  
-  
+
+
 }
--- a/modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
+++ b/modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
@ -37,7 +37,7 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
  private final OffsetAttribute offsetAttr;

  private final String tokenTypeString;
-  
+
  private int finalOffset = 0;

  public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Reader input) {
@ -47,8 +47,12 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
    this.offsetAttr = addAttribute(OffsetAttribute.class);
  }

-  private void analyzeText() throws IOException, AnalysisEngineProcessException {
-    analyzeInput();
+  protected void initializeIterator() throws IOException {
+    try {
+      analyzeInput();
+    } catch (AnalysisEngineProcessException e) {
+      throw new IOException(e);
+    }
    finalOffset = correctOffset(cas.getDocumentText().length());
    Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
    iterator = cas.getAnnotationIndex(tokenType).iterator();
@ -57,11 +61,7 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
  @Override
  public boolean incrementToken() throws IOException {
    if (iterator == null) {
-      try {
-        analyzeText();
-      } catch (Exception e) {
-        throw new IOException(e);
-      }
+      initializeIterator();
    }
    if (iterator.hasNext()) {
      clearAttributes();
--- a/modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
+++ b/modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
@ -47,7 +47,7 @@ public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer {
  private final String typeAttributeFeaturePath;

  private FeaturePath featurePath;
-  
+
  private int finalOffset = 0;

  public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Reader input) {
@ -59,23 +59,29 @@ public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer {
    this.typeAttributeFeaturePath = typeAttributeFeaturePath;
  }

-  private void analyzeText() throws IOException, AnalysisEngineProcessException, CASException {
-    analyzeInput();
+  protected void initializeIterator() throws IOException {
+    try {
+      analyzeInput();
+    } catch (AnalysisEngineProcessException e) {
+      throw new IOException(e);
+    }
+    featurePath = cas.createFeaturePath();
+    try {
+      featurePath.initialize(typeAttributeFeaturePath);
+    } catch (CASException e) {
+      featurePath = null;
+      throw new IOException(e);
+    }
    finalOffset = correctOffset(cas.getDocumentText().length());
    Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
    iterator = cas.getAnnotationIndex(tokenType).iterator();
-    featurePath = cas.createFeaturePath();
-    featurePath.initialize(typeAttributeFeaturePath);
+
  }

  @Override
  public boolean incrementToken() throws IOException {
    if (iterator == null) {
-      try {
-        analyzeText();
-      } catch (Exception e) {
-        throw new IOException(e);
-      }
+      initializeIterator();
    }
    if (iterator.hasNext()) {
      clearAttributes();