added clear() method to TokenCachingAnalyzer, changed anonymous class to public class

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@479699 13f79535-47bb-0310-9956-ffa450edef68
2006-11-27 17:37:26 +00:00 · 2006-11-27 17:37:26 +00:00 · ad49369d3d
parent bd76b754b0
commit ad49369d3d
1 changed files with 55 additions and 38 deletions
--- a/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java
+++ b/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java
@ -203,57 +203,74 @@ public class AnalyzerUtil {

  
  /**
-   * Returns an analyzer wrapper that caches all tokens generated by the underlying child analyzer's
-   * token stream, and delivers those cached tokens on subsequent matching calls to 
-   * <code>tokenStream(String fieldName, Reader reader)</code>.
+   * Analyzer wrapper that caches all tokens generated by the underlying child analyzer's
+   * token streams, and delivers those cached tokens on subsequent calls to 
+   * <code>tokenStream(String fieldName, Reader reader)</code>, 
+   * if the fieldName has been seen before, altogether ignoring the Reader parameter.
   * <p>
   * If Analyzer / TokenFilter chains are expensive in terms of I/O or CPU, such caching can 
   * help improve performance if the same document is added to multiple Lucene indexes, 
   * because the text analysis phase need not be performed more than once.
   * <p>
   * Caveats: 
-   * 1) Caching the tokens of large Lucene documents can lead to out of memory exceptions. 
-   * 2) The Token instances delivered by the underlying child analyzer must be immutable.
-   * 
-   * @param child
-   *            the underlying child analyzer
-   * @return a new analyzer
+   * <ul>
+   * <li>Caching the tokens of large Lucene documents can lead to out of memory exceptions.</li> 
+   * <li>The Token instances delivered by the underlying child analyzer must be immutable.</li>
+   * <li>A caching analyzer instance must not be used for more than one document, unless 
+   * <code>clear()</code> is called before each new document.</li>
+   * </ul>
   */
-  public static Analyzer getTokenCachingAnalyzer(final Analyzer child) {
+  public static class TokenCachingAnalyzer extends Analyzer {
+    
+    private final Analyzer child;
+    private final HashMap cache = new HashMap();
+      
+    /**
+     * Creates and returns a new caching analyzer that wraps the given underlying child analyzer.
+     * 
+     * @param child
+     *            the underlying child analyzer
+     * @return a new caching analyzer
+     */
+    public TokenCachingAnalyzer(Analyzer child) {
+      if (child == null)
+        throw new IllegalArgumentException("child analyzer must not be null");

-    if (child == null)
-      throw new IllegalArgumentException("child analyzer must not be null");
+      this.child = child;
+    }
+    
+    /**
+     * Removes all cached data.
+     */
+    public void clear() {
+      cache.clear();
+    }

-    return new Analyzer() {
+    public TokenStream tokenStream(String fieldName, Reader reader) {
+      final ArrayList tokens = (ArrayList) cache.get(fieldName);
+      if (tokens == null) { // not yet cached
+        final ArrayList tokens2 = new ArrayList();
+        cache.put(fieldName, tokens2);
+        return new TokenFilter(child.tokenStream(fieldName, reader)) {

-      private final HashMap cache = new HashMap();
+          public Token next() throws IOException {
+            Token token = input.next(); // from filter super class
+            if (token != null) tokens2.add(token);
+            return token;
+          }
+        };
+      } else { // already cached
+        return new TokenStream() {

-      public TokenStream tokenStream(String fieldName, Reader reader) {
-        final ArrayList tokens = (ArrayList) cache.get(fieldName);
-        if (tokens == null) { // not yet cached
-          final ArrayList tokens2 = new ArrayList();
-          cache.put(fieldName, tokens2);
-          return new TokenFilter(child.tokenStream(fieldName, reader)) {
+          private Iterator iter = tokens.iterator();

-            public Token next() throws IOException {
-              Token token = input.next(); // from filter super class
-              if (token != null) tokens2.add(token);
-              return token;
-            }
-          };
-        } else { // already cached
-          return new TokenStream() {
-
-            private Iterator iter = tokens.iterator();
-
-            public Token next() {
-              if (!iter.hasNext()) return null;
-              return (Token) iter.next();
-            }
-          };
-        }
+          public Token next() {
+            if (!iter.hasNext()) return null;
+            return (Token) iter.next();
+          }
+        };
      }
-    };
+    }
  }