mirror of https://github.com/apache/lucene.git
added clear() method to TokenCachingAnalyzer, changed anonymous class to public class
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@479699 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
bd76b754b0
commit
ad49369d3d
|
@ -203,57 +203,74 @@ public class AnalyzerUtil {
|
|||
|
||||
|
||||
/**
|
||||
* Returns an analyzer wrapper that caches all tokens generated by the underlying child analyzer's
|
||||
* token stream, and delivers those cached tokens on subsequent matching calls to
|
||||
* <code>tokenStream(String fieldName, Reader reader)</code>.
|
||||
* Analyzer wrapper that caches all tokens generated by the underlying child analyzer's
|
||||
* token streams, and delivers those cached tokens on subsequent calls to
|
||||
* <code>tokenStream(String fieldName, Reader reader)</code>,
|
||||
* if the fieldName has been seen before, altogether ignoring the Reader parameter.
|
||||
* <p>
|
||||
* If Analyzer / TokenFilter chains are expensive in terms of I/O or CPU, such caching can
|
||||
* help improve performance if the same document is added to multiple Lucene indexes,
|
||||
* because the text analysis phase need not be performed more than once.
|
||||
* <p>
|
||||
* Caveats:
|
||||
* 1) Caching the tokens of large Lucene documents can lead to out of memory exceptions.
|
||||
* 2) The Token instances delivered by the underlying child analyzer must be immutable.
|
||||
*
|
||||
* @param child
|
||||
* the underlying child analyzer
|
||||
* @return a new analyzer
|
||||
* <ul>
|
||||
* <li>Caching the tokens of large Lucene documents can lead to out of memory exceptions.</li>
|
||||
* <li>The Token instances delivered by the underlying child analyzer must be immutable.</li>
|
||||
* <li>A caching analyzer instance must not be used for more than one document, unless
|
||||
* <code>clear()</code> is called before each new document.</li>
|
||||
* </ul>
|
||||
*/
|
||||
public static Analyzer getTokenCachingAnalyzer(final Analyzer child) {
|
||||
public static class TokenCachingAnalyzer extends Analyzer {
|
||||
|
||||
private final Analyzer child;
|
||||
private final HashMap cache = new HashMap();
|
||||
|
||||
/**
|
||||
* Creates and returns a new caching analyzer that wraps the given underlying child analyzer.
|
||||
*
|
||||
* @param child
|
||||
* the underlying child analyzer
|
||||
* @return a new caching analyzer
|
||||
*/
|
||||
public TokenCachingAnalyzer(Analyzer child) {
|
||||
if (child == null)
|
||||
throw new IllegalArgumentException("child analyzer must not be null");
|
||||
|
||||
if (child == null)
|
||||
throw new IllegalArgumentException("child analyzer must not be null");
|
||||
this.child = child;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all cached data.
|
||||
*/
|
||||
public void clear() {
|
||||
cache.clear();
|
||||
}
|
||||
|
||||
return new Analyzer() {
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
final ArrayList tokens = (ArrayList) cache.get(fieldName);
|
||||
if (tokens == null) { // not yet cached
|
||||
final ArrayList tokens2 = new ArrayList();
|
||||
cache.put(fieldName, tokens2);
|
||||
return new TokenFilter(child.tokenStream(fieldName, reader)) {
|
||||
|
||||
private final HashMap cache = new HashMap();
|
||||
public Token next() throws IOException {
|
||||
Token token = input.next(); // from filter super class
|
||||
if (token != null) tokens2.add(token);
|
||||
return token;
|
||||
}
|
||||
};
|
||||
} else { // already cached
|
||||
return new TokenStream() {
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
final ArrayList tokens = (ArrayList) cache.get(fieldName);
|
||||
if (tokens == null) { // not yet cached
|
||||
final ArrayList tokens2 = new ArrayList();
|
||||
cache.put(fieldName, tokens2);
|
||||
return new TokenFilter(child.tokenStream(fieldName, reader)) {
|
||||
private Iterator iter = tokens.iterator();
|
||||
|
||||
public Token next() throws IOException {
|
||||
Token token = input.next(); // from filter super class
|
||||
if (token != null) tokens2.add(token);
|
||||
return token;
|
||||
}
|
||||
};
|
||||
} else { // already cached
|
||||
return new TokenStream() {
|
||||
|
||||
private Iterator iter = tokens.iterator();
|
||||
|
||||
public Token next() {
|
||||
if (!iter.hasNext()) return null;
|
||||
return (Token) iter.next();
|
||||
}
|
||||
};
|
||||
}
|
||||
public Token next() {
|
||||
if (!iter.hasNext()) return null;
|
||||
return (Token) iter.next();
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue