LUCENE-1796: Speed up repeated TokenStream init by caching isMethodOverridden results. Also speed up AttributeSource.clearAttributes().

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@802930 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2009-08-10 21:34:46 +00:00
parent 3aed0fd54b
commit 3c5de6bb35
6 changed files with 128 additions and 62 deletions

View File

@ -53,9 +53,9 @@ public abstract class CharTokenizer extends Tokenizer {
}
public final boolean incrementToken() throws IOException {
clearAttributes();
int length = 0;
int start = bufferIndex;
termAtt.clear();
char[] buffer = termAtt.termBuffer();
while (true) {

View File

@ -44,13 +44,13 @@ public class KeywordTokenizer extends Tokenizer {
this.done = false;
termAtt = (TermAttribute) addAttribute(TermAttribute.class);
offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
termAtt.resizeTermBuffer(bufferSize);
}
public final boolean incrementToken() throws IOException {
if (!done) {
done = true;
int upto = 0;
termAtt.clear();
char[] buffer = termAtt.termBuffer();
while (true) {
final int length = input.read(buffer, upto, buffer.length-upto);

View File

@ -18,6 +18,7 @@ package org.apache.lucene.analysis;
*/
import java.io.IOException;
import java.util.IdentityHashMap;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@ -74,12 +75,6 @@ public abstract class TokenStream extends AttributeSource {
private static final AttributeFactory DEFAULT_TOKEN_WRAPPER_ATTRIBUTE_FACTORY
= new TokenWrapperAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
/** @deprecated Remove this when old API is removed! */
private static final Class[] METHOD_NO_PARAMS = new Class[0];
/** @deprecated Remove this when old API is removed! */
private static final Class[] METHOD_TOKEN_PARAM = new Class[]{Token.class};
/** @deprecated Remove this when old API is removed! */
private final TokenWrapper tokenWrapper;
@ -87,21 +82,46 @@ public abstract class TokenStream extends AttributeSource {
private static boolean onlyUseNewAPI = false;
/** @deprecated Remove this when old API is removed! */
private final boolean
hasIncrementToken = isMethodOverridden("incrementToken", METHOD_NO_PARAMS),
hasReusableNext = onlyUseNewAPI ? false : isMethodOverridden("next", METHOD_TOKEN_PARAM),
hasNext = onlyUseNewAPI ? false : isMethodOverridden("next", METHOD_NO_PARAMS);
private final MethodSupport supportedMethods = getSupportedMethods(this.getClass());
/** @deprecated Remove this when old API is removed! */
private static final class MethodSupport {
final boolean hasIncrementToken, hasReusableNext, hasNext;
MethodSupport(Class clazz) {
hasIncrementToken = isMethodOverridden(clazz, "incrementToken", METHOD_NO_PARAMS);
hasReusableNext = isMethodOverridden(clazz, "next", METHOD_TOKEN_PARAM);
hasNext = isMethodOverridden(clazz, "next", METHOD_NO_PARAMS);
}
private static boolean isMethodOverridden(Class clazz, String name, Class[] params) {
try {
return clazz.getMethod(name, params).getDeclaringClass() != TokenStream.class;
} catch (NoSuchMethodException e) {
// should not happen
throw new RuntimeException(e);
}
}
private static final Class[] METHOD_NO_PARAMS = new Class[0];
private static final Class[] METHOD_TOKEN_PARAM = new Class[]{Token.class};
}
/** @deprecated Remove this when old API is removed! */
private static final IdentityHashMap/*<Class<? extends TokenStream>,MethodSupport>*/ knownMethodSupport = new IdentityHashMap();
/** @deprecated Remove this when old API is removed! */
private boolean isMethodOverridden(String name, Class[] params) {
try {
return this.getClass().getMethod(name, params).getDeclaringClass() != TokenStream.class;
} catch (NoSuchMethodException e) {
// should not happen
throw new RuntimeException(e);
private static MethodSupport getSupportedMethods(Class clazz) {
MethodSupport supportedMethods;
synchronized(knownMethodSupport) {
supportedMethods = (MethodSupport) knownMethodSupport.get(clazz);
if (supportedMethods == null) {
knownMethodSupport.put(clazz, supportedMethods = new MethodSupport(clazz));
}
}
return supportedMethods;
}
/** @deprecated Remove this when old API is removed! */
private static final class TokenWrapperAttributeFactory extends AttributeFactory {
private final AttributeFactory delegate;
@ -193,12 +213,12 @@ public abstract class TokenStream extends AttributeSource {
/** @deprecated Remove this when old API is removed! */
private void check() {
if (onlyUseNewAPI && !hasIncrementToken) {
if (onlyUseNewAPI && !supportedMethods.hasIncrementToken) {
throw new UnsupportedOperationException(getClass().getName()+" does not implement incrementToken() which is needed for onlyUseNewAPI.");
}
// a TokenStream subclass must at least implement one of the methods!
if (!(hasIncrementToken || hasNext || hasReusableNext)) {
if (!(supportedMethods.hasIncrementToken || supportedMethods.hasNext || supportedMethods.hasReusableNext)) {
throw new UnsupportedOperationException(getClass().getName()+" does not implement any of incrementToken(), next(Token), next().");
}
}
@ -257,10 +277,10 @@ public abstract class TokenStream extends AttributeSource {
assert !onlyUseNewAPI && tokenWrapper != null;
final Token token;
if (hasReusableNext) {
if (supportedMethods.hasReusableNext) {
token = next(tokenWrapper.delegate);
} else {
assert hasNext;
assert supportedMethods.hasNext;
token = next();
}
if (token == null) return false;
@ -321,11 +341,11 @@ public abstract class TokenStream extends AttributeSource {
if (onlyUseNewAPI)
throw new UnsupportedOperationException("This TokenStream only supports the new Attributes API.");
if (hasIncrementToken) {
if (supportedMethods.hasIncrementToken) {
tokenWrapper.delegate = reusableToken;
return incrementToken() ? tokenWrapper.delegate : null;
} else {
assert hasNext;
assert supportedMethods.hasNext;
final Token token = next();
if (token == null) return null;
tokenWrapper.delegate = token;
@ -344,10 +364,10 @@ public abstract class TokenStream extends AttributeSource {
if (onlyUseNewAPI)
throw new UnsupportedOperationException("This TokenStream only supports the new Attributes API.");
if (hasIncrementToken) {
if (supportedMethods.hasIncrementToken) {
return incrementToken() ? ((Token) tokenWrapper.delegate.clone()) : null;
} else {
assert hasReusableNext;
assert supportedMethods.hasReusableNext;
final Token token = next(tokenWrapper.delegate);
if (token == null) return null;
tokenWrapper.delegate = token;

View File

@ -158,7 +158,6 @@ public class StandardTokenizer extends Tokenizer {
}
if (scanner.yylength() <= maxTokenLength) {
termAtt.clear();
posIncrAtt.setPositionIncrement(posIncr);
scanner.getText(termAtt);
final int start = scanner.yychar();

View File

@ -17,8 +17,9 @@ package org.apache.lucene.util;
* limitations under the License.
*/
import java.util.Iterator;
import java.util.Collections;
import java.util.NoSuchElementException;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.IdentityHashMap;
import java.util.LinkedList;
@ -137,7 +138,33 @@ public class AttributeSource {
* if one instance implements more than one Attribute interface.
*/
public Iterator/*<AttributeImpl>*/ getAttributeImplsIterator() {
return Collections.unmodifiableCollection(attributeImpls.values()).iterator();
if (hasAttributes()) {
if (currentState == null) {
computeCurrentState();
}
final State initState = currentState;
return new Iterator() {
private State state = initState;
public void remove() {
throw new UnsupportedOperationException();
}
public Object next() {
if (state == null)
throw new NoSuchElementException();
final AttributeImpl att = state.attribute;
state = state.next;
return att;
}
public boolean hasNext() {
return state != null;
}
};
} else {
return Collections.EMPTY_SET.iterator();
}
}
/** a cache that stores all interfaces for known implementation classes for performance (slow reflection) */
@ -225,17 +252,6 @@ public class AttributeSource {
return att;
}
/**
* Resets all Attributes in this AttributeSource by calling
* {@link AttributeImpl#clear()} on each Attribute implementation.
*/
public void clearAttributes() {
Iterator it = getAttributeImplsIterator();
while (it.hasNext()) {
((AttributeImpl) it.next()).clear();
}
}
/**
* This class holds the state of an AttributeSource.
* @see #captureState
@ -262,7 +278,7 @@ public class AttributeSource {
private void computeCurrentState() {
currentState = new State();
State c = currentState;
Iterator it = getAttributeImplsIterator();
Iterator it = attributeImpls.values().iterator();
c.attribute = (AttributeImpl) it.next();
while (it.hasNext()) {
c.next = new State();
@ -271,6 +287,21 @@ public class AttributeSource {
}
}
/**
* Resets all Attributes in this AttributeSource by calling
* {@link AttributeImpl#clear()} on each Attribute implementation.
*/
public void clearAttributes() {
if (hasAttributes()) {
if (currentState == null) {
computeCurrentState();
}
for (State state = currentState; state != null; state = state.next) {
state.attribute.clear();
}
}
}
/**
* Captures the state of all Attributes. The return value can be passed to
* {@link #restoreState} to restore the state of this or another AttributeSource.
@ -316,9 +347,11 @@ public class AttributeSource {
public int hashCode() {
int code = 0;
if (hasAttributes()) {
Iterator it = getAttributeImplsIterator();
while (it.hasNext()) {
code = code * 31 + it.next().hashCode();
if (currentState == null) {
computeCurrentState();
}
for (State state = currentState; state != null; state = state.next) {
code = code * 31 + state.attribute.hashCode();
}
}
@ -343,14 +376,20 @@ public class AttributeSource {
}
// it is only equal if all attribute impls are the same in the same order
Iterator thisIt = this.getAttributeImplsIterator();
Iterator otherIt = other.getAttributeImplsIterator();
while (thisIt.hasNext() && otherIt.hasNext()) {
AttributeImpl thisAtt = (AttributeImpl) thisIt.next();
AttributeImpl otherAtt = (AttributeImpl) otherIt.next();
if (otherAtt.getClass() != thisAtt.getClass() || !otherAtt.equals(thisAtt)) {
if (this.currentState == null) {
this.computeCurrentState();
}
State thisState = this.currentState;
if (other.currentState == null) {
other.computeCurrentState();
}
State otherState = other.currentState;
while (thisState != null && otherState != null) {
if (otherState.attribute.getClass() != thisState.attribute.getClass() || !otherState.attribute.equals(thisState.attribute)) {
return false;
}
thisState = thisState.next;
otherState = otherState.next;
}
return true;
} else {
@ -365,13 +404,12 @@ public class AttributeSource {
sb.append('(');
if (hasAttributes()) {
Iterator it = getAttributeImplsIterator();
if (it.hasNext()) {
sb.append(it.next().toString());
if (currentState == null) {
computeCurrentState();
}
while (it.hasNext()) {
sb.append(',');
sb.append(it.next().toString());
for (State state = currentState; state != null; state = state.next) {
if (state != currentState) sb.append(',');
sb.append(state.attribute.toString());
}
}
sb.append(')');
@ -387,10 +425,13 @@ public class AttributeSource {
AttributeSource clone = new AttributeSource(this.factory);
// first clone the impls
Iterator/*<AttributeImpl>*/ implIt = getAttributeImplsIterator();
while (implIt.hasNext()) {
AttributeImpl impl = (AttributeImpl) implIt.next();
clone.attributeImpls.put(impl.getClass(), impl.clone());
if (hasAttributes()) {
if (currentState == null) {
computeCurrentState();
}
for (State state = currentState; state != null; state = state.next) {
clone.attributeImpls.put(state.attribute.getClass(), state.attribute.clone());
}
}
// now the interfaces

View File

@ -104,6 +104,12 @@ public class TestAttributeSource extends LuceneTestCase {
termAtt.setTermBuffer("TestTerm");
typeAtt.setType("TestType");
assertEquals("Attributes should appear in original order", "("+termAtt.toString()+","+typeAtt.toString()+")", src.toString());
Iterator it = src.getAttributeImplsIterator();
assertTrue("Iterator should have 2 attributes left", it.hasNext());
assertSame("First AttributeImpl from iterator should be termAtt", termAtt, it.next());
assertTrue("Iterator should have 1 attributes left", it.hasNext());
assertSame("Second AttributeImpl from iterator should be typeAtt", typeAtt, it.next());
assertFalse("Iterator should have 0 attributes left", it.hasNext());
src = new AttributeSource();
src.addAttributeImpl(new Token());
@ -111,7 +117,7 @@ public class TestAttributeSource extends LuceneTestCase {
termAtt = (TermAttribute) src.addAttribute(TermAttribute.class);
assertTrue("TermAttribute should be implemented by Token", termAtt instanceof Token);
// get the Token attribute and check, that it is the only one
final Iterator it = src.getAttributeImplsIterator();
it = src.getAttributeImplsIterator();
Token tok = (Token) it.next();
assertFalse("There should be only one attribute implementation instance", it.hasNext());