mirror of https://github.com/apache/lucene.git
LUCENE-1796: Speed up repeated TokenStream init by caching isMethodOverridden results. Also speed up AttributeSource.clearAttributes().
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@802930 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3aed0fd54b
commit
3c5de6bb35
|
@ -53,9 +53,9 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
}
|
||||
|
||||
public final boolean incrementToken() throws IOException {
|
||||
clearAttributes();
|
||||
int length = 0;
|
||||
int start = bufferIndex;
|
||||
termAtt.clear();
|
||||
char[] buffer = termAtt.termBuffer();
|
||||
while (true) {
|
||||
|
||||
|
|
|
@ -44,13 +44,13 @@ public class KeywordTokenizer extends Tokenizer {
|
|||
this.done = false;
|
||||
termAtt = (TermAttribute) addAttribute(TermAttribute.class);
|
||||
offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
|
||||
termAtt.resizeTermBuffer(bufferSize);
|
||||
}
|
||||
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (!done) {
|
||||
done = true;
|
||||
int upto = 0;
|
||||
termAtt.clear();
|
||||
char[] buffer = termAtt.termBuffer();
|
||||
while (true) {
|
||||
final int length = input.read(buffer, upto, buffer.length-upto);
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.IdentityHashMap;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
@ -74,12 +75,6 @@ public abstract class TokenStream extends AttributeSource {
|
|||
private static final AttributeFactory DEFAULT_TOKEN_WRAPPER_ATTRIBUTE_FACTORY
|
||||
= new TokenWrapperAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private static final Class[] METHOD_NO_PARAMS = new Class[0];
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private static final Class[] METHOD_TOKEN_PARAM = new Class[]{Token.class};
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private final TokenWrapper tokenWrapper;
|
||||
|
||||
|
@ -87,21 +82,46 @@ public abstract class TokenStream extends AttributeSource {
|
|||
private static boolean onlyUseNewAPI = false;
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private final boolean
|
||||
hasIncrementToken = isMethodOverridden("incrementToken", METHOD_NO_PARAMS),
|
||||
hasReusableNext = onlyUseNewAPI ? false : isMethodOverridden("next", METHOD_TOKEN_PARAM),
|
||||
hasNext = onlyUseNewAPI ? false : isMethodOverridden("next", METHOD_NO_PARAMS);
|
||||
private final MethodSupport supportedMethods = getSupportedMethods(this.getClass());
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private static final class MethodSupport {
|
||||
final boolean hasIncrementToken, hasReusableNext, hasNext;
|
||||
|
||||
MethodSupport(Class clazz) {
|
||||
hasIncrementToken = isMethodOverridden(clazz, "incrementToken", METHOD_NO_PARAMS);
|
||||
hasReusableNext = isMethodOverridden(clazz, "next", METHOD_TOKEN_PARAM);
|
||||
hasNext = isMethodOverridden(clazz, "next", METHOD_NO_PARAMS);
|
||||
}
|
||||
|
||||
private static boolean isMethodOverridden(Class clazz, String name, Class[] params) {
|
||||
try {
|
||||
return clazz.getMethod(name, params).getDeclaringClass() != TokenStream.class;
|
||||
} catch (NoSuchMethodException e) {
|
||||
// should not happen
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private static final Class[] METHOD_NO_PARAMS = new Class[0];
|
||||
private static final Class[] METHOD_TOKEN_PARAM = new Class[]{Token.class};
|
||||
}
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private static final IdentityHashMap/*<Class<? extends TokenStream>,MethodSupport>*/ knownMethodSupport = new IdentityHashMap();
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private boolean isMethodOverridden(String name, Class[] params) {
|
||||
try {
|
||||
return this.getClass().getMethod(name, params).getDeclaringClass() != TokenStream.class;
|
||||
} catch (NoSuchMethodException e) {
|
||||
// should not happen
|
||||
throw new RuntimeException(e);
|
||||
private static MethodSupport getSupportedMethods(Class clazz) {
|
||||
MethodSupport supportedMethods;
|
||||
synchronized(knownMethodSupport) {
|
||||
supportedMethods = (MethodSupport) knownMethodSupport.get(clazz);
|
||||
if (supportedMethods == null) {
|
||||
knownMethodSupport.put(clazz, supportedMethods = new MethodSupport(clazz));
|
||||
}
|
||||
}
|
||||
return supportedMethods;
|
||||
}
|
||||
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private static final class TokenWrapperAttributeFactory extends AttributeFactory {
|
||||
private final AttributeFactory delegate;
|
||||
|
@ -193,12 +213,12 @@ public abstract class TokenStream extends AttributeSource {
|
|||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private void check() {
|
||||
if (onlyUseNewAPI && !hasIncrementToken) {
|
||||
if (onlyUseNewAPI && !supportedMethods.hasIncrementToken) {
|
||||
throw new UnsupportedOperationException(getClass().getName()+" does not implement incrementToken() which is needed for onlyUseNewAPI.");
|
||||
}
|
||||
|
||||
// a TokenStream subclass must at least implement one of the methods!
|
||||
if (!(hasIncrementToken || hasNext || hasReusableNext)) {
|
||||
if (!(supportedMethods.hasIncrementToken || supportedMethods.hasNext || supportedMethods.hasReusableNext)) {
|
||||
throw new UnsupportedOperationException(getClass().getName()+" does not implement any of incrementToken(), next(Token), next().");
|
||||
}
|
||||
}
|
||||
|
@ -257,10 +277,10 @@ public abstract class TokenStream extends AttributeSource {
|
|||
assert !onlyUseNewAPI && tokenWrapper != null;
|
||||
|
||||
final Token token;
|
||||
if (hasReusableNext) {
|
||||
if (supportedMethods.hasReusableNext) {
|
||||
token = next(tokenWrapper.delegate);
|
||||
} else {
|
||||
assert hasNext;
|
||||
assert supportedMethods.hasNext;
|
||||
token = next();
|
||||
}
|
||||
if (token == null) return false;
|
||||
|
@ -321,11 +341,11 @@ public abstract class TokenStream extends AttributeSource {
|
|||
if (onlyUseNewAPI)
|
||||
throw new UnsupportedOperationException("This TokenStream only supports the new Attributes API.");
|
||||
|
||||
if (hasIncrementToken) {
|
||||
if (supportedMethods.hasIncrementToken) {
|
||||
tokenWrapper.delegate = reusableToken;
|
||||
return incrementToken() ? tokenWrapper.delegate : null;
|
||||
} else {
|
||||
assert hasNext;
|
||||
assert supportedMethods.hasNext;
|
||||
final Token token = next();
|
||||
if (token == null) return null;
|
||||
tokenWrapper.delegate = token;
|
||||
|
@ -344,10 +364,10 @@ public abstract class TokenStream extends AttributeSource {
|
|||
if (onlyUseNewAPI)
|
||||
throw new UnsupportedOperationException("This TokenStream only supports the new Attributes API.");
|
||||
|
||||
if (hasIncrementToken) {
|
||||
if (supportedMethods.hasIncrementToken) {
|
||||
return incrementToken() ? ((Token) tokenWrapper.delegate.clone()) : null;
|
||||
} else {
|
||||
assert hasReusableNext;
|
||||
assert supportedMethods.hasReusableNext;
|
||||
final Token token = next(tokenWrapper.delegate);
|
||||
if (token == null) return null;
|
||||
tokenWrapper.delegate = token;
|
||||
|
|
|
@ -158,7 +158,6 @@ public class StandardTokenizer extends Tokenizer {
|
|||
}
|
||||
|
||||
if (scanner.yylength() <= maxTokenLength) {
|
||||
termAtt.clear();
|
||||
posIncrAtt.setPositionIncrement(posIncr);
|
||||
scanner.getText(termAtt);
|
||||
final int start = scanner.yychar();
|
||||
|
|
|
@ -17,8 +17,9 @@ package org.apache.lucene.util;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.Collections;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.LinkedList;
|
||||
|
@ -137,7 +138,33 @@ public class AttributeSource {
|
|||
* if one instance implements more than one Attribute interface.
|
||||
*/
|
||||
public Iterator/*<AttributeImpl>*/ getAttributeImplsIterator() {
|
||||
return Collections.unmodifiableCollection(attributeImpls.values()).iterator();
|
||||
if (hasAttributes()) {
|
||||
if (currentState == null) {
|
||||
computeCurrentState();
|
||||
}
|
||||
final State initState = currentState;
|
||||
return new Iterator() {
|
||||
private State state = initState;
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public Object next() {
|
||||
if (state == null)
|
||||
throw new NoSuchElementException();
|
||||
final AttributeImpl att = state.attribute;
|
||||
state = state.next;
|
||||
return att;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return state != null;
|
||||
}
|
||||
};
|
||||
} else {
|
||||
return Collections.EMPTY_SET.iterator();
|
||||
}
|
||||
}
|
||||
|
||||
/** a cache that stores all interfaces for known implementation classes for performance (slow reflection) */
|
||||
|
@ -225,17 +252,6 @@ public class AttributeSource {
|
|||
return att;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets all Attributes in this AttributeSource by calling
|
||||
* {@link AttributeImpl#clear()} on each Attribute implementation.
|
||||
*/
|
||||
public void clearAttributes() {
|
||||
Iterator it = getAttributeImplsIterator();
|
||||
while (it.hasNext()) {
|
||||
((AttributeImpl) it.next()).clear();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This class holds the state of an AttributeSource.
|
||||
* @see #captureState
|
||||
|
@ -262,7 +278,7 @@ public class AttributeSource {
|
|||
private void computeCurrentState() {
|
||||
currentState = new State();
|
||||
State c = currentState;
|
||||
Iterator it = getAttributeImplsIterator();
|
||||
Iterator it = attributeImpls.values().iterator();
|
||||
c.attribute = (AttributeImpl) it.next();
|
||||
while (it.hasNext()) {
|
||||
c.next = new State();
|
||||
|
@ -271,6 +287,21 @@ public class AttributeSource {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets all Attributes in this AttributeSource by calling
|
||||
* {@link AttributeImpl#clear()} on each Attribute implementation.
|
||||
*/
|
||||
public void clearAttributes() {
|
||||
if (hasAttributes()) {
|
||||
if (currentState == null) {
|
||||
computeCurrentState();
|
||||
}
|
||||
for (State state = currentState; state != null; state = state.next) {
|
||||
state.attribute.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Captures the state of all Attributes. The return value can be passed to
|
||||
* {@link #restoreState} to restore the state of this or another AttributeSource.
|
||||
|
@ -316,9 +347,11 @@ public class AttributeSource {
|
|||
public int hashCode() {
|
||||
int code = 0;
|
||||
if (hasAttributes()) {
|
||||
Iterator it = getAttributeImplsIterator();
|
||||
while (it.hasNext()) {
|
||||
code = code * 31 + it.next().hashCode();
|
||||
if (currentState == null) {
|
||||
computeCurrentState();
|
||||
}
|
||||
for (State state = currentState; state != null; state = state.next) {
|
||||
code = code * 31 + state.attribute.hashCode();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -343,14 +376,20 @@ public class AttributeSource {
|
|||
}
|
||||
|
||||
// it is only equal if all attribute impls are the same in the same order
|
||||
Iterator thisIt = this.getAttributeImplsIterator();
|
||||
Iterator otherIt = other.getAttributeImplsIterator();
|
||||
while (thisIt.hasNext() && otherIt.hasNext()) {
|
||||
AttributeImpl thisAtt = (AttributeImpl) thisIt.next();
|
||||
AttributeImpl otherAtt = (AttributeImpl) otherIt.next();
|
||||
if (otherAtt.getClass() != thisAtt.getClass() || !otherAtt.equals(thisAtt)) {
|
||||
if (this.currentState == null) {
|
||||
this.computeCurrentState();
|
||||
}
|
||||
State thisState = this.currentState;
|
||||
if (other.currentState == null) {
|
||||
other.computeCurrentState();
|
||||
}
|
||||
State otherState = other.currentState;
|
||||
while (thisState != null && otherState != null) {
|
||||
if (otherState.attribute.getClass() != thisState.attribute.getClass() || !otherState.attribute.equals(thisState.attribute)) {
|
||||
return false;
|
||||
}
|
||||
thisState = thisState.next;
|
||||
otherState = otherState.next;
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
|
@ -365,13 +404,12 @@ public class AttributeSource {
|
|||
sb.append('(');
|
||||
|
||||
if (hasAttributes()) {
|
||||
Iterator it = getAttributeImplsIterator();
|
||||
if (it.hasNext()) {
|
||||
sb.append(it.next().toString());
|
||||
if (currentState == null) {
|
||||
computeCurrentState();
|
||||
}
|
||||
while (it.hasNext()) {
|
||||
sb.append(',');
|
||||
sb.append(it.next().toString());
|
||||
for (State state = currentState; state != null; state = state.next) {
|
||||
if (state != currentState) sb.append(',');
|
||||
sb.append(state.attribute.toString());
|
||||
}
|
||||
}
|
||||
sb.append(')');
|
||||
|
@ -387,10 +425,13 @@ public class AttributeSource {
|
|||
AttributeSource clone = new AttributeSource(this.factory);
|
||||
|
||||
// first clone the impls
|
||||
Iterator/*<AttributeImpl>*/ implIt = getAttributeImplsIterator();
|
||||
while (implIt.hasNext()) {
|
||||
AttributeImpl impl = (AttributeImpl) implIt.next();
|
||||
clone.attributeImpls.put(impl.getClass(), impl.clone());
|
||||
if (hasAttributes()) {
|
||||
if (currentState == null) {
|
||||
computeCurrentState();
|
||||
}
|
||||
for (State state = currentState; state != null; state = state.next) {
|
||||
clone.attributeImpls.put(state.attribute.getClass(), state.attribute.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// now the interfaces
|
||||
|
|
|
@ -104,6 +104,12 @@ public class TestAttributeSource extends LuceneTestCase {
|
|||
termAtt.setTermBuffer("TestTerm");
|
||||
typeAtt.setType("TestType");
|
||||
assertEquals("Attributes should appear in original order", "("+termAtt.toString()+","+typeAtt.toString()+")", src.toString());
|
||||
Iterator it = src.getAttributeImplsIterator();
|
||||
assertTrue("Iterator should have 2 attributes left", it.hasNext());
|
||||
assertSame("First AttributeImpl from iterator should be termAtt", termAtt, it.next());
|
||||
assertTrue("Iterator should have 1 attributes left", it.hasNext());
|
||||
assertSame("Second AttributeImpl from iterator should be typeAtt", typeAtt, it.next());
|
||||
assertFalse("Iterator should have 0 attributes left", it.hasNext());
|
||||
|
||||
src = new AttributeSource();
|
||||
src.addAttributeImpl(new Token());
|
||||
|
@ -111,7 +117,7 @@ public class TestAttributeSource extends LuceneTestCase {
|
|||
termAtt = (TermAttribute) src.addAttribute(TermAttribute.class);
|
||||
assertTrue("TermAttribute should be implemented by Token", termAtt instanceof Token);
|
||||
// get the Token attribute and check, that it is the only one
|
||||
final Iterator it = src.getAttributeImplsIterator();
|
||||
it = src.getAttributeImplsIterator();
|
||||
Token tok = (Token) it.next();
|
||||
assertFalse("There should be only one attribute implementation instance", it.hasNext());
|
||||
|
||||
|
|
Loading…
Reference in New Issue