LUCENE-2374: Added Attribute reflection API: It's now possible to inspect the contents of AttributeImpl and AttributeSource using a well-defined API

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1061039 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2011-01-19 22:41:16 +00:00
parent 6a9f686f58
commit 460fa90564
23 changed files with 657 additions and 677 deletions

View File

@ -362,9 +362,9 @@ Changes in backwards compatibility policy
* LUCENE-2302: The new interface for term attributes, CharTermAttribute, * LUCENE-2302: The new interface for term attributes, CharTermAttribute,
now implements CharSequence. This requires the toString() methods of now implements CharSequence. This requires the toString() methods of
CharTermAttribute, deprecated TermAttribute, and Token to return only CharTermAttribute, deprecated TermAttribute, and Token to return only
the term text and no other attribute contents. the term text and no other attribute contents. LUCENE-2374 implements
TODO: Point to new attribute inspection API coming with LUCENE-2374. an attribute reflection API to no longer rely on toString() for attribute
(Uwe Schindler, Robert Muir) inspection. (Uwe Schindler, Robert Muir)
* LUCENE-2372, LUCENE-2389: StandardAnalyzer, KeywordAnalyzer, * LUCENE-2372, LUCENE-2389: StandardAnalyzer, KeywordAnalyzer,
PerFieldAnalyzerWrapper, WhitespaceTokenizer are now final. Also removed PerFieldAnalyzerWrapper, WhitespaceTokenizer are now final. Also removed
@ -592,6 +592,23 @@ API Changes
to ensure that the norm is encoded with your Similarity. to ensure that the norm is encoded with your Similarity.
(Robert Muir, Mike McCandless) (Robert Muir, Mike McCandless)
* LUCENE-2374: Added Attribute reflection API: It's now possible to inspect the
contents of AttributeImpl and AttributeSource using a well-defined API.
This is e.g. used by Solr's AnalysisRequestHandlers to display all attributes
in a structured way.
There are also some backwards incompatible changes in toString() output,
as LUCENE-2302 introduced the CharSequence interface to CharTermAttribute
leading to changed toString() return values. The new API allows to get a
string representation in a well-defined way using a new method
reflectAsString(). For backwards compatibility reasons, when toString()
was implemented by implementation subclasses, the default implementation of
AttributeImpl.reflectWith() uses toString()s output instead to report the
Attribute's properties. Otherwise, reflectWith() uses Java's reflection
(like toString() did before) to get the attribute properties.
In addition, the mandatory equals() and hashCode() are no longer required
for AttributeImpls, but can still be provided (if needed).
(Uwe Schindler)
Bug fixes Bug fixes
* LUCENE-2249: ParallelMultiSearcher should shut down thread pool on * LUCENE-2249: ParallelMultiSearcher should shut down thread pool on

View File

@ -328,3 +328,10 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing
* LUCENE-2761: DataInput.readVInt/readVLong and DataOutput.writeVInt/writeVLong * LUCENE-2761: DataInput.readVInt/readVLong and DataOutput.writeVInt/writeVLong
are final. If you subclassed this code before to encode variable-length are final. If you subclassed this code before to encode variable-length
integers in some specialized way, use the Codec API instead. integers in some specialized way, use the Codec API instead.
* LUCENE-2374: The backwards layer in AttributeImpl was removed. To support correct
reflection of AttributeImpl instances, where the reflection was done using deprecated
toString() parsing, you have to now override reflectWith() to customize output.
toString() is no longer implemented by AttributeImpl, so if you have overridden
toString(), port your customization over to reflectWith(). reflectAsString() would
then return what toString() did before.

View File

@ -0,0 +1,67 @@
package org.apache.lucene.queryParser.standard.config;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery;
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
public class TestAttributes extends LuceneTestCase {
// this checks using reflection API if the defaults are correct
public void testAttributes() {
_TestUtil.assertAttributeReflection(new AllowLeadingWildcardAttributeImpl(),
Collections.singletonMap(AllowLeadingWildcardAttribute.class.getName()+"#allowLeadingWildcard", false));
_TestUtil.assertAttributeReflection(new AnalyzerAttributeImpl(),
Collections.singletonMap(AnalyzerAttribute.class.getName()+"#analyzer", null));
_TestUtil.assertAttributeReflection(new BoostAttributeImpl(),
Collections.singletonMap(BoostAttribute.class.getName()+"#boost", 1.0f));
_TestUtil.assertAttributeReflection(new DateResolutionAttributeImpl(),
Collections.singletonMap(DateResolutionAttribute.class.getName()+"#dateResolution", null));
_TestUtil.assertAttributeReflection(new DefaultOperatorAttributeImpl(),
Collections.singletonMap(DefaultOperatorAttribute.class.getName()+"#operator", DefaultOperatorAttribute.Operator.OR));
_TestUtil.assertAttributeReflection(new DefaultPhraseSlopAttributeImpl(),
Collections.singletonMap(DefaultPhraseSlopAttribute.class.getName()+"#defaultPhraseSlop", 0));
_TestUtil.assertAttributeReflection(new FieldBoostMapAttributeImpl(),
Collections.singletonMap(FieldBoostMapAttribute.class.getName()+"#boosts", Collections.emptyMap()));
_TestUtil.assertAttributeReflection(new FieldDateResolutionMapAttributeImpl(),
Collections.singletonMap(FieldDateResolutionMapAttribute.class.getName()+"#dateRes", Collections.emptyMap()));
_TestUtil.assertAttributeReflection(new FuzzyAttributeImpl(), new HashMap<String,Object>() {{
put(FuzzyAttribute.class.getName()+"#prefixLength", FuzzyQuery.defaultPrefixLength);
put(FuzzyAttribute.class.getName()+"#minSimilarity", FuzzyQuery.defaultMinSimilarity);
}});
_TestUtil.assertAttributeReflection(new LocaleAttributeImpl(),
Collections.singletonMap(LocaleAttribute.class.getName()+"#locale", Locale.getDefault()));
_TestUtil.assertAttributeReflection(new LowercaseExpandedTermsAttributeImpl(),
Collections.singletonMap(LowercaseExpandedTermsAttribute.class.getName()+"#lowercaseExpandedTerms", true));
_TestUtil.assertAttributeReflection(new MultiFieldAttributeImpl(),
Collections.singletonMap(MultiFieldAttribute.class.getName()+"#fields", null));
_TestUtil.assertAttributeReflection(new MultiTermRewriteMethodAttributeImpl(),
Collections.singletonMap(MultiTermRewriteMethodAttribute.class.getName()+"#multiTermRewriteMethod", MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT));
_TestUtil.assertAttributeReflection(new PositionIncrementsAttributeImpl(),
Collections.singletonMap(PositionIncrementsAttribute.class.getName()+"#positionIncrementsEnabled", false));
_TestUtil.assertAttributeReflection(new RangeCollatorAttributeImpl(),
Collections.singletonMap(RangeCollatorAttribute.class.getName()+"#rangeCollator", null));
}
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis;
import org.apache.lucene.util.Attribute; import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.document.NumericField; // for javadocs import org.apache.lucene.document.NumericField; // for javadocs
@ -170,13 +171,14 @@ public final class NumericTokenStream extends TokenStream {
} }
@Override @Override
public boolean equals(Object other) { public void reflectWith(AttributeReflector reflector) {
return other == this; final BytesRef bytes = new BytesRef();
} toBytesRef(bytes);
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes);
@Override reflector.reflect(NumericTermAttribute.class, "shift", shift);
public int hashCode() { reflector.reflect(NumericTermAttribute.class, "rawValue", rawValue);
return System.identityHashCode(this); reflector.reflect(NumericTermAttribute.class, "valueSize", valueSize);
reflector.reflect(NumericTermAttribute.class, "precisionStep", precisionStep);
} }
@Override @Override

View File

@ -28,6 +28,7 @@ import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc
import org.apache.lucene.util.Attribute; import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
/** /**
A Token is an occurrence of a term from the text of a field. It consists of A Token is an occurrence of a term from the text of a field. It consists of
@ -588,6 +589,17 @@ public class Token extends CharTermAttributeImpl
} }
} }
@Override
public void reflectWith(AttributeReflector reflector) {
super.reflectWith(reflector);
reflector.reflect(OffsetAttribute.class, "startOffset", startOffset);
reflector.reflect(OffsetAttribute.class, "endOffset", endOffset);
reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
reflector.reflect(PayloadAttribute.class, "payload", payload);
reflector.reflect(FlagsAttribute.class, "flags", flags);
reflector.reflect(TypeAttribute.class, "type", type);
}
/** Convenience factory that returns <code>Token</code> as implementation for the basic /** Convenience factory that returns <code>Token</code> as implementation for the basic
* attributes and return the default impl (with &quot;Impl&quot; appended) for all other * attributes and return the default impl (with &quot;Impl&quot; appended) for all other
* attributes. * attributes.

View File

@ -23,6 +23,7 @@ import java.nio.CharBuffer;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.UnicodeUtil;
@ -243,6 +244,14 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
return new String(termBuffer, 0, termLength); return new String(termBuffer, 0, termLength);
} }
@Override
public void reflectWith(AttributeReflector reflector) {
reflector.reflect(CharTermAttribute.class, "term", toString());
final BytesRef bytes = new BytesRef();
toBytesRef(bytes);
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes);
}
@Override @Override
public void copyTo(AttributeImpl target) { public void copyTo(AttributeImpl target) {
CharTermAttribute t = (CharTermAttribute) target; CharTermAttribute t = (CharTermAttribute) target;

View File

@ -38,20 +38,6 @@ public final class BoostAttributeImpl extends AttributeImpl implements BoostAttr
boost = 1.0f; boost = 1.0f;
} }
@Override
public boolean equals(Object other) {
if (this == other)
return true;
if (other instanceof BoostAttributeImpl)
return ((BoostAttributeImpl) other).boost == boost;
return false;
}
@Override
public int hashCode() {
return Float.floatToIntBits(boost);
}
@Override @Override
public void copyTo(AttributeImpl target) { public void copyTo(AttributeImpl target) {
((BoostAttribute) target).setBoost(boost); ((BoostAttribute) target).setBoost(boost);

View File

@ -49,25 +49,6 @@ public final class MaxNonCompetitiveBoostAttributeImpl extends AttributeImpl imp
competitiveTerm = null; competitiveTerm = null;
} }
@Override
public boolean equals(Object other) {
if (this == other)
return true;
if (other instanceof MaxNonCompetitiveBoostAttributeImpl) {
final MaxNonCompetitiveBoostAttributeImpl o = (MaxNonCompetitiveBoostAttributeImpl) other;
return (o.maxNonCompetitiveBoost == maxNonCompetitiveBoost)
&& (o.competitiveTerm == null ? competitiveTerm == null : o.competitiveTerm.equals(competitiveTerm));
}
return false;
}
@Override
public int hashCode() {
int hash = Float.floatToIntBits(maxNonCompetitiveBoost);
if (competitiveTerm != null) hash = 31 * hash + competitiveTerm.hashCode();
return hash;
}
@Override @Override
public void copyTo(AttributeImpl target) { public void copyTo(AttributeImpl target) {
final MaxNonCompetitiveBoostAttributeImpl t = (MaxNonCompetitiveBoostAttributeImpl) target; final MaxNonCompetitiveBoostAttributeImpl t = (MaxNonCompetitiveBoostAttributeImpl) target;

View File

@ -20,6 +20,8 @@ package org.apache.lucene.util;
import java.io.Serializable; import java.io.Serializable;
import java.lang.reflect.Field; import java.lang.reflect.Field;
import java.lang.reflect.Modifier; import java.lang.reflect.Modifier;
import java.lang.ref.WeakReference;
import java.util.LinkedList;
/** /**
* Base class for Attributes that can be added to a * Base class for Attributes that can be added to a
@ -37,71 +39,79 @@ public abstract class AttributeImpl implements Cloneable, Serializable, Attribut
public abstract void clear(); public abstract void clear();
/** /**
* The default implementation of this method accesses all declared * This method returns the current attribute values as a string in the following format
* fields of this object and prints the values in the following syntax: * by calling the {@link #reflectWith(AttributeReflector)} method:
* *
* <ul>
* <li><em>iff {@code prependAttClass=true}:</em> {@code "AttributeClass#key=value,AttributeClass#key=value"}
* <li><em>iff {@code prependAttClass=false}:</em> {@code "key=value,key=value"}
* </ul>
*
* @see #reflectWith(AttributeReflector)
*/
public final String reflectAsString(final boolean prependAttClass) {
final StringBuilder buffer = new StringBuilder();
reflectWith(new AttributeReflector() {
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
if (buffer.length() > 0) {
buffer.append(',');
}
if (prependAttClass) {
buffer.append(attClass.getName()).append('#');
}
buffer.append(key).append('=').append((value == null) ? "null" : value);
}
});
return buffer.toString();
}
/**
* This method is for introspection of attributes, it should simply
* add the key/values this attribute holds to the given {@link AttributeReflector}.
*
* <p>The default implementation calls {@link AttributeReflector#reflect} for all
* non-static fields from the implementing class, using the field name as key
* and the field value as value. The Attribute class is also determined by reflection.
* Please note that the default implementation can only handle single-Attribute
* implementations.
*
* <p>Custom implementations look like this (e.g. for a combined attribute implementation):
* <pre> * <pre>
* public String toString() { * public void reflectWith(AttributeReflector reflector) {
* return "start=" + startOffset + ",end=" + endOffset; * reflector.reflect(CharTermAttribute.class, "term", term());
* reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", getPositionIncrement());
* } * }
* </pre> * </pre>
* *
* This method may be overridden by subclasses. * <p>If you implement this method, make sure that for each invocation, the same set of {@link Attribute}
* interfaces and keys are passed to {@link AttributeReflector#reflect} in the same order, but possibly
* different values. So don't automatically exclude e.g. {@code null} properties!
*
* @see #reflectAsString(boolean)
*/ */
@Override public void reflectWith(AttributeReflector reflector) {
public String toString() { final Class<? extends AttributeImpl> clazz = this.getClass();
StringBuilder buffer = new StringBuilder(); final LinkedList<WeakReference<Class<? extends Attribute>>> interfaces = AttributeSource.getAttributeInterfaces(clazz);
Class<?> clazz = this.getClass(); if (interfaces.size() != 1) {
Field[] fields = clazz.getDeclaredFields(); throw new UnsupportedOperationException(clazz.getName() +
" implements more than one Attribute interface, the default reflectWith() implementation cannot handle this.");
}
final Class<? extends Attribute> interf = interfaces.getFirst().get();
final Field[] fields = clazz.getDeclaredFields();
try { try {
for (int i = 0; i < fields.length; i++) { for (int i = 0; i < fields.length; i++) {
Field f = fields[i]; final Field f = fields[i];
if (Modifier.isStatic(f.getModifiers())) continue; if (Modifier.isStatic(f.getModifiers())) continue;
f.setAccessible(true); f.setAccessible(true);
Object value = f.get(this); reflector.reflect(interf, f.getName(), f.get(this));
if (buffer.length()>0) {
buffer.append(',');
}
if (value == null) {
buffer.append(f.getName() + "=null");
} else {
buffer.append(f.getName() + "=" + value);
}
} }
} catch (IllegalAccessException e) { } catch (IllegalAccessException e) {
// this should never happen, because we're just accessing fields // this should never happen, because we're just accessing fields
// from 'this' // from 'this'
throw new RuntimeException(e); throw new RuntimeException(e);
} }
return buffer.toString();
} }
/**
* Subclasses must implement this method and should compute
* a hashCode similar to this:
* <pre>
* public int hashCode() {
* int code = startOffset;
* code = code * 31 + endOffset;
* return code;
* }
* </pre>
*
* see also {@link #equals(Object)}
*/
@Override
public abstract int hashCode();
/**
* All values used for computation of {@link #hashCode()}
* should be checked here for equality.
*
* see also {@link Object#equals(Object)}
*/
@Override
public abstract boolean equals(Object other);
/** /**
* Copies the values from this Attribute into the passed-in * Copies the values from this Attribute into the passed-in
* target attribute. The target implementation must support all the * target attribute. The target implementation must support all the

View File

@ -0,0 +1,34 @@
package org.apache.lucene.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This interface is used to reflect contents of {@link AttributeSource} or {@link AttributeImpl}.
*/
public interface AttributeReflector {
/**
* This method gets called for every property in an {@link AttributeImpl}/{@link AttributeSource}
* passing the class name of the {@link Attribute}, a key and the actual value.
* E.g., an invocation of {@link org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl#reflectWith}
* would call this method once using {@code org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class}
* as attribute class, {@code "term"} as key and the actual value as a String.
*/
public void reflect(Class<? extends Attribute> attClass, String key, Object value);
}

View File

@ -180,20 +180,9 @@ public class AttributeSource {
private static final WeakHashMap<Class<? extends AttributeImpl>,LinkedList<WeakReference<Class<? extends Attribute>>>> knownImplClasses = private static final WeakHashMap<Class<? extends AttributeImpl>,LinkedList<WeakReference<Class<? extends Attribute>>>> knownImplClasses =
new WeakHashMap<Class<? extends AttributeImpl>,LinkedList<WeakReference<Class<? extends Attribute>>>>(); new WeakHashMap<Class<? extends AttributeImpl>,LinkedList<WeakReference<Class<? extends Attribute>>>>();
/** <b>Expert:</b> Adds a custom AttributeImpl instance with one or more Attribute interfaces. static LinkedList<WeakReference<Class<? extends Attribute>>> getAttributeInterfaces(final Class<? extends AttributeImpl> clazz) {
* <p><font color="red"><b>Please note:</b> It is not guaranteed, that <code>att</code> is added to
* the <code>AttributeSource</code>, because the provided attributes may already exist.
* You should always retrieve the wanted attributes using {@link #getAttribute} after adding
* with this method and cast to your class.
* The recommended way to use custom implementations is using an {@link AttributeFactory}.
* </font></p>
*/
public void addAttributeImpl(final AttributeImpl att) {
final Class<? extends AttributeImpl> clazz = att.getClass();
if (attributeImpls.containsKey(clazz)) return;
LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces;
synchronized(knownImplClasses) { synchronized(knownImplClasses) {
foundInterfaces = knownImplClasses.get(clazz); LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces = knownImplClasses.get(clazz);
if (foundInterfaces == null) { if (foundInterfaces == null) {
// we have a strong reference to the class instance holding all interfaces in the list (parameter "att"), // we have a strong reference to the class instance holding all interfaces in the list (parameter "att"),
// so all WeakReferences are never evicted by GC // so all WeakReferences are never evicted by GC
@ -210,7 +199,23 @@ public class AttributeSource {
actClazz = actClazz.getSuperclass(); actClazz = actClazz.getSuperclass();
} while (actClazz != null); } while (actClazz != null);
} }
return foundInterfaces;
} }
}
/** <b>Expert:</b> Adds a custom AttributeImpl instance with one or more Attribute interfaces.
* <p><font color="red"><b>Please note:</b> It is not guaranteed, that <code>att</code> is added to
* the <code>AttributeSource</code>, because the provided attributes may already exist.
* You should always retrieve the wanted attributes using {@link #getAttribute} after adding
* with this method and cast to your class.
* The recommended way to use custom implementations is using an {@link AttributeFactory}.
* </font></p>
*/
public final void addAttributeImpl(final AttributeImpl att) {
final Class<? extends AttributeImpl> clazz = att.getClass();
if (attributeImpls.containsKey(clazz)) return;
final LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces =
getAttributeInterfaces(clazz);
// add all interfaces of this AttributeImpl to the maps // add all interfaces of this AttributeImpl to the maps
for (WeakReference<Class<? extends Attribute>> curInterfaceRef : foundInterfaces) { for (WeakReference<Class<? extends Attribute>> curInterfaceRef : foundInterfaces) {
@ -233,7 +238,7 @@ public class AttributeSource {
* already in this AttributeSource and returns it. Otherwise a * already in this AttributeSource and returns it. Otherwise a
* new instance is created, added to this AttributeSource and returned. * new instance is created, added to this AttributeSource and returned.
*/ */
public <A extends Attribute> A addAttribute(Class<A> attClass) { public final <A extends Attribute> A addAttribute(Class<A> attClass) {
AttributeImpl attImpl = attributes.get(attClass); AttributeImpl attImpl = attributes.get(attClass);
if (attImpl == null) { if (attImpl == null) {
if (!(attClass.isInterface() && Attribute.class.isAssignableFrom(attClass))) { if (!(attClass.isInterface() && Attribute.class.isAssignableFrom(attClass))) {
@ -248,7 +253,7 @@ public class AttributeSource {
} }
/** Returns true, iff this AttributeSource has any attributes */ /** Returns true, iff this AttributeSource has any attributes */
public boolean hasAttributes() { public final boolean hasAttributes() {
return !this.attributes.isEmpty(); return !this.attributes.isEmpty();
} }
@ -256,7 +261,7 @@ public class AttributeSource {
* The caller must pass in a Class&lt;? extends Attribute&gt; value. * The caller must pass in a Class&lt;? extends Attribute&gt; value.
* Returns true, iff this AttributeSource contains the passed-in Attribute. * Returns true, iff this AttributeSource contains the passed-in Attribute.
*/ */
public boolean hasAttribute(Class<? extends Attribute> attClass) { public final boolean hasAttribute(Class<? extends Attribute> attClass) {
return this.attributes.containsKey(attClass); return this.attributes.containsKey(attClass);
} }
@ -271,7 +276,7 @@ public class AttributeSource {
* available. If you want to only use the attribute, if it is available (to optimize * available. If you want to only use the attribute, if it is available (to optimize
* consuming), use {@link #hasAttribute}. * consuming), use {@link #hasAttribute}.
*/ */
public <A extends Attribute> A getAttribute(Class<A> attClass) { public final <A extends Attribute> A getAttribute(Class<A> attClass) {
AttributeImpl attImpl = attributes.get(attClass); AttributeImpl attImpl = attributes.get(attClass);
if (attImpl == null) { if (attImpl == null) {
throw new IllegalArgumentException("This AttributeSource does not have the attribute '" + attClass.getName() + "'."); throw new IllegalArgumentException("This AttributeSource does not have the attribute '" + attClass.getName() + "'.");
@ -319,7 +324,7 @@ public class AttributeSource {
* Resets all Attributes in this AttributeSource by calling * Resets all Attributes in this AttributeSource by calling
* {@link AttributeImpl#clear()} on each Attribute implementation. * {@link AttributeImpl#clear()} on each Attribute implementation.
*/ */
public void clearAttributes() { public final void clearAttributes() {
if (hasAttributes()) { if (hasAttributes()) {
if (currentState == null) { if (currentState == null) {
computeCurrentState(); computeCurrentState();
@ -334,7 +339,7 @@ public class AttributeSource {
* Captures the state of all Attributes. The return value can be passed to * Captures the state of all Attributes. The return value can be passed to
* {@link #restoreState} to restore the state of this or another AttributeSource. * {@link #restoreState} to restore the state of this or another AttributeSource.
*/ */
public State captureState() { public final State captureState() {
if (!hasAttributes()) { if (!hasAttributes()) {
return null; return null;
} }
@ -360,7 +365,7 @@ public class AttributeSource {
* reset its value to the default, in which case the caller should first * reset its value to the default, in which case the caller should first
* call {@link TokenStream#clearAttributes()} on the targetStream. * call {@link TokenStream#clearAttributes()} on the targetStream.
*/ */
public void restoreState(State state) { public final void restoreState(State state) {
if (state == null) return; if (state == null) return;
do { do {
@ -431,19 +436,51 @@ public class AttributeSource {
return false; return false;
} }
@Override /**
public String toString() { * This method returns the current attribute values as a string in the following format
StringBuilder sb = new StringBuilder().append('('); * by calling the {@link #reflectWith(AttributeReflector)} method:
*
* <ul>
* <li><em>iff {@code prependAttClass=true}:</em> {@code "AttributeClass#key=value,AttributeClass#key=value"}
* <li><em>iff {@code prependAttClass=false}:</em> {@code "key=value,key=value"}
* </ul>
*
* @see #reflectWith(AttributeReflector)
*/
public final String reflectAsString(final boolean prependAttClass) {
final StringBuilder buffer = new StringBuilder();
reflectWith(new AttributeReflector() {
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
if (buffer.length() > 0) {
buffer.append(',');
}
if (prependAttClass) {
buffer.append(attClass.getName()).append('#');
}
buffer.append(key).append('=').append((value == null) ? "null" : value);
}
});
return buffer.toString();
}
/**
* This method is for introspection of attributes, it should simply
* add the key/values this AttributeSource holds to the given {@link AttributeReflector}.
*
* <p>This method iterates over all Attribute implementations and calls the
* corresponding {@link AttributeImpl#reflectWith} method.</p>
*
* @see AttributeImpl#reflectWith
*/
public final void reflectWith(AttributeReflector reflector) {
if (hasAttributes()) { if (hasAttributes()) {
if (currentState == null) { if (currentState == null) {
computeCurrentState(); computeCurrentState();
} }
for (State state = currentState; state != null; state = state.next) { for (State state = currentState; state != null; state = state.next) {
if (state != currentState) sb.append(','); state.attribute.reflectWith(reflector);
sb.append(state.attribute.toString());
} }
} }
return sb.append(')').toString();
} }
/** /**
@ -453,7 +490,7 @@ public class AttributeSource {
* You can also use it as a (non-performant) replacement for {@link #captureState}, if you need to look * You can also use it as a (non-performant) replacement for {@link #captureState}, if you need to look
* into / modify the captured state. * into / modify the captured state.
*/ */
public AttributeSource cloneAttributes() { public final AttributeSource cloneAttributes() {
final AttributeSource clone = new AttributeSource(this.factory); final AttributeSource clone = new AttributeSource(this.factory);
if (hasAttributes()) { if (hasAttributes()) {

View File

@ -22,8 +22,11 @@ import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Attribute; import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util._TestUtil;
import java.io.StringReader; import java.io.StringReader;
import java.util.HashMap;
public class TestToken extends LuceneTestCase { public class TestToken extends LuceneTestCase {
@ -241,6 +244,22 @@ public class TestToken extends LuceneTestCase {
ts.addAttribute(TypeAttribute.class) instanceof Token); ts.addAttribute(TypeAttribute.class) instanceof Token);
} }
public void testAttributeReflection() throws Exception {
Token t = new Token("foobar", 6, 22, 8);
_TestUtil.assertAttributeReflection(t,
new HashMap<String,Object>() {{
put(CharTermAttribute.class.getName() + "#term", "foobar");
put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar"));
put(OffsetAttribute.class.getName() + "#startOffset", 6);
put(OffsetAttribute.class.getName() + "#endOffset", 22);
put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 1);
put(PayloadAttribute.class.getName() + "#payload", null);
put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE);
put(FlagsAttribute.class.getName() + "#flags", 8);
}});
}
public static <T extends AttributeImpl> T assertCloneIsEqual(T att) { public static <T extends AttributeImpl> T assertCloneIsEqual(T att) {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
T clone = (T) att.clone(); T clone = (T) att.clone();

View File

@ -19,7 +19,10 @@ package org.apache.lucene.analysis.tokenattributes;
import org.apache.lucene.analysis.TestToken; import org.apache.lucene.analysis.TestToken;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util._TestUtil;
import java.nio.CharBuffer; import java.nio.CharBuffer;
import java.util.HashMap;
import java.util.Formatter; import java.util.Formatter;
import java.util.Locale; import java.util.Locale;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -126,6 +129,15 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
assertNotSame(buf, copy.buffer()); assertNotSame(buf, copy.buffer());
} }
public void testAttributeReflection() throws Exception {
CharTermAttributeImpl t = new CharTermAttributeImpl();
t.append("foobar");
_TestUtil.assertAttributeReflection(t, new HashMap<String,Object>() {{
put(CharTermAttribute.class.getName() + "#term", "foobar");
put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar"));
}});
}
public void testCharSequenceInterface() { public void testCharSequenceInterface() {
final String s = "0123456789"; final String s = "0123456789";
final CharTermAttributeImpl t = new CharTermAttributeImpl(); final CharTermAttributeImpl t = new CharTermAttributeImpl();

View File

@ -0,0 +1,46 @@
package org.apache.lucene.analysis.tokenattributes;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.LuceneTestCase;
import java.util.Collections;
import java.util.HashMap;
public class TestSimpleAttributeImpl extends LuceneTestCase {
// this checks using reflection API if the defaults are correct
public void testAttributes() {
_TestUtil.assertAttributeReflection(new PositionIncrementAttributeImpl(),
Collections.singletonMap(PositionIncrementAttribute.class.getName()+"#positionIncrement", 1));
_TestUtil.assertAttributeReflection(new FlagsAttributeImpl(),
Collections.singletonMap(FlagsAttribute.class.getName()+"#flags", 0));
_TestUtil.assertAttributeReflection(new TypeAttributeImpl(),
Collections.singletonMap(TypeAttribute.class.getName()+"#type", TypeAttribute.DEFAULT_TYPE));
_TestUtil.assertAttributeReflection(new PayloadAttributeImpl(),
Collections.singletonMap(PayloadAttribute.class.getName()+"#payload", null));
_TestUtil.assertAttributeReflection(new KeywordAttributeImpl(),
Collections.singletonMap(KeywordAttribute.class.getName()+"#keyword", false));
_TestUtil.assertAttributeReflection(new OffsetAttributeImpl(), new HashMap<String,Object>() {{
put(OffsetAttribute.class.getName()+"#startOffset", 0);
put(OffsetAttribute.class.getName()+"#endOffset", 0);
}});
}
}

View File

@ -109,34 +109,6 @@ public class TestAttributeSource extends LuceneTestCase {
assertEquals("TypeAttribute of original and clone must be equal", typeAtt2, typeAtt); assertEquals("TypeAttribute of original and clone must be equal", typeAtt2, typeAtt);
} }
public void testToStringAndMultiAttributeImplementations() {
AttributeSource src = new AttributeSource();
CharTermAttribute termAtt = src.addAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = src.addAttribute(TypeAttribute.class);
termAtt.append("TestTerm");
typeAtt.setType("TestType");
assertEquals("Attributes should appear in original order", "("+termAtt.toString()+","+typeAtt.toString()+")", src.toString());
Iterator<AttributeImpl> it = src.getAttributeImplsIterator();
assertTrue("Iterator should have 2 attributes left", it.hasNext());
assertSame("First AttributeImpl from iterator should be termAtt", termAtt, it.next());
assertTrue("Iterator should have 1 attributes left", it.hasNext());
assertSame("Second AttributeImpl from iterator should be typeAtt", typeAtt, it.next());
assertFalse("Iterator should have 0 attributes left", it.hasNext());
src = new AttributeSource();
src.addAttributeImpl(new Token());
// this should not add a new attribute as Token implements CharTermAttribute, too
termAtt = src.addAttribute(CharTermAttribute.class);
assertTrue("CharTermAttribute should be implemented by Token", termAtt instanceof Token);
// get the Token attribute and check, that it is the only one
it = src.getAttributeImplsIterator();
Token tok = (Token) it.next();
assertFalse("There should be only one attribute implementation instance", it.hasNext());
termAtt.setEmpty().append("TestTerm");
assertEquals("Token should only printed once", "("+tok.toString()+")", src.toString());
}
public void testDefaultAttributeFactory() throws Exception { public void testDefaultAttributeFactory() throws Exception {
AttributeSource src = new AttributeSource(); AttributeSource src = new AttributeSource();

View File

@ -22,6 +22,10 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.Random; import java.util.Random;
import java.util.Map;
import java.util.HashMap;
import org.junit.Assert;
import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.ConcurrentMergeScheduler;
@ -238,4 +242,17 @@ public class _TestUtil {
((ConcurrentMergeScheduler) ms).setMaxMergeCount(3); ((ConcurrentMergeScheduler) ms).setMaxMergeCount(3);
} }
} }
/** Checks some basic behaviour of an AttributeImpl
* @param reflectedValues contains a map with "AttributeClass#key" as values
*/
public static <T> void assertAttributeReflection(final AttributeImpl att, Map<String,T> reflectedValues) {
final Map<String,Object> map = new HashMap<String,Object>();
att.reflectWith(new AttributeReflector() {
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
map.put(attClass.getName() + '#' + key, value);
}
});
Assert.assertEquals("Reflection does not produce same map", reflectedValues, map);
}
} }

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.icu.tokenattributes;
import java.io.Serializable; import java.io.Serializable;
import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
import com.ibm.icu.lang.UScript; import com.ibm.icu.lang.UScript;
@ -77,7 +78,7 @@ public class ScriptAttributeImpl extends AttributeImpl implements ScriptAttribut
} }
@Override @Override
public String toString() { public void reflectWith(AttributeReflector reflector) {
return "script=" + getName(); reflector.reflect(ScriptAttribute.class, "script", getName());
} }
} }

View File

@ -428,13 +428,6 @@
--> -->
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" /> <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
<!--
Analysis request handler. Since Solr 1.3. Use to returnhow a document is analyzed. Useful
for debugging and as a token server for other types of applications
-->
<requestHandler name="/analysis" class="solr.AnalysisRequestHandler" />
<!-- CSV update handler, loaded on demand --> <!-- CSV update handler, loaded on demand -->
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" /> <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" />

View File

@ -1,243 +0,0 @@
package org.apache.solr.handler;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.commons.io.IOUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Collection;
/**
*
* @deprecated Use {@link org.apache.solr.handler.DocumentAnalysisRequestHandler} instead.
**/
@Deprecated
public class AnalysisRequestHandler extends RequestHandlerBase {
public static Logger log = LoggerFactory.getLogger(AnalysisRequestHandler.class);
private XMLInputFactory inputFactory;
@Override
public void init(NamedList args) {
super.init(args);
inputFactory = XMLInputFactory.newInstance();
try {
// The java 1.6 bundled stax parser (sjsxp) does not currently have a thread-safe
// XMLInputFactory, as that implementation tries to cache and reuse the
// XMLStreamReader. Setting the parser-specific "reuse-instance" property to false
// prevents this.
// All other known open-source stax parsers (and the bea ref impl)
// have thread-safe factories.
inputFactory.setProperty("reuse-instance", Boolean.FALSE);
}
catch (IllegalArgumentException ex) {
// Other implementations will likely throw this exception since "reuse-instance"
// isimplementation specific.
log.debug("Unable to set the 'reuse-instance' property for the input factory: " + inputFactory);
}
}
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
SolrParams params = req.getParams();
Iterable<ContentStream> streams = req.getContentStreams();
if (streams != null) {
for (ContentStream stream : req.getContentStreams()) {
Reader reader = stream.getReader();
try {
XMLStreamReader parser = inputFactory.createXMLStreamReader(reader);
NamedList<Object> result = processContent(parser, req.getSchema());
rsp.add("response", result);
}
finally {
IOUtils.closeQuietly(reader);
}
}
}
}
NamedList<Object> processContent(XMLStreamReader parser,
IndexSchema schema) throws XMLStreamException, IOException {
NamedList<Object> result = new SimpleOrderedMap<Object>();
while (true) {
int event = parser.next();
switch (event) {
case XMLStreamConstants.END_DOCUMENT: {
parser.close();
return result;
}
case XMLStreamConstants.START_ELEMENT: {
String currTag = parser.getLocalName();
if ("doc".equals(currTag)) {
log.trace("Tokenizing doc...");
SolrInputDocument doc = readDoc(parser);
SchemaField uniq = schema.getUniqueKeyField();
NamedList<NamedList<NamedList<Object>>> theTokens = new SimpleOrderedMap<NamedList<NamedList<Object>>>();
result.add(doc.getFieldValue(uniq.getName()).toString(), theTokens);
for (String name : doc.getFieldNames()) {
FieldType ft = schema.getFieldType(name);
Analyzer analyzer = ft.getAnalyzer();
Collection<Object> vals = doc.getFieldValues(name);
for (Object val : vals) {
Reader reader = new StringReader(val.toString());
TokenStream tstream = analyzer.tokenStream(name, reader);
NamedList<NamedList<Object>> tokens = getTokens(tstream);
theTokens.add(name, tokens);
}
}
}
break;
}
}
}
}
static NamedList<NamedList<Object>> getTokens(TokenStream tstream) throws IOException {
// outer is namedList since order of tokens is important
NamedList<NamedList<Object>> tokens = new NamedList<NamedList<Object>>();
// TODO: support custom attributes
CharTermAttribute termAtt = null;
TermToBytesRefAttribute bytesAtt = null;
if (tstream.hasAttribute(CharTermAttribute.class)) {
termAtt = tstream.getAttribute(CharTermAttribute.class);
} else if (tstream.hasAttribute(TermToBytesRefAttribute.class)) {
bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class);
}
final OffsetAttribute offsetAtt = tstream.addAttribute(OffsetAttribute.class);
final TypeAttribute typeAtt = tstream.addAttribute(TypeAttribute.class);
final PositionIncrementAttribute posIncAtt = tstream.addAttribute(PositionIncrementAttribute.class);
final BytesRef bytes = new BytesRef();
while (tstream.incrementToken()) {
NamedList<Object> token = new SimpleOrderedMap<Object>();
tokens.add("token", token);
if (termAtt != null) {
token.add("value", termAtt.toString());
}
if (bytesAtt != null) {
bytesAtt.toBytesRef(bytes);
// TODO: This is incorrect when numeric fields change in later lucene versions. It should use BytesRef directly!
token.add("value", bytes.utf8ToString());
}
token.add("start", offsetAtt.startOffset());
token.add("end", offsetAtt.endOffset());
token.add("posInc", posIncAtt.getPositionIncrement());
token.add("type", typeAtt.type());
//TODO: handle payloads
}
return tokens;
}
SolrInputDocument readDoc(XMLStreamReader parser) throws XMLStreamException {
SolrInputDocument doc = new SolrInputDocument();
StringBuilder text = new StringBuilder();
String name = null;
String attrName = "";
float boost = 1.0f;
boolean isNull = false;
while (true) {
int event = parser.next();
switch (event) {
// Add everything to the text
case XMLStreamConstants.SPACE:
case XMLStreamConstants.CDATA:
case XMLStreamConstants.CHARACTERS:
text.append(parser.getText());
break;
case XMLStreamConstants.END_ELEMENT:
if ("doc".equals(parser.getLocalName())) {
return doc;
} else if ("field".equals(parser.getLocalName())) {
if (!isNull) {
doc.addField(name, text.toString(), boost);
boost = 1.0f;
}
}
break;
case XMLStreamConstants.START_ELEMENT:
text.setLength(0);
String localName = parser.getLocalName();
if (!"field".equals(localName)) {
log.warn("unexpected XML tag doc/" + localName);
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"unexpected XML tag doc/" + localName);
}
String attrVal = "";
for (int i = 0; i < parser.getAttributeCount(); i++) {
attrName = parser.getAttributeLocalName(i);
attrVal = parser.getAttributeValue(i);
if ("name".equals(attrName)) {
name = attrVal;
}
}
break;
}
}
}
//////////////////////// SolrInfoMBeans methods //////////////////////
@Override
public String getDescription() {
return "Provide Analysis of text";
}
@Override
public String getVersion() {
return "$Revision$";
}
@Override
public String getSourceId() {
return "$Id$";
}
@Override
public String getSource() {
return "$URL$";
}
}

View File

@ -20,10 +20,14 @@ package org.apache.solr.handler;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharReader; import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.CharStream; import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.*; import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.index.Payload;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.SorterTemplate;
import org.apache.solr.analysis.CharFilterFactory; import org.apache.solr.analysis.CharFilterFactory;
import org.apache.solr.analysis.TokenFilterFactory; import org.apache.solr.analysis.TokenFilterFactory;
import org.apache.solr.analysis.TokenizerChain; import org.apache.solr.analysis.TokenizerChain;
@ -34,6 +38,9 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
import org.apache.solr.util.ByteUtils;
import org.apache.noggit.CharArr;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
@ -47,7 +54,7 @@ import java.util.*;
*/ */
public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
public static final Set<String> EMPTY_STRING_SET = Collections.emptySet(); public static final Set<BytesRef> EMPTY_BYTES_SET = Collections.emptySet();
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
rsp.add("analysis", doAnalysis(req)); rsp.add("analysis", doAnalysis(req));
@ -107,7 +114,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
} }
TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value))); TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value)));
List<Token> tokens = analyzeTokenStream(tokenStream); List<AttributeSource> tokens = analyzeTokenStream(tokenStream);
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context)); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
@ -115,7 +122,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
for (TokenFilterFactory tokenFilterFactory : filtfacs) { for (TokenFilterFactory tokenFilterFactory : filtfacs) {
tokenStream = tokenFilterFactory.create(listBasedTokenStream); tokenStream = tokenFilterFactory.create(listBasedTokenStream);
List<Token> tokenList = analyzeTokenStream(tokenStream); List<AttributeSource> tokenList = analyzeTokenStream(tokenStream);
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokenList, context)); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokenList, context));
listBasedTokenStream = new ListBasedTokenStream(tokenList); listBasedTokenStream = new ListBasedTokenStream(tokenList);
} }
@ -126,14 +133,24 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
/** /**
* Analyzes the given text using the given analyzer and returns the produced tokens. * Analyzes the given text using the given analyzer and returns the produced tokens.
* *
* @param value The value to analyze. * @param query The query to analyze.
* @param analyzer The analyzer to use. * @param analyzer The analyzer to use.
*
* @return The produces token list.
*/ */
protected List<Token> analyzeValue(String value, Analyzer analyzer) { protected Set<BytesRef> getQueryTokenSet(String query, Analyzer analyzer) {
TokenStream tokenStream = analyzer.tokenStream("", new StringReader(value)); final Set<BytesRef> tokens = new HashSet<BytesRef>();
return analyzeTokenStream(tokenStream); final TokenStream tokenStream = analyzer.tokenStream("", new StringReader(query));
final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
try {
tokenStream.reset();
while (tokenStream.incrementToken()) {
final BytesRef bytes = new BytesRef();
bytesAtt.toBytesRef(bytes);
tokens.add(bytes);
}
} catch (IOException ioe) {
throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
}
return tokens;
} }
/** /**
@ -143,41 +160,17 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
* *
* @return List of tokens produced from the TokenStream * @return List of tokens produced from the TokenStream
*/ */
private List<Token> analyzeTokenStream(TokenStream tokenStream) { private List<AttributeSource> analyzeTokenStream(TokenStream tokenStream) {
List<Token> tokens = new ArrayList<Token>(); List<AttributeSource> tokens = new ArrayList<AttributeSource>();
// for backwards compatibility, add all "common" attributes
// TODO change this API to support custom attributes tokenStream.addAttribute(PositionIncrementAttribute.class);
CharTermAttribute termAtt = null; tokenStream.addAttribute(OffsetAttribute.class);
TermToBytesRefAttribute bytesAtt = null; tokenStream.addAttribute(TypeAttribute.class);
if (tokenStream.hasAttribute(CharTermAttribute.class)) {
termAtt = tokenStream.getAttribute(CharTermAttribute.class);
} else if (tokenStream.hasAttribute(TermToBytesRefAttribute.class)) {
bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
}
final OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
final TypeAttribute typeAtt = tokenStream.addAttribute(TypeAttribute.class);
final PositionIncrementAttribute posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
final FlagsAttribute flagsAtt = tokenStream.addAttribute(FlagsAttribute.class);
final PayloadAttribute payloadAtt = tokenStream.addAttribute(PayloadAttribute.class);
final BytesRef bytes = new BytesRef(); final BytesRef bytes = new BytesRef();
try { try {
tokenStream.reset();
while (tokenStream.incrementToken()) { while (tokenStream.incrementToken()) {
Token token = new Token(); tokens.add(tokenStream.cloneAttributes());
if (termAtt != null) {
token.setEmpty().append(termAtt);
}
if (bytesAtt != null) {
bytesAtt.toBytesRef(bytes);
// TODO: This is incorrect when numeric fields change in later lucene versions. It should use BytesRef directly!
token.setEmpty().append(bytes.utf8ToString());
}
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
token.setType(typeAtt.type());
token.setFlags(flagsAtt.getFlags());
token.setPayload(payloadAtt.getPayload());
token.setPositionIncrement(posIncAtt.getPositionIncrement());
tokens.add((Token) token.clone());
} }
} catch (IOException ioe) { } catch (IOException ioe) {
throw new RuntimeException("Error occured while iterating over tokenstream", ioe); throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
@ -186,6 +179,13 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
return tokens; return tokens;
} }
// a static mapping of the reflected attribute keys to the names used in Solr 1.4
static Map<String,String> ATTRIBUTE_MAPPING = Collections.unmodifiableMap(new HashMap<String,String>() {{
put(OffsetAttribute.class.getName() + "#startOffset", "start");
put(OffsetAttribute.class.getName() + "#endOffset", "end");
put(TypeAttribute.class.getName() + "#type", "type");
}});
/** /**
* Converts the list of Tokens to a list of NamedLists representing the tokens. * Converts the list of Tokens to a list of NamedLists representing the tokens.
* *
@ -194,42 +194,98 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
* *
* @return List of NamedLists containing the relevant information taken from the tokens * @return List of NamedLists containing the relevant information taken from the tokens
*/ */
private List<NamedList> convertTokensToNamedLists(List<Token> tokens, AnalysisContext context) { private List<NamedList> convertTokensToNamedLists(final List<AttributeSource> tokens, AnalysisContext context) {
List<NamedList> tokensNamedLists = new ArrayList<NamedList>(); final List<NamedList> tokensNamedLists = new ArrayList<NamedList>();
Collections.sort(tokens, new Comparator<Token>() {
public int compare(Token o1, Token o2) {
return o1.endOffset() - o2.endOffset();
}
});
final int[] positions = new int[tokens.size()];
int position = 0; int position = 0;
for (int i = 0, c = tokens.size(); i < c; i++) {
AttributeSource token = tokens.get(i);
position += token.addAttribute(PositionIncrementAttribute.class).getPositionIncrement();
positions[i] = position;
}
// sort the tokens by absoulte position
new SorterTemplate() {
@Override
protected void swap(int i, int j) {
Collections.swap(tokens, i, j);
}
@Override
protected int compare(int i, int j) {
return positions[i] - positions[j];
}
@Override
protected void setPivot(int i) {
pivot = positions[i];
}
@Override
protected int comparePivot(int j) {
return pivot - positions[j];
}
private int pivot;
}.mergeSort(0, tokens.size() - 1);
FieldType fieldType = context.getFieldType(); FieldType fieldType = context.getFieldType();
for (Token token : tokens) { final BytesRef rawBytes = new BytesRef();
NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>(); final CharArr textBuf = new CharArr();
for (int i = 0, c = tokens.size(); i < c; i++) {
AttributeSource token = tokens.get(i);
final NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>();
token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(rawBytes);
textBuf.reset();
fieldType.indexedToReadable(rawBytes, textBuf);
final String text = textBuf.toString();
String text = fieldType.indexedToReadable(token.toString());
tokenNamedList.add("text", text); tokenNamedList.add("text", text);
if (!text.equals(token.toString())) {
tokenNamedList.add("raw_text", token.toString()); if (token.hasAttribute(CharTermAttribute.class)) {
final String rawText = token.getAttribute(CharTermAttribute.class).toString();
if (!rawText.equals(text)) {
tokenNamedList.add("raw_text", rawText);
}
} }
tokenNamedList.add("type", token.type());
tokenNamedList.add("start", token.startOffset());
tokenNamedList.add("end", token.endOffset());
position += token.getPositionIncrement(); tokenNamedList.add("raw_bytes", rawBytes.toString());
tokenNamedList.add("position", position);
if (context.getTermsToMatch().contains(token.toString())) { if (context.getTermsToMatch().contains(rawBytes)) {
tokenNamedList.add("match", true); tokenNamedList.add("match", true);
} }
if (token.getPayload() != null) { tokenNamedList.add("position", positions[i]);
tokenNamedList.add("payload", token.getPayload());
token.reflectWith(new AttributeReflector() {
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
// leave out position and bytes term
if (TermToBytesRefAttribute.class.isAssignableFrom(attClass))
return;
if (CharTermAttribute.class.isAssignableFrom(attClass))
return;
if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
return;
String k = attClass.getName() + '#' + key;
// map keys for "standard attributes":
if (ATTRIBUTE_MAPPING.containsKey(k)) {
k = ATTRIBUTE_MAPPING.get(k);
} }
if (value instanceof Payload) {
final Payload p = (Payload) value;
value = new BytesRef(p.getData()).toString();
}
tokenNamedList.add(k, value);
}
});
tokensNamedLists.add(tokenNamedList); tokensNamedLists.add(tokenNamedList);
} }
@ -261,38 +317,27 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
*/ */
// TODO refactor to support custom attributes // TODO refactor to support custom attributes
protected final static class ListBasedTokenStream extends TokenStream { protected final static class ListBasedTokenStream extends TokenStream {
private final List<Token> tokens; private final List<AttributeSource> tokens;
private Iterator<Token> tokenIterator; private Iterator<AttributeSource> tokenIterator;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
/** /**
* Creates a new ListBasedTokenStream which uses the given tokens as its token source. * Creates a new ListBasedTokenStream which uses the given tokens as its token source.
* *
* @param tokens Source of tokens to be used * @param tokens Source of tokens to be used
*/ */
ListBasedTokenStream(List<Token> tokens) { ListBasedTokenStream(List<AttributeSource> tokens) {
this.tokens = tokens; this.tokens = tokens;
tokenIterator = tokens.iterator(); tokenIterator = tokens.iterator();
} }
/**
* {@inheritDoc}
*/
@Override @Override
public boolean incrementToken() throws IOException { public boolean incrementToken() throws IOException {
if (tokenIterator.hasNext()) { if (tokenIterator.hasNext()) {
Token next = tokenIterator.next(); AttributeSource next = tokenIterator.next();
termAtt.copyBuffer(next.buffer(), 0, next.length()); Iterator<Class<? extends Attribute>> atts = next.getAttributeClassesIterator();
typeAtt.setType(next.type()); while (atts.hasNext()) // make sure all att impls in the token exist here
offsetAtt.setOffset(next.startOffset(), next.endOffset()); addAttribute(atts.next());
flagsAtt.setFlags(next.getFlags()); next.copyTo(this);
payloadAtt.setPayload(next.getPayload());
posIncAtt.setPositionIncrement(next.getPositionIncrement());
return true; return true;
} else { } else {
return false; return false;
@ -314,7 +359,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
private final String fieldName; private final String fieldName;
private final FieldType fieldType; private final FieldType fieldType;
private final Analyzer analyzer; private final Analyzer analyzer;
private final Set<String> termsToMatch; private final Set<BytesRef> termsToMatch;
/** /**
* Constructs a new AnalysisContext with a given field tpe, analyzer and * Constructs a new AnalysisContext with a given field tpe, analyzer and
@ -328,7 +373,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
* @param termsToMatch Holds all the terms that should match during the * @param termsToMatch Holds all the terms that should match during the
* analysis process. * analysis process.
*/ */
public AnalysisContext(FieldType fieldType, Analyzer analyzer, Set<String> termsToMatch) { public AnalysisContext(FieldType fieldType, Analyzer analyzer, Set<BytesRef> termsToMatch) {
this(null, fieldType, analyzer, termsToMatch); this(null, fieldType, analyzer, termsToMatch);
} }
@ -343,7 +388,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
* *
*/ */
public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer) { public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer) {
this(fieldName, fieldType, analyzer, EMPTY_STRING_SET); this(fieldName, fieldType, analyzer, EMPTY_BYTES_SET);
} }
/** /**
@ -359,7 +404,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
* @param termsToMatch Holds all the terms that should match during the * @param termsToMatch Holds all the terms that should match during the
* analysis process. * analysis process.
*/ */
public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer, Set<String> termsToMatch) { public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer, Set<BytesRef> termsToMatch) {
this.fieldName = fieldName; this.fieldName = fieldName;
this.fieldType = fieldType; this.fieldType = fieldType;
this.analyzer = analyzer; this.analyzer = analyzer;
@ -378,7 +423,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
return analyzer; return analyzer;
} }
public Set<String> getTermsToMatch() { public Set<BytesRef> getTermsToMatch() {
return termsToMatch; return termsToMatch;
} }
} }

View File

@ -19,7 +19,7 @@ package org.apache.solr.handler;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token; import org.apache.lucene.util.BytesRef;
import org.apache.solr.client.solrj.request.DocumentAnalysisRequest; import org.apache.solr.client.solrj.request.DocumentAnalysisRequest;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
@ -216,21 +216,20 @@ public class DocumentAnalysisRequestHandler extends AnalysisRequestHandlerBase {
FieldType fieldType = schema.getFieldType(name); FieldType fieldType = schema.getFieldType(name);
Set<String> termsToMatch = new HashSet<String>(); final String queryValue = request.getQuery();
if (request.getQuery() != null && request.isShowMatch()) { Set<BytesRef> termsToMatch;
try { try {
List<Token> tokens = analyzeValue(request.getQuery(), fieldType.getQueryAnalyzer()); termsToMatch = (queryValue != null && request.isShowMatch())
for (Token token : tokens) { ? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer())
termsToMatch.add(token.toString()); : EMPTY_BYTES_SET;
}
} catch (Exception e) { } catch (Exception e) {
// ignore analysis exceptions since we are applying arbitrary text to all fields // ignore analysis exceptions since we are applying arbitrary text to all fields
} termsToMatch = EMPTY_BYTES_SET;
} }
if (request.getQuery() != null) { if (request.getQuery() != null) {
try { try {
AnalysisContext analysisContext = new AnalysisContext(fieldType, fieldType.getQueryAnalyzer(), EMPTY_STRING_SET); AnalysisContext analysisContext = new AnalysisContext(fieldType, fieldType.getQueryAnalyzer(), EMPTY_BYTES_SET);
fieldTokens.add("query", analyzeValue(request.getQuery(), analysisContext)); fieldTokens.add("query", analyzeValue(request.getQuery(), analysisContext));
} catch (Exception e) { } catch (Exception e) {
// ignore analysis exceptions since we are applying arbitrary text to all fields // ignore analysis exceptions since we are applying arbitrary text to all fields

View File

@ -17,7 +17,7 @@
package org.apache.solr.handler; package org.apache.solr.handler;
import org.apache.lucene.analysis.Token; import org.apache.lucene.util.BytesRef;
import org.apache.solr.client.solrj.request.FieldAnalysisRequest; import org.apache.solr.client.solrj.request.FieldAnalysisRequest;
import org.apache.solr.common.params.AnalysisParams; import org.apache.solr.common.params.AnalysisParams;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
@ -30,10 +30,7 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import java.util.Arrays; import java.util.*;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.io.Reader; import java.io.Reader;
import java.io.IOException; import java.io.IOException;
@ -222,14 +219,10 @@ public class FieldAnalysisRequestHandler extends AnalysisRequestHandlerBase {
*/ */
private NamedList<NamedList> analyzeValues(FieldAnalysisRequest analysisRequest, FieldType fieldType, String fieldName) { private NamedList<NamedList> analyzeValues(FieldAnalysisRequest analysisRequest, FieldType fieldType, String fieldName) {
Set<String> termsToMatch = new HashSet<String>(); final String queryValue = analysisRequest.getQuery();
String queryValue = analysisRequest.getQuery(); final Set<BytesRef> termsToMatch = (queryValue != null && analysisRequest.isShowMatch())
if (queryValue != null && analysisRequest.isShowMatch()) { ? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer())
List<Token> tokens = analyzeValue(queryValue, fieldType.getQueryAnalyzer()); : EMPTY_BYTES_SET;
for (Token token : tokens) {
termsToMatch.add(token.toString());
}
}
NamedList<NamedList> analyzeResults = new SimpleOrderedMap<NamedList>(); NamedList<NamedList> analyzeResults = new SimpleOrderedMap<NamedList>();
if (analysisRequest.getFieldValue() != null) { if (analysisRequest.getFieldValue() != null) {

View File

@ -24,6 +24,7 @@
org.apache.lucene.analysis.CharReader, org.apache.lucene.analysis.CharReader,
org.apache.lucene.analysis.CharStream, org.apache.lucene.analysis.CharStream,
org.apache.lucene.analysis.tokenattributes.*, org.apache.lucene.analysis.tokenattributes.*,
org.apache.lucene.util.AttributeReflector,
org.apache.solr.analysis.CharFilterFactory, org.apache.solr.analysis.CharFilterFactory,
org.apache.solr.analysis.TokenFilterFactory, org.apache.solr.analysis.TokenFilterFactory,
org.apache.solr.analysis.TokenizerChain, org.apache.solr.analysis.TokenizerChain,
@ -31,7 +32,8 @@
org.apache.solr.schema.FieldType, org.apache.solr.schema.FieldType,
org.apache.solr.schema.SchemaField, org.apache.solr.schema.SchemaField,
org.apache.solr.common.util.XML, org.apache.solr.common.util.XML,
javax.servlet.jsp.JspWriter,java.io.IOException javax.servlet.jsp.JspWriter,java.io.IOException,
org.apache.noggit.CharArr
"%> "%>
<%@ page import="java.io.Reader"%> <%@ page import="java.io.Reader"%>
<%@ page import="java.io.StringReader"%> <%@ page import="java.io.StringReader"%>
@ -39,8 +41,6 @@
<%@ page import="java.math.BigInteger" %> <%@ page import="java.math.BigInteger" %>
<%-- $Id$ --%> <%-- $Id$ --%>
<%-- $Source: /cvs/main/searching/org.apache.solrolarServer/resources/admin/analysis.jsp,v $ --%>
<%-- $Name: $ --%>
<%@include file="header.jsp" %> <%@include file="header.jsp" %>
@ -148,24 +148,28 @@
} }
if (field!=null) { if (field!=null) {
HashSet<Tok> matches = null; HashSet<BytesRef> matches = null;
if (qval!="" && highlight) { if (qval!="" && highlight) {
Reader reader = new StringReader(qval); Reader reader = new StringReader(qval);
Analyzer analyzer = field.getType().getQueryAnalyzer(); Analyzer analyzer = field.getType().getQueryAnalyzer();
TokenStream tstream = analyzer.reusableTokenStream(field.getName(),reader); TokenStream tstream = analyzer.reusableTokenStream(field.getName(),reader);
TermToBytesRefAttribute bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class);
tstream.reset(); tstream.reset();
List<AttributeSource> tokens = getTokens(tstream); matches = new HashSet<BytesRef>();
matches = new HashSet<Tok>(); while (tstream.incrementToken()) {
for (AttributeSource t : tokens) { matches.add( new Tok(t,0)); } final BytesRef bytes = new BytesRef();
bytesAtt.toBytesRef(bytes);
matches.add(bytes);
}
} }
if (val!="") { if (val!="") {
out.println("<h3>Index Analyzer</h3>"); out.println("<h3>Index Analyzer</h3>");
doAnalyzer(out, field, val, false, verbose,matches); doAnalyzer(out, field, val, false, verbose, matches);
} }
if (qval!="") { if (qval!="") {
out.println("<h3>Query Analyzer</h3>"); out.println("<h3>Query Analyzer</h3>");
doAnalyzer(out, field, qval, true, qverbose,null); doAnalyzer(out, field, qval, true, qverbose, null);
} }
} }
@ -177,7 +181,7 @@
<%! <%!
private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set<Tok> match) throws Exception { private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set<BytesRef> match) throws Exception {
FieldType ft = field.getType(); FieldType ft = field.getType();
Analyzer analyzer = queryAnalyser ? Analyzer analyzer = queryAnalyser ?
@ -240,7 +244,7 @@
tstream.reset(); tstream.reset();
List<AttributeSource> tokens = getTokens(tstream); List<AttributeSource> tokens = getTokens(tstream);
if (verbose) { if (verbose) {
writeHeader(out, analyzer.getClass(), new HashMap<String,String>()); writeHeader(out, analyzer.getClass(), Collections.EMPTY_MAP);
} }
writeTokens(out, tokens, ft, verbose, match); writeTokens(out, tokens, ft, verbose, match);
} }
@ -249,52 +253,59 @@
static List<AttributeSource> getTokens(TokenStream tstream) throws IOException { static List<AttributeSource> getTokens(TokenStream tstream) throws IOException {
List<AttributeSource> tokens = new ArrayList<AttributeSource>(); List<AttributeSource> tokens = new ArrayList<AttributeSource>();
tstream.reset();
while (true) { while (tstream.incrementToken()) {
if (!tstream.incrementToken())
break;
else {
tokens.add(tstream.cloneAttributes()); tokens.add(tstream.cloneAttributes());
} }
}
return tokens; return tokens;
} }
private static class ReflectItem {
final Class<? extends Attribute> attClass;
final String key;
final Object value;
private static class Tok { ReflectItem(Class<? extends Attribute> attClass, String key, Object value) {
AttributeSource token; this.attClass = attClass;
int pos; this.key = key;
Tok(AttributeSource token, int pos) { this.value = value;
this.token=token;
this.pos=pos;
}
public boolean equals(Object o) {
return ((Tok)o).token.toString().equals(token.toString());
}
public int hashCode() {
return token.toString().hashCode();
}
public String toString() {
return token.toString();
}
public String toPrintableString() {
TermToBytesRefAttribute att = token.addAttribute(TermToBytesRefAttribute.class);
if (att instanceof CharTermAttribute)
return att.toString();
else {
BytesRef bytes = new BytesRef();
att.toBytesRef(bytes);
return bytes.toString();
}
} }
} }
private static interface ToStr { private static class Tok {
public String toStr(Object o); final BytesRef bytes = new BytesRef();
final String rawText, text;
final int pos;
final List<ReflectItem> reflected = new ArrayList<ReflectItem>();
Tok(AttributeSource token, int pos, FieldType ft) {
this.pos = pos;
token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(bytes);
rawText = (token.hasAttribute(CharTermAttribute.class)) ?
token.getAttribute(CharTermAttribute.class).toString() : null;
final CharArr textBuf = new CharArr(bytes.length);
ft.indexedToReadable(bytes, textBuf);
text = textBuf.toString();
token.reflectWith(new AttributeReflector() {
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
// leave out position and raw term
if (TermToBytesRefAttribute.class.isAssignableFrom(attClass))
return;
if (CharTermAttribute.class.isAssignableFrom(attClass))
return;
if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
return;
reflected.add(new ReflectItem(attClass, key, value));
}
});
}
} }
private static void printRow(JspWriter out, String header, List[] arrLst, ToStr converter, boolean multival, boolean verbose, Set<Tok> match) throws IOException { private static interface TokToStr {
public String toStr(Tok o);
}
private static void printRow(JspWriter out, String header, String headerTitle, List<Tok>[] arrLst, TokToStr converter, boolean multival, boolean verbose, Set<BytesRef> match) throws IOException {
// find the maximum number of terms for any position // find the maximum number of terms for any position
int maxSz=1; int maxSz=1;
if (multival) { if (multival) {
@ -308,7 +319,13 @@
out.println("<tr>"); out.println("<tr>");
if (idx==0 && verbose) { if (idx==0 && verbose) {
if (header != null) { if (header != null) {
out.print("<th NOWRAP rowspan=\""+maxSz+"\">"); out.print("<th NOWRAP rowspan=\""+maxSz+"\"");
if (headerTitle != null) {
out.print(" title=\"");
XML.escapeCharData(headerTitle,out);
out.print("\"");
}
out.print(">");
XML.escapeCharData(header,out); XML.escapeCharData(header,out);
out.println("</th>"); out.println("</th>");
} }
@ -317,7 +334,7 @@
for (int posIndex=0; posIndex<arrLst.length; posIndex++) { for (int posIndex=0; posIndex<arrLst.length; posIndex++) {
List<Tok> lst = arrLst[posIndex]; List<Tok> lst = arrLst[posIndex];
if (lst.size() <= idx) continue; if (lst.size() <= idx) continue;
if (match!=null && match.contains(lst.get(idx))) { if (match!=null && match.contains(lst.get(idx).bytes)) {
out.print("<td class=\"highlight\""); out.print("<td class=\"highlight\"");
} else { } else {
out.print("<td class=\"debugdata\""); out.print("<td class=\"debugdata\"");
@ -340,15 +357,6 @@
} }
static String isPayloadString( Payload p ) {
String sp = new String( p.getData() );
for( int i=0; i < sp.length(); i++ ) {
if( !Character.isDefined( sp.charAt(i) ) || Character.isISOControl( sp.charAt(i) ) )
return "";
}
return "(" + sp + ")";
}
static void writeHeader(JspWriter out, Class clazz, Map<String,String> args) throws IOException { static void writeHeader(JspWriter out, Class clazz, Map<String,String> args) throws IOException {
out.print("<h4>"); out.print("<h4>");
out.print(clazz.getName()); out.print(clazz.getName());
@ -359,137 +367,93 @@
// readable, raw, pos, type, start/end // readable, raw, pos, type, start/end
static void writeTokens(JspWriter out, List<AttributeSource> tokens, final FieldType ft, boolean verbose, Set<Tok> match) throws IOException { static void writeTokens(JspWriter out, List<AttributeSource> tokens, final FieldType ft, boolean verbose, Set<BytesRef> match) throws IOException {
// Use a map to tell what tokens are in what positions // Use a map to tell what tokens are in what positions
// because some tokenizers/filters may do funky stuff with // because some tokenizers/filters may do funky stuff with
// very large increments, or negative increments. // very large increments, or negative increments.
HashMap<Integer,List<Tok>> map = new HashMap<Integer,List<Tok>>(); HashMap<Integer,List<Tok>> map = new HashMap<Integer,List<Tok>>();
boolean needRaw=false; boolean needRaw=false;
int pos=0; int pos=0, reflectionCount = -1;
for (AttributeSource t : tokens) { for (AttributeSource t : tokens) {
if (!t.toString().equals(ft.indexedToReadable(t.toString()))) {
needRaw=true;
}
pos += t.addAttribute(PositionIncrementAttribute.class).getPositionIncrement(); pos += t.addAttribute(PositionIncrementAttribute.class).getPositionIncrement();
List lst = map.get(pos); List lst = map.get(pos);
if (lst==null) { if (lst==null) {
lst = new ArrayList(1); lst = new ArrayList(1);
map.put(pos,lst); map.put(pos,lst);
} }
Tok tok = new Tok(t,pos); Tok tok = new Tok(t,pos,ft);
// sanity check
if (reflectionCount < 0) {
reflectionCount = tok.reflected.size();
} else {
if (reflectionCount != tok.reflected.size())
throw new RuntimeException("Should not happen: Number of reflected entries differs for position=" + pos);
}
if (tok.rawText != null && !tok.text.equals(tok.rawText)) {
needRaw=true;
}
lst.add(tok); lst.add(tok);
} }
List<Tok>[] arr = (List<Tok>[])map.values().toArray(new ArrayList[map.size()]); List<Tok>[] arr = (List<Tok>[])map.values().toArray(new ArrayList[map.size()]);
/* Jetty 6.1.3 miscompiles this generics version... // Jetty 6.1.3 miscompiles a generics-enabled version..., without generics:
Arrays.sort(arr, new Comparator<List<Tok>>() {
public int compare(List<Tok> toks, List<Tok> toks1) {
return toks.get(0).pos - toks1.get(0).pos;
}
}
*/
Arrays.sort(arr, new Comparator() { Arrays.sort(arr, new Comparator() {
public int compare(Object toks, Object toks1) { public int compare(Object toks, Object toks1) {
return ((List<Tok>)toks).get(0).pos - ((List<Tok>)toks1).get(0).pos; return ((List<Tok>)toks).get(0).pos - ((List<Tok>)toks1).get(0).pos;
} }
} });
);
out.println("<table width=\"auto\" class=\"analysis\" border=\"1\">"); out.println("<table width=\"auto\" class=\"analysis\" border=\"1\">");
if (verbose) { if (verbose) {
printRow(out,"term position", arr, new ToStr() { printRow(out, "position", "calculated from " + PositionIncrementAttribute.class.getName(), arr, new TokToStr() {
public String toStr(Object o) { public String toStr(Tok t) {
return Integer.toString(((Tok)o).pos); return Integer.toString(t.pos);
} }
} },false,verbose,null);
,false
,verbose
,null);
} }
printRow(out, "term text", "indexedToReadable applied to " + TermToBytesRefAttribute.class.getName(), arr, new TokToStr() {
printRow(out,"term text", arr, new ToStr() { public String toStr(Tok t) {
public String toStr(Object o) { return t.text;
return ft.indexedToReadable( ((Tok)o).toPrintableString() );
} }
} },true,verbose,match);
,true
,verbose
,match
);
if (verbose) {
if (needRaw) { if (needRaw) {
printRow(out,"raw text", arr, new ToStr() { printRow(out, "raw text", CharTermAttribute.class.getName(), arr, new TokToStr() {
public String toStr(Object o) { public String toStr(Tok t) {
// page is UTF-8, so anything goes. // page is UTF-8, so anything goes.
return ((Tok)o).toPrintableString(); return (t.rawText == null) ? "" : t.rawText;
} }
} },true,verbose,match);
,true
,verbose
,match
);
} }
if (verbose) { printRow(out, "raw bytes", TermToBytesRefAttribute.class.getName(), arr, new TokToStr() {
printRow(out,"term type", arr, new ToStr() { public String toStr(Tok t) {
public String toStr(Object o) { return t.bytes.toString();
String tt = ((Tok)o).token.addAttribute(TypeAttribute.class).type(); }
if (tt == null) { },true,verbose,match);
return "null";
for (int att=0; att < reflectionCount; att++) {
final ReflectItem item0 = arr[0].get(0).reflected.get(att);
final int i = att;
printRow(out, item0.key, item0.attClass.getName(), arr, new TokToStr() {
public String toStr(Tok t) {
final ReflectItem item = t.reflected.get(i);
if (item0.attClass != item.attClass || !item0.key.equals(item.key))
throw new RuntimeException("Should not happen: attribute types suddenly change at position=" + t.pos);
if (item.value instanceof Payload) {
final Payload p = (Payload) item.value;
return new BytesRef(p.getData()).toString();
} else { } else {
return tt; return (item.value != null) ? item.value.toString() : "";
} }
} }
},true,verbose, null);
} }
,true
,verbose,
null
);
}
if (verbose) {
printRow(out,"source start,end", arr, new ToStr() {
public String toStr(Object o) {
AttributeSource t = ((Tok)o).token;
return Integer.toString(t.addAttribute(OffsetAttribute.class).startOffset()) + ',' + t.addAttribute(OffsetAttribute.class).endOffset() ;
}
}
,true
,verbose
,null
);
}
if (verbose) {
printRow(out,"payload", arr, new ToStr() {
public String toStr(Object o) {
AttributeSource t = ((Tok)o).token;
Payload p = t.addAttribute(PayloadAttribute.class).getPayload();
if( null != p ) {
BigInteger bi = new BigInteger( p.getData() );
String ret = bi.toString( 16 );
if (ret.length() % 2 != 0) {
// Pad with 0
ret = "0"+ret;
}
ret += isPayloadString( p );
return ret;
}
return "";
}
}
,true
,verbose
,null
);
} }
out.println("</table>"); out.println("</table>");