diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e0fd2c2b208..42ba0ab6d49 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -362,9 +362,9 @@ Changes in backwards compatibility policy * LUCENE-2302: The new interface for term attributes, CharTermAttribute, now implements CharSequence. This requires the toString() methods of CharTermAttribute, deprecated TermAttribute, and Token to return only - the term text and no other attribute contents. - TODO: Point to new attribute inspection API coming with LUCENE-2374. - (Uwe Schindler, Robert Muir) + the term text and no other attribute contents. LUCENE-2374 implements + an attribute reflection API to no longer rely on toString() for attribute + inspection. (Uwe Schindler, Robert Muir) * LUCENE-2372, LUCENE-2389: StandardAnalyzer, KeywordAnalyzer, PerFieldAnalyzerWrapper, WhitespaceTokenizer are now final. Also removed @@ -592,6 +592,23 @@ API Changes to ensure that the norm is encoded with your Similarity. (Robert Muir, Mike McCandless) +* LUCENE-2374: Added Attribute reflection API: It's now possible to inspect the + contents of AttributeImpl and AttributeSource using a well-defined API. + This is e.g. used by Solr's AnalysisRequestHandlers to display all attributes + in a structured way. + There are also some backwards incompatible changes in toString() output, + as LUCENE-2302 introduced the CharSequence interface to CharTermAttribute + leading to changed toString() return values. The new API allows to get a + string representation in a well-defined way using a new method + reflectAsString(). For backwards compatibility reasons, when toString() + was implemented by implementation subclasses, the default implementation of + AttributeImpl.reflectWith() uses toString()s output instead to report the + Attribute's properties. Otherwise, reflectWith() uses Java's reflection + (like toString() did before) to get the attribute properties. + In addition, the mandatory equals() and hashCode() are no longer required + for AttributeImpls, but can still be provided (if needed). + (Uwe Schindler) + Bug fixes * LUCENE-2249: ParallelMultiSearcher should shut down thread pool on diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index c55dd8111d1..3a17c44a3c6 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -328,3 +328,10 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing * LUCENE-2761: DataInput.readVInt/readVLong and DataOutput.writeVInt/writeVLong are final. If you subclassed this code before to encode variable-length integers in some specialized way, use the Codec API instead. + +* LUCENE-2374: The backwards layer in AttributeImpl was removed. To support correct + reflection of AttributeImpl instances, where the reflection was done using deprecated + toString() parsing, you have to now override reflectWith() to customize output. + toString() is no longer implemented by AttributeImpl, so if you have overridden + toString(), port your customization over to reflectWith(). reflectAsString() would + then return what toString() did before. diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/TestAttributes.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/TestAttributes.java new file mode 100644 index 00000000000..50275eec8c8 --- /dev/null +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/TestAttributes.java @@ -0,0 +1,67 @@ +package org.apache.lucene.queryParser.standard.config; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MultiTermQuery; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Locale; + +public class TestAttributes extends LuceneTestCase { + + // this checks using reflection API if the defaults are correct + public void testAttributes() { + _TestUtil.assertAttributeReflection(new AllowLeadingWildcardAttributeImpl(), + Collections.singletonMap(AllowLeadingWildcardAttribute.class.getName()+"#allowLeadingWildcard", false)); + _TestUtil.assertAttributeReflection(new AnalyzerAttributeImpl(), + Collections.singletonMap(AnalyzerAttribute.class.getName()+"#analyzer", null)); + _TestUtil.assertAttributeReflection(new BoostAttributeImpl(), + Collections.singletonMap(BoostAttribute.class.getName()+"#boost", 1.0f)); + _TestUtil.assertAttributeReflection(new DateResolutionAttributeImpl(), + Collections.singletonMap(DateResolutionAttribute.class.getName()+"#dateResolution", null)); + _TestUtil.assertAttributeReflection(new DefaultOperatorAttributeImpl(), + Collections.singletonMap(DefaultOperatorAttribute.class.getName()+"#operator", DefaultOperatorAttribute.Operator.OR)); + _TestUtil.assertAttributeReflection(new DefaultPhraseSlopAttributeImpl(), + Collections.singletonMap(DefaultPhraseSlopAttribute.class.getName()+"#defaultPhraseSlop", 0)); + _TestUtil.assertAttributeReflection(new FieldBoostMapAttributeImpl(), + Collections.singletonMap(FieldBoostMapAttribute.class.getName()+"#boosts", Collections.emptyMap())); + _TestUtil.assertAttributeReflection(new FieldDateResolutionMapAttributeImpl(), + Collections.singletonMap(FieldDateResolutionMapAttribute.class.getName()+"#dateRes", Collections.emptyMap())); + _TestUtil.assertAttributeReflection(new FuzzyAttributeImpl(), new HashMap() {{ + put(FuzzyAttribute.class.getName()+"#prefixLength", FuzzyQuery.defaultPrefixLength); + put(FuzzyAttribute.class.getName()+"#minSimilarity", FuzzyQuery.defaultMinSimilarity); + }}); + _TestUtil.assertAttributeReflection(new LocaleAttributeImpl(), + Collections.singletonMap(LocaleAttribute.class.getName()+"#locale", Locale.getDefault())); + _TestUtil.assertAttributeReflection(new LowercaseExpandedTermsAttributeImpl(), + Collections.singletonMap(LowercaseExpandedTermsAttribute.class.getName()+"#lowercaseExpandedTerms", true)); + _TestUtil.assertAttributeReflection(new MultiFieldAttributeImpl(), + Collections.singletonMap(MultiFieldAttribute.class.getName()+"#fields", null)); + _TestUtil.assertAttributeReflection(new MultiTermRewriteMethodAttributeImpl(), + Collections.singletonMap(MultiTermRewriteMethodAttribute.class.getName()+"#multiTermRewriteMethod", MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT)); + _TestUtil.assertAttributeReflection(new PositionIncrementsAttributeImpl(), + Collections.singletonMap(PositionIncrementsAttribute.class.getName()+"#positionIncrementsEnabled", false)); + _TestUtil.assertAttributeReflection(new RangeCollatorAttributeImpl(), + Collections.singletonMap(RangeCollatorAttribute.class.getName()+"#rangeCollator", null)); + } + +} diff --git a/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java b/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java index baf17ce9816..8d5e5d7c2c4 100644 --- a/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java +++ b/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeReflector; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.document.NumericField; // for javadocs @@ -168,17 +169,18 @@ public final class NumericTokenStream extends TokenStream { // this attribute has no contents to clear! // we keep it untouched as it's fully controlled by outer class. } - - @Override - public boolean equals(Object other) { - return other == this; - } - - @Override - public int hashCode() { - return System.identityHashCode(this); - } + @Override + public void reflectWith(AttributeReflector reflector) { + final BytesRef bytes = new BytesRef(); + toBytesRef(bytes); + reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes); + reflector.reflect(NumericTermAttribute.class, "shift", shift); + reflector.reflect(NumericTermAttribute.class, "rawValue", rawValue); + reflector.reflect(NumericTermAttribute.class, "valueSize", valueSize); + reflector.reflect(NumericTermAttribute.class, "precisionStep", precisionStep); + } + @Override public void copyTo(AttributeImpl target) { final NumericTermAttribute a = (NumericTermAttribute) target; diff --git a/lucene/src/java/org/apache/lucene/analysis/Token.java b/lucene/src/java/org/apache/lucene/analysis/Token.java index a50b934377c..80c31ec4189 100644 --- a/lucene/src/java/org/apache/lucene/analysis/Token.java +++ b/lucene/src/java/org/apache/lucene/analysis/Token.java @@ -28,6 +28,7 @@ import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeReflector; /** A Token is an occurrence of a term from the text of a field. It consists of @@ -588,6 +589,17 @@ public class Token extends CharTermAttributeImpl } } + @Override + public void reflectWith(AttributeReflector reflector) { + super.reflectWith(reflector); + reflector.reflect(OffsetAttribute.class, "startOffset", startOffset); + reflector.reflect(OffsetAttribute.class, "endOffset", endOffset); + reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement); + reflector.reflect(PayloadAttribute.class, "payload", payload); + reflector.reflect(FlagsAttribute.class, "flags", flags); + reflector.reflect(TypeAttribute.class, "type", type); + } + /** Convenience factory that returns Token as implementation for the basic * attributes and return the default impl (with "Impl" appended) for all other * attributes. diff --git a/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java b/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java index 4268abc0db6..d45d280f73c 100644 --- a/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java +++ b/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java @@ -23,6 +23,7 @@ import java.nio.CharBuffer; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.AttributeReflector; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.UnicodeUtil; @@ -243,6 +244,14 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr return new String(termBuffer, 0, termLength); } + @Override + public void reflectWith(AttributeReflector reflector) { + reflector.reflect(CharTermAttribute.class, "term", toString()); + final BytesRef bytes = new BytesRef(); + toBytesRef(bytes); + reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes); + } + @Override public void copyTo(AttributeImpl target) { CharTermAttribute t = (CharTermAttribute) target; diff --git a/lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java b/lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java index 28ce30ee023..f07909021e5 100644 --- a/lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java +++ b/lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java @@ -37,20 +37,6 @@ public final class BoostAttributeImpl extends AttributeImpl implements BoostAttr public void clear() { boost = 1.0f; } - - @Override - public boolean equals(Object other) { - if (this == other) - return true; - if (other instanceof BoostAttributeImpl) - return ((BoostAttributeImpl) other).boost == boost; - return false; - } - - @Override - public int hashCode() { - return Float.floatToIntBits(boost); - } @Override public void copyTo(AttributeImpl target) { diff --git a/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java b/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java index e4ffe304084..629f600c677 100644 --- a/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java +++ b/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java @@ -48,25 +48,6 @@ public final class MaxNonCompetitiveBoostAttributeImpl extends AttributeImpl imp maxNonCompetitiveBoost = Float.NEGATIVE_INFINITY; competitiveTerm = null; } - - @Override - public boolean equals(Object other) { - if (this == other) - return true; - if (other instanceof MaxNonCompetitiveBoostAttributeImpl) { - final MaxNonCompetitiveBoostAttributeImpl o = (MaxNonCompetitiveBoostAttributeImpl) other; - return (o.maxNonCompetitiveBoost == maxNonCompetitiveBoost) - && (o.competitiveTerm == null ? competitiveTerm == null : o.competitiveTerm.equals(competitiveTerm)); - } - return false; - } - - @Override - public int hashCode() { - int hash = Float.floatToIntBits(maxNonCompetitiveBoost); - if (competitiveTerm != null) hash = 31 * hash + competitiveTerm.hashCode(); - return hash; - } @Override public void copyTo(AttributeImpl target) { diff --git a/lucene/src/java/org/apache/lucene/util/AttributeImpl.java b/lucene/src/java/org/apache/lucene/util/AttributeImpl.java index c8bf649b6bf..d22491bf2c6 100644 --- a/lucene/src/java/org/apache/lucene/util/AttributeImpl.java +++ b/lucene/src/java/org/apache/lucene/util/AttributeImpl.java @@ -20,6 +20,8 @@ package org.apache.lucene.util; import java.io.Serializable; import java.lang.reflect.Field; import java.lang.reflect.Modifier; +import java.lang.ref.WeakReference; +import java.util.LinkedList; /** * Base class for Attributes that can be added to a @@ -37,71 +39,79 @@ public abstract class AttributeImpl implements Cloneable, Serializable, Attribut public abstract void clear(); /** - * The default implementation of this method accesses all declared - * fields of this object and prints the values in the following syntax: + * This method returns the current attribute values as a string in the following format + * by calling the {@link #reflectWith(AttributeReflector)} method: * - *
-   *   public String toString() {
-   *     return "start=" + startOffset + ",end=" + endOffset;
-   *   }
-   * 
- * - * This method may be overridden by subclasses. + * + * + * @see #reflectWith(AttributeReflector) */ - @Override - public String toString() { - StringBuilder buffer = new StringBuilder(); - Class clazz = this.getClass(); - Field[] fields = clazz.getDeclaredFields(); - try { - for (int i = 0; i < fields.length; i++) { - Field f = fields[i]; - if (Modifier.isStatic(f.getModifiers())) continue; - f.setAccessible(true); - Object value = f.get(this); - if (buffer.length()>0) { + public final String reflectAsString(final boolean prependAttClass) { + final StringBuilder buffer = new StringBuilder(); + reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + if (buffer.length() > 0) { buffer.append(','); } - if (value == null) { - buffer.append(f.getName() + "=null"); - } else { - buffer.append(f.getName() + "=" + value); + if (prependAttClass) { + buffer.append(attClass.getName()).append('#'); } + buffer.append(key).append('=').append((value == null) ? "null" : value); + } + }); + return buffer.toString(); + } + + /** + * This method is for introspection of attributes, it should simply + * add the key/values this attribute holds to the given {@link AttributeReflector}. + * + *

The default implementation calls {@link AttributeReflector#reflect} for all + * non-static fields from the implementing class, using the field name as key + * and the field value as value. The Attribute class is also determined by reflection. + * Please note that the default implementation can only handle single-Attribute + * implementations. + * + *

Custom implementations look like this (e.g. for a combined attribute implementation): + *

+   *   public void reflectWith(AttributeReflector reflector) {
+   *     reflector.reflect(CharTermAttribute.class, "term", term());
+   *     reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", getPositionIncrement());
+   *   }
+   * 
+ * + *

If you implement this method, make sure that for each invocation, the same set of {@link Attribute} + * interfaces and keys are passed to {@link AttributeReflector#reflect} in the same order, but possibly + * different values. So don't automatically exclude e.g. {@code null} properties! + * + * @see #reflectAsString(boolean) + */ + public void reflectWith(AttributeReflector reflector) { + final Class clazz = this.getClass(); + final LinkedList>> interfaces = AttributeSource.getAttributeInterfaces(clazz); + if (interfaces.size() != 1) { + throw new UnsupportedOperationException(clazz.getName() + + " implements more than one Attribute interface, the default reflectWith() implementation cannot handle this."); + } + final Class interf = interfaces.getFirst().get(); + final Field[] fields = clazz.getDeclaredFields(); + try { + for (int i = 0; i < fields.length; i++) { + final Field f = fields[i]; + if (Modifier.isStatic(f.getModifiers())) continue; + f.setAccessible(true); + reflector.reflect(interf, f.getName(), f.get(this)); } } catch (IllegalAccessException e) { // this should never happen, because we're just accessing fields // from 'this' throw new RuntimeException(e); } - - return buffer.toString(); } - /** - * Subclasses must implement this method and should compute - * a hashCode similar to this: - *

-   *   public int hashCode() {
-   *     int code = startOffset;
-   *     code = code * 31 + endOffset;
-   *     return code;
-   *   }
-   * 
- * - * see also {@link #equals(Object)} - */ - @Override - public abstract int hashCode(); - - /** - * All values used for computation of {@link #hashCode()} - * should be checked here for equality. - * - * see also {@link Object#equals(Object)} - */ - @Override - public abstract boolean equals(Object other); - /** * Copies the values from this Attribute into the passed-in * target attribute. The target implementation must support all the diff --git a/lucene/src/java/org/apache/lucene/util/AttributeReflector.java b/lucene/src/java/org/apache/lucene/util/AttributeReflector.java new file mode 100644 index 00000000000..c64d04cbc9f --- /dev/null +++ b/lucene/src/java/org/apache/lucene/util/AttributeReflector.java @@ -0,0 +1,34 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This interface is used to reflect contents of {@link AttributeSource} or {@link AttributeImpl}. + */ +public interface AttributeReflector { + + /** + * This method gets called for every property in an {@link AttributeImpl}/{@link AttributeSource} + * passing the class name of the {@link Attribute}, a key and the actual value. + * E.g., an invocation of {@link org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl#reflectWith} + * would call this method once using {@code org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class} + * as attribute class, {@code "term"} as key and the actual value as a String. + */ + public void reflect(Class attClass, String key, Object value); + +} diff --git a/lucene/src/java/org/apache/lucene/util/AttributeSource.java b/lucene/src/java/org/apache/lucene/util/AttributeSource.java index 1af3763eb6c..c76638f482a 100644 --- a/lucene/src/java/org/apache/lucene/util/AttributeSource.java +++ b/lucene/src/java/org/apache/lucene/util/AttributeSource.java @@ -180,20 +180,9 @@ public class AttributeSource { private static final WeakHashMap,LinkedList>>> knownImplClasses = new WeakHashMap,LinkedList>>>(); - /** Expert: Adds a custom AttributeImpl instance with one or more Attribute interfaces. - *

Please note: It is not guaranteed, that att is added to - * the AttributeSource, because the provided attributes may already exist. - * You should always retrieve the wanted attributes using {@link #getAttribute} after adding - * with this method and cast to your class. - * The recommended way to use custom implementations is using an {@link AttributeFactory}. - *

- */ - public void addAttributeImpl(final AttributeImpl att) { - final Class clazz = att.getClass(); - if (attributeImpls.containsKey(clazz)) return; - LinkedList>> foundInterfaces; + static LinkedList>> getAttributeInterfaces(final Class clazz) { synchronized(knownImplClasses) { - foundInterfaces = knownImplClasses.get(clazz); + LinkedList>> foundInterfaces = knownImplClasses.get(clazz); if (foundInterfaces == null) { // we have a strong reference to the class instance holding all interfaces in the list (parameter "att"), // so all WeakReferences are never evicted by GC @@ -210,7 +199,23 @@ public class AttributeSource { actClazz = actClazz.getSuperclass(); } while (actClazz != null); } + return foundInterfaces; } + } + + /** Expert: Adds a custom AttributeImpl instance with one or more Attribute interfaces. + *

Please note: It is not guaranteed, that att is added to + * the AttributeSource, because the provided attributes may already exist. + * You should always retrieve the wanted attributes using {@link #getAttribute} after adding + * with this method and cast to your class. + * The recommended way to use custom implementations is using an {@link AttributeFactory}. + *

+ */ + public final void addAttributeImpl(final AttributeImpl att) { + final Class clazz = att.getClass(); + if (attributeImpls.containsKey(clazz)) return; + final LinkedList>> foundInterfaces = + getAttributeInterfaces(clazz); // add all interfaces of this AttributeImpl to the maps for (WeakReference> curInterfaceRef : foundInterfaces) { @@ -233,7 +238,7 @@ public class AttributeSource { * already in this AttributeSource and returns it. Otherwise a * new instance is created, added to this AttributeSource and returned. */ - public A addAttribute(Class attClass) { + public final A addAttribute(Class attClass) { AttributeImpl attImpl = attributes.get(attClass); if (attImpl == null) { if (!(attClass.isInterface() && Attribute.class.isAssignableFrom(attClass))) { @@ -248,7 +253,7 @@ public class AttributeSource { } /** Returns true, iff this AttributeSource has any attributes */ - public boolean hasAttributes() { + public final boolean hasAttributes() { return !this.attributes.isEmpty(); } @@ -256,7 +261,7 @@ public class AttributeSource { * The caller must pass in a Class<? extends Attribute> value. * Returns true, iff this AttributeSource contains the passed-in Attribute. */ - public boolean hasAttribute(Class attClass) { + public final boolean hasAttribute(Class attClass) { return this.attributes.containsKey(attClass); } @@ -271,7 +276,7 @@ public class AttributeSource { * available. If you want to only use the attribute, if it is available (to optimize * consuming), use {@link #hasAttribute}. */ - public A getAttribute(Class attClass) { + public final A getAttribute(Class attClass) { AttributeImpl attImpl = attributes.get(attClass); if (attImpl == null) { throw new IllegalArgumentException("This AttributeSource does not have the attribute '" + attClass.getName() + "'."); @@ -319,7 +324,7 @@ public class AttributeSource { * Resets all Attributes in this AttributeSource by calling * {@link AttributeImpl#clear()} on each Attribute implementation. */ - public void clearAttributes() { + public final void clearAttributes() { if (hasAttributes()) { if (currentState == null) { computeCurrentState(); @@ -334,7 +339,7 @@ public class AttributeSource { * Captures the state of all Attributes. The return value can be passed to * {@link #restoreState} to restore the state of this or another AttributeSource. */ - public State captureState() { + public final State captureState() { if (!hasAttributes()) { return null; } @@ -360,7 +365,7 @@ public class AttributeSource { * reset its value to the default, in which case the caller should first * call {@link TokenStream#clearAttributes()} on the targetStream. */ - public void restoreState(State state) { + public final void restoreState(State state) { if (state == null) return; do { @@ -431,21 +436,53 @@ public class AttributeSource { return false; } - @Override - public String toString() { - StringBuilder sb = new StringBuilder().append('('); + /** + * This method returns the current attribute values as a string in the following format + * by calling the {@link #reflectWith(AttributeReflector)} method: + * + *
    + *
  • iff {@code prependAttClass=true}: {@code "AttributeClass#key=value,AttributeClass#key=value"} + *
  • iff {@code prependAttClass=false}: {@code "key=value,key=value"} + *
+ * + * @see #reflectWith(AttributeReflector) + */ + public final String reflectAsString(final boolean prependAttClass) { + final StringBuilder buffer = new StringBuilder(); + reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + if (buffer.length() > 0) { + buffer.append(','); + } + if (prependAttClass) { + buffer.append(attClass.getName()).append('#'); + } + buffer.append(key).append('=').append((value == null) ? "null" : value); + } + }); + return buffer.toString(); + } + + /** + * This method is for introspection of attributes, it should simply + * add the key/values this AttributeSource holds to the given {@link AttributeReflector}. + * + *

This method iterates over all Attribute implementations and calls the + * corresponding {@link AttributeImpl#reflectWith} method.

+ * + * @see AttributeImpl#reflectWith + */ + public final void reflectWith(AttributeReflector reflector) { if (hasAttributes()) { if (currentState == null) { computeCurrentState(); } for (State state = currentState; state != null; state = state.next) { - if (state != currentState) sb.append(','); - sb.append(state.attribute.toString()); + state.attribute.reflectWith(reflector); } } - return sb.append(')').toString(); } - + /** * Performs a clone of all {@link AttributeImpl} instances returned in a new * {@code AttributeSource} instance. This method can be used to e.g. create another TokenStream @@ -453,7 +490,7 @@ public class AttributeSource { * You can also use it as a (non-performant) replacement for {@link #captureState}, if you need to look * into / modify the captured state. */ - public AttributeSource cloneAttributes() { + public final AttributeSource cloneAttributes() { final AttributeSource clone = new AttributeSource(this.factory); if (hasAttributes()) { diff --git a/lucene/src/test/org/apache/lucene/analysis/TestToken.java b/lucene/src/test/org/apache/lucene/analysis/TestToken.java index 94ab03dfb5f..29edc25a40a 100644 --- a/lucene/src/test/org/apache/lucene/analysis/TestToken.java +++ b/lucene/src/test/org/apache/lucene/analysis/TestToken.java @@ -22,8 +22,11 @@ import org.apache.lucene.analysis.tokenattributes.*; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util._TestUtil; import java.io.StringReader; +import java.util.HashMap; public class TestToken extends LuceneTestCase { @@ -241,6 +244,22 @@ public class TestToken extends LuceneTestCase { ts.addAttribute(TypeAttribute.class) instanceof Token); } + public void testAttributeReflection() throws Exception { + Token t = new Token("foobar", 6, 22, 8); + _TestUtil.assertAttributeReflection(t, + new HashMap() {{ + put(CharTermAttribute.class.getName() + "#term", "foobar"); + put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar")); + put(OffsetAttribute.class.getName() + "#startOffset", 6); + put(OffsetAttribute.class.getName() + "#endOffset", 22); + put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 1); + put(PayloadAttribute.class.getName() + "#payload", null); + put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE); + put(FlagsAttribute.class.getName() + "#flags", 8); + }}); + } + + public static T assertCloneIsEqual(T att) { @SuppressWarnings("unchecked") T clone = (T) att.clone(); diff --git a/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java b/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java index 622f83d6e68..bac72b5951d 100644 --- a/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java +++ b/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java @@ -19,7 +19,10 @@ package org.apache.lucene.analysis.tokenattributes; import org.apache.lucene.analysis.TestToken; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util._TestUtil; import java.nio.CharBuffer; +import java.util.HashMap; import java.util.Formatter; import java.util.Locale; import java.util.regex.Pattern; @@ -126,6 +129,15 @@ public class TestCharTermAttributeImpl extends LuceneTestCase { assertNotSame(buf, copy.buffer()); } + public void testAttributeReflection() throws Exception { + CharTermAttributeImpl t = new CharTermAttributeImpl(); + t.append("foobar"); + _TestUtil.assertAttributeReflection(t, new HashMap() {{ + put(CharTermAttribute.class.getName() + "#term", "foobar"); + put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar")); + }}); + } + public void testCharSequenceInterface() { final String s = "0123456789"; final CharTermAttributeImpl t = new CharTermAttributeImpl(); diff --git a/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java b/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java new file mode 100644 index 00000000000..b8e9a0df7e2 --- /dev/null +++ b/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java @@ -0,0 +1,46 @@ +package org.apache.lucene.analysis.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.LuceneTestCase; + +import java.util.Collections; +import java.util.HashMap; + +public class TestSimpleAttributeImpl extends LuceneTestCase { + + // this checks using reflection API if the defaults are correct + public void testAttributes() { + _TestUtil.assertAttributeReflection(new PositionIncrementAttributeImpl(), + Collections.singletonMap(PositionIncrementAttribute.class.getName()+"#positionIncrement", 1)); + _TestUtil.assertAttributeReflection(new FlagsAttributeImpl(), + Collections.singletonMap(FlagsAttribute.class.getName()+"#flags", 0)); + _TestUtil.assertAttributeReflection(new TypeAttributeImpl(), + Collections.singletonMap(TypeAttribute.class.getName()+"#type", TypeAttribute.DEFAULT_TYPE)); + _TestUtil.assertAttributeReflection(new PayloadAttributeImpl(), + Collections.singletonMap(PayloadAttribute.class.getName()+"#payload", null)); + _TestUtil.assertAttributeReflection(new KeywordAttributeImpl(), + Collections.singletonMap(KeywordAttribute.class.getName()+"#keyword", false)); + _TestUtil.assertAttributeReflection(new OffsetAttributeImpl(), new HashMap() {{ + put(OffsetAttribute.class.getName()+"#startOffset", 0); + put(OffsetAttribute.class.getName()+"#endOffset", 0); + }}); + } + +} diff --git a/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java b/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java index f72f02c1b6a..bf1c0d1934d 100644 --- a/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java +++ b/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java @@ -109,34 +109,6 @@ public class TestAttributeSource extends LuceneTestCase { assertEquals("TypeAttribute of original and clone must be equal", typeAtt2, typeAtt); } - public void testToStringAndMultiAttributeImplementations() { - AttributeSource src = new AttributeSource(); - CharTermAttribute termAtt = src.addAttribute(CharTermAttribute.class); - TypeAttribute typeAtt = src.addAttribute(TypeAttribute.class); - termAtt.append("TestTerm"); - typeAtt.setType("TestType"); - assertEquals("Attributes should appear in original order", "("+termAtt.toString()+","+typeAtt.toString()+")", src.toString()); - Iterator it = src.getAttributeImplsIterator(); - assertTrue("Iterator should have 2 attributes left", it.hasNext()); - assertSame("First AttributeImpl from iterator should be termAtt", termAtt, it.next()); - assertTrue("Iterator should have 1 attributes left", it.hasNext()); - assertSame("Second AttributeImpl from iterator should be typeAtt", typeAtt, it.next()); - assertFalse("Iterator should have 0 attributes left", it.hasNext()); - - src = new AttributeSource(); - src.addAttributeImpl(new Token()); - // this should not add a new attribute as Token implements CharTermAttribute, too - termAtt = src.addAttribute(CharTermAttribute.class); - assertTrue("CharTermAttribute should be implemented by Token", termAtt instanceof Token); - // get the Token attribute and check, that it is the only one - it = src.getAttributeImplsIterator(); - Token tok = (Token) it.next(); - assertFalse("There should be only one attribute implementation instance", it.hasNext()); - - termAtt.setEmpty().append("TestTerm"); - assertEquals("Token should only printed once", "("+tok.toString()+")", src.toString()); - } - public void testDefaultAttributeFactory() throws Exception { AttributeSource src = new AttributeSource(); diff --git a/lucene/src/test/org/apache/lucene/util/_TestUtil.java b/lucene/src/test/org/apache/lucene/util/_TestUtil.java index 70af1dec347..aaeb98cd367 100644 --- a/lucene/src/test/org/apache/lucene/util/_TestUtil.java +++ b/lucene/src/test/org/apache/lucene/util/_TestUtil.java @@ -22,6 +22,10 @@ import java.io.File; import java.io.IOException; import java.io.PrintStream; import java.util.Random; +import java.util.Map; +import java.util.HashMap; + +import org.junit.Assert; import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.ConcurrentMergeScheduler; @@ -238,4 +242,17 @@ public class _TestUtil { ((ConcurrentMergeScheduler) ms).setMaxMergeCount(3); } } + + /** Checks some basic behaviour of an AttributeImpl + * @param reflectedValues contains a map with "AttributeClass#key" as values + */ + public static void assertAttributeReflection(final AttributeImpl att, Map reflectedValues) { + final Map map = new HashMap(); + att.reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + map.put(attClass.getName() + '#' + key, value); + } + }); + Assert.assertEquals("Reflection does not produce same map", reflectedValues, map); + } } diff --git a/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java b/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java index 7e33ee7875f..3a54af94b58 100644 --- a/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java +++ b/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.icu.tokenattributes; import java.io.Serializable; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeReflector; import com.ibm.icu.lang.UScript; @@ -77,7 +78,7 @@ public class ScriptAttributeImpl extends AttributeImpl implements ScriptAttribut } @Override - public String toString() { - return "script=" + getName(); + public void reflectWith(AttributeReflector reflector) { + reflector.reflect(ScriptAttribute.class, "script", getName()); } } diff --git a/solr/contrib/clustering/src/test/resources/solr-clustering/conf/solrconfig.xml b/solr/contrib/clustering/src/test/resources/solr-clustering/conf/solrconfig.xml index c59cc9b63fd..958848535ed 100644 --- a/solr/contrib/clustering/src/test/resources/solr-clustering/conf/solrconfig.xml +++ b/solr/contrib/clustering/src/test/resources/solr-clustering/conf/solrconfig.xml @@ -428,13 +428,6 @@ --> - - - - diff --git a/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java b/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java deleted file mode 100644 index d391f4cccee..00000000000 --- a/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java +++ /dev/null @@ -1,243 +0,0 @@ -package org.apache.solr.handler; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.commons.io.IOUtils; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.*; -import org.apache.lucene.util.BytesRef; -import org.apache.solr.common.SolrException; -import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.ContentStream; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.util.SimpleOrderedMap; -import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.response.SolrQueryResponse; -import org.apache.solr.schema.FieldType; -import org.apache.solr.schema.IndexSchema; -import org.apache.solr.schema.SchemaField; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.xml.stream.XMLInputFactory; -import javax.xml.stream.XMLStreamConstants; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamReader; -import java.io.IOException; -import java.io.Reader; -import java.io.StringReader; -import java.util.Collection; - -/** - * - * @deprecated Use {@link org.apache.solr.handler.DocumentAnalysisRequestHandler} instead. - **/ -@Deprecated -public class AnalysisRequestHandler extends RequestHandlerBase { - - public static Logger log = LoggerFactory.getLogger(AnalysisRequestHandler.class); - - private XMLInputFactory inputFactory; - - @Override - public void init(NamedList args) { - super.init(args); - - inputFactory = XMLInputFactory.newInstance(); - try { - // The java 1.6 bundled stax parser (sjsxp) does not currently have a thread-safe - // XMLInputFactory, as that implementation tries to cache and reuse the - // XMLStreamReader. Setting the parser-specific "reuse-instance" property to false - // prevents this. - // All other known open-source stax parsers (and the bea ref impl) - // have thread-safe factories. - inputFactory.setProperty("reuse-instance", Boolean.FALSE); - } - catch (IllegalArgumentException ex) { - // Other implementations will likely throw this exception since "reuse-instance" - // isimplementation specific. - log.debug("Unable to set the 'reuse-instance' property for the input factory: " + inputFactory); - } - } - - public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { - SolrParams params = req.getParams(); - Iterable streams = req.getContentStreams(); - if (streams != null) { - for (ContentStream stream : req.getContentStreams()) { - Reader reader = stream.getReader(); - try { - XMLStreamReader parser = inputFactory.createXMLStreamReader(reader); - NamedList result = processContent(parser, req.getSchema()); - rsp.add("response", result); - } - finally { - IOUtils.closeQuietly(reader); - } - } - } - } - - NamedList processContent(XMLStreamReader parser, - IndexSchema schema) throws XMLStreamException, IOException { - NamedList result = new SimpleOrderedMap(); - while (true) { - int event = parser.next(); - switch (event) { - case XMLStreamConstants.END_DOCUMENT: { - parser.close(); - return result; - } - case XMLStreamConstants.START_ELEMENT: { - String currTag = parser.getLocalName(); - if ("doc".equals(currTag)) { - log.trace("Tokenizing doc..."); - - SolrInputDocument doc = readDoc(parser); - SchemaField uniq = schema.getUniqueKeyField(); - NamedList>> theTokens = new SimpleOrderedMap>>(); - result.add(doc.getFieldValue(uniq.getName()).toString(), theTokens); - for (String name : doc.getFieldNames()) { - FieldType ft = schema.getFieldType(name); - Analyzer analyzer = ft.getAnalyzer(); - Collection vals = doc.getFieldValues(name); - for (Object val : vals) { - Reader reader = new StringReader(val.toString()); - TokenStream tstream = analyzer.tokenStream(name, reader); - NamedList> tokens = getTokens(tstream); - theTokens.add(name, tokens); - } - } - } - break; - } - } - } - } - - static NamedList> getTokens(TokenStream tstream) throws IOException { - // outer is namedList since order of tokens is important - NamedList> tokens = new NamedList>(); - // TODO: support custom attributes - CharTermAttribute termAtt = null; - TermToBytesRefAttribute bytesAtt = null; - if (tstream.hasAttribute(CharTermAttribute.class)) { - termAtt = tstream.getAttribute(CharTermAttribute.class); - } else if (tstream.hasAttribute(TermToBytesRefAttribute.class)) { - bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class); - } - final OffsetAttribute offsetAtt = tstream.addAttribute(OffsetAttribute.class); - final TypeAttribute typeAtt = tstream.addAttribute(TypeAttribute.class); - final PositionIncrementAttribute posIncAtt = tstream.addAttribute(PositionIncrementAttribute.class); - - final BytesRef bytes = new BytesRef(); - while (tstream.incrementToken()) { - NamedList token = new SimpleOrderedMap(); - tokens.add("token", token); - if (termAtt != null) { - token.add("value", termAtt.toString()); - } - if (bytesAtt != null) { - bytesAtt.toBytesRef(bytes); - // TODO: This is incorrect when numeric fields change in later lucene versions. It should use BytesRef directly! - token.add("value", bytes.utf8ToString()); - } - token.add("start", offsetAtt.startOffset()); - token.add("end", offsetAtt.endOffset()); - token.add("posInc", posIncAtt.getPositionIncrement()); - token.add("type", typeAtt.type()); - //TODO: handle payloads - } - return tokens; - } - - SolrInputDocument readDoc(XMLStreamReader parser) throws XMLStreamException { - SolrInputDocument doc = new SolrInputDocument(); - - StringBuilder text = new StringBuilder(); - String name = null; - String attrName = ""; - float boost = 1.0f; - boolean isNull = false; - while (true) { - int event = parser.next(); - switch (event) { - // Add everything to the text - case XMLStreamConstants.SPACE: - case XMLStreamConstants.CDATA: - case XMLStreamConstants.CHARACTERS: - text.append(parser.getText()); - break; - - case XMLStreamConstants.END_ELEMENT: - if ("doc".equals(parser.getLocalName())) { - return doc; - } else if ("field".equals(parser.getLocalName())) { - if (!isNull) { - doc.addField(name, text.toString(), boost); - boost = 1.0f; - } - } - break; - - case XMLStreamConstants.START_ELEMENT: - text.setLength(0); - String localName = parser.getLocalName(); - if (!"field".equals(localName)) { - log.warn("unexpected XML tag doc/" + localName); - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, - "unexpected XML tag doc/" + localName); - } - - String attrVal = ""; - for (int i = 0; i < parser.getAttributeCount(); i++) { - attrName = parser.getAttributeLocalName(i); - attrVal = parser.getAttributeValue(i); - if ("name".equals(attrName)) { - name = attrVal; - } - } - break; - } - } - } - - - //////////////////////// SolrInfoMBeans methods ////////////////////// - @Override - public String getDescription() { - return "Provide Analysis of text"; - } - - @Override - public String getVersion() { - return "$Revision$"; - } - - @Override - public String getSourceId() { - return "$Id$"; - } - - @Override - public String getSource() { - return "$URL$"; - } - -} diff --git a/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java b/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java index b87dcc3d56f..4ec455bbd1e 100644 --- a/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java +++ b/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java @@ -20,10 +20,14 @@ package org.apache.solr.handler; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CharReader; import org.apache.lucene.analysis.CharStream; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.*; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.Payload; +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.AttributeReflector; +import org.apache.lucene.util.SorterTemplate; import org.apache.solr.analysis.CharFilterFactory; import org.apache.solr.analysis.TokenFilterFactory; import org.apache.solr.analysis.TokenizerChain; @@ -34,6 +38,9 @@ import org.apache.solr.common.SolrException; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.FieldType; +import org.apache.solr.util.ByteUtils; + +import org.apache.noggit.CharArr; import java.io.IOException; import java.io.StringReader; @@ -47,7 +54,7 @@ import java.util.*; */ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { - public static final Set EMPTY_STRING_SET = Collections.emptySet(); + public static final Set EMPTY_BYTES_SET = Collections.emptySet(); public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { rsp.add("analysis", doAnalysis(req)); @@ -107,7 +114,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { } TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value))); - List tokens = analyzeTokenStream(tokenStream); + List tokens = analyzeTokenStream(tokenStream); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context)); @@ -115,7 +122,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { for (TokenFilterFactory tokenFilterFactory : filtfacs) { tokenStream = tokenFilterFactory.create(listBasedTokenStream); - List tokenList = analyzeTokenStream(tokenStream); + List tokenList = analyzeTokenStream(tokenStream); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokenList, context)); listBasedTokenStream = new ListBasedTokenStream(tokenList); } @@ -126,14 +133,24 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { /** * Analyzes the given text using the given analyzer and returns the produced tokens. * - * @param value The value to analyze. + * @param query The query to analyze. * @param analyzer The analyzer to use. - * - * @return The produces token list. */ - protected List analyzeValue(String value, Analyzer analyzer) { - TokenStream tokenStream = analyzer.tokenStream("", new StringReader(value)); - return analyzeTokenStream(tokenStream); + protected Set getQueryTokenSet(String query, Analyzer analyzer) { + final Set tokens = new HashSet(); + final TokenStream tokenStream = analyzer.tokenStream("", new StringReader(query)); + final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class); + try { + tokenStream.reset(); + while (tokenStream.incrementToken()) { + final BytesRef bytes = new BytesRef(); + bytesAtt.toBytesRef(bytes); + tokens.add(bytes); + } + } catch (IOException ioe) { + throw new RuntimeException("Error occured while iterating over tokenstream", ioe); + } + return tokens; } /** @@ -143,41 +160,17 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { * * @return List of tokens produced from the TokenStream */ - private List analyzeTokenStream(TokenStream tokenStream) { - List tokens = new ArrayList(); - - // TODO change this API to support custom attributes - CharTermAttribute termAtt = null; - TermToBytesRefAttribute bytesAtt = null; - if (tokenStream.hasAttribute(CharTermAttribute.class)) { - termAtt = tokenStream.getAttribute(CharTermAttribute.class); - } else if (tokenStream.hasAttribute(TermToBytesRefAttribute.class)) { - bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class); - } - final OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class); - final TypeAttribute typeAtt = tokenStream.addAttribute(TypeAttribute.class); - final PositionIncrementAttribute posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class); - final FlagsAttribute flagsAtt = tokenStream.addAttribute(FlagsAttribute.class); - final PayloadAttribute payloadAtt = tokenStream.addAttribute(PayloadAttribute.class); - + private List analyzeTokenStream(TokenStream tokenStream) { + List tokens = new ArrayList(); + // for backwards compatibility, add all "common" attributes + tokenStream.addAttribute(PositionIncrementAttribute.class); + tokenStream.addAttribute(OffsetAttribute.class); + tokenStream.addAttribute(TypeAttribute.class); final BytesRef bytes = new BytesRef(); try { + tokenStream.reset(); while (tokenStream.incrementToken()) { - Token token = new Token(); - if (termAtt != null) { - token.setEmpty().append(termAtt); - } - if (bytesAtt != null) { - bytesAtt.toBytesRef(bytes); - // TODO: This is incorrect when numeric fields change in later lucene versions. It should use BytesRef directly! - token.setEmpty().append(bytes.utf8ToString()); - } - token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); - token.setType(typeAtt.type()); - token.setFlags(flagsAtt.getFlags()); - token.setPayload(payloadAtt.getPayload()); - token.setPositionIncrement(posIncAtt.getPositionIncrement()); - tokens.add((Token) token.clone()); + tokens.add(tokenStream.cloneAttributes()); } } catch (IOException ioe) { throw new RuntimeException("Error occured while iterating over tokenstream", ioe); @@ -186,6 +179,13 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { return tokens; } + // a static mapping of the reflected attribute keys to the names used in Solr 1.4 + static Map ATTRIBUTE_MAPPING = Collections.unmodifiableMap(new HashMap() {{ + put(OffsetAttribute.class.getName() + "#startOffset", "start"); + put(OffsetAttribute.class.getName() + "#endOffset", "end"); + put(TypeAttribute.class.getName() + "#type", "type"); + }}); + /** * Converts the list of Tokens to a list of NamedLists representing the tokens. * @@ -194,41 +194,97 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { * * @return List of NamedLists containing the relevant information taken from the tokens */ - private List convertTokensToNamedLists(List tokens, AnalysisContext context) { - List tokensNamedLists = new ArrayList(); + private List convertTokensToNamedLists(final List tokens, AnalysisContext context) { + final List tokensNamedLists = new ArrayList(); - Collections.sort(tokens, new Comparator() { - public int compare(Token o1, Token o2) { - return o1.endOffset() - o2.endOffset(); + final int[] positions = new int[tokens.size()]; + int position = 0; + for (int i = 0, c = tokens.size(); i < c; i++) { + AttributeSource token = tokens.get(i); + position += token.addAttribute(PositionIncrementAttribute.class).getPositionIncrement(); + positions[i] = position; + } + + // sort the tokens by absoulte position + new SorterTemplate() { + @Override + protected void swap(int i, int j) { + Collections.swap(tokens, i, j); + } + + @Override + protected int compare(int i, int j) { + return positions[i] - positions[j]; } - }); - int position = 0; + @Override + protected void setPivot(int i) { + pivot = positions[i]; + } + + @Override + protected int comparePivot(int j) { + return pivot - positions[j]; + } + + private int pivot; + }.mergeSort(0, tokens.size() - 1); FieldType fieldType = context.getFieldType(); - for (Token token : tokens) { - NamedList tokenNamedList = new SimpleOrderedMap(); + final BytesRef rawBytes = new BytesRef(); + final CharArr textBuf = new CharArr(); + for (int i = 0, c = tokens.size(); i < c; i++) { + AttributeSource token = tokens.get(i); + final NamedList tokenNamedList = new SimpleOrderedMap(); + token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(rawBytes); + + textBuf.reset(); + fieldType.indexedToReadable(rawBytes, textBuf); + final String text = textBuf.toString(); - String text = fieldType.indexedToReadable(token.toString()); tokenNamedList.add("text", text); - if (!text.equals(token.toString())) { - tokenNamedList.add("raw_text", token.toString()); + + if (token.hasAttribute(CharTermAttribute.class)) { + final String rawText = token.getAttribute(CharTermAttribute.class).toString(); + if (!rawText.equals(text)) { + tokenNamedList.add("raw_text", rawText); + } } - tokenNamedList.add("type", token.type()); - tokenNamedList.add("start", token.startOffset()); - tokenNamedList.add("end", token.endOffset()); - position += token.getPositionIncrement(); - tokenNamedList.add("position", position); + tokenNamedList.add("raw_bytes", rawBytes.toString()); - if (context.getTermsToMatch().contains(token.toString())) { + if (context.getTermsToMatch().contains(rawBytes)) { tokenNamedList.add("match", true); } - if (token.getPayload() != null) { - tokenNamedList.add("payload", token.getPayload()); - } + tokenNamedList.add("position", positions[i]); + + token.reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + // leave out position and bytes term + if (TermToBytesRefAttribute.class.isAssignableFrom(attClass)) + return; + if (CharTermAttribute.class.isAssignableFrom(attClass)) + return; + if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) + return; + + String k = attClass.getName() + '#' + key; + + // map keys for "standard attributes": + if (ATTRIBUTE_MAPPING.containsKey(k)) { + k = ATTRIBUTE_MAPPING.get(k); + } + + if (value instanceof Payload) { + final Payload p = (Payload) value; + value = new BytesRef(p.getData()).toString(); + } + + tokenNamedList.add(k, value); + } + }); tokensNamedLists.add(tokenNamedList); } @@ -261,38 +317,27 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { */ // TODO refactor to support custom attributes protected final static class ListBasedTokenStream extends TokenStream { - private final List tokens; - private Iterator tokenIterator; + private final List tokens; + private Iterator tokenIterator; - private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); - private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); - private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); - private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); - private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); /** * Creates a new ListBasedTokenStream which uses the given tokens as its token source. * * @param tokens Source of tokens to be used */ - ListBasedTokenStream(List tokens) { + ListBasedTokenStream(List tokens) { this.tokens = tokens; tokenIterator = tokens.iterator(); } - /** - * {@inheritDoc} - */ @Override public boolean incrementToken() throws IOException { if (tokenIterator.hasNext()) { - Token next = tokenIterator.next(); - termAtt.copyBuffer(next.buffer(), 0, next.length()); - typeAtt.setType(next.type()); - offsetAtt.setOffset(next.startOffset(), next.endOffset()); - flagsAtt.setFlags(next.getFlags()); - payloadAtt.setPayload(next.getPayload()); - posIncAtt.setPositionIncrement(next.getPositionIncrement()); + AttributeSource next = tokenIterator.next(); + Iterator> atts = next.getAttributeClassesIterator(); + while (atts.hasNext()) // make sure all att impls in the token exist here + addAttribute(atts.next()); + next.copyTo(this); return true; } else { return false; @@ -314,7 +359,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { private final String fieldName; private final FieldType fieldType; private final Analyzer analyzer; - private final Set termsToMatch; + private final Set termsToMatch; /** * Constructs a new AnalysisContext with a given field tpe, analyzer and @@ -328,7 +373,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { * @param termsToMatch Holds all the terms that should match during the * analysis process. */ - public AnalysisContext(FieldType fieldType, Analyzer analyzer, Set termsToMatch) { + public AnalysisContext(FieldType fieldType, Analyzer analyzer, Set termsToMatch) { this(null, fieldType, analyzer, termsToMatch); } @@ -343,7 +388,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { * */ public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer) { - this(fieldName, fieldType, analyzer, EMPTY_STRING_SET); + this(fieldName, fieldType, analyzer, EMPTY_BYTES_SET); } /** @@ -359,7 +404,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { * @param termsToMatch Holds all the terms that should match during the * analysis process. */ - public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer, Set termsToMatch) { + public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer, Set termsToMatch) { this.fieldName = fieldName; this.fieldType = fieldType; this.analyzer = analyzer; @@ -378,7 +423,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { return analyzer; } - public Set getTermsToMatch() { + public Set getTermsToMatch() { return termsToMatch; } } diff --git a/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java b/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java index 519674c782c..195c9a3a7d1 100644 --- a/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java +++ b/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java @@ -19,7 +19,7 @@ package org.apache.solr.handler; import org.apache.commons.io.IOUtils; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Token; +import org.apache.lucene.util.BytesRef; import org.apache.solr.client.solrj.request.DocumentAnalysisRequest; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; @@ -216,21 +216,20 @@ public class DocumentAnalysisRequestHandler extends AnalysisRequestHandlerBase { FieldType fieldType = schema.getFieldType(name); - Set termsToMatch = new HashSet(); - if (request.getQuery() != null && request.isShowMatch()) { - try { - List tokens = analyzeValue(request.getQuery(), fieldType.getQueryAnalyzer()); - for (Token token : tokens) { - termsToMatch.add(token.toString()); - } - } catch (Exception e) { - // ignore analysis exceptions since we are applying arbitrary text to all fields - } + final String queryValue = request.getQuery(); + Set termsToMatch; + try { + termsToMatch = (queryValue != null && request.isShowMatch()) + ? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer()) + : EMPTY_BYTES_SET; + } catch (Exception e) { + // ignore analysis exceptions since we are applying arbitrary text to all fields + termsToMatch = EMPTY_BYTES_SET; } if (request.getQuery() != null) { try { - AnalysisContext analysisContext = new AnalysisContext(fieldType, fieldType.getQueryAnalyzer(), EMPTY_STRING_SET); + AnalysisContext analysisContext = new AnalysisContext(fieldType, fieldType.getQueryAnalyzer(), EMPTY_BYTES_SET); fieldTokens.add("query", analyzeValue(request.getQuery(), analysisContext)); } catch (Exception e) { // ignore analysis exceptions since we are applying arbitrary text to all fields diff --git a/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java b/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java index a670af6bcef..580735b81c3 100644 --- a/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java +++ b/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java @@ -17,7 +17,7 @@ package org.apache.solr.handler; -import org.apache.lucene.analysis.Token; +import org.apache.lucene.util.BytesRef; import org.apache.solr.client.solrj.request.FieldAnalysisRequest; import org.apache.solr.common.params.AnalysisParams; import org.apache.solr.common.params.CommonParams; @@ -30,10 +30,7 @@ import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.commons.io.IOUtils; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Set; +import java.util.*; import java.io.Reader; import java.io.IOException; @@ -222,14 +219,10 @@ public class FieldAnalysisRequestHandler extends AnalysisRequestHandlerBase { */ private NamedList analyzeValues(FieldAnalysisRequest analysisRequest, FieldType fieldType, String fieldName) { - Set termsToMatch = new HashSet(); - String queryValue = analysisRequest.getQuery(); - if (queryValue != null && analysisRequest.isShowMatch()) { - List tokens = analyzeValue(queryValue, fieldType.getQueryAnalyzer()); - for (Token token : tokens) { - termsToMatch.add(token.toString()); - } - } + final String queryValue = analysisRequest.getQuery(); + final Set termsToMatch = (queryValue != null && analysisRequest.isShowMatch()) + ? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer()) + : EMPTY_BYTES_SET; NamedList analyzeResults = new SimpleOrderedMap(); if (analysisRequest.getFieldValue() != null) { diff --git a/solr/src/webapp/web/admin/analysis.jsp b/solr/src/webapp/web/admin/analysis.jsp index 347691e4d0b..89dbd71d677 100644 --- a/solr/src/webapp/web/admin/analysis.jsp +++ b/solr/src/webapp/web/admin/analysis.jsp @@ -24,6 +24,7 @@ org.apache.lucene.analysis.CharReader, org.apache.lucene.analysis.CharStream, org.apache.lucene.analysis.tokenattributes.*, + org.apache.lucene.util.AttributeReflector, org.apache.solr.analysis.CharFilterFactory, org.apache.solr.analysis.TokenFilterFactory, org.apache.solr.analysis.TokenizerChain, @@ -31,7 +32,8 @@ org.apache.solr.schema.FieldType, org.apache.solr.schema.SchemaField, org.apache.solr.common.util.XML, - javax.servlet.jsp.JspWriter,java.io.IOException + javax.servlet.jsp.JspWriter,java.io.IOException, + org.apache.noggit.CharArr "%> <%@ page import="java.io.Reader"%> <%@ page import="java.io.StringReader"%> @@ -39,8 +41,6 @@ <%@ page import="java.math.BigInteger" %> <%-- $Id$ --%> -<%-- $Source: /cvs/main/searching/org.apache.solrolarServer/resources/admin/analysis.jsp,v $ --%> -<%-- $Name: $ --%> <%@include file="header.jsp" %> @@ -71,19 +71,19 @@ @@ -115,7 +115,7 @@ @@ -148,24 +148,28 @@ } if (field!=null) { - HashSet matches = null; + HashSet matches = null; if (qval!="" && highlight) { Reader reader = new StringReader(qval); Analyzer analyzer = field.getType().getQueryAnalyzer(); TokenStream tstream = analyzer.reusableTokenStream(field.getName(),reader); + TermToBytesRefAttribute bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class); tstream.reset(); - List tokens = getTokens(tstream); - matches = new HashSet(); - for (AttributeSource t : tokens) { matches.add( new Tok(t,0)); } + matches = new HashSet(); + while (tstream.incrementToken()) { + final BytesRef bytes = new BytesRef(); + bytesAtt.toBytesRef(bytes); + matches.add(bytes); + } } if (val!="") { out.println("

Index Analyzer

"); - doAnalyzer(out, field, val, false, verbose,matches); + doAnalyzer(out, field, val, false, verbose, matches); } if (qval!="") { out.println("

Query Analyzer

"); - doAnalyzer(out, field, qval, true, qverbose,null); + doAnalyzer(out, field, qval, true, qverbose, null); } } @@ -177,7 +181,7 @@ <%! - private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set match) throws Exception { + private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set match) throws Exception { FieldType ft = field.getType(); Analyzer analyzer = queryAnalyser ? @@ -240,7 +244,7 @@ tstream.reset(); List tokens = getTokens(tstream); if (verbose) { - writeHeader(out, analyzer.getClass(), new HashMap()); + writeHeader(out, analyzer.getClass(), Collections.EMPTY_MAP); } writeTokens(out, tokens, ft, verbose, match); } @@ -249,52 +253,59 @@ static List getTokens(TokenStream tstream) throws IOException { List tokens = new ArrayList(); - - while (true) { - if (!tstream.incrementToken()) - break; - else { - tokens.add(tstream.cloneAttributes()); - } + tstream.reset(); + while (tstream.incrementToken()) { + tokens.add(tstream.cloneAttributes()); } return tokens; } - + private static class ReflectItem { + final Class attClass; + final String key; + final Object value; + + ReflectItem(Class attClass, String key, Object value) { + this.attClass = attClass; + this.key = key; + this.value = value; + } + } + private static class Tok { - AttributeSource token; - int pos; - Tok(AttributeSource token, int pos) { - this.token=token; - this.pos=pos; - } - - public boolean equals(Object o) { - return ((Tok)o).token.toString().equals(token.toString()); - } - public int hashCode() { - return token.toString().hashCode(); - } - public String toString() { - return token.toString(); - } - public String toPrintableString() { - TermToBytesRefAttribute att = token.addAttribute(TermToBytesRefAttribute.class); - if (att instanceof CharTermAttribute) - return att.toString(); - else { - BytesRef bytes = new BytesRef(); - att.toBytesRef(bytes); - return bytes.toString(); - } + final BytesRef bytes = new BytesRef(); + final String rawText, text; + final int pos; + final List reflected = new ArrayList(); + + Tok(AttributeSource token, int pos, FieldType ft) { + this.pos = pos; + token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(bytes); + rawText = (token.hasAttribute(CharTermAttribute.class)) ? + token.getAttribute(CharTermAttribute.class).toString() : null; + final CharArr textBuf = new CharArr(bytes.length); + ft.indexedToReadable(bytes, textBuf); + text = textBuf.toString(); + token.reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + // leave out position and raw term + if (TermToBytesRefAttribute.class.isAssignableFrom(attClass)) + return; + if (CharTermAttribute.class.isAssignableFrom(attClass)) + return; + if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) + return; + reflected.add(new ReflectItem(attClass, key, value)); + } + }); } } - private static interface ToStr { - public String toStr(Object o); + private static interface TokToStr { + public String toStr(Tok o); } - private static void printRow(JspWriter out, String header, List[] arrLst, ToStr converter, boolean multival, boolean verbose, Set match) throws IOException { + private static void printRow(JspWriter out, String header, String headerTitle, List[] arrLst, TokToStr converter, boolean multival, boolean verbose, Set match) throws IOException { // find the maximum number of terms for any position int maxSz=1; if (multival) { @@ -308,7 +319,13 @@ out.println("
"); if (idx==0 && verbose) { if (header != null) { - out.print(""); } @@ -317,7 +334,7 @@ for (int posIndex=0; posIndex lst = arrLst[posIndex]; if (lst.size() <= idx) continue; - if (match!=null && match.contains(lst.get(idx))) { + if (match!=null && match.contains(lst.get(idx).bytes)) { out.print("
- Field + Field - +
- Field value (Index) + Field value (Index)
verbose output >
- +
- Field value (Query) + Field value (Query)
verbose output >
- +
- +
"); + out.print(""); XML.escapeCharData(header,out); out.println(" args) throws IOException { out.print("

"); out.print(clazz.getName()); @@ -359,137 +367,93 @@ // readable, raw, pos, type, start/end - static void writeTokens(JspWriter out, List tokens, final FieldType ft, boolean verbose, Set match) throws IOException { + static void writeTokens(JspWriter out, List tokens, final FieldType ft, boolean verbose, Set match) throws IOException { // Use a map to tell what tokens are in what positions // because some tokenizers/filters may do funky stuff with // very large increments, or negative increments. HashMap> map = new HashMap>(); boolean needRaw=false; - int pos=0; + int pos=0, reflectionCount = -1; for (AttributeSource t : tokens) { - if (!t.toString().equals(ft.indexedToReadable(t.toString()))) { - needRaw=true; - } - pos += t.addAttribute(PositionIncrementAttribute.class).getPositionIncrement(); List lst = map.get(pos); if (lst==null) { lst = new ArrayList(1); map.put(pos,lst); } - Tok tok = new Tok(t,pos); + Tok tok = new Tok(t,pos,ft); + // sanity check + if (reflectionCount < 0) { + reflectionCount = tok.reflected.size(); + } else { + if (reflectionCount != tok.reflected.size()) + throw new RuntimeException("Should not happen: Number of reflected entries differs for position=" + pos); + } + if (tok.rawText != null && !tok.text.equals(tok.rawText)) { + needRaw=true; + } lst.add(tok); } List[] arr = (List[])map.values().toArray(new ArrayList[map.size()]); - /* Jetty 6.1.3 miscompiles this generics version... - Arrays.sort(arr, new Comparator>() { - public int compare(List toks, List toks1) { - return toks.get(0).pos - toks1.get(0).pos; - } - } - */ - + // Jetty 6.1.3 miscompiles a generics-enabled version..., without generics: Arrays.sort(arr, new Comparator() { public int compare(Object toks, Object toks1) { return ((List)toks).get(0).pos - ((List)toks1).get(0).pos; } - } - - - ); + }); out.println(""); if (verbose) { - printRow(out,"term position", arr, new ToStr() { - public String toStr(Object o) { - return Integer.toString(((Tok)o).pos); + printRow(out, "position", "calculated from " + PositionIncrementAttribute.class.getName(), arr, new TokToStr() { + public String toStr(Tok t) { + return Integer.toString(t.pos); } - } - ,false - ,verbose - ,null); + },false,verbose,null); } - - printRow(out,"term text", arr, new ToStr() { - public String toStr(Object o) { - return ft.indexedToReadable( ((Tok)o).toPrintableString() ); + printRow(out, "term text", "indexedToReadable applied to " + TermToBytesRefAttribute.class.getName(), arr, new TokToStr() { + public String toStr(Tok t) { + return t.text; } - } - ,true - ,verbose - ,match - ); - - if (needRaw) { - printRow(out,"raw text", arr, new ToStr() { - public String toStr(Object o) { - // page is UTF-8, so anything goes. - return ((Tok)o).toPrintableString(); - } - } - ,true - ,verbose - ,match - ); - } + },true,verbose,match); if (verbose) { - printRow(out,"term type", arr, new ToStr() { - public String toStr(Object o) { - String tt = ((Tok)o).token.addAttribute(TypeAttribute.class).type(); - if (tt == null) { - return "null"; - } else { - return tt; + if (needRaw) { + printRow(out, "raw text", CharTermAttribute.class.getName(), arr, new TokToStr() { + public String toStr(Tok t) { + // page is UTF-8, so anything goes. + return (t.rawText == null) ? "" : t.rawText; } - } + },true,verbose,match); } - ,true - ,verbose, - null - ); - } - - if (verbose) { - printRow(out,"source start,end", arr, new ToStr() { - public String toStr(Object o) { - AttributeSource t = ((Tok)o).token; - return Integer.toString(t.addAttribute(OffsetAttribute.class).startOffset()) + ',' + t.addAttribute(OffsetAttribute.class).endOffset() ; + + printRow(out, "raw bytes", TermToBytesRefAttribute.class.getName(), arr, new TokToStr() { + public String toStr(Tok t) { + return t.bytes.toString(); } - } - ,true - ,verbose - ,null - ); - } + },true,verbose,match); - if (verbose) { - printRow(out,"payload", arr, new ToStr() { - public String toStr(Object o) { - AttributeSource t = ((Tok)o).token; - Payload p = t.addAttribute(PayloadAttribute.class).getPayload(); - if( null != p ) { - BigInteger bi = new BigInteger( p.getData() ); - String ret = bi.toString( 16 ); - if (ret.length() % 2 != 0) { - // Pad with 0 - ret = "0"+ret; + for (int att=0; att < reflectionCount; att++) { + final ReflectItem item0 = arr[0].get(0).reflected.get(att); + final int i = att; + printRow(out, item0.key, item0.attClass.getName(), arr, new TokToStr() { + public String toStr(Tok t) { + final ReflectItem item = t.reflected.get(i); + if (item0.attClass != item.attClass || !item0.key.equals(item.key)) + throw new RuntimeException("Should not happen: attribute types suddenly change at position=" + t.pos); + if (item.value instanceof Payload) { + final Payload p = (Payload) item.value; + return new BytesRef(p.getData()).toString(); + } else { + return (item.value != null) ? item.value.toString() : ""; } - ret += isPayloadString( p ); - return ret; } - return ""; - } + },true,verbose, null); } - ,true - ,verbose - ,null - ); } out.println("
");