mirror of https://github.com/apache/lucene.git
LUCENE-2374: Added Attribute reflection API: It's now possible to inspect the contents of AttributeImpl and AttributeSource using a well-defined API
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1061039 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6a9f686f58
commit
460fa90564
|
@ -362,9 +362,9 @@ Changes in backwards compatibility policy
|
|||
* LUCENE-2302: The new interface for term attributes, CharTermAttribute,
|
||||
now implements CharSequence. This requires the toString() methods of
|
||||
CharTermAttribute, deprecated TermAttribute, and Token to return only
|
||||
the term text and no other attribute contents.
|
||||
TODO: Point to new attribute inspection API coming with LUCENE-2374.
|
||||
(Uwe Schindler, Robert Muir)
|
||||
the term text and no other attribute contents. LUCENE-2374 implements
|
||||
an attribute reflection API to no longer rely on toString() for attribute
|
||||
inspection. (Uwe Schindler, Robert Muir)
|
||||
|
||||
* LUCENE-2372, LUCENE-2389: StandardAnalyzer, KeywordAnalyzer,
|
||||
PerFieldAnalyzerWrapper, WhitespaceTokenizer are now final. Also removed
|
||||
|
@ -592,6 +592,23 @@ API Changes
|
|||
to ensure that the norm is encoded with your Similarity.
|
||||
(Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-2374: Added Attribute reflection API: It's now possible to inspect the
|
||||
contents of AttributeImpl and AttributeSource using a well-defined API.
|
||||
This is e.g. used by Solr's AnalysisRequestHandlers to display all attributes
|
||||
in a structured way.
|
||||
There are also some backwards incompatible changes in toString() output,
|
||||
as LUCENE-2302 introduced the CharSequence interface to CharTermAttribute
|
||||
leading to changed toString() return values. The new API allows to get a
|
||||
string representation in a well-defined way using a new method
|
||||
reflectAsString(). For backwards compatibility reasons, when toString()
|
||||
was implemented by implementation subclasses, the default implementation of
|
||||
AttributeImpl.reflectWith() uses toString()s output instead to report the
|
||||
Attribute's properties. Otherwise, reflectWith() uses Java's reflection
|
||||
(like toString() did before) to get the attribute properties.
|
||||
In addition, the mandatory equals() and hashCode() are no longer required
|
||||
for AttributeImpls, but can still be provided (if needed).
|
||||
(Uwe Schindler)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-2249: ParallelMultiSearcher should shut down thread pool on
|
||||
|
|
|
@ -328,3 +328,10 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing
|
|||
* LUCENE-2761: DataInput.readVInt/readVLong and DataOutput.writeVInt/writeVLong
|
||||
are final. If you subclassed this code before to encode variable-length
|
||||
integers in some specialized way, use the Codec API instead.
|
||||
|
||||
* LUCENE-2374: The backwards layer in AttributeImpl was removed. To support correct
|
||||
reflection of AttributeImpl instances, where the reflection was done using deprecated
|
||||
toString() parsing, you have to now override reflectWith() to customize output.
|
||||
toString() is no longer implemented by AttributeImpl, so if you have overridden
|
||||
toString(), port your customization over to reflectWith(). reflectAsString() would
|
||||
then return what toString() did before.
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
package org.apache.lucene.queryParser.standard.config;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
|
||||
public class TestAttributes extends LuceneTestCase {
|
||||
|
||||
// this checks using reflection API if the defaults are correct
|
||||
public void testAttributes() {
|
||||
_TestUtil.assertAttributeReflection(new AllowLeadingWildcardAttributeImpl(),
|
||||
Collections.singletonMap(AllowLeadingWildcardAttribute.class.getName()+"#allowLeadingWildcard", false));
|
||||
_TestUtil.assertAttributeReflection(new AnalyzerAttributeImpl(),
|
||||
Collections.singletonMap(AnalyzerAttribute.class.getName()+"#analyzer", null));
|
||||
_TestUtil.assertAttributeReflection(new BoostAttributeImpl(),
|
||||
Collections.singletonMap(BoostAttribute.class.getName()+"#boost", 1.0f));
|
||||
_TestUtil.assertAttributeReflection(new DateResolutionAttributeImpl(),
|
||||
Collections.singletonMap(DateResolutionAttribute.class.getName()+"#dateResolution", null));
|
||||
_TestUtil.assertAttributeReflection(new DefaultOperatorAttributeImpl(),
|
||||
Collections.singletonMap(DefaultOperatorAttribute.class.getName()+"#operator", DefaultOperatorAttribute.Operator.OR));
|
||||
_TestUtil.assertAttributeReflection(new DefaultPhraseSlopAttributeImpl(),
|
||||
Collections.singletonMap(DefaultPhraseSlopAttribute.class.getName()+"#defaultPhraseSlop", 0));
|
||||
_TestUtil.assertAttributeReflection(new FieldBoostMapAttributeImpl(),
|
||||
Collections.singletonMap(FieldBoostMapAttribute.class.getName()+"#boosts", Collections.emptyMap()));
|
||||
_TestUtil.assertAttributeReflection(new FieldDateResolutionMapAttributeImpl(),
|
||||
Collections.singletonMap(FieldDateResolutionMapAttribute.class.getName()+"#dateRes", Collections.emptyMap()));
|
||||
_TestUtil.assertAttributeReflection(new FuzzyAttributeImpl(), new HashMap<String,Object>() {{
|
||||
put(FuzzyAttribute.class.getName()+"#prefixLength", FuzzyQuery.defaultPrefixLength);
|
||||
put(FuzzyAttribute.class.getName()+"#minSimilarity", FuzzyQuery.defaultMinSimilarity);
|
||||
}});
|
||||
_TestUtil.assertAttributeReflection(new LocaleAttributeImpl(),
|
||||
Collections.singletonMap(LocaleAttribute.class.getName()+"#locale", Locale.getDefault()));
|
||||
_TestUtil.assertAttributeReflection(new LowercaseExpandedTermsAttributeImpl(),
|
||||
Collections.singletonMap(LowercaseExpandedTermsAttribute.class.getName()+"#lowercaseExpandedTerms", true));
|
||||
_TestUtil.assertAttributeReflection(new MultiFieldAttributeImpl(),
|
||||
Collections.singletonMap(MultiFieldAttribute.class.getName()+"#fields", null));
|
||||
_TestUtil.assertAttributeReflection(new MultiTermRewriteMethodAttributeImpl(),
|
||||
Collections.singletonMap(MultiTermRewriteMethodAttribute.class.getName()+"#multiTermRewriteMethod", MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT));
|
||||
_TestUtil.assertAttributeReflection(new PositionIncrementsAttributeImpl(),
|
||||
Collections.singletonMap(PositionIncrementsAttribute.class.getName()+"#positionIncrementsEnabled", false));
|
||||
_TestUtil.assertAttributeReflection(new RangeCollatorAttributeImpl(),
|
||||
Collections.singletonMap(RangeCollatorAttribute.class.getName()+"#rangeCollator", null));
|
||||
}
|
||||
|
||||
}
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.analysis;
|
|||
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.document.NumericField; // for javadocs
|
||||
|
@ -168,17 +169,18 @@ public final class NumericTokenStream extends TokenStream {
|
|||
// this attribute has no contents to clear!
|
||||
// we keep it untouched as it's fully controlled by outer class.
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
return other == this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return System.identityHashCode(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reflectWith(AttributeReflector reflector) {
|
||||
final BytesRef bytes = new BytesRef();
|
||||
toBytesRef(bytes);
|
||||
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes);
|
||||
reflector.reflect(NumericTermAttribute.class, "shift", shift);
|
||||
reflector.reflect(NumericTermAttribute.class, "rawValue", rawValue);
|
||||
reflector.reflect(NumericTermAttribute.class, "valueSize", valueSize);
|
||||
reflector.reflect(NumericTermAttribute.class, "precisionStep", precisionStep);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
final NumericTermAttribute a = (NumericTermAttribute) target;
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc
|
|||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
||||
/**
|
||||
A Token is an occurrence of a term from the text of a field. It consists of
|
||||
|
@ -588,6 +589,17 @@ public class Token extends CharTermAttributeImpl
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reflectWith(AttributeReflector reflector) {
|
||||
super.reflectWith(reflector);
|
||||
reflector.reflect(OffsetAttribute.class, "startOffset", startOffset);
|
||||
reflector.reflect(OffsetAttribute.class, "endOffset", endOffset);
|
||||
reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
|
||||
reflector.reflect(PayloadAttribute.class, "payload", payload);
|
||||
reflector.reflect(FlagsAttribute.class, "flags", flags);
|
||||
reflector.reflect(TypeAttribute.class, "type", type);
|
||||
}
|
||||
|
||||
/** Convenience factory that returns <code>Token</code> as implementation for the basic
|
||||
* attributes and return the default impl (with "Impl" appended) for all other
|
||||
* attributes.
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.nio.CharBuffer;
|
|||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
||||
|
@ -243,6 +244,14 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
|
|||
return new String(termBuffer, 0, termLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reflectWith(AttributeReflector reflector) {
|
||||
reflector.reflect(CharTermAttribute.class, "term", toString());
|
||||
final BytesRef bytes = new BytesRef();
|
||||
toBytesRef(bytes);
|
||||
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
CharTermAttribute t = (CharTermAttribute) target;
|
||||
|
|
|
@ -37,20 +37,6 @@ public final class BoostAttributeImpl extends AttributeImpl implements BoostAttr
|
|||
public void clear() {
|
||||
boost = 1.0f;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (this == other)
|
||||
return true;
|
||||
if (other instanceof BoostAttributeImpl)
|
||||
return ((BoostAttributeImpl) other).boost == boost;
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Float.floatToIntBits(boost);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
|
|
|
@ -48,25 +48,6 @@ public final class MaxNonCompetitiveBoostAttributeImpl extends AttributeImpl imp
|
|||
maxNonCompetitiveBoost = Float.NEGATIVE_INFINITY;
|
||||
competitiveTerm = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (this == other)
|
||||
return true;
|
||||
if (other instanceof MaxNonCompetitiveBoostAttributeImpl) {
|
||||
final MaxNonCompetitiveBoostAttributeImpl o = (MaxNonCompetitiveBoostAttributeImpl) other;
|
||||
return (o.maxNonCompetitiveBoost == maxNonCompetitiveBoost)
|
||||
&& (o.competitiveTerm == null ? competitiveTerm == null : o.competitiveTerm.equals(competitiveTerm));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int hash = Float.floatToIntBits(maxNonCompetitiveBoost);
|
||||
if (competitiveTerm != null) hash = 31 * hash + competitiveTerm.hashCode();
|
||||
return hash;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
|
|
|
@ -20,6 +20,8 @@ package org.apache.lucene.util;
|
|||
import java.io.Serializable;
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.Modifier;
|
||||
import java.lang.ref.WeakReference;
|
||||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
* Base class for Attributes that can be added to a
|
||||
|
@ -37,71 +39,79 @@ public abstract class AttributeImpl implements Cloneable, Serializable, Attribut
|
|||
public abstract void clear();
|
||||
|
||||
/**
|
||||
* The default implementation of this method accesses all declared
|
||||
* fields of this object and prints the values in the following syntax:
|
||||
* This method returns the current attribute values as a string in the following format
|
||||
* by calling the {@link #reflectWith(AttributeReflector)} method:
|
||||
*
|
||||
* <pre>
|
||||
* public String toString() {
|
||||
* return "start=" + startOffset + ",end=" + endOffset;
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* This method may be overridden by subclasses.
|
||||
* <ul>
|
||||
* <li><em>iff {@code prependAttClass=true}:</em> {@code "AttributeClass#key=value,AttributeClass#key=value"}
|
||||
* <li><em>iff {@code prependAttClass=false}:</em> {@code "key=value,key=value"}
|
||||
* </ul>
|
||||
*
|
||||
* @see #reflectWith(AttributeReflector)
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
Class<?> clazz = this.getClass();
|
||||
Field[] fields = clazz.getDeclaredFields();
|
||||
try {
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
Field f = fields[i];
|
||||
if (Modifier.isStatic(f.getModifiers())) continue;
|
||||
f.setAccessible(true);
|
||||
Object value = f.get(this);
|
||||
if (buffer.length()>0) {
|
||||
public final String reflectAsString(final boolean prependAttClass) {
|
||||
final StringBuilder buffer = new StringBuilder();
|
||||
reflectWith(new AttributeReflector() {
|
||||
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
|
||||
if (buffer.length() > 0) {
|
||||
buffer.append(',');
|
||||
}
|
||||
if (value == null) {
|
||||
buffer.append(f.getName() + "=null");
|
||||
} else {
|
||||
buffer.append(f.getName() + "=" + value);
|
||||
if (prependAttClass) {
|
||||
buffer.append(attClass.getName()).append('#');
|
||||
}
|
||||
buffer.append(key).append('=').append((value == null) ? "null" : value);
|
||||
}
|
||||
});
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is for introspection of attributes, it should simply
|
||||
* add the key/values this attribute holds to the given {@link AttributeReflector}.
|
||||
*
|
||||
* <p>The default implementation calls {@link AttributeReflector#reflect} for all
|
||||
* non-static fields from the implementing class, using the field name as key
|
||||
* and the field value as value. The Attribute class is also determined by reflection.
|
||||
* Please note that the default implementation can only handle single-Attribute
|
||||
* implementations.
|
||||
*
|
||||
* <p>Custom implementations look like this (e.g. for a combined attribute implementation):
|
||||
* <pre>
|
||||
* public void reflectWith(AttributeReflector reflector) {
|
||||
* reflector.reflect(CharTermAttribute.class, "term", term());
|
||||
* reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", getPositionIncrement());
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* <p>If you implement this method, make sure that for each invocation, the same set of {@link Attribute}
|
||||
* interfaces and keys are passed to {@link AttributeReflector#reflect} in the same order, but possibly
|
||||
* different values. So don't automatically exclude e.g. {@code null} properties!
|
||||
*
|
||||
* @see #reflectAsString(boolean)
|
||||
*/
|
||||
public void reflectWith(AttributeReflector reflector) {
|
||||
final Class<? extends AttributeImpl> clazz = this.getClass();
|
||||
final LinkedList<WeakReference<Class<? extends Attribute>>> interfaces = AttributeSource.getAttributeInterfaces(clazz);
|
||||
if (interfaces.size() != 1) {
|
||||
throw new UnsupportedOperationException(clazz.getName() +
|
||||
" implements more than one Attribute interface, the default reflectWith() implementation cannot handle this.");
|
||||
}
|
||||
final Class<? extends Attribute> interf = interfaces.getFirst().get();
|
||||
final Field[] fields = clazz.getDeclaredFields();
|
||||
try {
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
final Field f = fields[i];
|
||||
if (Modifier.isStatic(f.getModifiers())) continue;
|
||||
f.setAccessible(true);
|
||||
reflector.reflect(interf, f.getName(), f.get(this));
|
||||
}
|
||||
} catch (IllegalAccessException e) {
|
||||
// this should never happen, because we're just accessing fields
|
||||
// from 'this'
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Subclasses must implement this method and should compute
|
||||
* a hashCode similar to this:
|
||||
* <pre>
|
||||
* public int hashCode() {
|
||||
* int code = startOffset;
|
||||
* code = code * 31 + endOffset;
|
||||
* return code;
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* see also {@link #equals(Object)}
|
||||
*/
|
||||
@Override
|
||||
public abstract int hashCode();
|
||||
|
||||
/**
|
||||
* All values used for computation of {@link #hashCode()}
|
||||
* should be checked here for equality.
|
||||
*
|
||||
* see also {@link Object#equals(Object)}
|
||||
*/
|
||||
@Override
|
||||
public abstract boolean equals(Object other);
|
||||
|
||||
/**
|
||||
* Copies the values from this Attribute into the passed-in
|
||||
* target attribute. The target implementation must support all the
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This interface is used to reflect contents of {@link AttributeSource} or {@link AttributeImpl}.
|
||||
*/
|
||||
public interface AttributeReflector {
|
||||
|
||||
/**
|
||||
* This method gets called for every property in an {@link AttributeImpl}/{@link AttributeSource}
|
||||
* passing the class name of the {@link Attribute}, a key and the actual value.
|
||||
* E.g., an invocation of {@link org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl#reflectWith}
|
||||
* would call this method once using {@code org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class}
|
||||
* as attribute class, {@code "term"} as key and the actual value as a String.
|
||||
*/
|
||||
public void reflect(Class<? extends Attribute> attClass, String key, Object value);
|
||||
|
||||
}
|
|
@ -180,20 +180,9 @@ public class AttributeSource {
|
|||
private static final WeakHashMap<Class<? extends AttributeImpl>,LinkedList<WeakReference<Class<? extends Attribute>>>> knownImplClasses =
|
||||
new WeakHashMap<Class<? extends AttributeImpl>,LinkedList<WeakReference<Class<? extends Attribute>>>>();
|
||||
|
||||
/** <b>Expert:</b> Adds a custom AttributeImpl instance with one or more Attribute interfaces.
|
||||
* <p><font color="red"><b>Please note:</b> It is not guaranteed, that <code>att</code> is added to
|
||||
* the <code>AttributeSource</code>, because the provided attributes may already exist.
|
||||
* You should always retrieve the wanted attributes using {@link #getAttribute} after adding
|
||||
* with this method and cast to your class.
|
||||
* The recommended way to use custom implementations is using an {@link AttributeFactory}.
|
||||
* </font></p>
|
||||
*/
|
||||
public void addAttributeImpl(final AttributeImpl att) {
|
||||
final Class<? extends AttributeImpl> clazz = att.getClass();
|
||||
if (attributeImpls.containsKey(clazz)) return;
|
||||
LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces;
|
||||
static LinkedList<WeakReference<Class<? extends Attribute>>> getAttributeInterfaces(final Class<? extends AttributeImpl> clazz) {
|
||||
synchronized(knownImplClasses) {
|
||||
foundInterfaces = knownImplClasses.get(clazz);
|
||||
LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces = knownImplClasses.get(clazz);
|
||||
if (foundInterfaces == null) {
|
||||
// we have a strong reference to the class instance holding all interfaces in the list (parameter "att"),
|
||||
// so all WeakReferences are never evicted by GC
|
||||
|
@ -210,7 +199,23 @@ public class AttributeSource {
|
|||
actClazz = actClazz.getSuperclass();
|
||||
} while (actClazz != null);
|
||||
}
|
||||
return foundInterfaces;
|
||||
}
|
||||
}
|
||||
|
||||
/** <b>Expert:</b> Adds a custom AttributeImpl instance with one or more Attribute interfaces.
|
||||
* <p><font color="red"><b>Please note:</b> It is not guaranteed, that <code>att</code> is added to
|
||||
* the <code>AttributeSource</code>, because the provided attributes may already exist.
|
||||
* You should always retrieve the wanted attributes using {@link #getAttribute} after adding
|
||||
* with this method and cast to your class.
|
||||
* The recommended way to use custom implementations is using an {@link AttributeFactory}.
|
||||
* </font></p>
|
||||
*/
|
||||
public final void addAttributeImpl(final AttributeImpl att) {
|
||||
final Class<? extends AttributeImpl> clazz = att.getClass();
|
||||
if (attributeImpls.containsKey(clazz)) return;
|
||||
final LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces =
|
||||
getAttributeInterfaces(clazz);
|
||||
|
||||
// add all interfaces of this AttributeImpl to the maps
|
||||
for (WeakReference<Class<? extends Attribute>> curInterfaceRef : foundInterfaces) {
|
||||
|
@ -233,7 +238,7 @@ public class AttributeSource {
|
|||
* already in this AttributeSource and returns it. Otherwise a
|
||||
* new instance is created, added to this AttributeSource and returned.
|
||||
*/
|
||||
public <A extends Attribute> A addAttribute(Class<A> attClass) {
|
||||
public final <A extends Attribute> A addAttribute(Class<A> attClass) {
|
||||
AttributeImpl attImpl = attributes.get(attClass);
|
||||
if (attImpl == null) {
|
||||
if (!(attClass.isInterface() && Attribute.class.isAssignableFrom(attClass))) {
|
||||
|
@ -248,7 +253,7 @@ public class AttributeSource {
|
|||
}
|
||||
|
||||
/** Returns true, iff this AttributeSource has any attributes */
|
||||
public boolean hasAttributes() {
|
||||
public final boolean hasAttributes() {
|
||||
return !this.attributes.isEmpty();
|
||||
}
|
||||
|
||||
|
@ -256,7 +261,7 @@ public class AttributeSource {
|
|||
* The caller must pass in a Class<? extends Attribute> value.
|
||||
* Returns true, iff this AttributeSource contains the passed-in Attribute.
|
||||
*/
|
||||
public boolean hasAttribute(Class<? extends Attribute> attClass) {
|
||||
public final boolean hasAttribute(Class<? extends Attribute> attClass) {
|
||||
return this.attributes.containsKey(attClass);
|
||||
}
|
||||
|
||||
|
@ -271,7 +276,7 @@ public class AttributeSource {
|
|||
* available. If you want to only use the attribute, if it is available (to optimize
|
||||
* consuming), use {@link #hasAttribute}.
|
||||
*/
|
||||
public <A extends Attribute> A getAttribute(Class<A> attClass) {
|
||||
public final <A extends Attribute> A getAttribute(Class<A> attClass) {
|
||||
AttributeImpl attImpl = attributes.get(attClass);
|
||||
if (attImpl == null) {
|
||||
throw new IllegalArgumentException("This AttributeSource does not have the attribute '" + attClass.getName() + "'.");
|
||||
|
@ -319,7 +324,7 @@ public class AttributeSource {
|
|||
* Resets all Attributes in this AttributeSource by calling
|
||||
* {@link AttributeImpl#clear()} on each Attribute implementation.
|
||||
*/
|
||||
public void clearAttributes() {
|
||||
public final void clearAttributes() {
|
||||
if (hasAttributes()) {
|
||||
if (currentState == null) {
|
||||
computeCurrentState();
|
||||
|
@ -334,7 +339,7 @@ public class AttributeSource {
|
|||
* Captures the state of all Attributes. The return value can be passed to
|
||||
* {@link #restoreState} to restore the state of this or another AttributeSource.
|
||||
*/
|
||||
public State captureState() {
|
||||
public final State captureState() {
|
||||
if (!hasAttributes()) {
|
||||
return null;
|
||||
}
|
||||
|
@ -360,7 +365,7 @@ public class AttributeSource {
|
|||
* reset its value to the default, in which case the caller should first
|
||||
* call {@link TokenStream#clearAttributes()} on the targetStream.
|
||||
*/
|
||||
public void restoreState(State state) {
|
||||
public final void restoreState(State state) {
|
||||
if (state == null) return;
|
||||
|
||||
do {
|
||||
|
@ -431,21 +436,53 @@ public class AttributeSource {
|
|||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder().append('(');
|
||||
/**
|
||||
* This method returns the current attribute values as a string in the following format
|
||||
* by calling the {@link #reflectWith(AttributeReflector)} method:
|
||||
*
|
||||
* <ul>
|
||||
* <li><em>iff {@code prependAttClass=true}:</em> {@code "AttributeClass#key=value,AttributeClass#key=value"}
|
||||
* <li><em>iff {@code prependAttClass=false}:</em> {@code "key=value,key=value"}
|
||||
* </ul>
|
||||
*
|
||||
* @see #reflectWith(AttributeReflector)
|
||||
*/
|
||||
public final String reflectAsString(final boolean prependAttClass) {
|
||||
final StringBuilder buffer = new StringBuilder();
|
||||
reflectWith(new AttributeReflector() {
|
||||
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
|
||||
if (buffer.length() > 0) {
|
||||
buffer.append(',');
|
||||
}
|
||||
if (prependAttClass) {
|
||||
buffer.append(attClass.getName()).append('#');
|
||||
}
|
||||
buffer.append(key).append('=').append((value == null) ? "null" : value);
|
||||
}
|
||||
});
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is for introspection of attributes, it should simply
|
||||
* add the key/values this AttributeSource holds to the given {@link AttributeReflector}.
|
||||
*
|
||||
* <p>This method iterates over all Attribute implementations and calls the
|
||||
* corresponding {@link AttributeImpl#reflectWith} method.</p>
|
||||
*
|
||||
* @see AttributeImpl#reflectWith
|
||||
*/
|
||||
public final void reflectWith(AttributeReflector reflector) {
|
||||
if (hasAttributes()) {
|
||||
if (currentState == null) {
|
||||
computeCurrentState();
|
||||
}
|
||||
for (State state = currentState; state != null; state = state.next) {
|
||||
if (state != currentState) sb.append(',');
|
||||
sb.append(state.attribute.toString());
|
||||
state.attribute.reflectWith(reflector);
|
||||
}
|
||||
}
|
||||
return sb.append(')').toString();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Performs a clone of all {@link AttributeImpl} instances returned in a new
|
||||
* {@code AttributeSource} instance. This method can be used to e.g. create another TokenStream
|
||||
|
@ -453,7 +490,7 @@ public class AttributeSource {
|
|||
* You can also use it as a (non-performant) replacement for {@link #captureState}, if you need to look
|
||||
* into / modify the captured state.
|
||||
*/
|
||||
public AttributeSource cloneAttributes() {
|
||||
public final AttributeSource cloneAttributes() {
|
||||
final AttributeSource clone = new AttributeSource(this.factory);
|
||||
|
||||
if (hasAttributes()) {
|
||||
|
|
|
@ -22,8 +22,11 @@ import org.apache.lucene.analysis.tokenattributes.*;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.HashMap;
|
||||
|
||||
public class TestToken extends LuceneTestCase {
|
||||
|
||||
|
@ -241,6 +244,22 @@ public class TestToken extends LuceneTestCase {
|
|||
ts.addAttribute(TypeAttribute.class) instanceof Token);
|
||||
}
|
||||
|
||||
public void testAttributeReflection() throws Exception {
|
||||
Token t = new Token("foobar", 6, 22, 8);
|
||||
_TestUtil.assertAttributeReflection(t,
|
||||
new HashMap<String,Object>() {{
|
||||
put(CharTermAttribute.class.getName() + "#term", "foobar");
|
||||
put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar"));
|
||||
put(OffsetAttribute.class.getName() + "#startOffset", 6);
|
||||
put(OffsetAttribute.class.getName() + "#endOffset", 22);
|
||||
put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 1);
|
||||
put(PayloadAttribute.class.getName() + "#payload", null);
|
||||
put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE);
|
||||
put(FlagsAttribute.class.getName() + "#flags", 8);
|
||||
}});
|
||||
}
|
||||
|
||||
|
||||
public static <T extends AttributeImpl> T assertCloneIsEqual(T att) {
|
||||
@SuppressWarnings("unchecked")
|
||||
T clone = (T) att.clone();
|
||||
|
|
|
@ -19,7 +19,10 @@ package org.apache.lucene.analysis.tokenattributes;
|
|||
|
||||
import org.apache.lucene.analysis.TestToken;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import java.nio.CharBuffer;
|
||||
import java.util.HashMap;
|
||||
import java.util.Formatter;
|
||||
import java.util.Locale;
|
||||
import java.util.regex.Pattern;
|
||||
|
@ -126,6 +129,15 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
|
|||
assertNotSame(buf, copy.buffer());
|
||||
}
|
||||
|
||||
public void testAttributeReflection() throws Exception {
|
||||
CharTermAttributeImpl t = new CharTermAttributeImpl();
|
||||
t.append("foobar");
|
||||
_TestUtil.assertAttributeReflection(t, new HashMap<String,Object>() {{
|
||||
put(CharTermAttribute.class.getName() + "#term", "foobar");
|
||||
put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar"));
|
||||
}});
|
||||
}
|
||||
|
||||
public void testCharSequenceInterface() {
|
||||
final String s = "0123456789";
|
||||
final CharTermAttributeImpl t = new CharTermAttributeImpl();
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
package org.apache.lucene.analysis.tokenattributes;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
|
||||
public class TestSimpleAttributeImpl extends LuceneTestCase {
|
||||
|
||||
// this checks using reflection API if the defaults are correct
|
||||
public void testAttributes() {
|
||||
_TestUtil.assertAttributeReflection(new PositionIncrementAttributeImpl(),
|
||||
Collections.singletonMap(PositionIncrementAttribute.class.getName()+"#positionIncrement", 1));
|
||||
_TestUtil.assertAttributeReflection(new FlagsAttributeImpl(),
|
||||
Collections.singletonMap(FlagsAttribute.class.getName()+"#flags", 0));
|
||||
_TestUtil.assertAttributeReflection(new TypeAttributeImpl(),
|
||||
Collections.singletonMap(TypeAttribute.class.getName()+"#type", TypeAttribute.DEFAULT_TYPE));
|
||||
_TestUtil.assertAttributeReflection(new PayloadAttributeImpl(),
|
||||
Collections.singletonMap(PayloadAttribute.class.getName()+"#payload", null));
|
||||
_TestUtil.assertAttributeReflection(new KeywordAttributeImpl(),
|
||||
Collections.singletonMap(KeywordAttribute.class.getName()+"#keyword", false));
|
||||
_TestUtil.assertAttributeReflection(new OffsetAttributeImpl(), new HashMap<String,Object>() {{
|
||||
put(OffsetAttribute.class.getName()+"#startOffset", 0);
|
||||
put(OffsetAttribute.class.getName()+"#endOffset", 0);
|
||||
}});
|
||||
}
|
||||
|
||||
}
|
|
@ -109,34 +109,6 @@ public class TestAttributeSource extends LuceneTestCase {
|
|||
assertEquals("TypeAttribute of original and clone must be equal", typeAtt2, typeAtt);
|
||||
}
|
||||
|
||||
public void testToStringAndMultiAttributeImplementations() {
|
||||
AttributeSource src = new AttributeSource();
|
||||
CharTermAttribute termAtt = src.addAttribute(CharTermAttribute.class);
|
||||
TypeAttribute typeAtt = src.addAttribute(TypeAttribute.class);
|
||||
termAtt.append("TestTerm");
|
||||
typeAtt.setType("TestType");
|
||||
assertEquals("Attributes should appear in original order", "("+termAtt.toString()+","+typeAtt.toString()+")", src.toString());
|
||||
Iterator<AttributeImpl> it = src.getAttributeImplsIterator();
|
||||
assertTrue("Iterator should have 2 attributes left", it.hasNext());
|
||||
assertSame("First AttributeImpl from iterator should be termAtt", termAtt, it.next());
|
||||
assertTrue("Iterator should have 1 attributes left", it.hasNext());
|
||||
assertSame("Second AttributeImpl from iterator should be typeAtt", typeAtt, it.next());
|
||||
assertFalse("Iterator should have 0 attributes left", it.hasNext());
|
||||
|
||||
src = new AttributeSource();
|
||||
src.addAttributeImpl(new Token());
|
||||
// this should not add a new attribute as Token implements CharTermAttribute, too
|
||||
termAtt = src.addAttribute(CharTermAttribute.class);
|
||||
assertTrue("CharTermAttribute should be implemented by Token", termAtt instanceof Token);
|
||||
// get the Token attribute and check, that it is the only one
|
||||
it = src.getAttributeImplsIterator();
|
||||
Token tok = (Token) it.next();
|
||||
assertFalse("There should be only one attribute implementation instance", it.hasNext());
|
||||
|
||||
termAtt.setEmpty().append("TestTerm");
|
||||
assertEquals("Token should only printed once", "("+tok.toString()+")", src.toString());
|
||||
}
|
||||
|
||||
public void testDefaultAttributeFactory() throws Exception {
|
||||
AttributeSource src = new AttributeSource();
|
||||
|
||||
|
|
|
@ -22,6 +22,10 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.Random;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
||||
import org.junit.Assert;
|
||||
|
||||
import org.apache.lucene.index.CheckIndex;
|
||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||
|
@ -238,4 +242,17 @@ public class _TestUtil {
|
|||
((ConcurrentMergeScheduler) ms).setMaxMergeCount(3);
|
||||
}
|
||||
}
|
||||
|
||||
/** Checks some basic behaviour of an AttributeImpl
|
||||
* @param reflectedValues contains a map with "AttributeClass#key" as values
|
||||
*/
|
||||
public static <T> void assertAttributeReflection(final AttributeImpl att, Map<String,T> reflectedValues) {
|
||||
final Map<String,Object> map = new HashMap<String,Object>();
|
||||
att.reflectWith(new AttributeReflector() {
|
||||
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
|
||||
map.put(attClass.getName() + '#' + key, value);
|
||||
}
|
||||
});
|
||||
Assert.assertEquals("Reflection does not produce same map", reflectedValues, map);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.icu.tokenattributes;
|
|||
import java.io.Serializable;
|
||||
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
||||
import com.ibm.icu.lang.UScript;
|
||||
|
||||
|
@ -77,7 +78,7 @@ public class ScriptAttributeImpl extends AttributeImpl implements ScriptAttribut
|
|||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "script=" + getName();
|
||||
public void reflectWith(AttributeReflector reflector) {
|
||||
reflector.reflect(ScriptAttribute.class, "script", getName());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -428,13 +428,6 @@
|
|||
-->
|
||||
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
|
||||
|
||||
<!--
|
||||
Analysis request handler. Since Solr 1.3. Use to returnhow a document is analyzed. Useful
|
||||
for debugging and as a token server for other types of applications
|
||||
-->
|
||||
<requestHandler name="/analysis" class="solr.AnalysisRequestHandler" />
|
||||
|
||||
|
||||
<!-- CSV update handler, loaded on demand -->
|
||||
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" />
|
||||
|
||||
|
|
|
@ -1,243 +0,0 @@
|
|||
package org.apache.solr.handler;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import javax.xml.stream.XMLStreamConstants;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.XMLStreamReader;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
*
|
||||
* @deprecated Use {@link org.apache.solr.handler.DocumentAnalysisRequestHandler} instead.
|
||||
**/
|
||||
@Deprecated
|
||||
public class AnalysisRequestHandler extends RequestHandlerBase {
|
||||
|
||||
public static Logger log = LoggerFactory.getLogger(AnalysisRequestHandler.class);
|
||||
|
||||
private XMLInputFactory inputFactory;
|
||||
|
||||
@Override
|
||||
public void init(NamedList args) {
|
||||
super.init(args);
|
||||
|
||||
inputFactory = XMLInputFactory.newInstance();
|
||||
try {
|
||||
// The java 1.6 bundled stax parser (sjsxp) does not currently have a thread-safe
|
||||
// XMLInputFactory, as that implementation tries to cache and reuse the
|
||||
// XMLStreamReader. Setting the parser-specific "reuse-instance" property to false
|
||||
// prevents this.
|
||||
// All other known open-source stax parsers (and the bea ref impl)
|
||||
// have thread-safe factories.
|
||||
inputFactory.setProperty("reuse-instance", Boolean.FALSE);
|
||||
}
|
||||
catch (IllegalArgumentException ex) {
|
||||
// Other implementations will likely throw this exception since "reuse-instance"
|
||||
// isimplementation specific.
|
||||
log.debug("Unable to set the 'reuse-instance' property for the input factory: " + inputFactory);
|
||||
}
|
||||
}
|
||||
|
||||
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
|
||||
SolrParams params = req.getParams();
|
||||
Iterable<ContentStream> streams = req.getContentStreams();
|
||||
if (streams != null) {
|
||||
for (ContentStream stream : req.getContentStreams()) {
|
||||
Reader reader = stream.getReader();
|
||||
try {
|
||||
XMLStreamReader parser = inputFactory.createXMLStreamReader(reader);
|
||||
NamedList<Object> result = processContent(parser, req.getSchema());
|
||||
rsp.add("response", result);
|
||||
}
|
||||
finally {
|
||||
IOUtils.closeQuietly(reader);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NamedList<Object> processContent(XMLStreamReader parser,
|
||||
IndexSchema schema) throws XMLStreamException, IOException {
|
||||
NamedList<Object> result = new SimpleOrderedMap<Object>();
|
||||
while (true) {
|
||||
int event = parser.next();
|
||||
switch (event) {
|
||||
case XMLStreamConstants.END_DOCUMENT: {
|
||||
parser.close();
|
||||
return result;
|
||||
}
|
||||
case XMLStreamConstants.START_ELEMENT: {
|
||||
String currTag = parser.getLocalName();
|
||||
if ("doc".equals(currTag)) {
|
||||
log.trace("Tokenizing doc...");
|
||||
|
||||
SolrInputDocument doc = readDoc(parser);
|
||||
SchemaField uniq = schema.getUniqueKeyField();
|
||||
NamedList<NamedList<NamedList<Object>>> theTokens = new SimpleOrderedMap<NamedList<NamedList<Object>>>();
|
||||
result.add(doc.getFieldValue(uniq.getName()).toString(), theTokens);
|
||||
for (String name : doc.getFieldNames()) {
|
||||
FieldType ft = schema.getFieldType(name);
|
||||
Analyzer analyzer = ft.getAnalyzer();
|
||||
Collection<Object> vals = doc.getFieldValues(name);
|
||||
for (Object val : vals) {
|
||||
Reader reader = new StringReader(val.toString());
|
||||
TokenStream tstream = analyzer.tokenStream(name, reader);
|
||||
NamedList<NamedList<Object>> tokens = getTokens(tstream);
|
||||
theTokens.add(name, tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static NamedList<NamedList<Object>> getTokens(TokenStream tstream) throws IOException {
|
||||
// outer is namedList since order of tokens is important
|
||||
NamedList<NamedList<Object>> tokens = new NamedList<NamedList<Object>>();
|
||||
// TODO: support custom attributes
|
||||
CharTermAttribute termAtt = null;
|
||||
TermToBytesRefAttribute bytesAtt = null;
|
||||
if (tstream.hasAttribute(CharTermAttribute.class)) {
|
||||
termAtt = tstream.getAttribute(CharTermAttribute.class);
|
||||
} else if (tstream.hasAttribute(TermToBytesRefAttribute.class)) {
|
||||
bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class);
|
||||
}
|
||||
final OffsetAttribute offsetAtt = tstream.addAttribute(OffsetAttribute.class);
|
||||
final TypeAttribute typeAtt = tstream.addAttribute(TypeAttribute.class);
|
||||
final PositionIncrementAttribute posIncAtt = tstream.addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
final BytesRef bytes = new BytesRef();
|
||||
while (tstream.incrementToken()) {
|
||||
NamedList<Object> token = new SimpleOrderedMap<Object>();
|
||||
tokens.add("token", token);
|
||||
if (termAtt != null) {
|
||||
token.add("value", termAtt.toString());
|
||||
}
|
||||
if (bytesAtt != null) {
|
||||
bytesAtt.toBytesRef(bytes);
|
||||
// TODO: This is incorrect when numeric fields change in later lucene versions. It should use BytesRef directly!
|
||||
token.add("value", bytes.utf8ToString());
|
||||
}
|
||||
token.add("start", offsetAtt.startOffset());
|
||||
token.add("end", offsetAtt.endOffset());
|
||||
token.add("posInc", posIncAtt.getPositionIncrement());
|
||||
token.add("type", typeAtt.type());
|
||||
//TODO: handle payloads
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
SolrInputDocument readDoc(XMLStreamReader parser) throws XMLStreamException {
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
|
||||
StringBuilder text = new StringBuilder();
|
||||
String name = null;
|
||||
String attrName = "";
|
||||
float boost = 1.0f;
|
||||
boolean isNull = false;
|
||||
while (true) {
|
||||
int event = parser.next();
|
||||
switch (event) {
|
||||
// Add everything to the text
|
||||
case XMLStreamConstants.SPACE:
|
||||
case XMLStreamConstants.CDATA:
|
||||
case XMLStreamConstants.CHARACTERS:
|
||||
text.append(parser.getText());
|
||||
break;
|
||||
|
||||
case XMLStreamConstants.END_ELEMENT:
|
||||
if ("doc".equals(parser.getLocalName())) {
|
||||
return doc;
|
||||
} else if ("field".equals(parser.getLocalName())) {
|
||||
if (!isNull) {
|
||||
doc.addField(name, text.toString(), boost);
|
||||
boost = 1.0f;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case XMLStreamConstants.START_ELEMENT:
|
||||
text.setLength(0);
|
||||
String localName = parser.getLocalName();
|
||||
if (!"field".equals(localName)) {
|
||||
log.warn("unexpected XML tag doc/" + localName);
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"unexpected XML tag doc/" + localName);
|
||||
}
|
||||
|
||||
String attrVal = "";
|
||||
for (int i = 0; i < parser.getAttributeCount(); i++) {
|
||||
attrName = parser.getAttributeLocalName(i);
|
||||
attrVal = parser.getAttributeValue(i);
|
||||
if ("name".equals(attrName)) {
|
||||
name = attrVal;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//////////////////////// SolrInfoMBeans methods //////////////////////
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return "Provide Analysis of text";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getVersion() {
|
||||
return "$Revision$";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSourceId() {
|
||||
return "$Id$";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSource() {
|
||||
return "$URL$";
|
||||
}
|
||||
|
||||
}
|
|
@ -20,10 +20,14 @@ package org.apache.solr.handler;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharReader;
|
||||
import org.apache.lucene.analysis.CharStream;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.index.Payload;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
import org.apache.lucene.util.SorterTemplate;
|
||||
import org.apache.solr.analysis.CharFilterFactory;
|
||||
import org.apache.solr.analysis.TokenFilterFactory;
|
||||
import org.apache.solr.analysis.TokenizerChain;
|
||||
|
@ -34,6 +38,9 @@ import org.apache.solr.common.SolrException;
|
|||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.util.ByteUtils;
|
||||
|
||||
import org.apache.noggit.CharArr;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
@ -47,7 +54,7 @@ import java.util.*;
|
|||
*/
|
||||
public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
||||
|
||||
public static final Set<String> EMPTY_STRING_SET = Collections.emptySet();
|
||||
public static final Set<BytesRef> EMPTY_BYTES_SET = Collections.emptySet();
|
||||
|
||||
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
|
||||
rsp.add("analysis", doAnalysis(req));
|
||||
|
@ -107,7 +114,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
}
|
||||
|
||||
TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value)));
|
||||
List<Token> tokens = analyzeTokenStream(tokenStream);
|
||||
List<AttributeSource> tokens = analyzeTokenStream(tokenStream);
|
||||
|
||||
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
|
||||
|
||||
|
@ -115,7 +122,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
|
||||
for (TokenFilterFactory tokenFilterFactory : filtfacs) {
|
||||
tokenStream = tokenFilterFactory.create(listBasedTokenStream);
|
||||
List<Token> tokenList = analyzeTokenStream(tokenStream);
|
||||
List<AttributeSource> tokenList = analyzeTokenStream(tokenStream);
|
||||
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokenList, context));
|
||||
listBasedTokenStream = new ListBasedTokenStream(tokenList);
|
||||
}
|
||||
|
@ -126,14 +133,24 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
/**
|
||||
* Analyzes the given text using the given analyzer and returns the produced tokens.
|
||||
*
|
||||
* @param value The value to analyze.
|
||||
* @param query The query to analyze.
|
||||
* @param analyzer The analyzer to use.
|
||||
*
|
||||
* @return The produces token list.
|
||||
*/
|
||||
protected List<Token> analyzeValue(String value, Analyzer analyzer) {
|
||||
TokenStream tokenStream = analyzer.tokenStream("", new StringReader(value));
|
||||
return analyzeTokenStream(tokenStream);
|
||||
protected Set<BytesRef> getQueryTokenSet(String query, Analyzer analyzer) {
|
||||
final Set<BytesRef> tokens = new HashSet<BytesRef>();
|
||||
final TokenStream tokenStream = analyzer.tokenStream("", new StringReader(query));
|
||||
final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
||||
try {
|
||||
tokenStream.reset();
|
||||
while (tokenStream.incrementToken()) {
|
||||
final BytesRef bytes = new BytesRef();
|
||||
bytesAtt.toBytesRef(bytes);
|
||||
tokens.add(bytes);
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -143,41 +160,17 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
*
|
||||
* @return List of tokens produced from the TokenStream
|
||||
*/
|
||||
private List<Token> analyzeTokenStream(TokenStream tokenStream) {
|
||||
List<Token> tokens = new ArrayList<Token>();
|
||||
|
||||
// TODO change this API to support custom attributes
|
||||
CharTermAttribute termAtt = null;
|
||||
TermToBytesRefAttribute bytesAtt = null;
|
||||
if (tokenStream.hasAttribute(CharTermAttribute.class)) {
|
||||
termAtt = tokenStream.getAttribute(CharTermAttribute.class);
|
||||
} else if (tokenStream.hasAttribute(TermToBytesRefAttribute.class)) {
|
||||
bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
||||
}
|
||||
final OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
|
||||
final TypeAttribute typeAtt = tokenStream.addAttribute(TypeAttribute.class);
|
||||
final PositionIncrementAttribute posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
|
||||
final FlagsAttribute flagsAtt = tokenStream.addAttribute(FlagsAttribute.class);
|
||||
final PayloadAttribute payloadAtt = tokenStream.addAttribute(PayloadAttribute.class);
|
||||
|
||||
private List<AttributeSource> analyzeTokenStream(TokenStream tokenStream) {
|
||||
List<AttributeSource> tokens = new ArrayList<AttributeSource>();
|
||||
// for backwards compatibility, add all "common" attributes
|
||||
tokenStream.addAttribute(PositionIncrementAttribute.class);
|
||||
tokenStream.addAttribute(OffsetAttribute.class);
|
||||
tokenStream.addAttribute(TypeAttribute.class);
|
||||
final BytesRef bytes = new BytesRef();
|
||||
try {
|
||||
tokenStream.reset();
|
||||
while (tokenStream.incrementToken()) {
|
||||
Token token = new Token();
|
||||
if (termAtt != null) {
|
||||
token.setEmpty().append(termAtt);
|
||||
}
|
||||
if (bytesAtt != null) {
|
||||
bytesAtt.toBytesRef(bytes);
|
||||
// TODO: This is incorrect when numeric fields change in later lucene versions. It should use BytesRef directly!
|
||||
token.setEmpty().append(bytes.utf8ToString());
|
||||
}
|
||||
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
|
||||
token.setType(typeAtt.type());
|
||||
token.setFlags(flagsAtt.getFlags());
|
||||
token.setPayload(payloadAtt.getPayload());
|
||||
token.setPositionIncrement(posIncAtt.getPositionIncrement());
|
||||
tokens.add((Token) token.clone());
|
||||
tokens.add(tokenStream.cloneAttributes());
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
|
||||
|
@ -186,6 +179,13 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
return tokens;
|
||||
}
|
||||
|
||||
// a static mapping of the reflected attribute keys to the names used in Solr 1.4
|
||||
static Map<String,String> ATTRIBUTE_MAPPING = Collections.unmodifiableMap(new HashMap<String,String>() {{
|
||||
put(OffsetAttribute.class.getName() + "#startOffset", "start");
|
||||
put(OffsetAttribute.class.getName() + "#endOffset", "end");
|
||||
put(TypeAttribute.class.getName() + "#type", "type");
|
||||
}});
|
||||
|
||||
/**
|
||||
* Converts the list of Tokens to a list of NamedLists representing the tokens.
|
||||
*
|
||||
|
@ -194,41 +194,97 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
*
|
||||
* @return List of NamedLists containing the relevant information taken from the tokens
|
||||
*/
|
||||
private List<NamedList> convertTokensToNamedLists(List<Token> tokens, AnalysisContext context) {
|
||||
List<NamedList> tokensNamedLists = new ArrayList<NamedList>();
|
||||
private List<NamedList> convertTokensToNamedLists(final List<AttributeSource> tokens, AnalysisContext context) {
|
||||
final List<NamedList> tokensNamedLists = new ArrayList<NamedList>();
|
||||
|
||||
Collections.sort(tokens, new Comparator<Token>() {
|
||||
public int compare(Token o1, Token o2) {
|
||||
return o1.endOffset() - o2.endOffset();
|
||||
final int[] positions = new int[tokens.size()];
|
||||
int position = 0;
|
||||
for (int i = 0, c = tokens.size(); i < c; i++) {
|
||||
AttributeSource token = tokens.get(i);
|
||||
position += token.addAttribute(PositionIncrementAttribute.class).getPositionIncrement();
|
||||
positions[i] = position;
|
||||
}
|
||||
|
||||
// sort the tokens by absoulte position
|
||||
new SorterTemplate() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
Collections.swap(tokens, i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return positions[i] - positions[j];
|
||||
}
|
||||
});
|
||||
|
||||
int position = 0;
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
pivot = positions[i];
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
return pivot - positions[j];
|
||||
}
|
||||
|
||||
private int pivot;
|
||||
}.mergeSort(0, tokens.size() - 1);
|
||||
|
||||
FieldType fieldType = context.getFieldType();
|
||||
|
||||
for (Token token : tokens) {
|
||||
NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>();
|
||||
final BytesRef rawBytes = new BytesRef();
|
||||
final CharArr textBuf = new CharArr();
|
||||
for (int i = 0, c = tokens.size(); i < c; i++) {
|
||||
AttributeSource token = tokens.get(i);
|
||||
final NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>();
|
||||
token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(rawBytes);
|
||||
|
||||
textBuf.reset();
|
||||
fieldType.indexedToReadable(rawBytes, textBuf);
|
||||
final String text = textBuf.toString();
|
||||
|
||||
String text = fieldType.indexedToReadable(token.toString());
|
||||
tokenNamedList.add("text", text);
|
||||
if (!text.equals(token.toString())) {
|
||||
tokenNamedList.add("raw_text", token.toString());
|
||||
|
||||
if (token.hasAttribute(CharTermAttribute.class)) {
|
||||
final String rawText = token.getAttribute(CharTermAttribute.class).toString();
|
||||
if (!rawText.equals(text)) {
|
||||
tokenNamedList.add("raw_text", rawText);
|
||||
}
|
||||
}
|
||||
tokenNamedList.add("type", token.type());
|
||||
tokenNamedList.add("start", token.startOffset());
|
||||
tokenNamedList.add("end", token.endOffset());
|
||||
|
||||
position += token.getPositionIncrement();
|
||||
tokenNamedList.add("position", position);
|
||||
tokenNamedList.add("raw_bytes", rawBytes.toString());
|
||||
|
||||
if (context.getTermsToMatch().contains(token.toString())) {
|
||||
if (context.getTermsToMatch().contains(rawBytes)) {
|
||||
tokenNamedList.add("match", true);
|
||||
}
|
||||
|
||||
if (token.getPayload() != null) {
|
||||
tokenNamedList.add("payload", token.getPayload());
|
||||
}
|
||||
tokenNamedList.add("position", positions[i]);
|
||||
|
||||
token.reflectWith(new AttributeReflector() {
|
||||
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
|
||||
// leave out position and bytes term
|
||||
if (TermToBytesRefAttribute.class.isAssignableFrom(attClass))
|
||||
return;
|
||||
if (CharTermAttribute.class.isAssignableFrom(attClass))
|
||||
return;
|
||||
if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
|
||||
return;
|
||||
|
||||
String k = attClass.getName() + '#' + key;
|
||||
|
||||
// map keys for "standard attributes":
|
||||
if (ATTRIBUTE_MAPPING.containsKey(k)) {
|
||||
k = ATTRIBUTE_MAPPING.get(k);
|
||||
}
|
||||
|
||||
if (value instanceof Payload) {
|
||||
final Payload p = (Payload) value;
|
||||
value = new BytesRef(p.getData()).toString();
|
||||
}
|
||||
|
||||
tokenNamedList.add(k, value);
|
||||
}
|
||||
});
|
||||
|
||||
tokensNamedLists.add(tokenNamedList);
|
||||
}
|
||||
|
@ -261,38 +317,27 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
*/
|
||||
// TODO refactor to support custom attributes
|
||||
protected final static class ListBasedTokenStream extends TokenStream {
|
||||
private final List<Token> tokens;
|
||||
private Iterator<Token> tokenIterator;
|
||||
private final List<AttributeSource> tokens;
|
||||
private Iterator<AttributeSource> tokenIterator;
|
||||
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
|
||||
private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
|
||||
private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
|
||||
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
/**
|
||||
* Creates a new ListBasedTokenStream which uses the given tokens as its token source.
|
||||
*
|
||||
* @param tokens Source of tokens to be used
|
||||
*/
|
||||
ListBasedTokenStream(List<Token> tokens) {
|
||||
ListBasedTokenStream(List<AttributeSource> tokens) {
|
||||
this.tokens = tokens;
|
||||
tokenIterator = tokens.iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (tokenIterator.hasNext()) {
|
||||
Token next = tokenIterator.next();
|
||||
termAtt.copyBuffer(next.buffer(), 0, next.length());
|
||||
typeAtt.setType(next.type());
|
||||
offsetAtt.setOffset(next.startOffset(), next.endOffset());
|
||||
flagsAtt.setFlags(next.getFlags());
|
||||
payloadAtt.setPayload(next.getPayload());
|
||||
posIncAtt.setPositionIncrement(next.getPositionIncrement());
|
||||
AttributeSource next = tokenIterator.next();
|
||||
Iterator<Class<? extends Attribute>> atts = next.getAttributeClassesIterator();
|
||||
while (atts.hasNext()) // make sure all att impls in the token exist here
|
||||
addAttribute(atts.next());
|
||||
next.copyTo(this);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
|
@ -314,7 +359,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
private final String fieldName;
|
||||
private final FieldType fieldType;
|
||||
private final Analyzer analyzer;
|
||||
private final Set<String> termsToMatch;
|
||||
private final Set<BytesRef> termsToMatch;
|
||||
|
||||
/**
|
||||
* Constructs a new AnalysisContext with a given field tpe, analyzer and
|
||||
|
@ -328,7 +373,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
* @param termsToMatch Holds all the terms that should match during the
|
||||
* analysis process.
|
||||
*/
|
||||
public AnalysisContext(FieldType fieldType, Analyzer analyzer, Set<String> termsToMatch) {
|
||||
public AnalysisContext(FieldType fieldType, Analyzer analyzer, Set<BytesRef> termsToMatch) {
|
||||
this(null, fieldType, analyzer, termsToMatch);
|
||||
}
|
||||
|
||||
|
@ -343,7 +388,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
*
|
||||
*/
|
||||
public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer) {
|
||||
this(fieldName, fieldType, analyzer, EMPTY_STRING_SET);
|
||||
this(fieldName, fieldType, analyzer, EMPTY_BYTES_SET);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -359,7 +404,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
* @param termsToMatch Holds all the terms that should match during the
|
||||
* analysis process.
|
||||
*/
|
||||
public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer, Set<String> termsToMatch) {
|
||||
public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer, Set<BytesRef> termsToMatch) {
|
||||
this.fieldName = fieldName;
|
||||
this.fieldType = fieldType;
|
||||
this.analyzer = analyzer;
|
||||
|
@ -378,7 +423,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
return analyzer;
|
||||
}
|
||||
|
||||
public Set<String> getTermsToMatch() {
|
||||
public Set<BytesRef> getTermsToMatch() {
|
||||
return termsToMatch;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.solr.handler;
|
|||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.client.solrj.request.DocumentAnalysisRequest;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
@ -216,21 +216,20 @@ public class DocumentAnalysisRequestHandler extends AnalysisRequestHandlerBase {
|
|||
|
||||
FieldType fieldType = schema.getFieldType(name);
|
||||
|
||||
Set<String> termsToMatch = new HashSet<String>();
|
||||
if (request.getQuery() != null && request.isShowMatch()) {
|
||||
try {
|
||||
List<Token> tokens = analyzeValue(request.getQuery(), fieldType.getQueryAnalyzer());
|
||||
for (Token token : tokens) {
|
||||
termsToMatch.add(token.toString());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// ignore analysis exceptions since we are applying arbitrary text to all fields
|
||||
}
|
||||
final String queryValue = request.getQuery();
|
||||
Set<BytesRef> termsToMatch;
|
||||
try {
|
||||
termsToMatch = (queryValue != null && request.isShowMatch())
|
||||
? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer())
|
||||
: EMPTY_BYTES_SET;
|
||||
} catch (Exception e) {
|
||||
// ignore analysis exceptions since we are applying arbitrary text to all fields
|
||||
termsToMatch = EMPTY_BYTES_SET;
|
||||
}
|
||||
|
||||
if (request.getQuery() != null) {
|
||||
try {
|
||||
AnalysisContext analysisContext = new AnalysisContext(fieldType, fieldType.getQueryAnalyzer(), EMPTY_STRING_SET);
|
||||
AnalysisContext analysisContext = new AnalysisContext(fieldType, fieldType.getQueryAnalyzer(), EMPTY_BYTES_SET);
|
||||
fieldTokens.add("query", analyzeValue(request.getQuery(), analysisContext));
|
||||
} catch (Exception e) {
|
||||
// ignore analysis exceptions since we are applying arbitrary text to all fields
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
|
||||
package org.apache.solr.handler;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.client.solrj.request.FieldAnalysisRequest;
|
||||
import org.apache.solr.common.params.AnalysisParams;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
|
@ -30,10 +30,7 @@ import org.apache.solr.schema.FieldType;
|
|||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.*;
|
||||
import java.io.Reader;
|
||||
import java.io.IOException;
|
||||
|
||||
|
@ -222,14 +219,10 @@ public class FieldAnalysisRequestHandler extends AnalysisRequestHandlerBase {
|
|||
*/
|
||||
private NamedList<NamedList> analyzeValues(FieldAnalysisRequest analysisRequest, FieldType fieldType, String fieldName) {
|
||||
|
||||
Set<String> termsToMatch = new HashSet<String>();
|
||||
String queryValue = analysisRequest.getQuery();
|
||||
if (queryValue != null && analysisRequest.isShowMatch()) {
|
||||
List<Token> tokens = analyzeValue(queryValue, fieldType.getQueryAnalyzer());
|
||||
for (Token token : tokens) {
|
||||
termsToMatch.add(token.toString());
|
||||
}
|
||||
}
|
||||
final String queryValue = analysisRequest.getQuery();
|
||||
final Set<BytesRef> termsToMatch = (queryValue != null && analysisRequest.isShowMatch())
|
||||
? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer())
|
||||
: EMPTY_BYTES_SET;
|
||||
|
||||
NamedList<NamedList> analyzeResults = new SimpleOrderedMap<NamedList>();
|
||||
if (analysisRequest.getFieldValue() != null) {
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
org.apache.lucene.analysis.CharReader,
|
||||
org.apache.lucene.analysis.CharStream,
|
||||
org.apache.lucene.analysis.tokenattributes.*,
|
||||
org.apache.lucene.util.AttributeReflector,
|
||||
org.apache.solr.analysis.CharFilterFactory,
|
||||
org.apache.solr.analysis.TokenFilterFactory,
|
||||
org.apache.solr.analysis.TokenizerChain,
|
||||
|
@ -31,7 +32,8 @@
|
|||
org.apache.solr.schema.FieldType,
|
||||
org.apache.solr.schema.SchemaField,
|
||||
org.apache.solr.common.util.XML,
|
||||
javax.servlet.jsp.JspWriter,java.io.IOException
|
||||
javax.servlet.jsp.JspWriter,java.io.IOException,
|
||||
org.apache.noggit.CharArr
|
||||
"%>
|
||||
<%@ page import="java.io.Reader"%>
|
||||
<%@ page import="java.io.StringReader"%>
|
||||
|
@ -39,8 +41,6 @@
|
|||
<%@ page import="java.math.BigInteger" %>
|
||||
|
||||
<%-- $Id$ --%>
|
||||
<%-- $Source: /cvs/main/searching/org.apache.solrolarServer/resources/admin/analysis.jsp,v $ --%>
|
||||
<%-- $Name: $ --%>
|
||||
|
||||
<%@include file="header.jsp" %>
|
||||
|
||||
|
@ -71,19 +71,19 @@
|
|||
<table>
|
||||
<tr>
|
||||
<td>
|
||||
<strong>Field
|
||||
<strong>Field
|
||||
<select name="nt">
|
||||
<option <%= nt.equals("name") ? "selected=\"selected\"" : "" %> >name</option>
|
||||
<option <%= nt.equals("type") ? "selected=\"selected\"" : "" %>>type</option>
|
||||
<option <%= nt.equals("name") ? "selected=\"selected\"" : "" %> >name</option>
|
||||
<option <%= nt.equals("type") ? "selected=\"selected\"" : "" %>>type</option>
|
||||
</select></strong>
|
||||
</td>
|
||||
<td>
|
||||
<input class="std" name="name" type="text" value="<% XML.escapeCharData(name, out); %>">
|
||||
<input class="std" name="name" type="text" value="<% XML.escapeCharData(name, out); %>">
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<strong>Field value (Index)</strong>
|
||||
<strong>Field value (Index)</strong>
|
||||
<br/>
|
||||
verbose output
|
||||
<input name="verbose" type="checkbox"
|
||||
|
@ -94,19 +94,19 @@
|
|||
<%= highlight ? "checked=\"true\"" : "" %> >
|
||||
</td>
|
||||
<td>
|
||||
<textarea class="std" rows="8" cols="70" name="val"><% XML.escapeCharData(val,out); %></textarea>
|
||||
<textarea class="std" rows="8" cols="70" name="val"><% XML.escapeCharData(val,out); %></textarea>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<strong>Field value (Query)</strong>
|
||||
<strong>Field value (Query)</strong>
|
||||
<br/>
|
||||
verbose output
|
||||
<input name="qverbose" type="checkbox"
|
||||
<%= qverbose ? "checked=\"true\"" : "" %> >
|
||||
</td>
|
||||
<td>
|
||||
<textarea class="std" rows="1" cols="70" name="qval"><% XML.escapeCharData(qval,out); %></textarea>
|
||||
<textarea class="std" rows="1" cols="70" name="qval"><% XML.escapeCharData(qval,out); %></textarea>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
@ -115,7 +115,7 @@
|
|||
</td>
|
||||
|
||||
<td>
|
||||
<input class="stdbutton" type="submit" value="analyze">
|
||||
<input class="stdbutton" type="submit" value="analyze">
|
||||
</td>
|
||||
|
||||
</tr>
|
||||
|
@ -148,24 +148,28 @@
|
|||
}
|
||||
|
||||
if (field!=null) {
|
||||
HashSet<Tok> matches = null;
|
||||
HashSet<BytesRef> matches = null;
|
||||
if (qval!="" && highlight) {
|
||||
Reader reader = new StringReader(qval);
|
||||
Analyzer analyzer = field.getType().getQueryAnalyzer();
|
||||
TokenStream tstream = analyzer.reusableTokenStream(field.getName(),reader);
|
||||
TermToBytesRefAttribute bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class);
|
||||
tstream.reset();
|
||||
List<AttributeSource> tokens = getTokens(tstream);
|
||||
matches = new HashSet<Tok>();
|
||||
for (AttributeSource t : tokens) { matches.add( new Tok(t,0)); }
|
||||
matches = new HashSet<BytesRef>();
|
||||
while (tstream.incrementToken()) {
|
||||
final BytesRef bytes = new BytesRef();
|
||||
bytesAtt.toBytesRef(bytes);
|
||||
matches.add(bytes);
|
||||
}
|
||||
}
|
||||
|
||||
if (val!="") {
|
||||
out.println("<h3>Index Analyzer</h3>");
|
||||
doAnalyzer(out, field, val, false, verbose,matches);
|
||||
doAnalyzer(out, field, val, false, verbose, matches);
|
||||
}
|
||||
if (qval!="") {
|
||||
out.println("<h3>Query Analyzer</h3>");
|
||||
doAnalyzer(out, field, qval, true, qverbose,null);
|
||||
doAnalyzer(out, field, qval, true, qverbose, null);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -177,7 +181,7 @@
|
|||
|
||||
|
||||
<%!
|
||||
private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set<Tok> match) throws Exception {
|
||||
private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set<BytesRef> match) throws Exception {
|
||||
|
||||
FieldType ft = field.getType();
|
||||
Analyzer analyzer = queryAnalyser ?
|
||||
|
@ -240,7 +244,7 @@
|
|||
tstream.reset();
|
||||
List<AttributeSource> tokens = getTokens(tstream);
|
||||
if (verbose) {
|
||||
writeHeader(out, analyzer.getClass(), new HashMap<String,String>());
|
||||
writeHeader(out, analyzer.getClass(), Collections.EMPTY_MAP);
|
||||
}
|
||||
writeTokens(out, tokens, ft, verbose, match);
|
||||
}
|
||||
|
@ -249,52 +253,59 @@
|
|||
|
||||
static List<AttributeSource> getTokens(TokenStream tstream) throws IOException {
|
||||
List<AttributeSource> tokens = new ArrayList<AttributeSource>();
|
||||
|
||||
while (true) {
|
||||
if (!tstream.incrementToken())
|
||||
break;
|
||||
else {
|
||||
tokens.add(tstream.cloneAttributes());
|
||||
}
|
||||
tstream.reset();
|
||||
while (tstream.incrementToken()) {
|
||||
tokens.add(tstream.cloneAttributes());
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
|
||||
private static class ReflectItem {
|
||||
final Class<? extends Attribute> attClass;
|
||||
final String key;
|
||||
final Object value;
|
||||
|
||||
ReflectItem(Class<? extends Attribute> attClass, String key, Object value) {
|
||||
this.attClass = attClass;
|
||||
this.key = key;
|
||||
this.value = value;
|
||||
}
|
||||
}
|
||||
|
||||
private static class Tok {
|
||||
AttributeSource token;
|
||||
int pos;
|
||||
Tok(AttributeSource token, int pos) {
|
||||
this.token=token;
|
||||
this.pos=pos;
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
return ((Tok)o).token.toString().equals(token.toString());
|
||||
}
|
||||
public int hashCode() {
|
||||
return token.toString().hashCode();
|
||||
}
|
||||
public String toString() {
|
||||
return token.toString();
|
||||
}
|
||||
public String toPrintableString() {
|
||||
TermToBytesRefAttribute att = token.addAttribute(TermToBytesRefAttribute.class);
|
||||
if (att instanceof CharTermAttribute)
|
||||
return att.toString();
|
||||
else {
|
||||
BytesRef bytes = new BytesRef();
|
||||
att.toBytesRef(bytes);
|
||||
return bytes.toString();
|
||||
}
|
||||
final BytesRef bytes = new BytesRef();
|
||||
final String rawText, text;
|
||||
final int pos;
|
||||
final List<ReflectItem> reflected = new ArrayList<ReflectItem>();
|
||||
|
||||
Tok(AttributeSource token, int pos, FieldType ft) {
|
||||
this.pos = pos;
|
||||
token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(bytes);
|
||||
rawText = (token.hasAttribute(CharTermAttribute.class)) ?
|
||||
token.getAttribute(CharTermAttribute.class).toString() : null;
|
||||
final CharArr textBuf = new CharArr(bytes.length);
|
||||
ft.indexedToReadable(bytes, textBuf);
|
||||
text = textBuf.toString();
|
||||
token.reflectWith(new AttributeReflector() {
|
||||
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
|
||||
// leave out position and raw term
|
||||
if (TermToBytesRefAttribute.class.isAssignableFrom(attClass))
|
||||
return;
|
||||
if (CharTermAttribute.class.isAssignableFrom(attClass))
|
||||
return;
|
||||
if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
|
||||
return;
|
||||
reflected.add(new ReflectItem(attClass, key, value));
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private static interface ToStr {
|
||||
public String toStr(Object o);
|
||||
private static interface TokToStr {
|
||||
public String toStr(Tok o);
|
||||
}
|
||||
|
||||
private static void printRow(JspWriter out, String header, List[] arrLst, ToStr converter, boolean multival, boolean verbose, Set<Tok> match) throws IOException {
|
||||
private static void printRow(JspWriter out, String header, String headerTitle, List<Tok>[] arrLst, TokToStr converter, boolean multival, boolean verbose, Set<BytesRef> match) throws IOException {
|
||||
// find the maximum number of terms for any position
|
||||
int maxSz=1;
|
||||
if (multival) {
|
||||
|
@ -308,7 +319,13 @@
|
|||
out.println("<tr>");
|
||||
if (idx==0 && verbose) {
|
||||
if (header != null) {
|
||||
out.print("<th NOWRAP rowspan=\""+maxSz+"\">");
|
||||
out.print("<th NOWRAP rowspan=\""+maxSz+"\"");
|
||||
if (headerTitle != null) {
|
||||
out.print(" title=\"");
|
||||
XML.escapeCharData(headerTitle,out);
|
||||
out.print("\"");
|
||||
}
|
||||
out.print(">");
|
||||
XML.escapeCharData(header,out);
|
||||
out.println("</th>");
|
||||
}
|
||||
|
@ -317,7 +334,7 @@
|
|||
for (int posIndex=0; posIndex<arrLst.length; posIndex++) {
|
||||
List<Tok> lst = arrLst[posIndex];
|
||||
if (lst.size() <= idx) continue;
|
||||
if (match!=null && match.contains(lst.get(idx))) {
|
||||
if (match!=null && match.contains(lst.get(idx).bytes)) {
|
||||
out.print("<td class=\"highlight\"");
|
||||
} else {
|
||||
out.print("<td class=\"debugdata\"");
|
||||
|
@ -340,15 +357,6 @@
|
|||
|
||||
}
|
||||
|
||||
static String isPayloadString( Payload p ) {
|
||||
String sp = new String( p.getData() );
|
||||
for( int i=0; i < sp.length(); i++ ) {
|
||||
if( !Character.isDefined( sp.charAt(i) ) || Character.isISOControl( sp.charAt(i) ) )
|
||||
return "";
|
||||
}
|
||||
return "(" + sp + ")";
|
||||
}
|
||||
|
||||
static void writeHeader(JspWriter out, Class clazz, Map<String,String> args) throws IOException {
|
||||
out.print("<h4>");
|
||||
out.print(clazz.getName());
|
||||
|
@ -359,137 +367,93 @@
|
|||
|
||||
|
||||
// readable, raw, pos, type, start/end
|
||||
static void writeTokens(JspWriter out, List<AttributeSource> tokens, final FieldType ft, boolean verbose, Set<Tok> match) throws IOException {
|
||||
static void writeTokens(JspWriter out, List<AttributeSource> tokens, final FieldType ft, boolean verbose, Set<BytesRef> match) throws IOException {
|
||||
|
||||
// Use a map to tell what tokens are in what positions
|
||||
// because some tokenizers/filters may do funky stuff with
|
||||
// very large increments, or negative increments.
|
||||
HashMap<Integer,List<Tok>> map = new HashMap<Integer,List<Tok>>();
|
||||
boolean needRaw=false;
|
||||
int pos=0;
|
||||
int pos=0, reflectionCount = -1;
|
||||
for (AttributeSource t : tokens) {
|
||||
if (!t.toString().equals(ft.indexedToReadable(t.toString()))) {
|
||||
needRaw=true;
|
||||
}
|
||||
|
||||
pos += t.addAttribute(PositionIncrementAttribute.class).getPositionIncrement();
|
||||
List lst = map.get(pos);
|
||||
if (lst==null) {
|
||||
lst = new ArrayList(1);
|
||||
map.put(pos,lst);
|
||||
}
|
||||
Tok tok = new Tok(t,pos);
|
||||
Tok tok = new Tok(t,pos,ft);
|
||||
// sanity check
|
||||
if (reflectionCount < 0) {
|
||||
reflectionCount = tok.reflected.size();
|
||||
} else {
|
||||
if (reflectionCount != tok.reflected.size())
|
||||
throw new RuntimeException("Should not happen: Number of reflected entries differs for position=" + pos);
|
||||
}
|
||||
if (tok.rawText != null && !tok.text.equals(tok.rawText)) {
|
||||
needRaw=true;
|
||||
}
|
||||
lst.add(tok);
|
||||
}
|
||||
|
||||
List<Tok>[] arr = (List<Tok>[])map.values().toArray(new ArrayList[map.size()]);
|
||||
|
||||
/* Jetty 6.1.3 miscompiles this generics version...
|
||||
Arrays.sort(arr, new Comparator<List<Tok>>() {
|
||||
public int compare(List<Tok> toks, List<Tok> toks1) {
|
||||
return toks.get(0).pos - toks1.get(0).pos;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// Jetty 6.1.3 miscompiles a generics-enabled version..., without generics:
|
||||
Arrays.sort(arr, new Comparator() {
|
||||
public int compare(Object toks, Object toks1) {
|
||||
return ((List<Tok>)toks).get(0).pos - ((List<Tok>)toks1).get(0).pos;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
);
|
||||
});
|
||||
|
||||
out.println("<table width=\"auto\" class=\"analysis\" border=\"1\">");
|
||||
|
||||
if (verbose) {
|
||||
printRow(out,"term position", arr, new ToStr() {
|
||||
public String toStr(Object o) {
|
||||
return Integer.toString(((Tok)o).pos);
|
||||
printRow(out, "position", "calculated from " + PositionIncrementAttribute.class.getName(), arr, new TokToStr() {
|
||||
public String toStr(Tok t) {
|
||||
return Integer.toString(t.pos);
|
||||
}
|
||||
}
|
||||
,false
|
||||
,verbose
|
||||
,null);
|
||||
},false,verbose,null);
|
||||
}
|
||||
|
||||
|
||||
printRow(out,"term text", arr, new ToStr() {
|
||||
public String toStr(Object o) {
|
||||
return ft.indexedToReadable( ((Tok)o).toPrintableString() );
|
||||
printRow(out, "term text", "indexedToReadable applied to " + TermToBytesRefAttribute.class.getName(), arr, new TokToStr() {
|
||||
public String toStr(Tok t) {
|
||||
return t.text;
|
||||
}
|
||||
}
|
||||
,true
|
||||
,verbose
|
||||
,match
|
||||
);
|
||||
|
||||
if (needRaw) {
|
||||
printRow(out,"raw text", arr, new ToStr() {
|
||||
public String toStr(Object o) {
|
||||
// page is UTF-8, so anything goes.
|
||||
return ((Tok)o).toPrintableString();
|
||||
}
|
||||
}
|
||||
,true
|
||||
,verbose
|
||||
,match
|
||||
);
|
||||
}
|
||||
},true,verbose,match);
|
||||
|
||||
if (verbose) {
|
||||
printRow(out,"term type", arr, new ToStr() {
|
||||
public String toStr(Object o) {
|
||||
String tt = ((Tok)o).token.addAttribute(TypeAttribute.class).type();
|
||||
if (tt == null) {
|
||||
return "null";
|
||||
} else {
|
||||
return tt;
|
||||
if (needRaw) {
|
||||
printRow(out, "raw text", CharTermAttribute.class.getName(), arr, new TokToStr() {
|
||||
public String toStr(Tok t) {
|
||||
// page is UTF-8, so anything goes.
|
||||
return (t.rawText == null) ? "" : t.rawText;
|
||||
}
|
||||
}
|
||||
},true,verbose,match);
|
||||
}
|
||||
,true
|
||||
,verbose,
|
||||
null
|
||||
);
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
printRow(out,"source start,end", arr, new ToStr() {
|
||||
public String toStr(Object o) {
|
||||
AttributeSource t = ((Tok)o).token;
|
||||
return Integer.toString(t.addAttribute(OffsetAttribute.class).startOffset()) + ',' + t.addAttribute(OffsetAttribute.class).endOffset() ;
|
||||
|
||||
printRow(out, "raw bytes", TermToBytesRefAttribute.class.getName(), arr, new TokToStr() {
|
||||
public String toStr(Tok t) {
|
||||
return t.bytes.toString();
|
||||
}
|
||||
}
|
||||
,true
|
||||
,verbose
|
||||
,null
|
||||
);
|
||||
}
|
||||
},true,verbose,match);
|
||||
|
||||
if (verbose) {
|
||||
printRow(out,"payload", arr, new ToStr() {
|
||||
public String toStr(Object o) {
|
||||
AttributeSource t = ((Tok)o).token;
|
||||
Payload p = t.addAttribute(PayloadAttribute.class).getPayload();
|
||||
if( null != p ) {
|
||||
BigInteger bi = new BigInteger( p.getData() );
|
||||
String ret = bi.toString( 16 );
|
||||
if (ret.length() % 2 != 0) {
|
||||
// Pad with 0
|
||||
ret = "0"+ret;
|
||||
for (int att=0; att < reflectionCount; att++) {
|
||||
final ReflectItem item0 = arr[0].get(0).reflected.get(att);
|
||||
final int i = att;
|
||||
printRow(out, item0.key, item0.attClass.getName(), arr, new TokToStr() {
|
||||
public String toStr(Tok t) {
|
||||
final ReflectItem item = t.reflected.get(i);
|
||||
if (item0.attClass != item.attClass || !item0.key.equals(item.key))
|
||||
throw new RuntimeException("Should not happen: attribute types suddenly change at position=" + t.pos);
|
||||
if (item.value instanceof Payload) {
|
||||
final Payload p = (Payload) item.value;
|
||||
return new BytesRef(p.getData()).toString();
|
||||
} else {
|
||||
return (item.value != null) ? item.value.toString() : "";
|
||||
}
|
||||
ret += isPayloadString( p );
|
||||
return ret;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
},true,verbose, null);
|
||||
}
|
||||
,true
|
||||
,verbose
|
||||
,null
|
||||
);
|
||||
}
|
||||
|
||||
out.println("</table>");
|
||||
|
|
Loading…
Reference in New Issue