diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e93f3cf0945..448f11b0707 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -333,6 +333,10 @@ New features * LUCENE-2862: Added TermsEnum.totalTermFreq() and Terms.getSumTotalTermFreq(). (Mike McCandless, Robert Muir) + +* LUCENE-3001: Added TrieFieldHelper to write solr compatible numeric + fields without the solr dependency. (ryan) + Optimizations diff --git a/lucene/src/java/org/apache/lucene/util/TrieFieldHelper.java b/lucene/src/java/org/apache/lucene/util/TrieFieldHelper.java new file mode 100644 index 00000000000..e16090af198 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/util/TrieFieldHelper.java @@ -0,0 +1,163 @@ +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util; + +import java.util.Date; + +import org.apache.lucene.analysis.NumericTokenStream; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; + +/** + * Helper class to make TrieFields compatible with ones written in solr + */ +public class TrieFieldHelper { + + private TrieFieldHelper() {} + + public static class FieldInfo { + public int precisionStep = 8; // same as solr default + public boolean store = true; + public boolean index = true; + public boolean omitNorms = true; + public boolean omitTF = true; + } + + //---------------------------------------------- + // Create Field + //---------------------------------------------- + + private static Fieldable createField(String name, byte[] arr, TokenStream ts, FieldInfo info, float boost) { + + Field f; + if (info.store) { + f = new Field(name, arr); + if (info.index) f.setTokenStream(ts); + } else { + f = new Field(name, ts); + } + + // term vectors aren't supported + f.setOmitNorms(info.omitNorms); + f.setOmitTermFreqAndPositions(info.omitTF); + f.setBoost(boost); + return f; + } + + public static Fieldable createIntField(String name, int value, FieldInfo info, float boost) { + + byte[] arr=null; + TokenStream ts=null; + + if (info.store) arr = TrieFieldHelper.toArr(value); + if (info.index) ts = new NumericTokenStream(info.precisionStep).setIntValue(value); + + return createField(name, arr, ts, info, boost); + } + + public static Fieldable createFloatField(String name, float value, FieldInfo info, float boost) { + + byte[] arr=null; + TokenStream ts=null; + + if (info.store) arr = TrieFieldHelper.toArr(value); + if (info.index) ts = new NumericTokenStream(info.precisionStep).setFloatValue(value); + + return createField(name, arr, ts, info, boost); + } + + public static Fieldable createLongField(String name, long value, FieldInfo info, float boost) { + + byte[] arr=null; + TokenStream ts=null; + + if (info.store) arr = TrieFieldHelper.toArr(value); + if (info.index) ts = new NumericTokenStream(info.precisionStep).setLongValue(value); + + return createField(name, arr, ts, info, boost); + } + + public static Fieldable createDoubleField(String name, double value, FieldInfo info, float boost) { + + byte[] arr=null; + TokenStream ts=null; + + if (info.store) arr = TrieFieldHelper.toArr(value); + if (info.index) ts = new NumericTokenStream(info.precisionStep).setDoubleValue(value); + + return createField(name, arr, ts, info, boost); + } + + public static Fieldable createDateField(String name, Date value, FieldInfo info, float boost) { + // TODO, make sure the date is within long range! + return createLongField(name, value.getTime(), info, boost); + } + + + //---------------------------------------------- + // number <=> byte[] + //---------------------------------------------- + + public static int toInt(byte[] arr) { + return (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff); + } + + public static long toLong(byte[] arr) { + int high = (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff); + int low = (arr[4]<<24) | ((arr[5]&0xff)<<16) | ((arr[6]&0xff)<<8) | (arr[7]&0xff); + return (((long)high)<<32) | (low&0x0ffffffffL); + } + + public static float toFloat(byte[] arr) { + return Float.intBitsToFloat(toInt(arr)); + } + + public static double toDouble(byte[] arr) { + return Double.longBitsToDouble(toLong(arr)); + } + + public static byte[] toArr(int val) { + byte[] arr = new byte[4]; + arr[0] = (byte)(val>>>24); + arr[1] = (byte)(val>>>16); + arr[2] = (byte)(val>>>8); + arr[3] = (byte)(val); + return arr; + } + + public static byte[] toArr(long val) { + byte[] arr = new byte[8]; + arr[0] = (byte)(val>>>56); + arr[1] = (byte)(val>>>48); + arr[2] = (byte)(val>>>40); + arr[3] = (byte)(val>>>32); + arr[4] = (byte)(val>>>24); + arr[5] = (byte)(val>>>16); + arr[6] = (byte)(val>>>8); + arr[7] = (byte)(val); + return arr; + } + + public static byte[] toArr(float val) { + return toArr(Float.floatToRawIntBits(val)); + } + + public static byte[] toArr(double val) { + return toArr(Double.doubleToRawLongBits(val)); + } +} diff --git a/solr/src/java/org/apache/solr/schema/TrieDateField.java b/solr/src/java/org/apache/solr/schema/TrieDateField.java index bba4d0ae69e..f02d26311a1 100755 --- a/solr/src/java/org/apache/solr/schema/TrieDateField.java +++ b/solr/src/java/org/apache/solr/schema/TrieDateField.java @@ -27,7 +27,6 @@ import org.apache.solr.search.function.*; import org.apache.solr.search.QParser; import org.apache.solr.response.TextResponseWriter; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.document.Field; import org.apache.lucene.search.SortField; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.Query; @@ -36,8 +35,7 @@ import org.apache.lucene.search.cache.CachedArrayCreator; import org.apache.lucene.search.cache.LongValuesCreator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.NumericTokenStream; +import org.apache.lucene.util.TrieFieldHelper; import java.util.Map; import java.util.Date; @@ -68,7 +66,7 @@ public class TrieDateField extends DateField { public Date toObject(Fieldable f) { byte[] arr = f.getBinaryValue(); if (arr==null) throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,TrieField.badFieldString(f)); - return new Date(TrieField.toLong(arr)); + return new Date(TrieFieldHelper.toLong(arr)); } @Override @@ -107,7 +105,7 @@ public class TrieDateField extends DateField { return; } - writer.writeDate(name,new Date(TrieField.toLong(arr))); + writer.writeDate(name,new Date(TrieFieldHelper.toLong(arr))); } @Override @@ -146,7 +144,7 @@ public class TrieDateField extends DateField { public String toExternal(Fieldable f) { byte[] arr = f.getBinaryValue(); if (arr==null) return TrieField.badFieldString(f); - return super.toExternal(new Date(TrieField.toLong(arr))); + return super.toExternal(new Date(TrieFieldHelper.toLong(arr))); } @Override @@ -167,44 +165,6 @@ public class TrieDateField extends DateField { return readableToIndexed(storedToReadable(f)); } - @Override - public Fieldable createField(SchemaField field, Object value, float boost) { - boolean indexed = field.indexed(); - boolean stored = field.stored(); - - if (!indexed && !stored) { - if (log.isTraceEnabled()) - log.trace("Ignoring unindexed/unstored field: " + field); - return null; - } - - int ps = precisionStep; - - byte[] arr=null; - TokenStream ts=null; - - long time = (value instanceof Date) - ? ((Date)value).getTime() - : super.parseMath(null, value.toString()).getTime(); - - if (stored) arr = TrieField.toArr(time); - if (indexed) ts = new NumericTokenStream(ps).setLongValue(time); - - Field f; - if (stored) { - f = new Field(field.getName(), arr); - if (indexed) f.setTokenStream(ts); - } else { - f = new Field(field.getName(), ts); - } - - // term vectors aren't supported - - f.setOmitNorms(field.omitNorms()); - f.setOmitTermFreqAndPositions(field.omitTf()); - f.setBoost(boost); - return f; - } @Override public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) { diff --git a/solr/src/java/org/apache/solr/schema/TrieField.java b/solr/src/java/org/apache/solr/schema/TrieField.java index cd7d953c4ad..8e7dff3bcbc 100644 --- a/solr/src/java/org/apache/solr/schema/TrieField.java +++ b/solr/src/java/org/apache/solr/schema/TrieField.java @@ -17,7 +17,6 @@ package org.apache.solr.schema; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.document.Field; import org.apache.lucene.search.*; import org.apache.lucene.search.cache.CachedArrayCreator; import org.apache.lucene.search.cache.DoubleValuesCreator; @@ -26,8 +25,7 @@ import org.apache.lucene.search.cache.IntValuesCreator; import org.apache.lucene.search.cache.LongValuesCreator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.NumericTokenStream; +import org.apache.lucene.util.TrieFieldHelper; import org.apache.noggit.CharArr; import org.apache.solr.analysis.*; import org.apache.solr.common.SolrException; @@ -68,6 +66,7 @@ public class TrieField extends FieldType { protected TrieTypes type; protected Object missingValue; + /** * Used for handling date types following the same semantics as DateField */ @@ -107,15 +106,15 @@ public class TrieField extends FieldType { if (arr==null) return badFieldString(f); switch (type) { case INTEGER: - return toInt(arr); + return TrieFieldHelper.toInt(arr); case FLOAT: - return toFloat(arr); + return TrieFieldHelper.toFloat(arr); case LONG: - return toLong(arr); + return TrieFieldHelper.toLong(arr); case DOUBLE: - return toDouble(arr); + return TrieFieldHelper.toDouble(arr); case DATE: - return new Date(toLong(arr)); + return new Date(TrieFieldHelper.toLong(arr)); default: throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name()); } @@ -207,19 +206,19 @@ public class TrieField extends FieldType { } switch (type) { case INTEGER: - writer.writeInt(name,toInt(arr)); + writer.writeInt(name,TrieFieldHelper.toInt(arr)); break; case FLOAT: - writer.writeFloat(name,toFloat(arr)); + writer.writeFloat(name,TrieFieldHelper.toFloat(arr)); break; case LONG: - writer.writeLong(name,toLong(arr)); + writer.writeLong(name,TrieFieldHelper.toLong(arr)); break; case DOUBLE: - writer.writeDouble(name,toDouble(arr)); + writer.writeDouble(name,TrieFieldHelper.toDouble(arr)); break; case DATE: - writer.writeDate(name,new Date(toLong(arr))); + writer.writeDate(name,new Date(TrieFieldHelper.toLong(arr))); break; default: throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name()); @@ -293,55 +292,6 @@ public class TrieField extends FieldType { } - static int toInt(byte[] arr) { - return (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff); - } - - static long toLong(byte[] arr) { - int high = (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff); - int low = (arr[4]<<24) | ((arr[5]&0xff)<<16) | ((arr[6]&0xff)<<8) | (arr[7]&0xff); - return (((long)high)<<32) | (low&0x0ffffffffL); - } - - static float toFloat(byte[] arr) { - return Float.intBitsToFloat(toInt(arr)); - } - - static double toDouble(byte[] arr) { - return Double.longBitsToDouble(toLong(arr)); - } - - static byte[] toArr(int val) { - byte[] arr = new byte[4]; - arr[0] = (byte)(val>>>24); - arr[1] = (byte)(val>>>16); - arr[2] = (byte)(val>>>8); - arr[3] = (byte)(val); - return arr; - } - - static byte[] toArr(long val) { - byte[] arr = new byte[8]; - arr[0] = (byte)(val>>>56); - arr[1] = (byte)(val>>>48); - arr[2] = (byte)(val>>>40); - arr[3] = (byte)(val>>>32); - arr[4] = (byte)(val>>>24); - arr[5] = (byte)(val>>>16); - arr[6] = (byte)(val>>>8); - arr[7] = (byte)(val); - return arr; - } - - static byte[] toArr(float val) { - return toArr(Float.floatToRawIntBits(val)); - } - - static byte[] toArr(double val) { - return toArr(Double.doubleToRawLongBits(val)); - } - - @Override public String storedToReadable(Fieldable f) { return toExternal(f); @@ -396,15 +346,15 @@ public class TrieField extends FieldType { if (arr==null) return badFieldString(f); switch (type) { case INTEGER: - return Integer.toString(toInt(arr)); + return Integer.toString(TrieFieldHelper.toInt(arr)); case FLOAT: - return Float.toString(toFloat(arr)); + return Float.toString(TrieFieldHelper.toFloat(arr)); case LONG: - return Long.toString(toLong(arr)); + return Long.toString(TrieFieldHelper.toLong(arr)); case DOUBLE: - return Double.toString(toDouble(arr)); + return Double.toString(TrieFieldHelper.toDouble(arr)); case DATE: - return dateField.formatDate(new Date(toLong(arr))); + return dateField.formatDate(new Date(TrieFieldHelper.toLong(arr))); default: throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name()); } @@ -483,75 +433,53 @@ public class TrieField extends FieldType { @Override public Fieldable createField(SchemaField field, Object value, float boost) { - boolean indexed = field.indexed(); - boolean stored = field.stored(); - - if (!indexed && !stored) { + TrieFieldHelper.FieldInfo info = new TrieFieldHelper.FieldInfo(); + info.index = field.indexed(); + info.store = field.stored(); + info.precisionStep = precisionStep; + info.omitNorms = field.omitNorms(); + info.omitTF = field.omitTf(); + + if (!info.index && !info.store) { if (log.isTraceEnabled()) log.trace("Ignoring unindexed/unstored field: " + field); return null; } - int ps = precisionStep; - - byte[] arr=null; - TokenStream ts=null; - // String indexedVal = indexed && precisionStep==0 ? readableToIndexed(externalVal) : null; - switch (type) { case INTEGER: int i = (value instanceof Number) ? ((Number)value).intValue() : Integer.parseInt(value.toString()); - if (stored) arr = toArr(i); - if (indexed) ts = new NumericTokenStream(ps).setIntValue(i); - break; + return TrieFieldHelper.createIntField(field.getName(), i, info, boost); + case FLOAT: float f = (value instanceof Number) ? ((Number)value).floatValue() : Float.parseFloat(value.toString()); - if (stored) arr = toArr(f); - if (indexed) ts = new NumericTokenStream(ps).setFloatValue(f); - break; + return TrieFieldHelper.createFloatField(field.getName(), f, info, boost); + case LONG: long l = (value instanceof Number) ? ((Number)value).longValue() : Long.parseLong(value.toString()); - if (stored) arr = toArr(l); - if (indexed) ts = new NumericTokenStream(ps).setLongValue(l); - break; + return TrieFieldHelper.createLongField(field.getName(), l, info, boost); + case DOUBLE: double d = (value instanceof Number) ? ((Number)value).doubleValue() : Double.parseDouble(value.toString()); - if (stored) arr = toArr(d); - if (indexed) ts = new NumericTokenStream(ps).setDoubleValue(d); - break; + return TrieFieldHelper.createDoubleField(field.getName(), d, info, boost); + case DATE: - long time = (value instanceof Date) - ? ((Date)value).getTime() - : dateField.parseMath(null, value.toString()).getTime(); - if (stored) arr = toArr(time); - if (indexed) ts = new NumericTokenStream(ps).setLongValue(time); - break; + Date date = (value instanceof Date) + ? ((Date)value) + : dateField.parseMath(null, value.toString()); + return TrieFieldHelper.createDateField(field.getName(), date, info, boost); + default: throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type); } - - Field f; - if (stored) { - f = new Field(field.getName(), arr); - if (indexed) f.setTokenStream(ts); - } else { - f = new Field(field.getName(), ts); - } - - // term vectors aren't supported - - f.setOmitNorms(field.omitNorms()); - f.setOmitTermFreqAndPositions(field.omitTf()); - f.setBoost(boost); - return f; } public enum TrieTypes {