LUCENE-3001 -- Adding TrieFieldHelper to write solr compatible numeric fields without the solr dependency.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1086651 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Ryan McKinley 2011-03-29 17:41:58 +00:00
parent 4bfb56d42b
commit b95ca2b453
4 changed files with 209 additions and 154 deletions

View File

@ -334,6 +334,10 @@ New features
* LUCENE-2862: Added TermsEnum.totalTermFreq() and
Terms.getSumTotalTermFreq(). (Mike McCandless, Robert Muir)
* LUCENE-3001: Added TrieFieldHelper to write solr compatible numeric
fields without the solr dependency. (ryan)
Optimizations
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms

View File

@ -0,0 +1,163 @@
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Date;
import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
/**
* Helper class to make TrieFields compatible with ones written in solr
*/
public class TrieFieldHelper {
private TrieFieldHelper() {}
public static class FieldInfo {
public int precisionStep = 8; // same as solr default
public boolean store = true;
public boolean index = true;
public boolean omitNorms = true;
public boolean omitTF = true;
}
//----------------------------------------------
// Create Field
//----------------------------------------------
private static Fieldable createField(String name, byte[] arr, TokenStream ts, FieldInfo info, float boost) {
Field f;
if (info.store) {
f = new Field(name, arr);
if (info.index) f.setTokenStream(ts);
} else {
f = new Field(name, ts);
}
// term vectors aren't supported
f.setOmitNorms(info.omitNorms);
f.setOmitTermFreqAndPositions(info.omitTF);
f.setBoost(boost);
return f;
}
public static Fieldable createIntField(String name, int value, FieldInfo info, float boost) {
byte[] arr=null;
TokenStream ts=null;
if (info.store) arr = TrieFieldHelper.toArr(value);
if (info.index) ts = new NumericTokenStream(info.precisionStep).setIntValue(value);
return createField(name, arr, ts, info, boost);
}
public static Fieldable createFloatField(String name, float value, FieldInfo info, float boost) {
byte[] arr=null;
TokenStream ts=null;
if (info.store) arr = TrieFieldHelper.toArr(value);
if (info.index) ts = new NumericTokenStream(info.precisionStep).setFloatValue(value);
return createField(name, arr, ts, info, boost);
}
public static Fieldable createLongField(String name, long value, FieldInfo info, float boost) {
byte[] arr=null;
TokenStream ts=null;
if (info.store) arr = TrieFieldHelper.toArr(value);
if (info.index) ts = new NumericTokenStream(info.precisionStep).setLongValue(value);
return createField(name, arr, ts, info, boost);
}
public static Fieldable createDoubleField(String name, double value, FieldInfo info, float boost) {
byte[] arr=null;
TokenStream ts=null;
if (info.store) arr = TrieFieldHelper.toArr(value);
if (info.index) ts = new NumericTokenStream(info.precisionStep).setDoubleValue(value);
return createField(name, arr, ts, info, boost);
}
public static Fieldable createDateField(String name, Date value, FieldInfo info, float boost) {
// TODO, make sure the date is within long range!
return createLongField(name, value.getTime(), info, boost);
}
//----------------------------------------------
// number <=> byte[]
//----------------------------------------------
public static int toInt(byte[] arr) {
return (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff);
}
public static long toLong(byte[] arr) {
int high = (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff);
int low = (arr[4]<<24) | ((arr[5]&0xff)<<16) | ((arr[6]&0xff)<<8) | (arr[7]&0xff);
return (((long)high)<<32) | (low&0x0ffffffffL);
}
public static float toFloat(byte[] arr) {
return Float.intBitsToFloat(toInt(arr));
}
public static double toDouble(byte[] arr) {
return Double.longBitsToDouble(toLong(arr));
}
public static byte[] toArr(int val) {
byte[] arr = new byte[4];
arr[0] = (byte)(val>>>24);
arr[1] = (byte)(val>>>16);
arr[2] = (byte)(val>>>8);
arr[3] = (byte)(val);
return arr;
}
public static byte[] toArr(long val) {
byte[] arr = new byte[8];
arr[0] = (byte)(val>>>56);
arr[1] = (byte)(val>>>48);
arr[2] = (byte)(val>>>40);
arr[3] = (byte)(val>>>32);
arr[4] = (byte)(val>>>24);
arr[5] = (byte)(val>>>16);
arr[6] = (byte)(val>>>8);
arr[7] = (byte)(val);
return arr;
}
public static byte[] toArr(float val) {
return toArr(Float.floatToRawIntBits(val));
}
public static byte[] toArr(double val) {
return toArr(Double.doubleToRawLongBits(val));
}
}

View File

@ -27,7 +27,6 @@ import org.apache.solr.search.function.*;
import org.apache.solr.search.QParser;
import org.apache.solr.response.TextResponseWriter;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Query;
@ -36,8 +35,7 @@ import org.apache.lucene.search.cache.CachedArrayCreator;
import org.apache.lucene.search.cache.LongValuesCreator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.lucene.util.TrieFieldHelper;
import java.util.Map;
import java.util.Date;
@ -68,7 +66,7 @@ public class TrieDateField extends DateField {
public Date toObject(Fieldable f) {
byte[] arr = f.getBinaryValue();
if (arr==null) throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,TrieField.badFieldString(f));
return new Date(TrieField.toLong(arr));
return new Date(TrieFieldHelper.toLong(arr));
}
@Override
@ -107,7 +105,7 @@ public class TrieDateField extends DateField {
return;
}
writer.writeDate(name,new Date(TrieField.toLong(arr)));
writer.writeDate(name,new Date(TrieFieldHelper.toLong(arr)));
}
@Override
@ -146,7 +144,7 @@ public class TrieDateField extends DateField {
public String toExternal(Fieldable f) {
byte[] arr = f.getBinaryValue();
if (arr==null) return TrieField.badFieldString(f);
return super.toExternal(new Date(TrieField.toLong(arr)));
return super.toExternal(new Date(TrieFieldHelper.toLong(arr)));
}
@Override
@ -167,44 +165,6 @@ public class TrieDateField extends DateField {
return readableToIndexed(storedToReadable(f));
}
@Override
public Fieldable createField(SchemaField field, Object value, float boost) {
boolean indexed = field.indexed();
boolean stored = field.stored();
if (!indexed && !stored) {
if (log.isTraceEnabled())
log.trace("Ignoring unindexed/unstored field: " + field);
return null;
}
int ps = precisionStep;
byte[] arr=null;
TokenStream ts=null;
long time = (value instanceof Date)
? ((Date)value).getTime()
: super.parseMath(null, value.toString()).getTime();
if (stored) arr = TrieField.toArr(time);
if (indexed) ts = new NumericTokenStream(ps).setLongValue(time);
Field f;
if (stored) {
f = new Field(field.getName(), arr);
if (indexed) f.setTokenStream(ts);
} else {
f = new Field(field.getName(), ts);
}
// term vectors aren't supported
f.setOmitNorms(field.omitNorms());
f.setOmitTermFreqAndPositions(field.omitTf());
f.setBoost(boost);
return f;
}
@Override
public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {

View File

@ -17,7 +17,6 @@
package org.apache.solr.schema;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.*;
import org.apache.lucene.search.cache.CachedArrayCreator;
import org.apache.lucene.search.cache.DoubleValuesCreator;
@ -26,8 +25,7 @@ import org.apache.lucene.search.cache.IntValuesCreator;
import org.apache.lucene.search.cache.LongValuesCreator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.lucene.util.TrieFieldHelper;
import org.apache.noggit.CharArr;
import org.apache.solr.analysis.*;
import org.apache.solr.common.SolrException;
@ -68,6 +66,7 @@ public class TrieField extends FieldType {
protected TrieTypes type;
protected Object missingValue;
/**
* Used for handling date types following the same semantics as DateField
*/
@ -107,15 +106,15 @@ public class TrieField extends FieldType {
if (arr==null) return badFieldString(f);
switch (type) {
case INTEGER:
return toInt(arr);
return TrieFieldHelper.toInt(arr);
case FLOAT:
return toFloat(arr);
return TrieFieldHelper.toFloat(arr);
case LONG:
return toLong(arr);
return TrieFieldHelper.toLong(arr);
case DOUBLE:
return toDouble(arr);
return TrieFieldHelper.toDouble(arr);
case DATE:
return new Date(toLong(arr));
return new Date(TrieFieldHelper.toLong(arr));
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
}
@ -207,19 +206,19 @@ public class TrieField extends FieldType {
}
switch (type) {
case INTEGER:
writer.writeInt(name,toInt(arr));
writer.writeInt(name,TrieFieldHelper.toInt(arr));
break;
case FLOAT:
writer.writeFloat(name,toFloat(arr));
writer.writeFloat(name,TrieFieldHelper.toFloat(arr));
break;
case LONG:
writer.writeLong(name,toLong(arr));
writer.writeLong(name,TrieFieldHelper.toLong(arr));
break;
case DOUBLE:
writer.writeDouble(name,toDouble(arr));
writer.writeDouble(name,TrieFieldHelper.toDouble(arr));
break;
case DATE:
writer.writeDate(name,new Date(toLong(arr)));
writer.writeDate(name,new Date(TrieFieldHelper.toLong(arr)));
break;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
@ -293,55 +292,6 @@ public class TrieField extends FieldType {
}
static int toInt(byte[] arr) {
return (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff);
}
static long toLong(byte[] arr) {
int high = (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff);
int low = (arr[4]<<24) | ((arr[5]&0xff)<<16) | ((arr[6]&0xff)<<8) | (arr[7]&0xff);
return (((long)high)<<32) | (low&0x0ffffffffL);
}
static float toFloat(byte[] arr) {
return Float.intBitsToFloat(toInt(arr));
}
static double toDouble(byte[] arr) {
return Double.longBitsToDouble(toLong(arr));
}
static byte[] toArr(int val) {
byte[] arr = new byte[4];
arr[0] = (byte)(val>>>24);
arr[1] = (byte)(val>>>16);
arr[2] = (byte)(val>>>8);
arr[3] = (byte)(val);
return arr;
}
static byte[] toArr(long val) {
byte[] arr = new byte[8];
arr[0] = (byte)(val>>>56);
arr[1] = (byte)(val>>>48);
arr[2] = (byte)(val>>>40);
arr[3] = (byte)(val>>>32);
arr[4] = (byte)(val>>>24);
arr[5] = (byte)(val>>>16);
arr[6] = (byte)(val>>>8);
arr[7] = (byte)(val);
return arr;
}
static byte[] toArr(float val) {
return toArr(Float.floatToRawIntBits(val));
}
static byte[] toArr(double val) {
return toArr(Double.doubleToRawLongBits(val));
}
@Override
public String storedToReadable(Fieldable f) {
return toExternal(f);
@ -396,15 +346,15 @@ public class TrieField extends FieldType {
if (arr==null) return badFieldString(f);
switch (type) {
case INTEGER:
return Integer.toString(toInt(arr));
return Integer.toString(TrieFieldHelper.toInt(arr));
case FLOAT:
return Float.toString(toFloat(arr));
return Float.toString(TrieFieldHelper.toFloat(arr));
case LONG:
return Long.toString(toLong(arr));
return Long.toString(TrieFieldHelper.toLong(arr));
case DOUBLE:
return Double.toString(toDouble(arr));
return Double.toString(TrieFieldHelper.toDouble(arr));
case DATE:
return dateField.formatDate(new Date(toLong(arr)));
return dateField.formatDate(new Date(TrieFieldHelper.toLong(arr)));
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
}
@ -483,75 +433,53 @@ public class TrieField extends FieldType {
@Override
public Fieldable createField(SchemaField field, Object value, float boost) {
boolean indexed = field.indexed();
boolean stored = field.stored();
TrieFieldHelper.FieldInfo info = new TrieFieldHelper.FieldInfo();
info.index = field.indexed();
info.store = field.stored();
info.precisionStep = precisionStep;
info.omitNorms = field.omitNorms();
info.omitTF = field.omitTf();
if (!indexed && !stored) {
if (!info.index && !info.store) {
if (log.isTraceEnabled())
log.trace("Ignoring unindexed/unstored field: " + field);
return null;
}
int ps = precisionStep;
byte[] arr=null;
TokenStream ts=null;
// String indexedVal = indexed && precisionStep==0 ? readableToIndexed(externalVal) : null;
switch (type) {
case INTEGER:
int i = (value instanceof Number)
? ((Number)value).intValue()
: Integer.parseInt(value.toString());
if (stored) arr = toArr(i);
if (indexed) ts = new NumericTokenStream(ps).setIntValue(i);
break;
return TrieFieldHelper.createIntField(field.getName(), i, info, boost);
case FLOAT:
float f = (value instanceof Number)
? ((Number)value).floatValue()
: Float.parseFloat(value.toString());
if (stored) arr = toArr(f);
if (indexed) ts = new NumericTokenStream(ps).setFloatValue(f);
break;
return TrieFieldHelper.createFloatField(field.getName(), f, info, boost);
case LONG:
long l = (value instanceof Number)
? ((Number)value).longValue()
: Long.parseLong(value.toString());
if (stored) arr = toArr(l);
if (indexed) ts = new NumericTokenStream(ps).setLongValue(l);
break;
return TrieFieldHelper.createLongField(field.getName(), l, info, boost);
case DOUBLE:
double d = (value instanceof Number)
? ((Number)value).doubleValue()
: Double.parseDouble(value.toString());
if (stored) arr = toArr(d);
if (indexed) ts = new NumericTokenStream(ps).setDoubleValue(d);
break;
return TrieFieldHelper.createDoubleField(field.getName(), d, info, boost);
case DATE:
long time = (value instanceof Date)
? ((Date)value).getTime()
: dateField.parseMath(null, value.toString()).getTime();
if (stored) arr = toArr(time);
if (indexed) ts = new NumericTokenStream(ps).setLongValue(time);
break;
Date date = (value instanceof Date)
? ((Date)value)
: dateField.parseMath(null, value.toString());
return TrieFieldHelper.createDateField(field.getName(), date, info, boost);
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type);
}
Field f;
if (stored) {
f = new Field(field.getName(), arr);
if (indexed) f.setTokenStream(ts);
} else {
f = new Field(field.getName(), ts);
}
// term vectors aren't supported
f.setOmitNorms(field.omitNorms());
f.setOmitTermFreqAndPositions(field.omitTf());
f.setBoost(boost);
return f;
}
public enum TrieTypes {