mirror of https://github.com/apache/lucene.git
LUCENE-3001 -- Adding TrieFieldHelper to write solr compatible numeric fields without the solr dependency.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1086651 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4bfb56d42b
commit
b95ca2b453
|
@ -334,6 +334,10 @@ New features
|
|||
* LUCENE-2862: Added TermsEnum.totalTermFreq() and
|
||||
Terms.getSumTotalTermFreq(). (Mike McCandless, Robert Muir)
|
||||
|
||||
* LUCENE-3001: Added TrieFieldHelper to write solr compatible numeric
|
||||
fields without the solr dependency. (ryan)
|
||||
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms
|
||||
|
|
|
@ -0,0 +1,163 @@
|
|||
/**
|
||||
* Copyright 2005 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
import org.apache.lucene.analysis.NumericTokenStream;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
|
||||
/**
|
||||
* Helper class to make TrieFields compatible with ones written in solr
|
||||
*/
|
||||
public class TrieFieldHelper {
|
||||
|
||||
private TrieFieldHelper() {}
|
||||
|
||||
public static class FieldInfo {
|
||||
public int precisionStep = 8; // same as solr default
|
||||
public boolean store = true;
|
||||
public boolean index = true;
|
||||
public boolean omitNorms = true;
|
||||
public boolean omitTF = true;
|
||||
}
|
||||
|
||||
//----------------------------------------------
|
||||
// Create Field
|
||||
//----------------------------------------------
|
||||
|
||||
private static Fieldable createField(String name, byte[] arr, TokenStream ts, FieldInfo info, float boost) {
|
||||
|
||||
Field f;
|
||||
if (info.store) {
|
||||
f = new Field(name, arr);
|
||||
if (info.index) f.setTokenStream(ts);
|
||||
} else {
|
||||
f = new Field(name, ts);
|
||||
}
|
||||
|
||||
// term vectors aren't supported
|
||||
f.setOmitNorms(info.omitNorms);
|
||||
f.setOmitTermFreqAndPositions(info.omitTF);
|
||||
f.setBoost(boost);
|
||||
return f;
|
||||
}
|
||||
|
||||
public static Fieldable createIntField(String name, int value, FieldInfo info, float boost) {
|
||||
|
||||
byte[] arr=null;
|
||||
TokenStream ts=null;
|
||||
|
||||
if (info.store) arr = TrieFieldHelper.toArr(value);
|
||||
if (info.index) ts = new NumericTokenStream(info.precisionStep).setIntValue(value);
|
||||
|
||||
return createField(name, arr, ts, info, boost);
|
||||
}
|
||||
|
||||
public static Fieldable createFloatField(String name, float value, FieldInfo info, float boost) {
|
||||
|
||||
byte[] arr=null;
|
||||
TokenStream ts=null;
|
||||
|
||||
if (info.store) arr = TrieFieldHelper.toArr(value);
|
||||
if (info.index) ts = new NumericTokenStream(info.precisionStep).setFloatValue(value);
|
||||
|
||||
return createField(name, arr, ts, info, boost);
|
||||
}
|
||||
|
||||
public static Fieldable createLongField(String name, long value, FieldInfo info, float boost) {
|
||||
|
||||
byte[] arr=null;
|
||||
TokenStream ts=null;
|
||||
|
||||
if (info.store) arr = TrieFieldHelper.toArr(value);
|
||||
if (info.index) ts = new NumericTokenStream(info.precisionStep).setLongValue(value);
|
||||
|
||||
return createField(name, arr, ts, info, boost);
|
||||
}
|
||||
|
||||
public static Fieldable createDoubleField(String name, double value, FieldInfo info, float boost) {
|
||||
|
||||
byte[] arr=null;
|
||||
TokenStream ts=null;
|
||||
|
||||
if (info.store) arr = TrieFieldHelper.toArr(value);
|
||||
if (info.index) ts = new NumericTokenStream(info.precisionStep).setDoubleValue(value);
|
||||
|
||||
return createField(name, arr, ts, info, boost);
|
||||
}
|
||||
|
||||
public static Fieldable createDateField(String name, Date value, FieldInfo info, float boost) {
|
||||
// TODO, make sure the date is within long range!
|
||||
return createLongField(name, value.getTime(), info, boost);
|
||||
}
|
||||
|
||||
|
||||
//----------------------------------------------
|
||||
// number <=> byte[]
|
||||
//----------------------------------------------
|
||||
|
||||
public static int toInt(byte[] arr) {
|
||||
return (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff);
|
||||
}
|
||||
|
||||
public static long toLong(byte[] arr) {
|
||||
int high = (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff);
|
||||
int low = (arr[4]<<24) | ((arr[5]&0xff)<<16) | ((arr[6]&0xff)<<8) | (arr[7]&0xff);
|
||||
return (((long)high)<<32) | (low&0x0ffffffffL);
|
||||
}
|
||||
|
||||
public static float toFloat(byte[] arr) {
|
||||
return Float.intBitsToFloat(toInt(arr));
|
||||
}
|
||||
|
||||
public static double toDouble(byte[] arr) {
|
||||
return Double.longBitsToDouble(toLong(arr));
|
||||
}
|
||||
|
||||
public static byte[] toArr(int val) {
|
||||
byte[] arr = new byte[4];
|
||||
arr[0] = (byte)(val>>>24);
|
||||
arr[1] = (byte)(val>>>16);
|
||||
arr[2] = (byte)(val>>>8);
|
||||
arr[3] = (byte)(val);
|
||||
return arr;
|
||||
}
|
||||
|
||||
public static byte[] toArr(long val) {
|
||||
byte[] arr = new byte[8];
|
||||
arr[0] = (byte)(val>>>56);
|
||||
arr[1] = (byte)(val>>>48);
|
||||
arr[2] = (byte)(val>>>40);
|
||||
arr[3] = (byte)(val>>>32);
|
||||
arr[4] = (byte)(val>>>24);
|
||||
arr[5] = (byte)(val>>>16);
|
||||
arr[6] = (byte)(val>>>8);
|
||||
arr[7] = (byte)(val);
|
||||
return arr;
|
||||
}
|
||||
|
||||
public static byte[] toArr(float val) {
|
||||
return toArr(Float.floatToRawIntBits(val));
|
||||
}
|
||||
|
||||
public static byte[] toArr(double val) {
|
||||
return toArr(Double.doubleToRawLongBits(val));
|
||||
}
|
||||
}
|
|
@ -27,7 +27,6 @@ import org.apache.solr.search.function.*;
|
|||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -36,8 +35,7 @@ import org.apache.lucene.search.cache.CachedArrayCreator;
|
|||
import org.apache.lucene.search.cache.LongValuesCreator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.NumericTokenStream;
|
||||
import org.apache.lucene.util.TrieFieldHelper;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Date;
|
||||
|
@ -68,7 +66,7 @@ public class TrieDateField extends DateField {
|
|||
public Date toObject(Fieldable f) {
|
||||
byte[] arr = f.getBinaryValue();
|
||||
if (arr==null) throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,TrieField.badFieldString(f));
|
||||
return new Date(TrieField.toLong(arr));
|
||||
return new Date(TrieFieldHelper.toLong(arr));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -107,7 +105,7 @@ public class TrieDateField extends DateField {
|
|||
return;
|
||||
}
|
||||
|
||||
writer.writeDate(name,new Date(TrieField.toLong(arr)));
|
||||
writer.writeDate(name,new Date(TrieFieldHelper.toLong(arr)));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -146,7 +144,7 @@ public class TrieDateField extends DateField {
|
|||
public String toExternal(Fieldable f) {
|
||||
byte[] arr = f.getBinaryValue();
|
||||
if (arr==null) return TrieField.badFieldString(f);
|
||||
return super.toExternal(new Date(TrieField.toLong(arr)));
|
||||
return super.toExternal(new Date(TrieFieldHelper.toLong(arr)));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -167,44 +165,6 @@ public class TrieDateField extends DateField {
|
|||
return readableToIndexed(storedToReadable(f));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Fieldable createField(SchemaField field, Object value, float boost) {
|
||||
boolean indexed = field.indexed();
|
||||
boolean stored = field.stored();
|
||||
|
||||
if (!indexed && !stored) {
|
||||
if (log.isTraceEnabled())
|
||||
log.trace("Ignoring unindexed/unstored field: " + field);
|
||||
return null;
|
||||
}
|
||||
|
||||
int ps = precisionStep;
|
||||
|
||||
byte[] arr=null;
|
||||
TokenStream ts=null;
|
||||
|
||||
long time = (value instanceof Date)
|
||||
? ((Date)value).getTime()
|
||||
: super.parseMath(null, value.toString()).getTime();
|
||||
|
||||
if (stored) arr = TrieField.toArr(time);
|
||||
if (indexed) ts = new NumericTokenStream(ps).setLongValue(time);
|
||||
|
||||
Field f;
|
||||
if (stored) {
|
||||
f = new Field(field.getName(), arr);
|
||||
if (indexed) f.setTokenStream(ts);
|
||||
} else {
|
||||
f = new Field(field.getName(), ts);
|
||||
}
|
||||
|
||||
// term vectors aren't supported
|
||||
|
||||
f.setOmitNorms(field.omitNorms());
|
||||
f.setOmitTermFreqAndPositions(field.omitTf());
|
||||
f.setBoost(boost);
|
||||
return f;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
package org.apache.solr.schema;
|
||||
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.cache.CachedArrayCreator;
|
||||
import org.apache.lucene.search.cache.DoubleValuesCreator;
|
||||
|
@ -26,8 +25,7 @@ import org.apache.lucene.search.cache.IntValuesCreator;
|
|||
import org.apache.lucene.search.cache.LongValuesCreator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.NumericTokenStream;
|
||||
import org.apache.lucene.util.TrieFieldHelper;
|
||||
import org.apache.noggit.CharArr;
|
||||
import org.apache.solr.analysis.*;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -68,6 +66,7 @@ public class TrieField extends FieldType {
|
|||
protected TrieTypes type;
|
||||
protected Object missingValue;
|
||||
|
||||
|
||||
/**
|
||||
* Used for handling date types following the same semantics as DateField
|
||||
*/
|
||||
|
@ -107,15 +106,15 @@ public class TrieField extends FieldType {
|
|||
if (arr==null) return badFieldString(f);
|
||||
switch (type) {
|
||||
case INTEGER:
|
||||
return toInt(arr);
|
||||
return TrieFieldHelper.toInt(arr);
|
||||
case FLOAT:
|
||||
return toFloat(arr);
|
||||
return TrieFieldHelper.toFloat(arr);
|
||||
case LONG:
|
||||
return toLong(arr);
|
||||
return TrieFieldHelper.toLong(arr);
|
||||
case DOUBLE:
|
||||
return toDouble(arr);
|
||||
return TrieFieldHelper.toDouble(arr);
|
||||
case DATE:
|
||||
return new Date(toLong(arr));
|
||||
return new Date(TrieFieldHelper.toLong(arr));
|
||||
default:
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
|
||||
}
|
||||
|
@ -207,19 +206,19 @@ public class TrieField extends FieldType {
|
|||
}
|
||||
switch (type) {
|
||||
case INTEGER:
|
||||
writer.writeInt(name,toInt(arr));
|
||||
writer.writeInt(name,TrieFieldHelper.toInt(arr));
|
||||
break;
|
||||
case FLOAT:
|
||||
writer.writeFloat(name,toFloat(arr));
|
||||
writer.writeFloat(name,TrieFieldHelper.toFloat(arr));
|
||||
break;
|
||||
case LONG:
|
||||
writer.writeLong(name,toLong(arr));
|
||||
writer.writeLong(name,TrieFieldHelper.toLong(arr));
|
||||
break;
|
||||
case DOUBLE:
|
||||
writer.writeDouble(name,toDouble(arr));
|
||||
writer.writeDouble(name,TrieFieldHelper.toDouble(arr));
|
||||
break;
|
||||
case DATE:
|
||||
writer.writeDate(name,new Date(toLong(arr)));
|
||||
writer.writeDate(name,new Date(TrieFieldHelper.toLong(arr)));
|
||||
break;
|
||||
default:
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
|
||||
|
@ -293,55 +292,6 @@ public class TrieField extends FieldType {
|
|||
}
|
||||
|
||||
|
||||
static int toInt(byte[] arr) {
|
||||
return (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff);
|
||||
}
|
||||
|
||||
static long toLong(byte[] arr) {
|
||||
int high = (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff);
|
||||
int low = (arr[4]<<24) | ((arr[5]&0xff)<<16) | ((arr[6]&0xff)<<8) | (arr[7]&0xff);
|
||||
return (((long)high)<<32) | (low&0x0ffffffffL);
|
||||
}
|
||||
|
||||
static float toFloat(byte[] arr) {
|
||||
return Float.intBitsToFloat(toInt(arr));
|
||||
}
|
||||
|
||||
static double toDouble(byte[] arr) {
|
||||
return Double.longBitsToDouble(toLong(arr));
|
||||
}
|
||||
|
||||
static byte[] toArr(int val) {
|
||||
byte[] arr = new byte[4];
|
||||
arr[0] = (byte)(val>>>24);
|
||||
arr[1] = (byte)(val>>>16);
|
||||
arr[2] = (byte)(val>>>8);
|
||||
arr[3] = (byte)(val);
|
||||
return arr;
|
||||
}
|
||||
|
||||
static byte[] toArr(long val) {
|
||||
byte[] arr = new byte[8];
|
||||
arr[0] = (byte)(val>>>56);
|
||||
arr[1] = (byte)(val>>>48);
|
||||
arr[2] = (byte)(val>>>40);
|
||||
arr[3] = (byte)(val>>>32);
|
||||
arr[4] = (byte)(val>>>24);
|
||||
arr[5] = (byte)(val>>>16);
|
||||
arr[6] = (byte)(val>>>8);
|
||||
arr[7] = (byte)(val);
|
||||
return arr;
|
||||
}
|
||||
|
||||
static byte[] toArr(float val) {
|
||||
return toArr(Float.floatToRawIntBits(val));
|
||||
}
|
||||
|
||||
static byte[] toArr(double val) {
|
||||
return toArr(Double.doubleToRawLongBits(val));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String storedToReadable(Fieldable f) {
|
||||
return toExternal(f);
|
||||
|
@ -396,15 +346,15 @@ public class TrieField extends FieldType {
|
|||
if (arr==null) return badFieldString(f);
|
||||
switch (type) {
|
||||
case INTEGER:
|
||||
return Integer.toString(toInt(arr));
|
||||
return Integer.toString(TrieFieldHelper.toInt(arr));
|
||||
case FLOAT:
|
||||
return Float.toString(toFloat(arr));
|
||||
return Float.toString(TrieFieldHelper.toFloat(arr));
|
||||
case LONG:
|
||||
return Long.toString(toLong(arr));
|
||||
return Long.toString(TrieFieldHelper.toLong(arr));
|
||||
case DOUBLE:
|
||||
return Double.toString(toDouble(arr));
|
||||
return Double.toString(TrieFieldHelper.toDouble(arr));
|
||||
case DATE:
|
||||
return dateField.formatDate(new Date(toLong(arr)));
|
||||
return dateField.formatDate(new Date(TrieFieldHelper.toLong(arr)));
|
||||
default:
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
|
||||
}
|
||||
|
@ -483,75 +433,53 @@ public class TrieField extends FieldType {
|
|||
|
||||
@Override
|
||||
public Fieldable createField(SchemaField field, Object value, float boost) {
|
||||
boolean indexed = field.indexed();
|
||||
boolean stored = field.stored();
|
||||
TrieFieldHelper.FieldInfo info = new TrieFieldHelper.FieldInfo();
|
||||
info.index = field.indexed();
|
||||
info.store = field.stored();
|
||||
info.precisionStep = precisionStep;
|
||||
info.omitNorms = field.omitNorms();
|
||||
info.omitTF = field.omitTf();
|
||||
|
||||
if (!indexed && !stored) {
|
||||
if (!info.index && !info.store) {
|
||||
if (log.isTraceEnabled())
|
||||
log.trace("Ignoring unindexed/unstored field: " + field);
|
||||
return null;
|
||||
}
|
||||
|
||||
int ps = precisionStep;
|
||||
|
||||
byte[] arr=null;
|
||||
TokenStream ts=null;
|
||||
// String indexedVal = indexed && precisionStep==0 ? readableToIndexed(externalVal) : null;
|
||||
|
||||
switch (type) {
|
||||
case INTEGER:
|
||||
int i = (value instanceof Number)
|
||||
? ((Number)value).intValue()
|
||||
: Integer.parseInt(value.toString());
|
||||
if (stored) arr = toArr(i);
|
||||
if (indexed) ts = new NumericTokenStream(ps).setIntValue(i);
|
||||
break;
|
||||
return TrieFieldHelper.createIntField(field.getName(), i, info, boost);
|
||||
|
||||
case FLOAT:
|
||||
float f = (value instanceof Number)
|
||||
? ((Number)value).floatValue()
|
||||
: Float.parseFloat(value.toString());
|
||||
if (stored) arr = toArr(f);
|
||||
if (indexed) ts = new NumericTokenStream(ps).setFloatValue(f);
|
||||
break;
|
||||
return TrieFieldHelper.createFloatField(field.getName(), f, info, boost);
|
||||
|
||||
case LONG:
|
||||
long l = (value instanceof Number)
|
||||
? ((Number)value).longValue()
|
||||
: Long.parseLong(value.toString());
|
||||
if (stored) arr = toArr(l);
|
||||
if (indexed) ts = new NumericTokenStream(ps).setLongValue(l);
|
||||
break;
|
||||
return TrieFieldHelper.createLongField(field.getName(), l, info, boost);
|
||||
|
||||
case DOUBLE:
|
||||
double d = (value instanceof Number)
|
||||
? ((Number)value).doubleValue()
|
||||
: Double.parseDouble(value.toString());
|
||||
if (stored) arr = toArr(d);
|
||||
if (indexed) ts = new NumericTokenStream(ps).setDoubleValue(d);
|
||||
break;
|
||||
return TrieFieldHelper.createDoubleField(field.getName(), d, info, boost);
|
||||
|
||||
case DATE:
|
||||
long time = (value instanceof Date)
|
||||
? ((Date)value).getTime()
|
||||
: dateField.parseMath(null, value.toString()).getTime();
|
||||
if (stored) arr = toArr(time);
|
||||
if (indexed) ts = new NumericTokenStream(ps).setLongValue(time);
|
||||
break;
|
||||
Date date = (value instanceof Date)
|
||||
? ((Date)value)
|
||||
: dateField.parseMath(null, value.toString());
|
||||
return TrieFieldHelper.createDateField(field.getName(), date, info, boost);
|
||||
|
||||
default:
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type);
|
||||
}
|
||||
|
||||
Field f;
|
||||
if (stored) {
|
||||
f = new Field(field.getName(), arr);
|
||||
if (indexed) f.setTokenStream(ts);
|
||||
} else {
|
||||
f = new Field(field.getName(), ts);
|
||||
}
|
||||
|
||||
// term vectors aren't supported
|
||||
|
||||
f.setOmitNorms(field.omitNorms());
|
||||
f.setOmitTermFreqAndPositions(field.omitTf());
|
||||
f.setBoost(boost);
|
||||
return f;
|
||||
}
|
||||
|
||||
public enum TrieTypes {
|
||||
|
|
Loading…
Reference in New Issue