LUCENE-1505: switch local lucene to use trie's NumericUtils for mapping doubles to strings

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@794721 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-07-16 15:38:06 +00:00
parent d7579b7e1e
commit c79f54975e
8 changed files with 61 additions and 108 deletions

View File

@ -4,7 +4,10 @@ Lucene contrib change Log
Changes in runtime behavior Changes in runtime behavior
(None) 1. LUCENE-1505: Local lucene now uses org.apache.lucene.util.NumericUtils for all
number conversion. You'll need to fully re-index any previously created indexes.
This isn't a break in back-compatibility because local Lucene has not yet
been released. (Mike McCandless)
API Changes API Changes

View File

@ -1,76 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.spatial;
import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
import org.apache.lucene.search.NumericRangeQuery; // for javadocs
import org.apache.lucene.util.NumericUtils; // for javadocs
/**
* TODO -- when solr moves NumberUtils to lucene, this should be redundant
*
* This is a copy of solr's number utils with only the functions we use...
*
* @deprecated TODO: This helper class will be removed soonly.
* For new indexes use {@link NumericUtils} instead, which provides a sortable
* binary representation (prefix encoded) of numeric values.
* To index and efficiently query numeric values use {@link NumericTokenStream}
* and {@link NumericRangeQuery}.
*/
@Deprecated
public class NumberUtils {
public static String long2sortableStr(long val) {
char[] arr = new char[5];
long2sortableStr(val,arr,0);
return new String(arr,0,5);
}
public static String double2sortableStr(double val) {
long f = Double.doubleToRawLongBits(val);
if (f<0) f ^= 0x7fffffffffffffffL;
return long2sortableStr(f);
}
public static double SortableStr2double(String val) {
long f = SortableStr2long(val,0,6);
if (f<0) f ^= 0x7fffffffffffffffL;
return Double.longBitsToDouble(f);
}
public static int long2sortableStr(long val, char[] out, int offset) {
val += Long.MIN_VALUE;
out[offset++] = (char)(val >>>60);
out[offset++] = (char)(val >>>45 & 0x7fff);
out[offset++] = (char)(val >>>30 & 0x7fff);
out[offset++] = (char)(val >>>15 & 0x7fff);
out[offset] = (char)(val & 0x7fff);
return 5;
}
public static long SortableStr2long(String sval, int offset, int len) {
long val = (long)(sval.charAt(offset++)) << 60;
val |= ((long)sval.charAt(offset++)) << 45;
val |= ((long)sval.charAt(offset++)) << 30;
val |= sval.charAt(offset++) << 15;
val |= sval.charAt(offset);
val -= Long.MIN_VALUE;
return val;
}
}

View File

@ -26,7 +26,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.Filter; import org.apache.lucene.search.Filter;
import org.apache.lucene.spatial.NumberUtils; import org.apache.lucene.util.NumericUtils;
@ -158,11 +158,11 @@ public class BoundaryBoxFilter extends Filter {
buffer.append(":"); buffer.append(":");
buffer.append(includeLower ? "[" : "{"); buffer.append(includeLower ? "[" : "{");
if (null != lowerTerm) { if (null != lowerTerm) {
buffer.append(NumberUtils.SortableStr2double(lowerTerm)); buffer.append(NumericUtils.prefixCodedToDouble(lowerTerm));
} }
buffer.append("-"); buffer.append("-");
if (null != upperTerm) { if (null != upperTerm) {
buffer.append(NumberUtils.SortableStr2double(upperTerm)); buffer.append(NumericUtils.prefixCodedToDouble(upperTerm));
} }
buffer.append(includeUpper ? "]" : "}"); buffer.append(includeUpper ? "]" : "}");
return buffer.toString(); return buffer.toString();

View File

@ -25,7 +25,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermDocs;
import org.apache.lucene.search.Filter; import org.apache.lucene.search.Filter;
import org.apache.lucene.spatial.NumberUtils; import org.apache.lucene.util.NumericUtils;
public class CartesianShapeFilter extends Filter { public class CartesianShapeFilter extends Filter {
@ -56,9 +56,8 @@ public class CartesianShapeFilter extends Filter {
// iterate through each boxid // iterate through each boxid
for (int i =0; i< sz; i++) { for (int i =0; i< sz; i++) {
double boxId = area.get(i).doubleValue(); double boxId = area.get(i).doubleValue();
termDocs.seek(new Term(fieldName, termDocs.seek(new Term(fieldName,
NumberUtils.double2sortableStr(boxId))); NumericUtils.doubleToPrefixCoded(boxId)));
// iterate through all documents // iterate through all documents
// which have this boxId // which have this boxId

View File

@ -27,7 +27,7 @@ import java.util.logging.Logger;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermDocs;
import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.FieldCache;
import org.apache.lucene.spatial.NumberUtils; import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.spatial.tier.DistanceHandler.Precision; import org.apache.lucene.spatial.tier.DistanceHandler.Precision;
@ -91,8 +91,8 @@ public class LatLongDistanceFilter extends DistanceFilter {
// TODO: Why is this a WeakHashMap? // TODO: Why is this a WeakHashMap?
WeakHashMap<String,Double> cdistance = new WeakHashMap<String,Double>(maxdocs); WeakHashMap<String,Double> cdistance = new WeakHashMap<String,Double>(maxdocs);
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
String[] latIndex = FieldCache.DEFAULT.getStrings(reader, latField); double[] latIndex = FieldCache.DEFAULT.getDoubles(reader, latField);
String[] lngIndex = FieldCache.DEFAULT.getStrings(reader, lngField); double[] lngIndex = FieldCache.DEFAULT.getDoubles(reader, lngField);
/* store calculated distances for reuse by other components */ /* store calculated distances for reuse by other components */
distances = new HashMap<Integer,Double>(maxdocs); distances = new HashMap<Integer,Double>(maxdocs);
@ -105,11 +105,8 @@ public class LatLongDistanceFilter extends DistanceFilter {
while(td.next()) { while(td.next()) {
int doc = td.doc(); int doc = td.doc();
String sx = latIndex[doc]; double x = latIndex[doc];
String sy = lngIndex[doc]; double y = lngIndex[doc];
double x = NumberUtils.SortableStr2double(sx);
double y = NumberUtils.SortableStr2double(sy);
// round off lat / longs if necessary // round off lat / longs if necessary
// x = DistanceHandler.getPrecision(x, precise); // x = DistanceHandler.getPrecision(x, precise);
@ -168,8 +165,8 @@ public class LatLongDistanceFilter extends DistanceFilter {
} }
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
String[] latIndex = FieldCache.DEFAULT.getStrings(reader, latField); double[] latIndex = FieldCache.DEFAULT.getDoubles(reader, latField);
String[] lngIndex = FieldCache.DEFAULT.getStrings(reader, lngField); double[] lngIndex = FieldCache.DEFAULT.getDoubles(reader, lngField);
/* loop over all set bits (hits from the boundary box filters) */ /* loop over all set bits (hits from the boundary box filters) */
int i = bits.nextSetBit(0); int i = bits.nextSetBit(0);
@ -186,10 +183,8 @@ public class LatLongDistanceFilter extends DistanceFilter {
// filter chain, lat / lngs can be retrived from // filter chain, lat / lngs can be retrived from
// memory rather than document base. // memory rather than document base.
String sx = latIndex[i]; x = latIndex[i];
String sy = lngIndex[i]; y = lngIndex[i];
x = NumberUtils.SortableStr2double(sx);
y = NumberUtils.SortableStr2double(sy);
// round off lat / longs if necessary // round off lat / longs if necessary
// x = DistanceHandler.getPrecision(x, precise); // x = DistanceHandler.getPrecision(x, precise);

View File

@ -37,7 +37,7 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.function.CustomScoreQuery; import org.apache.lucene.search.function.CustomScoreQuery;
import org.apache.lucene.search.function.FieldScoreQuery; import org.apache.lucene.search.function.FieldScoreQuery;
import org.apache.lucene.search.function.FieldScoreQuery.Type; import org.apache.lucene.search.function.FieldScoreQuery.Type;
import org.apache.lucene.spatial.NumberUtils; import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.spatial.geohash.GeoHashUtils; import org.apache.lucene.spatial.geohash.GeoHashUtils;
import org.apache.lucene.spatial.tier.projections.CartesianTierPlotter; import org.apache.lucene.spatial.tier.projections.CartesianTierPlotter;
import org.apache.lucene.spatial.tier.projections.IProjector; import org.apache.lucene.spatial.tier.projections.IProjector;
@ -96,8 +96,8 @@ public class TestCartesian extends TestCase{
doc.add(new Field("name", name,Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("name", name,Field.Store.YES, Field.Index.TOKENIZED));
// convert the lat / long to lucene fields // convert the lat / long to lucene fields
doc.add(new Field(latField, NumberUtils.double2sortableStr(lat),Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.add(new Field(latField, NumericUtils.doubleToPrefixCoded(lat),Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field(lngField, NumberUtils.double2sortableStr(lng),Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.add(new Field(lngField, NumericUtils.doubleToPrefixCoded(lng),Field.Store.YES, Field.Index.UN_TOKENIZED));
// add a default meta field to make searching all documents easy // add a default meta field to make searching all documents easy
doc.add(new Field("metafile", "doc",Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("metafile", "doc",Field.Store.YES, Field.Index.TOKENIZED));
@ -106,7 +106,7 @@ public class TestCartesian extends TestCase{
for (int i =0; i < ctpsize; i++){ for (int i =0; i < ctpsize; i++){
CartesianTierPlotter ctp = ctps.get(i); CartesianTierPlotter ctp = ctps.get(i);
doc.add(new Field(ctp.getTierFieldName(), doc.add(new Field(ctp.getTierFieldName(),
NumberUtils.double2sortableStr(ctp.getTierBoxId(lat,lng)), NumericUtils.doubleToPrefixCoded(ctp.getTierBoxId(lat,lng)),
Field.Store.YES, Field.Store.YES,
Field.Index.NO_NORMS)); Field.Index.NO_NORMS));
@ -212,8 +212,8 @@ public class TestCartesian extends TestCase{
Document d = hits.doc(i); Document d = hits.doc(i);
String name = d.get("name"); String name = d.get("name");
double rsLat = NumberUtils.SortableStr2double(d.get(latField)); double rsLat = NumericUtils.prefixCodedToDouble(d.get(latField));
double rsLng = NumberUtils.SortableStr2double(d.get(lngField)); double rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField));
Double geo_distance = distances.get(hits.id(i)); Double geo_distance = distances.get(hits.id(i));
double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng); double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng);
@ -296,8 +296,8 @@ public class TestCartesian extends TestCase{
Document d = hits.doc(i); Document d = hits.doc(i);
String name = d.get("name"); String name = d.get("name");
double rsLat = NumberUtils.SortableStr2double(d.get(latField)); double rsLat = NumericUtils.prefixCodedToDouble(d.get(latField));
double rsLng = NumberUtils.SortableStr2double(d.get(lngField)); double rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField));
Double geo_distance = distances.get(hits.id(i)); Double geo_distance = distances.get(hits.id(i));
double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng); double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng);

View File

@ -29,7 +29,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Filter; import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.spatial.NumberUtils; import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.spatial.tier.LatLongDistanceFilter; import org.apache.lucene.spatial.tier.LatLongDistanceFilter;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
@ -69,8 +69,8 @@ public class TestDistance extends TestCase{
doc.add(new Field("name", name,Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("name", name,Field.Store.YES, Field.Index.TOKENIZED));
// convert the lat / long to lucene fields // convert the lat / long to lucene fields
doc.add(new Field(latField, NumberUtils.double2sortableStr(lat),Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.add(new Field(latField, NumericUtils.doubleToPrefixCoded(lat),Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field(lngField, NumberUtils.double2sortableStr(lng),Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.add(new Field(lngField, NumericUtils.doubleToPrefixCoded(lng),Field.Store.YES, Field.Index.UN_TOKENIZED));
// add a default meta field to make searching all documents easy // add a default meta field to make searching all documents easy
doc.add(new Field("metafile", "doc",Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("metafile", "doc",Field.Store.YES, Field.Index.TOKENIZED));

View File

@ -259,6 +259,14 @@ public final class NumericUtils {
return f; return f;
} }
/**
* Convenience method: this just returns:
* longToPrefixCoded(doubleToSortableLong(val))
*/
public static String doubleToPrefixCoded(double val) {
return longToPrefixCoded(doubleToSortableLong(val));
}
/** /**
* Converts a sortable <code>long</code> back to a <code>double</code>. * Converts a sortable <code>long</code> back to a <code>double</code>.
* @see #doubleToSortableLong * @see #doubleToSortableLong
@ -268,6 +276,14 @@ public final class NumericUtils {
return Double.longBitsToDouble(val); return Double.longBitsToDouble(val);
} }
/**
* Convenience method: this just returns:
* sortableLongToDouble(prefixCodedToLong(val))
*/
public static double prefixCodedToDouble(String val) {
return sortableLongToDouble(prefixCodedToLong(val));
}
/** /**
* Converts a <code>float</code> value to a sortable signed <code>int</code>. * Converts a <code>float</code> value to a sortable signed <code>int</code>.
* The value is converted by getting their IEEE 754 floating-point &quot;float format&quot; * The value is converted by getting their IEEE 754 floating-point &quot;float format&quot;
@ -281,6 +297,14 @@ public final class NumericUtils {
return f; return f;
} }
/**
* Convenience method: this just returns:
* intToPrefixCoded(floatToSortableInt(val))
*/
public static String floatToPrefixCoded(float val) {
return intToPrefixCoded(floatToSortableInt(val));
}
/** /**
* Converts a sortable <code>int</code> back to a <code>float</code>. * Converts a sortable <code>int</code> back to a <code>float</code>.
* @see #floatToSortableInt * @see #floatToSortableInt
@ -290,6 +314,14 @@ public final class NumericUtils {
return Float.intBitsToFloat(val); return Float.intBitsToFloat(val);
} }
/**
* Convenience method: this just returns:
* sortableIntToFloat(prefixCodedToInt(val))
*/
public static float prefixCodedToFloat(String val) {
return sortableIntToFloat(prefixCodedToInt(val));
}
/** /**
* Expert: Splits a long range recursively. * Expert: Splits a long range recursively.
* You may implement a builder that adds clauses to a * You may implement a builder that adds clauses to a