From 30eda3c4749e9e2dc892899b34216c3652e5861a Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sat, 24 Nov 2012 13:13:51 +0000 Subject: [PATCH] merge DocTermsIndex into SortedDocValues git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1413182 13f79535-47bb-0310-9956-ffa450edef68 --- .../benchmark/byTask/TestPerfTasksLogic.java | 7 +- lucene/common-build.xml | 2 +- .../apache/lucene/index/SortedDocValues.java | 140 ++++++++++++++++++ .../org/apache/lucene/search/FieldCache.java | 79 ++-------- .../apache/lucene/search/FieldCacheImpl.java | 81 ++++------ .../lucene/search/FieldCacheRangeFilter.java | 17 ++- .../lucene/search/FieldCacheTermsFilter.java | 9 +- .../apache/lucene/search/FieldComparator.java | 49 ++---- .../apache/lucene/index/TestIndexWriter.java | 8 +- .../search/FieldCacheRewriteMethod.java | 5 +- .../search/TestElevationComparator.java | 6 +- .../apache/lucene/search/TestFieldCache.java | 23 ++- .../term/TermAllGroupHeadsCollector.java | 77 ++++++---- .../grouping/term/TermAllGroupsCollector.java | 26 ++-- .../term/TermDistinctValuesCollector.java | 23 +-- .../term/TermFirstPassGroupingCollector.java | 14 +- .../term/TermGroupFacetCollector.java | 78 ++++++---- .../term/TermSecondPassGroupingCollector.java | 13 +- .../docvalues/DocTermsIndexDocValues.java | 23 +-- .../function/valuesource/OrdFieldSource.java | 11 +- .../valuesource/ReverseOrdFieldSource.java | 11 +- .../handler/component/FieldFacetStats.java | 37 +++-- .../handler/component/StatsComponent.java | 18 ++- .../PerSegmentSingleValuedFaceting.java | 19 +-- .../org/apache/solr/request/SimpleFacets.java | 14 +- .../apache/solr/request/UnInvertedField.java | 35 +++-- .../org/apache/solr/schema/BoolField.java | 31 ++-- .../org/apache/solr/schema/DateField.java | 7 +- .../solr/schema/SortableDoubleField.java | 17 ++- .../solr/schema/SortableFloatField.java | 17 ++- .../apache/solr/schema/SortableIntField.java | 17 ++- .../apache/solr/schema/SortableLongField.java | 17 ++- .../apache/solr/schema/StrFieldSource.java | 2 +- .../MissingStringLastComparatorSource.java | 22 +-- 34 files changed, 567 insertions(+), 388 deletions(-) diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java index 0cc49e32033..d56252b9649 100755 --- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java +++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java @@ -53,10 +53,10 @@ import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.index.SerialMergeScheduler; import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.FieldCache.DocTermsIndex; import org.apache.lucene.search.FieldCache; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; @@ -341,12 +341,11 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { Benchmark benchmark = execBenchmark(algLines); DirectoryReader r = DirectoryReader.open(benchmark.getRunData().getDirectory()); - DocTermsIndex idx = FieldCache.DEFAULT.getTermsIndex(new SlowCompositeReaderWrapper(r), "country"); + SortedDocValues idx = FieldCache.DEFAULT.getTermsIndex(new SlowCompositeReaderWrapper(r), "country"); final int maxDoc = r.maxDoc(); assertEquals(1000, maxDoc); - BytesRef br = new BytesRef(); for(int i=0;i<1000;i++) { - assertNotNull("doc " + i + " has null country", idx.getTerm(i, br)); + assertTrue("doc " + i + " has null country", idx.getOrd(i) != -1); } r.close(); } diff --git a/lucene/common-build.xml b/lucene/common-build.xml index 186ddd65a87..5e2a78932d3 100644 --- a/lucene/common-build.xml +++ b/lucene/common-build.xml @@ -438,7 +438,7 @@ description="Compiles core classes"> + DESTDIR="${build.dir}/classes/java"> diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java index 831ed3794f0..05d27fe0cbe 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java @@ -17,6 +17,10 @@ package org.apache.lucene.index; * limitations under the License. */ +import java.io.IOException; +import java.util.Comparator; + +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; // nocommit need marker interface? @@ -28,14 +32,123 @@ public abstract class SortedDocValues extends BinaryDocValues { public abstract void lookupOrd(int ord, BytesRef result); // nocommit throws IOE or not? + // nocommit .getUniqueValueCount? public abstract int getValueCount(); @Override public void get(int docID, BytesRef result) { int ord = getOrd(docID); + if (ord == -1) { + // nocommit what to do ... maybe we need to return + // BytesRef? + throw new IllegalArgumentException("doc has no value"); + } lookupOrd(ord, result); } + public TermsEnum getTermsEnum() { + // nocommit who tests this base impl ... + // Default impl just uses the existing API; subclasses + // can specialize: + return new TermsEnum() { + private int currentOrd = -1; + + private final BytesRef term = new BytesRef(); + + @Override + public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException { + int low = 0; + int high = getValueCount()-1; + + while (low <= high) { + int mid = (low + high) >>> 1; + seekExact(mid); + int cmp = term.compareTo(text); + + if (cmp < 0) + low = mid + 1; + else if (cmp > 0) + high = mid - 1; + else { + return SeekStatus.FOUND; // key found + } + } + + if (low == getValueCount()) { + return SeekStatus.END; + } else { + seekExact(low); + return SeekStatus.NOT_FOUND; + } + } + + @Override + public void seekExact(long ord) throws IOException { + assert ord >= 0 && ord < getValueCount(); + currentOrd = (int) ord; + lookupOrd(currentOrd, term); + } + + @Override + public BytesRef next() throws IOException { + currentOrd++; + if (currentOrd >= getValueCount()) { + return null; + } + lookupOrd(currentOrd, term); + return term; + } + + @Override + public BytesRef term() throws IOException { + return term; + } + + @Override + public long ord() throws IOException { + return currentOrd; + } + + @Override + public int docFreq() { + throw new UnsupportedOperationException(); + } + + @Override + public long totalTermFreq() { + return -1; + } + + @Override + public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public Comparator getComparator() { + return BytesRef.getUTF8SortedAsUnicodeComparator(); + } + + @Override + public void seekExact(BytesRef term, TermState state) throws IOException { + assert state != null && state instanceof OrdTermState; + this.seekExact(((OrdTermState)state).ord); + } + + @Override + public TermState termState() throws IOException { + OrdTermState state = new OrdTermState(); + state.ord = currentOrd; + return state; + } + }; + } + @Override public SortedDocValues newRAMInstance() { // nocommit optimize this @@ -136,4 +249,31 @@ public abstract class SortedDocValues extends BinaryDocValues { return 0; } } + + // nocommit javadocs + public int lookupTerm(BytesRef key, BytesRef spare) { + // this special case is the reason that Arrays.binarySearch() isn't useful. + if (key == null) { + throw new IllegalArgumentException("key must not be null"); + } + + int low = 0; + int high = getValueCount()-1; + + while (low <= high) { + int mid = (low + high) >>> 1; + lookupOrd(mid, spare); + int cmp = spare.compareTo(key); + + if (cmp < 0) { + low = mid + 1; + } else if (cmp > 0) { + high = mid - 1; + } else { + return mid; // key found + } + } + + return -(low + 1); // key not found. + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java index 5ab47ea849f..1eb9718e26c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java @@ -21,18 +21,17 @@ import java.io.IOException; import java.io.PrintStream; import org.apache.lucene.analysis.NumericTokenStream; // for javadocs -import org.apache.lucene.document.IntField; // for javadocs -import org.apache.lucene.document.FloatField; // for javadocs -import org.apache.lucene.document.LongField; // for javadocs import org.apache.lucene.document.DoubleField; // for javadocs -import org.apache.lucene.index.DocTermOrds; +import org.apache.lucene.document.FloatField; // for javadocs +import org.apache.lucene.document.IntField; // for javadocs +import org.apache.lucene.document.LongField; // for javadocs import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.DocTermOrds; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.RamUsageEstimator; -import org.apache.lucene.util.packed.PackedInts; /** * Expert: Maintains caches of term values. @@ -41,6 +40,8 @@ import org.apache.lucene.util.packed.PackedInts; * * @since lucene 1.4 * @see org.apache.lucene.util.FieldCacheSanityChecker + * + * @lucene.internal */ // nocommit abstract class...? public interface FieldCache { @@ -480,7 +481,7 @@ public interface FieldCache { public Doubles getDoubles(AtomicReader reader, String field, DoubleParser parser, boolean setDocsWithField) throws IOException; /** Returned by {@link #getTerms} */ - // nocommit: can we merge this api with the BinaryDocValues api? + // nocommit: merge this api with the BinaryDocValues api? public abstract static class DocTerms { /** The BytesRef argument must not be null; the method * returns the same BytesRef, or an empty (length=0) @@ -515,64 +516,6 @@ public interface FieldCache { * subsequent calls will share the same cache entry. */ public DocTerms getTerms (AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException; - /** Returned by {@link #getTermsIndex} */ - // nocommit: can we merge this api with the SortedDocValues api? - public abstract static class DocTermsIndex { - - public int binarySearchLookup(BytesRef key, BytesRef spare) { - // this special case is the reason that Arrays.binarySearch() isn't useful. - if (key == null) { - throw new IllegalArgumentException("key must not be null"); - } - - int low = 0; - int high = numOrd()-1; - - while (low <= high) { - int mid = (low + high) >>> 1; - int cmp = lookup(mid, spare).compareTo(key); - - if (cmp < 0) - low = mid + 1; - else if (cmp > 0) - high = mid - 1; - else - return mid; // key found - } - return -(low + 1); // key not found. - } - - /** The BytesRef argument must not be null; the method - * returns the same BytesRef, or an empty (length=0) - * BytesRef if this ord is the null ord (-1). */ - public abstract BytesRef lookup(int ord, BytesRef reuse); - - /** Convenience method, to lookup the Term for a doc. - * If this doc is deleted or did not have this field, - * this will return an empty (length=0) BytesRef. */ - public BytesRef getTerm(int docID, BytesRef reuse) { - int ord = getOrd(docID); - if (ord == -1) { - return null; - } - return lookup(ord, reuse); - } - - /** Returns sort ord for this document. Ord -1 is - * is returend for docs that are deleted or did not have - * this field. */ - public abstract int getOrd(int docID); - - /** Returns total unique ord count. */ - public abstract int numOrd(); - - /** Number of documents */ - public abstract int size(); - - /** Returns a TermsEnum that can iterate over the values in this index entry */ - public abstract TermsEnum getTermsEnum(); - } - /** Checks the internal cache for an appropriate entry, and if none * is found, reads the term values in field * and returns a {@link DocTerms} instance, providing a @@ -582,7 +525,7 @@ public interface FieldCache { * @return The values in the given field for each document. * @throws IOException If any error occurs. */ - public DocTermsIndex getTermsIndex (AtomicReader reader, String field) throws IOException; + public SortedDocValues getTermsIndex (AtomicReader reader, String field) throws IOException; /** Expert: just like {@link * #getTermsIndex(AtomicReader,String)}, but you can specify @@ -590,7 +533,7 @@ public interface FieldCache { * faster lookups (default is "true"). Note that the * first call for a given reader and field "wins", * subsequent calls will share the same cache entry. */ - public DocTermsIndex getTermsIndex (AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException; + public SortedDocValues getTermsIndex (AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException; /** * Checks the internal cache for an appropriate entry, and if none is found, reads the term values @@ -662,7 +605,7 @@ public interface FieldCache { * The most recently estimated size of the value, null unless * estimateSize has been called. */ - public final String getEstimatedSize() { + public String getEstimatedSize() { return size; } diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java index bc9574d0328..3bcd97ea99a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -47,7 +47,6 @@ import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.packed.GrowableWriter; import org.apache.lucene.util.packed.PackedInts; -import org.apache.lucene.util.packed.PackedInts.Reader; // nocommit rename to UninvertFieldCacheImpl or something ... @@ -73,7 +72,7 @@ class FieldCacheImpl implements FieldCache { caches.put(Long.TYPE, new LongCache(this)); caches.put(Double.TYPE, new DoubleCache(this)); caches.put(DocTerms.class, new DocTermsCache(this)); - caches.put(DocTermsIndex.class, new DocTermsIndexCache(this)); + caches.put(SortedDocValues.class, new SortedDocValuesCache(this)); caches.put(DocTermOrds.class, new DocTermOrdsCache(this)); caches.put(DocsWithFieldCache.class, new DocsWithFieldCache(this)); } @@ -574,7 +573,6 @@ class FieldCacheImpl implements FieldCache { // nocommit should we throw exc if parser isn't // null? if setDocsWithField is true? } else { - int maxDoc = reader.maxDoc(); final int[] values; final IntParser parser = (IntParser) key.custom; if (parser == null) { @@ -728,7 +726,6 @@ class FieldCacheImpl implements FieldCache { // nocommit should we throw exc if parser isn't // null? if setDocsWithField is true? } else { - int maxDoc = reader.maxDoc(); final float[] values; final FloatParser parser = (FloatParser) key.custom; if (parser == null) { @@ -819,7 +816,6 @@ class FieldCacheImpl implements FieldCache { // nocommit should we throw exc if parser isn't // null? if setDocsWithField is true? } else { - int maxDoc = reader.maxDoc(); final long[] values; final LongParser parser = (LongParser) key.custom; if (parser == null) { @@ -910,7 +906,6 @@ class FieldCacheImpl implements FieldCache { // nocommit should we throw exc if parser isn't // null? if setDocsWithField is true? } else { - int maxDoc = reader.maxDoc(); final double[] values; final DoubleParser parser = (DoubleParser) key.custom; if (parser == null) { @@ -954,13 +949,13 @@ class FieldCacheImpl implements FieldCache { } } - public static class DocTermsIndexImpl extends DocTermsIndex { + public static class SortedDocValuesImpl extends SortedDocValues { private final PagedBytes.Reader bytes; private final PackedInts.Reader termOrdToBytesOffset; private final PackedInts.Reader docToTermOrd; private final int numOrd; - public DocTermsIndexImpl(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) { + public SortedDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) { this.bytes = bytes; this.docToTermOrd = docToTermOrd; this.termOrdToBytesOffset = termOrdToBytesOffset; @@ -968,7 +963,7 @@ class FieldCacheImpl implements FieldCache { } @Override - public int numOrd() { + public int getValueCount() { return numOrd; } @@ -986,19 +981,31 @@ class FieldCacheImpl implements FieldCache { } @Override - public BytesRef lookup(int ord, BytesRef ret) { + public void lookupOrd(int ord, BytesRef ret) { if (ord < 0) { throw new IllegalArgumentException("ord must be >=0 (got ord=" + ord + ")"); } - return bytes.fill(ret, termOrdToBytesOffset.get(ord)); + bytes.fill(ret, termOrdToBytesOffset.get(ord)); + } + + @Override + public int maxLength() { + // nocommit hmm + throw new UnsupportedOperationException(); + } + + @Override + public boolean isFixedLength() { + // nocommit hmm + throw new UnsupportedOperationException(); } @Override public TermsEnum getTermsEnum() { - return this.new DocTermsIndexEnum(); + return this.new SortedDocValuesEnum(); } - class DocTermsIndexEnum extends TermsEnum { + class SortedDocValuesEnum extends TermsEnum { int currentOrd; int currentBlockNumber; int end; // end position in the current block @@ -1007,7 +1014,7 @@ class FieldCacheImpl implements FieldCache { final BytesRef term = new BytesRef(); - public DocTermsIndexEnum() { + public SortedDocValuesEnum() { currentOrd = -1; currentBlockNumber = 0; blocks = bytes.getBlocks(); @@ -1043,8 +1050,9 @@ class FieldCacheImpl implements FieldCache { } } + @Override public void seekExact(long ord) throws IOException { - assert ord >= 0 && ord <= numOrd; + assert ord >= 0 && ord < numOrd; // TODO: if gap is small, could iterate from current position? Or let user decide that? currentBlockNumber = bytes.fillAndGetIndex(term, termOrdToBytesOffset.get((int)ord)); end = blockEnds[currentBlockNumber]; @@ -1140,16 +1148,16 @@ class FieldCacheImpl implements FieldCache { // nocommit woudl be nice if .getTErms would return a // DocTermsIndex if one already existed - public DocTermsIndex getTermsIndex(AtomicReader reader, String field) throws IOException { + public SortedDocValues getTermsIndex(AtomicReader reader, String field) throws IOException { return getTermsIndex(reader, field, PackedInts.FAST); } - public DocTermsIndex getTermsIndex(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException { - return (DocTermsIndex) caches.get(DocTermsIndex.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false); + public SortedDocValues getTermsIndex(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException { + return (SortedDocValues) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false); } - static class DocTermsIndexCache extends Cache { - DocTermsIndexCache(FieldCacheImpl wrapper) { + static class SortedDocValuesCache extends Cache { + SortedDocValuesCache(FieldCacheImpl wrapper) { super(wrapper); } @@ -1160,36 +1168,7 @@ class FieldCacheImpl implements FieldCache { final int maxDoc = reader.maxDoc(); SortedDocValues valuesIn = reader.getSortedDocValues(key.field); if (valuesIn != null) { - final SortedDocValues ramInstance = valuesIn.newRAMInstance(); - return new DocTermsIndex() { - - @Override - public BytesRef lookup(int ord, BytesRef reuse) { - ramInstance.lookupOrd(ord, reuse); - return reuse; - } - - @Override - public int getOrd(int docID) { - return ramInstance.getOrd(docID); - } - - @Override - public int numOrd() { - return ramInstance.getValueCount(); - } - - @Override - public int size() { - return ramInstance.size(); - } - - @Override - public TermsEnum getTermsEnum() { - // nocommit: to the codec api? or can that termsenum just use this thing? - return null; - } - }; + return valuesIn.newRAMInstance(); } else { Terms terms = reader.terms(key.field); @@ -1283,7 +1262,7 @@ class FieldCacheImpl implements FieldCache { } // maybe an int-only impl? - return new DocTermsIndexImpl(bytes.freeze(true), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd); + return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd); } } } diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java b/lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java index 624e25f8b83..853977c4bca 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java @@ -18,15 +18,16 @@ package org.apache.lucene.search; import java.io.IOException; +import org.apache.lucene.document.DoubleField; // for javadocs +import org.apache.lucene.document.FloatField; // for javadocs +import org.apache.lucene.document.IntField; // for javadocs +import org.apache.lucene.document.LongField; // for javadocs import org.apache.lucene.index.AtomicReader; // for javadocs import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.document.IntField; // for javadocs -import org.apache.lucene.document.FloatField; // for javadocs -import org.apache.lucene.document.LongField; // for javadocs -import org.apache.lucene.document.DoubleField; // for javadocs -import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.NumericUtils; /** * A range filter built on top of a cached single term field (in {@link FieldCache}). @@ -89,10 +90,10 @@ public abstract class FieldCacheRangeFilter extends Filter { return new FieldCacheRangeFilter(field, null, lowerVal, upperVal, includeLower, includeUpper) { @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { - final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader(), field); + final SortedDocValues fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader(), field); final BytesRef spare = new BytesRef(); - final int lowerPoint = lowerVal == null ? -1 : fcsi.binarySearchLookup(new BytesRef(lowerVal), spare); - final int upperPoint = upperVal == null ? -1 : fcsi.binarySearchLookup(new BytesRef(upperVal), spare); + final int lowerPoint = lowerVal == null ? -1 : fcsi.lookupTerm(new BytesRef(lowerVal), spare); + final int upperPoint = upperVal == null ? -1 : fcsi.lookupTerm(new BytesRef(upperVal), spare); final int inclusiveLowerPoint, inclusiveUpperPoint; diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java b/lucene/core/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java index 11caa9c84d5..1b5692f8833 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java @@ -22,9 +22,10 @@ import java.io.IOException; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocsEnum; // javadoc @link import org.apache.lucene.index.IndexReader; -import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FixedBitSet; /** * A {@link Filter} that only accepts documents whose single @@ -118,11 +119,11 @@ public class FieldCacheTermsFilter extends Filter { @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { - final FieldCache.DocTermsIndex fcsi = getFieldCache().getTermsIndex(context.reader(), field); - final FixedBitSet bits = new FixedBitSet(fcsi.numOrd()); + final SortedDocValues fcsi = getFieldCache().getTermsIndex(context.reader(), field); + final FixedBitSet bits = new FixedBitSet(fcsi.getValueCount()); final BytesRef spare = new BytesRef(); for (int i=0;i= 0) { bits.set(ord); } diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java index 3c60526bdcc..f4946ddd7fc 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java @@ -23,9 +23,9 @@ import java.util.Comparator; import org.apache.lucene.index.AtomicReader; // javadocs import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.search.FieldCache.ByteParser; import org.apache.lucene.search.FieldCache.DocTerms; -import org.apache.lucene.search.FieldCache.DocTermsIndex; import org.apache.lucene.search.FieldCache.DoubleParser; import org.apache.lucene.search.FieldCache.FloatParser; import org.apache.lucene.search.FieldCache.IntParser; @@ -1096,7 +1096,7 @@ public abstract class FieldComparator { /* Current reader's doc ord/values. @lucene.internal */ - DocTermsIndex termsIndex; + SortedDocValues termsIndex; private final String field; @@ -1159,8 +1159,8 @@ public abstract class FieldComparator { @Override public int compareDocToValue(int doc, BytesRef value) { - BytesRef docValue = termsIndex.getTerm(doc, tempBR); - if (docValue == null) { + int ord = termsIndex.getOrd(doc); + if (ord == -1) { if (value == null) { return 0; } @@ -1168,12 +1168,10 @@ public abstract class FieldComparator { } else if (value == null) { return 1; } - return docValue.compareTo(value); + termsIndex.lookupOrd(ord, tempBR); + return tempBR.compareTo(value); } - // nocommit remove null from FC DocTerms/Index as an - // allowed value - /** Base class for specialized (per bit width of the * ords) per-segment comparator. NOTE: this is messy; * we do this only because hotspot can't reliably inline @@ -1223,10 +1221,10 @@ public abstract class FieldComparator { // Used per-segment when docToOrd is null: private final class AnyOrdComparator extends PerSegmentComparator { - private final DocTermsIndex termsIndex; + private final SortedDocValues termsIndex; private final int docBase; - public AnyOrdComparator(DocTermsIndex termsIndex, int docBase) { + public AnyOrdComparator(SortedDocValues termsIndex, int docBase) { this.termsIndex = termsIndex; this.docBase = docBase; } @@ -1259,7 +1257,7 @@ public abstract class FieldComparator { if (values[slot] == null) { values[slot] = new BytesRef(); } - termsIndex.lookup(ord, values[slot]); + termsIndex.lookupOrd(ord, values[slot]); } readerGen[slot] = currentReaderGen; } @@ -1294,7 +1292,7 @@ public abstract class FieldComparator { bottomSameReader = true; readerGen[bottomSlot] = currentReaderGen; } else { - final int index = binarySearch(tempBR, termsIndex, bottomValue); + final int index = termsIndex.lookupTerm(bottomValue, tempBR); if (index < 0) { bottomOrd = -index - 2; bottomSameReader = false; @@ -1912,31 +1910,4 @@ public abstract class FieldComparator { return docTerms.getBytes(doc, tempBR).compareTo(value); } } - - // nocommit why do we have this AND DTI.binarySearch? - final protected static int binarySearch(BytesRef br, DocTermsIndex a, BytesRef key) { - return binarySearch(br, a, key, 0, a.numOrd()-1); - } - - final protected static int binarySearch(BytesRef br, DocTermsIndex a, BytesRef key, int low, int high) { - - while (low <= high) { - int mid = (low + high) >>> 1; - BytesRef midVal = a.lookup(mid, br); - int cmp; - if (midVal != null) { - cmp = midVal.compareTo(key); - } else { - cmp = -1; - } - - if (cmp < 0) - low = mid + 1; - else if (cmp > 0) - high = mid - 1; - else - return mid; - } - return -(low + 1); - } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index 433e0656cb2..81be3744b0f 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -1627,10 +1627,12 @@ public class TestIndexWriter extends LuceneTestCase { w.close(); assertEquals(1, reader.docFreq(new Term("content", bigTerm))); - FieldCache.DocTermsIndex dti = FieldCache.DEFAULT.getTermsIndex(SlowCompositeReaderWrapper.wrap(reader), "content", random().nextFloat() * PackedInts.FAST); - assertEquals(4, dti.numOrd()); + SortedDocValues dti = FieldCache.DEFAULT.getTermsIndex(SlowCompositeReaderWrapper.wrap(reader), "content", random().nextFloat() * PackedInts.FAST); + assertEquals(4, dti.getValueCount()); assertEquals(4, dti.size()); - assertEquals(bigTermBytesRef, dti.lookup(2, new BytesRef())); + BytesRef br = new BytesRef(); + dti.lookupOrd(2, br); + assertEquals(bigTermBytesRef, br); reader.close(); dir.close(); } diff --git a/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java b/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java index da06242f427..bd4319958f5 100644 --- a/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java +++ b/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java @@ -22,6 +22,7 @@ import java.util.Comparator; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.Bits; @@ -88,9 +89,9 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod */ @Override public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { - final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader(), query.field); + final SortedDocValues fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader(), query.field); // Cannot use FixedBitSet because we require long index (ord): - final OpenBitSet termSet = new OpenBitSet(fcsi.numOrd()); + final OpenBitSet termSet = new OpenBitSet(fcsi.getValueCount()); TermsEnum termsEnum = query.getTermsEnum(new Terms() { @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestElevationComparator.java b/lucene/core/src/test/org/apache/lucene/search/TestElevationComparator.java index e78f0d64a65..37916fcfc9f 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestElevationComparator.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestElevationComparator.java @@ -142,7 +142,7 @@ class ElevationComparatorSource extends FieldComparatorSource { public FieldComparator newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException { return new FieldComparator() { - FieldCache.DocTermsIndex idIndex; + SortedDocValues idIndex; private final int[] values = new int[numHits]; private final BytesRef tempBR = new BytesRef(); int bottomVal; @@ -162,8 +162,8 @@ class ElevationComparatorSource extends FieldComparatorSource { if (ord == -1) { return 0; } else { - BytesRef id = idIndex.lookup(ord, tempBR); - Integer prio = priority.get(id); + idIndex.lookupOrd(ord, tempBR); + Integer prio = priority.get(tempBR); return prio == null ? 0 : prio.intValue(); } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java b/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java index cdaded446cb..d7ec288ed98 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java @@ -188,35 +188,42 @@ public class TestFieldCache extends LuceneTestCase { } // getTermsIndex - FieldCache.DocTermsIndex termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString"); + SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString"); assertSame("Second request to cache return same array", termsIndex, cache.getTermsIndex(reader, "theRandomUnicodeString")); assertTrue("doubles Size: " + termsIndex.size() + " is not: " + NUM_DOCS, termsIndex.size() == NUM_DOCS); final BytesRef br = new BytesRef(); for (int i = 0; i < NUM_DOCS; i++) { - final BytesRef term = termsIndex.getTerm(i, br); + final BytesRef term; + final int ord = termsIndex.getOrd(i); + if (ord == -1) { + term = null; + } else { + termsIndex.lookupOrd(ord, br); + term = br; + } final String s = term == null ? null : term.utf8ToString(); assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s)); } - int nTerms = termsIndex.numOrd(); + int nTerms = termsIndex.getValueCount(); // System.out.println("nTerms="+nTerms); TermsEnum tenum = termsIndex.getTermsEnum(); BytesRef val = new BytesRef(); for (int i=0; i { private final SentinelIntSet ordSet; - private FieldCache.DocTermsIndex index; + private SortedDocValues index; private final BytesRef spareBytesRef = new BytesRef(); private final String groupField; @@ -61,7 +62,7 @@ public class TermSecondPassGroupingCollector extends AbstractSecondPassGroupingC ordSet.clear(); for (SearchGroupDocs group : groupMap.values()) { // System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); - int ord = group.groupValue == null ? -1 : index.binarySearchLookup(group.groupValue, spareBytesRef); + int ord = group.groupValue == null ? -1 : index.lookupTerm(group.groupValue, spareBytesRef); if (group.groupValue == null || ord >= 0) { groupDocs[ordSet.put(ord)] = group; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java b/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java index 311d3c568e5..12055771588 100755 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java @@ -17,26 +17,27 @@ package org.apache.lucene.queries.function.docvalues; +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSourceScorer; import org.apache.lucene.search.FieldCache; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValueStr; -import java.io.IOException; - /** * Internal class, subject to change. * Serves as base class for FunctionValues based on DocTermsIndex. */ public abstract class DocTermsIndexDocValues extends FunctionValues { - protected final FieldCache.DocTermsIndex termsIndex; + protected final SortedDocValues termsIndex; protected final ValueSource vs; protected final MutableValueStr val = new MutableValueStr(); protected final BytesRef spare = new BytesRef(); @@ -51,7 +52,7 @@ public abstract class DocTermsIndexDocValues extends FunctionValues { this.vs = vs; } - public FieldCache.DocTermsIndex getDocTermsIndex() { + public SortedDocValues getSortedDocValues() { return termsIndex; } @@ -70,7 +71,7 @@ public abstract class DocTermsIndexDocValues extends FunctionValues { target.length = 0; return false; } - termsIndex.lookup(ord, target); + termsIndex.lookupOrd(ord, target); return true; } @@ -78,7 +79,7 @@ public abstract class DocTermsIndexDocValues extends FunctionValues { public String strVal(int doc) { int ord=termsIndex.getOrd(doc); if (ord==-1) return null; - termsIndex.lookup(ord, spare); + termsIndex.lookupOrd(ord, spare); UnicodeUtil.UTF8toUTF16(spare, spareChars); return spareChars.toString(); } @@ -101,7 +102,7 @@ public abstract class DocTermsIndexDocValues extends FunctionValues { int lower = Integer.MIN_VALUE; if (lowerVal != null) { - lower = termsIndex.binarySearchLookup(new BytesRef(lowerVal), spare); + lower = termsIndex.lookupTerm(new BytesRef(lowerVal), spare); if (lower < 0) { lower = -lower-1; } else if (!includeLower) { @@ -111,7 +112,7 @@ public abstract class DocTermsIndexDocValues extends FunctionValues { int upper = Integer.MAX_VALUE; if (upperVal != null) { - upper = termsIndex.binarySearchLookup(new BytesRef(upperVal), spare); + upper = termsIndex.lookupTerm(new BytesRef(upperVal), spare); if (upper < 0) { upper = -upper-2; } else if (!includeUpper) { @@ -153,7 +154,7 @@ public abstract class DocTermsIndexDocValues extends FunctionValues { if (!mval.exists) { mval.value.length = 0; } else { - mval.value = termsIndex.lookup(ord, mval.value); + termsIndex.lookupOrd(ord, mval.value); } } }; diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/OrdFieldSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/OrdFieldSource.java index 04d5d0a8e05..4eff3867dcd 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/OrdFieldSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/OrdFieldSource.java @@ -17,12 +17,16 @@ package org.apache.lucene.queries.function.valuesource; +import java.io.IOException; +import java.util.Map; + import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.CompositeReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.IntDocValues; @@ -30,9 +34,6 @@ import org.apache.lucene.search.FieldCache; import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValueInt; -import java.io.IOException; -import java.util.Map; - /** * Obtains the ordinal of the field value from the default Lucene {@link org.apache.lucene.search.FieldCache} using getStringIndex(). *
@@ -72,7 +73,7 @@ public class OrdFieldSource extends ValueSource { final AtomicReader r = topReader instanceof CompositeReader ? new SlowCompositeReaderWrapper((CompositeReader)topReader) : (AtomicReader) topReader; - final FieldCache.DocTermsIndex sindex = FieldCache.DEFAULT.getTermsIndex(r, field); + final SortedDocValues sindex = FieldCache.DEFAULT.getTermsIndex(r, field); return new IntDocValues(this) { protected String toTerm(String readableValue) { return readableValue; @@ -87,7 +88,7 @@ public class OrdFieldSource extends ValueSource { } @Override public int numOrd() { - return sindex.numOrd(); + return sindex.getValueCount(); } @Override diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ReverseOrdFieldSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ReverseOrdFieldSource.java index 32f8942c1a6..4f90815adac 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ReverseOrdFieldSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ReverseOrdFieldSource.java @@ -17,20 +17,21 @@ package org.apache.lucene.queries.function.valuesource; +import java.io.IOException; +import java.util.Map; + import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.CompositeReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.IntDocValues; import org.apache.lucene.search.FieldCache; -import java.io.IOException; -import java.util.Map; - /** * Obtains the ordinal of the field value from the default Lucene {@link org.apache.lucene.search.FieldCache} using getTermsIndex() * and reverses the order. @@ -73,8 +74,8 @@ public class ReverseOrdFieldSource extends ValueSource { : (AtomicReader) topReader; final int off = readerContext.docBase; - final FieldCache.DocTermsIndex sindex = FieldCache.DEFAULT.getTermsIndex(r, field); - final int end = sindex.numOrd(); + final SortedDocValues sindex = FieldCache.DEFAULT.getTermsIndex(r, field); + final int end = sindex.getValueCount(); return new IntDocValues(this) { @Override diff --git a/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java b/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java index 3206c405a0b..adce22e90d2 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java +++ b/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java @@ -16,16 +16,17 @@ package org.apache.solr.handler.component; * limitations under the License. */ -import org.apache.lucene.search.FieldCache; -import org.apache.lucene.util.BytesRef; -import org.apache.solr.schema.FieldType; -import org.apache.solr.schema.SchemaField; - import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.SchemaField; + /** * FieldFacetStats is a utility to accumulate statistics on a set of values in one field, @@ -39,7 +40,7 @@ import java.util.Map; public class FieldFacetStats { public final String name; - final FieldCache.DocTermsIndex si; + final SortedDocValues si; final SchemaField facet_sf; final SchemaField field_sf; @@ -55,7 +56,7 @@ public class FieldFacetStats { private final BytesRef tempBR = new BytesRef(); - public FieldFacetStats(String name, FieldCache.DocTermsIndex si, SchemaField field_sf, SchemaField facet_sf, int numStatsTerms) { + public FieldFacetStats(String name, SortedDocValues si, SchemaField field_sf, SchemaField facet_sf, int numStatsTerms) { this.name = name; this.si = si; this.field_sf = field_sf; @@ -63,7 +64,7 @@ public class FieldFacetStats { this.numStatsTerms = numStatsTerms; startTermIndex = 0; - endTermIndex = si.numOrd(); + endTermIndex = si.getValueCount(); nTerms = endTermIndex - startTermIndex; facetStatsValues = new HashMap(); @@ -82,7 +83,8 @@ public class FieldFacetStats { if (ord == -1) { return null; } else { - return si.lookup(ord, ret); + si.lookupOrd(ord, ret); + return ret; } } @@ -90,7 +92,14 @@ public class FieldFacetStats { int term = si.getOrd(docID); int arrIdx = term - startTermIndex; if (arrIdx >= 0 && arrIdx < nTerms) { - final BytesRef br = si.lookup(term, tempBR); + + final BytesRef br; + if (term == -1) { + br = null; + } else { + br = tempBR; + si.lookupOrd(term, tempBR); + } String key = (br == null)?null:facet_sf.getType().indexedToReadable(br.utf8ToString()); StatsValues stats = facetStatsValues.get(key); if (stats == null) { @@ -117,7 +126,13 @@ public class FieldFacetStats { int term = si.getOrd(docID); int arrIdx = term - startTermIndex; if (arrIdx >= 0 && arrIdx < nTerms) { - final BytesRef br = si.lookup(term, tempBR); + final BytesRef br; + if (term == -1) { + br = null; + } else { + br = tempBR; + si.lookupOrd(term, tempBR); + } String key = br == null ? null : br.utf8ToString(); HashMap statsTermCounts = facetStatsTerms.get(statsTermNum); Integer statsTermCount = statsTermCounts.get(key); diff --git a/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java b/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java index d510b11846c..521dc832b5e 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java @@ -23,22 +23,23 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.search.FieldCache; import org.apache.lucene.util.BytesRef; import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.ShardParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.StatsParams; -import org.apache.solr.common.params.ShardParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.request.UnInvertedField; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.TrieField; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocSet; import org.apache.solr.search.SolrIndexSearcher; -import org.apache.solr.request.UnInvertedField; /** * Stats component calculates simple statistics on numeric field values @@ -240,7 +241,7 @@ class SimpleStats { public NamedList getFieldCacheStats(String fieldName, String[] facet ) { SchemaField sf = searcher.getSchema().getField(fieldName); - FieldCache.DocTermsIndex si; + SortedDocValues si; try { si = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), fieldName); } @@ -248,12 +249,12 @@ class SimpleStats { throw new RuntimeException( "failed to open field cache for: "+fieldName, e ); } StatsValues allstats = StatsValuesFactory.createStatsValues(sf); - final int nTerms = si.numOrd(); + final int nTerms = si.getValueCount(); if ( nTerms <= 0 || docs.size() <= 0 ) return allstats.getStatsValues(); // don't worry about faceting if no documents match... List facetStats = new ArrayList(); - FieldCache.DocTermsIndex facetTermsIndex; + SortedDocValues facetTermsIndex; for( String facetField : facet ) { SchemaField fsf = searcher.getSchema().getField(facetField); @@ -283,9 +284,10 @@ class SimpleStats { tempBR.length = 0; raw = tempBR; } else { - raw = si.lookup(docOrd, tempBR); - if( raw.length > 0 ) { - allstats.accumulate(raw); + raw = tempBR; + si.lookupOrd(docOrd, tempBR); + if( tempBR.length > 0 ) { + allstats.accumulate(tempBR); } else { allstats.missing(); } diff --git a/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java b/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java index 6b724875ea8..09e0eb6b8aa 100755 --- a/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java +++ b/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java @@ -17,15 +17,20 @@ package org.apache.solr.request; +import java.io.IOException; +import java.util.*; +import java.util.concurrent.*; + import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.PriorityQueue; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.packed.PackedInts; import org.apache.solr.common.SolrException; @@ -36,10 +41,6 @@ import org.apache.solr.search.DocSet; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.util.BoundedTreeSet; -import java.io.IOException; -import java.util.*; -import java.util.concurrent.*; - class PerSegmentSingleValuedFaceting { @@ -223,7 +224,7 @@ class PerSegmentSingleValuedFaceting { this.context = context; } - FieldCache.DocTermsIndex si; + SortedDocValues si; int startTermIndex; int endTermIndex; int[] counts; @@ -239,16 +240,16 @@ class PerSegmentSingleValuedFaceting { if (prefix!=null) { BytesRef prefixRef = new BytesRef(prefix); - startTermIndex = si.binarySearchLookup(prefixRef, tempBR); + startTermIndex = si.lookupTerm(prefixRef, tempBR); if (startTermIndex<0) startTermIndex=-startTermIndex-1; prefixRef.append(UnicodeUtil.BIG_TERM); // TODO: we could constrain the lower endpoint if we had a binarySearch method that allowed passing start/end - endTermIndex = si.binarySearchLookup(prefixRef, tempBR); + endTermIndex = si.lookupTerm(prefixRef, tempBR); assert endTermIndex < 0; endTermIndex = -endTermIndex-1; } else { startTermIndex=-1; - endTermIndex=si.numOrd(); + endTermIndex=si.getValueCount(); } final int nTerms=endTermIndex-startTermIndex; diff --git a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java index 2b5e5538a8f..f57a2dcf1c9 100644 --- a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java +++ b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java @@ -477,7 +477,7 @@ public class SimpleFacets { FieldType ft = searcher.getSchema().getFieldType(fieldName); NamedList res = new NamedList(); - FieldCache.DocTermsIndex si = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), fieldName); + SortedDocValues si = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), fieldName); final BytesRef prefixRef; if (prefix == null) { @@ -493,15 +493,15 @@ public class SimpleFacets { int startTermIndex, endTermIndex; if (prefix!=null) { - startTermIndex = si.binarySearchLookup(prefixRef, br); + startTermIndex = si.lookupTerm(prefixRef, br); if (startTermIndex<0) startTermIndex=-startTermIndex-1; prefixRef.append(UnicodeUtil.BIG_TERM); - endTermIndex = si.binarySearchLookup(prefixRef, br); + endTermIndex = si.lookupTerm(prefixRef, br); assert endTermIndex < 0; endTermIndex = -endTermIndex-1; } else { startTermIndex=-1; - endTermIndex=si.numOrd(); + endTermIndex=si.getValueCount(); } final int nTerms=endTermIndex-startTermIndex; @@ -564,7 +564,8 @@ public class SimpleFacets { long pair = sorted[i]; int c = (int)(pair >>> 32); int tnum = Integer.MAX_VALUE - (int)pair; - ft.indexedToReadable(si.lookup(startTermIndex+tnum, br), charsRef); + si.lookupOrd(startTermIndex+tnum, br); + ft.indexedToReadable(br, charsRef); res.add(charsRef.toString(), c); } @@ -582,7 +583,8 @@ public class SimpleFacets { int c = counts[i]; if (c=0) continue; if (--lim<0) break; - ft.indexedToReadable(si.lookup(startTermIndex+i, br), charsRef); + si.lookupOrd(startTermIndex+i, br); + ft.indexedToReadable(br, charsRef); res.add(charsRef.toString(), c); } } diff --git a/solr/core/src/java/org/apache/solr/request/UnInvertedField.java b/solr/core/src/java/org/apache/solr/request/UnInvertedField.java index c73c2c91dde..7407e790e43 100755 --- a/solr/core/src/java/org/apache/solr/request/UnInvertedField.java +++ b/solr/core/src/java/org/apache/solr/request/UnInvertedField.java @@ -17,37 +17,36 @@ package org.apache.solr.request; -import org.apache.lucene.search.FieldCache; +import java.io.IOException; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + import org.apache.lucene.index.DocTermOrds; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util.UnicodeUtil; +import org.apache.solr.common.SolrException; import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.SolrException; import org.apache.solr.core.SolrCore; - +import org.apache.solr.handler.component.FieldFacetStats; +import org.apache.solr.handler.component.StatsValues; +import org.apache.solr.handler.component.StatsValuesFactory; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.TrieField; import org.apache.solr.search.*; import org.apache.solr.util.LongPriorityQueue; import org.apache.solr.util.PrimUtils; -import org.apache.solr.handler.component.StatsValues; -import org.apache.solr.handler.component.StatsValuesFactory; -import org.apache.solr.handler.component.FieldFacetStats; -import org.apache.lucene.util.CharsRef; -import org.apache.lucene.util.OpenBitSet; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.UnicodeUtil; - -import java.io.IOException; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.Map; - -import java.util.concurrent.atomic.AtomicLong; /** * @@ -481,7 +480,7 @@ public class UnInvertedField extends DocTermOrds { int i = 0; final FieldFacetStats[] finfo = new FieldFacetStats[facet.length]; //Initialize facetstats, if facets have been passed in - FieldCache.DocTermsIndex si; + SortedDocValues si; for (String f : facet) { SchemaField facet_sf = searcher.getSchema().getField(f); try { diff --git a/solr/core/src/java/org/apache/solr/schema/BoolField.java b/solr/core/src/java/org/apache/solr/schema/BoolField.java index f203200ca35..3704120a152 100644 --- a/solr/core/src/java/org/apache/solr/schema/BoolField.java +++ b/solr/core/src/java/org/apache/solr/schema/BoolField.java @@ -17,29 +17,30 @@ package org.apache.solr.schema; +import java.io.IOException; +import java.io.Reader; +import java.util.Map; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.GeneralField; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.StorableField; -import org.apache.lucene.search.FieldCache; -import org.apache.lucene.search.SortField; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.BoolDocValues; import org.apache.lucene.queries.function.valuesource.OrdFieldSource; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.SortField; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValueBool; -import org.apache.solr.search.QParser; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.solr.response.TextResponseWriter; import org.apache.solr.analysis.SolrAnalyzer; - -import java.util.Map; -import java.io.Reader; -import java.io.IOException; +import org.apache.solr.response.TextResponseWriter; +import org.apache.solr.search.QParser; /** * */ @@ -167,14 +168,14 @@ class BoolFieldSource extends ValueSource { @Override public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { - final FieldCache.DocTermsIndex sindex = FieldCache.DEFAULT.getTermsIndex(readerContext.reader(), field); + final SortedDocValues sindex = FieldCache.DEFAULT.getTermsIndex(readerContext.reader(), field); // figure out what ord maps to true - int nord = sindex.numOrd(); + int nord = sindex.getValueCount(); BytesRef br = new BytesRef(); int tord = -1; for (int i=0; i { private final BytesRef[] values; private final int[] readerGen; - private FieldCache.DocTermsIndex termsIndex; + private SortedDocValues termsIndex; private final String field; private final BytesRef NULL_VAL; @@ -137,7 +138,7 @@ class TermOrdValComparator_SML extends FieldComparator { protected final int[] readerGen; protected int currentReaderGen = -1; - protected FieldCache.DocTermsIndex termsIndex; + protected SortedDocValues termsIndex; protected int bottomSlot = -1; protected int bottomOrd; @@ -202,7 +203,7 @@ class TermOrdValComparator_SML extends FieldComparator { bottomSameReader = true; readerGen[bottomSlot] = currentReaderGen; } else { - final int index = binarySearch(tempBR, termsIndex, bottomValue); + final int index = termsIndex.lookupTerm(bottomValue, tempBR); if (index < 0) { bottomOrd = -index - 2; bottomSameReader = false; @@ -224,8 +225,8 @@ class TermOrdValComparator_SML extends FieldComparator { @Override public int compareDocToValue(int doc, BytesRef value) { - final BytesRef docValue = termsIndex.getTerm(doc, tempBR); - if (docValue == null) { + int docOrd = termsIndex.getOrd(doc); + if (docOrd == -1) { if (value == null) { return 0; } @@ -233,7 +234,8 @@ class TermOrdValComparator_SML extends FieldComparator { } else if (value == null) { return -1; } - return docValue.compareTo(value); + termsIndex.lookupOrd(docOrd, tempBR); + return tempBR.compareTo(value); } } @@ -270,7 +272,7 @@ class TermOrdValComparator_SML extends FieldComparator { if (order == NULL_ORD) { return bottomValue.compareTo(parent.NULL_VAL); } else { - termsIndex.lookup(order, tempBR); + termsIndex.lookupOrd(order, tempBR); return bottomValue.compareTo(tempBR); } } @@ -288,7 +290,7 @@ class TermOrdValComparator_SML extends FieldComparator { if (values[slot] == null) { values[slot] = new BytesRef(); } - termsIndex.lookup(ord, values[slot]); + termsIndex.lookupOrd(ord, values[slot]); } readerGen[slot] = currentReaderGen; }