mirror of https://github.com/apache/lucene.git
LUCENE-4819: move Sorted(set)DocValuesTermsEnum to codec
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1455391 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
325ab7dfe1
commit
b25cc528d9
|
@ -55,6 +55,16 @@ New Features
|
||||||
query "i " will no longer suggest "Isla de Muerta" for example.
|
query "i " will no longer suggest "Isla de Muerta" for example.
|
||||||
(Mike McCandless)
|
(Mike McCandless)
|
||||||
|
|
||||||
|
Optimizations
|
||||||
|
|
||||||
|
* LUCENE-4819: Added Sorted[Set]DocValues.termsEnum(), and optimized the
|
||||||
|
default codec for improved enumeration performance. (Robert Muir)
|
||||||
|
|
||||||
|
Bug Fixes
|
||||||
|
|
||||||
|
* LUCENE-4819: seekExact(BytesRef, boolean) did not work correctly with
|
||||||
|
Sorted[Set]DocValuesTermsEnum. (Robert Muir)
|
||||||
|
|
||||||
======================= Lucene 4.2.0 =======================
|
======================= Lucene 4.2.0 =======================
|
||||||
|
|
||||||
Changes in backwards compatibility policy
|
Changes in backwards compatibility policy
|
||||||
|
|
|
@ -32,9 +32,7 @@ import org.apache.lucene.index.MultiDocValues.OrdinalMap;
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
import org.apache.lucene.index.SortedDocValuesTermsEnum;
|
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.index.SortedSetDocValuesTermsEnum;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
@ -269,7 +267,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
SortedDocValues dv = dvs[sub];
|
SortedDocValues dv = dvs[sub];
|
||||||
Bits liveDocs = reader.getLiveDocs();
|
Bits liveDocs = reader.getLiveDocs();
|
||||||
if (liveDocs == null) {
|
if (liveDocs == null) {
|
||||||
liveTerms[sub] = new SortedDocValuesTermsEnum(dv);
|
liveTerms[sub] = dv.termsEnum();
|
||||||
} else {
|
} else {
|
||||||
OpenBitSet bitset = new OpenBitSet(dv.getValueCount());
|
OpenBitSet bitset = new OpenBitSet(dv.getValueCount());
|
||||||
for (int i = 0; i < reader.maxDoc(); i++) {
|
for (int i = 0; i < reader.maxDoc(); i++) {
|
||||||
|
@ -277,7 +275,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
bitset.set(dv.getOrd(i));
|
bitset.set(dv.getOrd(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
liveTerms[sub] = new BitsFilteredTermsEnum(new SortedDocValuesTermsEnum(dv), bitset);
|
liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -401,7 +399,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
SortedSetDocValues dv = dvs[sub];
|
SortedSetDocValues dv = dvs[sub];
|
||||||
Bits liveDocs = reader.getLiveDocs();
|
Bits liveDocs = reader.getLiveDocs();
|
||||||
if (liveDocs == null) {
|
if (liveDocs == null) {
|
||||||
liveTerms[sub] = new SortedSetDocValuesTermsEnum(dv);
|
liveTerms[sub] = dv.termsEnum();
|
||||||
} else {
|
} else {
|
||||||
OpenBitSet bitset = new OpenBitSet(dv.getValueCount());
|
OpenBitSet bitset = new OpenBitSet(dv.getValueCount());
|
||||||
for (int i = 0; i < reader.maxDoc(); i++) {
|
for (int i = 0; i < reader.maxDoc(); i++) {
|
||||||
|
@ -413,7 +411,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
liveTerms[sub] = new BitsFilteredTermsEnum(new SortedSetDocValuesTermsEnum(dv), bitset);
|
liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene42;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
@ -25,6 +26,8 @@ import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.index.BinaryDocValues;
|
import org.apache.lucene.index.BinaryDocValues;
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
@ -32,8 +35,10 @@ import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
|
@ -285,6 +290,11 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
|
||||||
public int getValueCount() {
|
public int getValueCount() {
|
||||||
return (int)entry.numOrds;
|
return (int)entry.numOrds;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermsEnum termsEnum() {
|
||||||
|
return new FSTTermsEnum(fst);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -369,6 +379,11 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
|
||||||
public long getValueCount() {
|
public long getValueCount() {
|
||||||
return entry.numOrds;
|
return entry.numOrds;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermsEnum termsEnum() {
|
||||||
|
return new FSTTermsEnum(fst);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -396,4 +411,106 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
|
||||||
long offset;
|
long offset;
|
||||||
long numOrds;
|
long numOrds;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// exposes FSTEnum directly as a TermsEnum: avoids binary-search next()
|
||||||
|
static class FSTTermsEnum extends TermsEnum {
|
||||||
|
final BytesRefFSTEnum<Long> in;
|
||||||
|
|
||||||
|
// this is all for the complicated seek(ord)...
|
||||||
|
// maybe we should add a FSTEnum that supports this operation?
|
||||||
|
final FST<Long> fst;
|
||||||
|
final FST.BytesReader bytesReader;
|
||||||
|
final Arc<Long> firstArc = new Arc<Long>();
|
||||||
|
final Arc<Long> scratchArc = new Arc<Long>();
|
||||||
|
final IntsRef scratchInts = new IntsRef();
|
||||||
|
final BytesRef scratchBytes = new BytesRef();
|
||||||
|
|
||||||
|
FSTTermsEnum(FST<Long> fst) {
|
||||||
|
this.fst = fst;
|
||||||
|
in = new BytesRefFSTEnum<Long>(fst);
|
||||||
|
bytesReader = fst.getBytesReader();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef next() throws IOException {
|
||||||
|
InputOutput<Long> io = in.next();
|
||||||
|
if (io == null) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
return io.input;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparator<BytesRef> getComparator() {
|
||||||
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException {
|
||||||
|
if (in.seekCeil(text) == null) {
|
||||||
|
return SeekStatus.END;
|
||||||
|
} else if (term().equals(text)) {
|
||||||
|
// TODO: add SeekStatus to FSTEnum like in https://issues.apache.org/jira/browse/LUCENE-3729
|
||||||
|
// to remove this comparision?
|
||||||
|
return SeekStatus.FOUND;
|
||||||
|
} else {
|
||||||
|
return SeekStatus.NOT_FOUND;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
|
||||||
|
if (in.seekExact(text) == null) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void seekExact(long ord) throws IOException {
|
||||||
|
// TODO: would be better to make this simpler and faster.
|
||||||
|
// but we dont want to introduce a bug that corrupts our enum state!
|
||||||
|
bytesReader.setPosition(0);
|
||||||
|
fst.getFirstArc(firstArc);
|
||||||
|
IntsRef output = Util.getByOutput(fst, ord, bytesReader, firstArc, scratchArc, scratchInts);
|
||||||
|
scratchBytes.bytes = new byte[output.length];
|
||||||
|
scratchBytes.offset = 0;
|
||||||
|
scratchBytes.length = 0;
|
||||||
|
Util.toBytesRef(output, scratchBytes);
|
||||||
|
// TODO: we could do this lazily, better to try to push into FSTEnum though?
|
||||||
|
in.seekExact(scratchBytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef term() throws IOException {
|
||||||
|
return in.current().input;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ord() throws IOException {
|
||||||
|
return in.current().output;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docFreq() throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long totalTermFreq() throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -217,7 +217,7 @@ public class MultiDocValues {
|
||||||
} else {
|
} else {
|
||||||
TermsEnum enums[] = new TermsEnum[values.length];
|
TermsEnum enums[] = new TermsEnum[values.length];
|
||||||
for (int i = 0; i < values.length; i++) {
|
for (int i = 0; i < values.length; i++) {
|
||||||
enums[i] = new SortedDocValuesTermsEnum(values[i]);
|
enums[i] = values[i].termsEnum();
|
||||||
}
|
}
|
||||||
OrdinalMap mapping = new OrdinalMap(r.getCoreCacheKey(), enums);
|
OrdinalMap mapping = new OrdinalMap(r.getCoreCacheKey(), enums);
|
||||||
return new MultiSortedDocValues(values, starts, mapping);
|
return new MultiSortedDocValues(values, starts, mapping);
|
||||||
|
@ -261,7 +261,7 @@ public class MultiDocValues {
|
||||||
} else {
|
} else {
|
||||||
TermsEnum enums[] = new TermsEnum[values.length];
|
TermsEnum enums[] = new TermsEnum[values.length];
|
||||||
for (int i = 0; i < values.length; i++) {
|
for (int i = 0; i < values.length; i++) {
|
||||||
enums[i] = new SortedSetDocValuesTermsEnum(values[i]);
|
enums[i] = values[i].termsEnum();
|
||||||
}
|
}
|
||||||
OrdinalMap mapping = new OrdinalMap(r.getCoreCacheKey(), enums);
|
OrdinalMap mapping = new OrdinalMap(r.getCoreCacheKey(), enums);
|
||||||
return new MultiSortedSetDocValues(values, starts, mapping);
|
return new MultiSortedSetDocValues(values, starts, mapping);
|
||||||
|
|
|
@ -114,4 +114,12 @@ public abstract class SortedDocValues extends BinaryDocValues {
|
||||||
|
|
||||||
return -(low + 1); // key not found.
|
return -(low + 1); // key not found.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a {@link TermsEnum} over the values.
|
||||||
|
* The enum supports {@link TermsEnum#ord()} and {@link TermsEnum#seekExact(long)}.
|
||||||
|
*/
|
||||||
|
public TermsEnum termsEnum() {
|
||||||
|
return new SortedDocValuesTermsEnum(this);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,7 +26,7 @@ import org.apache.lucene.util.BytesRef;
|
||||||
/** Implements a {@link TermsEnum} wrapping a provided
|
/** Implements a {@link TermsEnum} wrapping a provided
|
||||||
* {@link SortedDocValues}. */
|
* {@link SortedDocValues}. */
|
||||||
|
|
||||||
public class SortedDocValuesTermsEnum extends TermsEnum {
|
class SortedDocValuesTermsEnum extends TermsEnum {
|
||||||
private final SortedDocValues values;
|
private final SortedDocValues values;
|
||||||
private int currentOrd = -1;
|
private int currentOrd = -1;
|
||||||
private final BytesRef term = new BytesRef();
|
private final BytesRef term = new BytesRef();
|
||||||
|
@ -64,6 +64,12 @@ public class SortedDocValuesTermsEnum extends TermsEnum {
|
||||||
public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
|
public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
|
||||||
int ord = values.lookupTerm(text);
|
int ord = values.lookupTerm(text);
|
||||||
if (ord >= 0) {
|
if (ord >= 0) {
|
||||||
|
term.offset = 0;
|
||||||
|
// TODO: is there a cleaner way?
|
||||||
|
// term.bytes may be pointing to codec-private byte[]
|
||||||
|
// storage, so we must force new byte[] allocation:
|
||||||
|
term.bytes = new byte[text.length];
|
||||||
|
term.copyBytes(text);
|
||||||
currentOrd = ord;
|
currentOrd = ord;
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -117,4 +117,12 @@ public abstract class SortedSetDocValues {
|
||||||
|
|
||||||
return -(low + 1); // key not found.
|
return -(low + 1); // key not found.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a {@link TermsEnum} over the values.
|
||||||
|
* The enum supports {@link TermsEnum#ord()} and {@link TermsEnum#seekExact(long)}.
|
||||||
|
*/
|
||||||
|
public TermsEnum termsEnum() {
|
||||||
|
return new SortedSetDocValuesTermsEnum(this);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,7 +26,7 @@ import org.apache.lucene.util.BytesRef;
|
||||||
/** Implements a {@link TermsEnum} wrapping a provided
|
/** Implements a {@link TermsEnum} wrapping a provided
|
||||||
* {@link SortedSetDocValues}. */
|
* {@link SortedSetDocValues}. */
|
||||||
|
|
||||||
public class SortedSetDocValuesTermsEnum extends TermsEnum {
|
class SortedSetDocValuesTermsEnum extends TermsEnum {
|
||||||
private final SortedSetDocValues values;
|
private final SortedSetDocValues values;
|
||||||
private long currentOrd = -1;
|
private long currentOrd = -1;
|
||||||
private final BytesRef term = new BytesRef();
|
private final BytesRef term = new BytesRef();
|
||||||
|
@ -64,6 +64,12 @@ public class SortedSetDocValuesTermsEnum extends TermsEnum {
|
||||||
public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
|
public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
|
||||||
long ord = values.lookupTerm(text);
|
long ord = values.lookupTerm(text);
|
||||||
if (ord >= 0) {
|
if (ord >= 0) {
|
||||||
|
term.offset = 0;
|
||||||
|
// TODO: is there a cleaner way?
|
||||||
|
// term.bytes may be pointing to codec-private byte[]
|
||||||
|
// storage, so we must force new byte[] allocation:
|
||||||
|
term.bytes = new byte[text.length];
|
||||||
|
term.copyBytes(text);
|
||||||
currentOrd = ord;
|
currentOrd = ord;
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Comparator;
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.index.SortedSetDocValuesTermsEnum;
|
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
@ -98,7 +97,7 @@ public final class DocTermOrdsRewriteMethod extends MultiTermQuery.RewriteMethod
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TermsEnum iterator(TermsEnum reuse) {
|
public TermsEnum iterator(TermsEnum reuse) {
|
||||||
return new SortedSetDocValuesTermsEnum(docTermOrds);
|
return docTermOrds.termsEnum();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Comparator;
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
import org.apache.lucene.index.SortedDocValuesTermsEnum;
|
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
@ -98,7 +97,7 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TermsEnum iterator(TermsEnum reuse) {
|
public TermsEnum iterator(TermsEnum reuse) {
|
||||||
return new SortedDocValuesTermsEnum(fcsi);
|
return fcsi.termsEnum();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -219,7 +219,7 @@ public class TestFieldCache extends LuceneTestCase {
|
||||||
|
|
||||||
int nTerms = termsIndex.getValueCount();
|
int nTerms = termsIndex.getValueCount();
|
||||||
|
|
||||||
TermsEnum tenum = new SortedDocValuesTermsEnum(termsIndex);
|
TermsEnum tenum = termsIndex.termsEnum();
|
||||||
BytesRef val = new BytesRef();
|
BytesRef val = new BytesRef();
|
||||||
for (int i=0; i<nTerms; i++) {
|
for (int i=0; i<nTerms; i++) {
|
||||||
BytesRef val1 = tenum.next();
|
BytesRef val1 = tenum.next();
|
||||||
|
|
|
@ -24,9 +24,7 @@ import java.util.List;
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.DocTermOrds;
|
import org.apache.lucene.index.DocTermOrds;
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
import org.apache.lucene.index.SortedDocValuesTermsEnum;
|
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.index.SortedSetDocValuesTermsEnum;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.FieldCache;
|
import org.apache.lucene.search.FieldCache;
|
||||||
import org.apache.lucene.search.grouping.AbstractGroupFacetCollector;
|
import org.apache.lucene.search.grouping.AbstractGroupFacetCollector;
|
||||||
|
@ -170,7 +168,7 @@ public abstract class TermGroupFacetCollector extends AbstractGroupFacetCollecto
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected SegmentResult createSegmentResult() throws IOException {
|
protected SegmentResult createSegmentResult() throws IOException {
|
||||||
return new SegmentResult(segmentFacetCounts, segmentTotalCount, new SortedDocValuesTermsEnum(facetFieldTermsIndex), startFacetOrd, endFacetOrd);
|
return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldTermsIndex.termsEnum(), startFacetOrd, endFacetOrd);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class SegmentResult extends AbstractGroupFacetCollector.SegmentResult {
|
private static class SegmentResult extends AbstractGroupFacetCollector.SegmentResult {
|
||||||
|
@ -289,7 +287,7 @@ public abstract class TermGroupFacetCollector extends AbstractGroupFacetCollecto
|
||||||
if (facetFieldNumTerms == 0) {
|
if (facetFieldNumTerms == 0) {
|
||||||
facetOrdTermsEnum = null;
|
facetOrdTermsEnum = null;
|
||||||
} else {
|
} else {
|
||||||
facetOrdTermsEnum = new SortedSetDocValuesTermsEnum(facetFieldDocTermOrds);
|
facetOrdTermsEnum = facetFieldDocTermOrds.termsEnum();
|
||||||
}
|
}
|
||||||
// [facetFieldNumTerms() + 1] for all possible facet values and docs not containing facet field
|
// [facetFieldNumTerms() + 1] for all possible facet values and docs not containing facet field
|
||||||
segmentFacetCounts = new int[facetFieldNumTerms + 1];
|
segmentFacetCounts = new int[facetFieldNumTerms + 1];
|
||||||
|
|
|
@ -45,6 +45,7 @@ import org.apache.lucene.document.StoredField;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||||
|
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.FieldCache;
|
import org.apache.lucene.search.FieldCache;
|
||||||
|
@ -702,6 +703,77 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
|
||||||
directory.close();
|
directory.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSortedTermsEnum() throws IOException {
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||||
|
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new SortedDocValuesField("field", new BytesRef("hello")));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new SortedDocValuesField("field", new BytesRef("world")));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new SortedDocValuesField("field", new BytesRef("beer")));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
iwriter.forceMerge(1);
|
||||||
|
|
||||||
|
DirectoryReader ireader = iwriter.getReader();
|
||||||
|
iwriter.close();
|
||||||
|
|
||||||
|
SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field");
|
||||||
|
assertEquals(3, dv.getValueCount());
|
||||||
|
|
||||||
|
TermsEnum termsEnum = dv.termsEnum();
|
||||||
|
|
||||||
|
// next()
|
||||||
|
assertEquals("beer", termsEnum.next().utf8ToString());
|
||||||
|
assertEquals(0, termsEnum.ord());
|
||||||
|
assertEquals("hello", termsEnum.next().utf8ToString());
|
||||||
|
assertEquals(1, termsEnum.ord());
|
||||||
|
assertEquals("world", termsEnum.next().utf8ToString());
|
||||||
|
assertEquals(2, termsEnum.ord());
|
||||||
|
|
||||||
|
// seekCeil()
|
||||||
|
assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
|
||||||
|
assertEquals("hello", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(1, termsEnum.ord());
|
||||||
|
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
|
||||||
|
assertEquals("beer", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(0, termsEnum.ord());
|
||||||
|
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
|
||||||
|
|
||||||
|
// seekExact()
|
||||||
|
assertTrue(termsEnum.seekExact(new BytesRef("beer"), true));
|
||||||
|
assertEquals("beer", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(0, termsEnum.ord());
|
||||||
|
assertTrue(termsEnum.seekExact(new BytesRef("hello"), true));
|
||||||
|
assertEquals(Codec.getDefault().toString(), "hello", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(1, termsEnum.ord());
|
||||||
|
assertTrue(termsEnum.seekExact(new BytesRef("world"), true));
|
||||||
|
assertEquals("world", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(2, termsEnum.ord());
|
||||||
|
assertFalse(termsEnum.seekExact(new BytesRef("bogus"), true));
|
||||||
|
|
||||||
|
// seek(ord)
|
||||||
|
termsEnum.seekExact(0);
|
||||||
|
assertEquals("beer", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(0, termsEnum.ord());
|
||||||
|
termsEnum.seekExact(1);
|
||||||
|
assertEquals("hello", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(1, termsEnum.ord());
|
||||||
|
termsEnum.seekExact(2);
|
||||||
|
assertEquals("world", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(2, termsEnum.ord());
|
||||||
|
ireader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
public void testEmptySortedBytes() throws IOException {
|
public void testEmptySortedBytes() throws IOException {
|
||||||
Analyzer analyzer = new MockAnalyzer(random());
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
|
||||||
|
@ -1658,6 +1730,71 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
|
||||||
directory.close();
|
directory.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSortedSetTermsEnum() throws IOException {
|
||||||
|
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||||
|
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
|
||||||
|
doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
|
||||||
|
doc.add(new SortedSetDocValuesField("field", new BytesRef("beer")));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
|
||||||
|
DirectoryReader ireader = iwriter.getReader();
|
||||||
|
iwriter.close();
|
||||||
|
|
||||||
|
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
|
||||||
|
assertEquals(3, dv.getValueCount());
|
||||||
|
|
||||||
|
TermsEnum termsEnum = dv.termsEnum();
|
||||||
|
|
||||||
|
// next()
|
||||||
|
assertEquals("beer", termsEnum.next().utf8ToString());
|
||||||
|
assertEquals(0, termsEnum.ord());
|
||||||
|
assertEquals("hello", termsEnum.next().utf8ToString());
|
||||||
|
assertEquals(1, termsEnum.ord());
|
||||||
|
assertEquals("world", termsEnum.next().utf8ToString());
|
||||||
|
assertEquals(2, termsEnum.ord());
|
||||||
|
|
||||||
|
// seekCeil()
|
||||||
|
assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
|
||||||
|
assertEquals("hello", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(1, termsEnum.ord());
|
||||||
|
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
|
||||||
|
assertEquals("beer", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(0, termsEnum.ord());
|
||||||
|
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
|
||||||
|
|
||||||
|
// seekExact()
|
||||||
|
assertTrue(termsEnum.seekExact(new BytesRef("beer"), true));
|
||||||
|
assertEquals("beer", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(0, termsEnum.ord());
|
||||||
|
assertTrue(termsEnum.seekExact(new BytesRef("hello"), true));
|
||||||
|
assertEquals("hello", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(1, termsEnum.ord());
|
||||||
|
assertTrue(termsEnum.seekExact(new BytesRef("world"), true));
|
||||||
|
assertEquals("world", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(2, termsEnum.ord());
|
||||||
|
assertFalse(termsEnum.seekExact(new BytesRef("bogus"), true));
|
||||||
|
|
||||||
|
// seek(ord)
|
||||||
|
termsEnum.seekExact(0);
|
||||||
|
assertEquals("beer", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(0, termsEnum.ord());
|
||||||
|
termsEnum.seekExact(1);
|
||||||
|
assertEquals("hello", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(1, termsEnum.ord());
|
||||||
|
termsEnum.seekExact(2);
|
||||||
|
assertEquals("world", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(2, termsEnum.ord());
|
||||||
|
ireader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
private void doTestSortedSetVsStoredFields(int minLength, int maxLength) throws Exception {
|
private void doTestSortedSetVsStoredFields(int minLength, int maxLength) throws Exception {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||||
|
|
|
@ -23,7 +23,6 @@ import java.util.concurrent.*;
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
import org.apache.lucene.index.SortedDocValuesTermsEnum;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.DocIdSet;
|
import org.apache.lucene.search.DocIdSet;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
@ -155,7 +154,7 @@ class PerSegmentSingleValuedFaceting {
|
||||||
seg.pos = seg.startTermIndex;
|
seg.pos = seg.startTermIndex;
|
||||||
}
|
}
|
||||||
if (seg.pos < seg.endTermIndex) {
|
if (seg.pos < seg.endTermIndex) {
|
||||||
seg.tenum = new SortedDocValuesTermsEnum(seg.si);
|
seg.tenum = seg.si.termsEnum();
|
||||||
seg.tenum.seekExact(seg.pos);
|
seg.tenum.seekExact(seg.pos);
|
||||||
seg.tempBR = seg.tenum.term();
|
seg.tempBR = seg.tenum.term();
|
||||||
queue.add(seg);
|
queue.add(seg);
|
||||||
|
|
Loading…
Reference in New Issue