Use radix sort to sort postings when index sorting is enabled. (#12114)

This switches to LSBRadixSorter instead of TimSorter to sort postings whose
index options are `DOCS`. On a synthetic benchmark this yielded barely any
difference in the case when the index order is the same as the sort order, or
reverse, but almost a 3x speedup for writing postings in the case when the
index order is mostly random.
This commit is contained in:
Adrien Grand 2023-03-15 11:56:45 +01:00 committed by GitHub
parent d407edf4b8
commit 805eb0b613
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 342 additions and 185 deletions

View File

@ -35,9 +35,11 @@ import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IntBlockPool; import org.apache.lucene.util.IntBlockPool;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LSBRadixSorter;
import org.apache.lucene.util.LongsRef; import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.TimSorter; import org.apache.lucene.util.TimSorter;
import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.packed.PackedInts;
final class FreqProxTermsWriter extends TermsHash { final class FreqProxTermsWriter extends TermsHash {
@ -153,13 +155,12 @@ final class FreqProxTermsWriter extends TermsHash {
@Override @Override
public TermsEnum iterator() throws IOException { public TermsEnum iterator() throws IOException {
return new SortingTermsEnum(in.iterator(), docMap, indexOptions, hasPositions()); return new SortingTermsEnum(in.iterator(), docMap, indexOptions);
} }
@Override @Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException { public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
return new SortingTermsEnum( return new SortingTermsEnum(in.intersect(compiled, startTerm), docMap, indexOptions);
in.intersect(compiled, startTerm), docMap, indexOptions, hasPositions());
} }
} }
@ -167,20 +168,18 @@ final class FreqProxTermsWriter extends TermsHash {
final Sorter.DocMap docMap; // pkg-protected to avoid synthetic accessor methods final Sorter.DocMap docMap; // pkg-protected to avoid synthetic accessor methods
private final IndexOptions indexOptions; private final IndexOptions indexOptions;
private final boolean hasPositions;
SortingTermsEnum( SortingTermsEnum(final TermsEnum in, Sorter.DocMap docMap, IndexOptions indexOptions) {
final TermsEnum in, Sorter.DocMap docMap, IndexOptions indexOptions, boolean hasPositions) {
super(in); super(in);
this.docMap = docMap; this.docMap = docMap;
this.indexOptions = indexOptions; this.indexOptions = indexOptions;
this.hasPositions = hasPositions;
} }
@Override @Override
public PostingsEnum postings(PostingsEnum reuse, final int flags) throws IOException { public PostingsEnum postings(PostingsEnum reuse, final int flags) throws IOException {
if (hasPositions && PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) { if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0
&& PostingsEnum.featureRequested(flags, PostingsEnum.FREQS)) {
final PostingsEnum inReuse; final PostingsEnum inReuse;
final SortingPostingsEnum wrapReuse; final SortingPostingsEnum wrapReuse;
if (reuse != null && reuse instanceof SortingPostingsEnum) { if (reuse != null && reuse instanceof SortingPostingsEnum) {
@ -194,14 +193,16 @@ final class FreqProxTermsWriter extends TermsHash {
} }
final PostingsEnum inDocsAndPositions = in.postings(inReuse, flags); final PostingsEnum inDocsAndPositions = in.postings(inReuse, flags);
// we ignore the fact that offsets may be stored but not asked for, // we ignore the fact that positions/offsets may be stored but not asked for,
// since this code is expected to be used during addIndexes which will // since this code is expected to be used during addIndexes which will
// ask for everything. if that assumption changes in the future, we can // ask for everything. if that assumption changes in the future, we can
// factor in whether 'flags' says offsets are not required. // factor in whether 'flags' says offsets are not required.
final boolean storePositions =
indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
final boolean storeOffsets = final boolean storeOffsets =
indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
return new SortingPostingsEnum( return new SortingPostingsEnum(
docMap.size(), wrapReuse, inDocsAndPositions, docMap, storeOffsets); docMap.size(), wrapReuse, inDocsAndPositions, docMap, storePositions, storeOffsets);
} }
final PostingsEnum inReuse; final PostingsEnum inReuse;
@ -213,161 +214,53 @@ final class FreqProxTermsWriter extends TermsHash {
inReuse = wrapReuse.getWrapped(); inReuse = wrapReuse.getWrapped();
} else { } else {
wrapReuse = null; wrapReuse = null;
inReuse = reuse; inReuse = null;
} }
final PostingsEnum inDocs = in.postings(inReuse, flags); final PostingsEnum inDocs = in.postings(inReuse, flags);
final boolean withFreqs = return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, docMap);
indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0
&& PostingsEnum.featureRequested(flags, PostingsEnum.FREQS);
return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, withFreqs, docMap);
} }
} }
static class SortingDocsEnum extends FilterLeafReader.FilterPostingsEnum { static class SortingDocsEnum extends PostingsEnum {
private static final class DocFreqSorter extends TimSorter { private final PostingsEnum in;
private final LSBRadixSorter sorter;
private int[] docs;
private int[] freqs;
private int[] tmpDocs;
private int[] tmpFreqs;
DocFreqSorter(int maxDoc) {
super(maxDoc / 8);
this.tmpDocs = IntsRef.EMPTY_INTS;
}
public void reset(int[] docs, int[] freqs) {
this.docs = docs;
this.freqs = freqs;
if (freqs != null && tmpFreqs == null) {
tmpFreqs = new int[tmpDocs.length];
}
}
@Override
protected int compare(int i, int j) {
return docs[i] - docs[j];
}
@Override
protected void swap(int i, int j) {
int tmpDoc = docs[i];
docs[i] = docs[j];
docs[j] = tmpDoc;
if (freqs != null) {
int tmpFreq = freqs[i];
freqs[i] = freqs[j];
freqs[j] = tmpFreq;
}
}
@Override
protected void copy(int src, int dest) {
docs[dest] = docs[src];
if (freqs != null) {
freqs[dest] = freqs[src];
}
}
@Override
protected void save(int i, int len) {
if (tmpDocs.length < len) {
tmpDocs = new int[ArrayUtil.oversize(len, Integer.BYTES)];
if (freqs != null) {
tmpFreqs = new int[tmpDocs.length];
}
}
System.arraycopy(docs, i, tmpDocs, 0, len);
if (freqs != null) {
System.arraycopy(freqs, i, tmpFreqs, 0, len);
}
}
@Override
protected void restore(int i, int j) {
docs[j] = tmpDocs[i];
if (freqs != null) {
freqs[j] = tmpFreqs[i];
}
}
@Override
protected int compareSaved(int i, int j) {
return tmpDocs[i] - docs[j];
}
}
private final int maxDoc;
private final DocFreqSorter sorter;
private int[] docs; private int[] docs;
private int[] freqs;
private int docIt = -1; private int docIt = -1;
private final int upto; private final int upTo;
private final boolean withFreqs;
SortingDocsEnum( SortingDocsEnum(
int maxDoc, int maxDoc, SortingDocsEnum reuse, final PostingsEnum in, final Sorter.DocMap docMap)
SortingDocsEnum reuse,
final PostingsEnum in,
boolean withFreqs,
final Sorter.DocMap docMap)
throws IOException { throws IOException {
super(in);
this.maxDoc = maxDoc;
this.withFreqs = withFreqs;
if (reuse != null) { if (reuse != null) {
if (reuse.maxDoc == maxDoc) { sorter = reuse.sorter;
sorter = reuse.sorter;
} else {
sorter = new DocFreqSorter(maxDoc);
}
docs = reuse.docs; docs = reuse.docs;
freqs = reuse.freqs; // maybe null
} else { } else {
docs = new int[64]; sorter = new LSBRadixSorter();
sorter = new DocFreqSorter(maxDoc); docs = IntsRef.EMPTY_INTS;
} }
docIt = -1; this.in = in;
int i = 0; int i = 0;
int doc; for (int doc = in.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = in.nextDoc()) {
if (withFreqs) { if (docs.length <= i) {
if (freqs == null || freqs.length < docs.length) { docs = ArrayUtil.grow(docs);
freqs = new int[docs.length];
}
while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (i >= docs.length) {
docs = ArrayUtil.grow(docs, docs.length + 1);
freqs = ArrayUtil.grow(freqs, freqs.length + 1);
}
docs[i] = docMap.oldToNew(doc);
freqs[i] = in.freq();
++i;
}
} else {
freqs = null;
while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (i >= docs.length) {
docs = ArrayUtil.grow(docs, docs.length + 1);
}
docs[i++] = docMap.oldToNew(doc);
} }
docs[i++] = docMap.oldToNew(doc);
} }
// TimSort can save much time compared to other sorts in case of upTo = i;
// reverse sorting, or when sorting a concatenation of sorted readers if (docs.length == upTo) {
sorter.reset(docs, freqs); docs = ArrayUtil.grow(docs);
sorter.sort(0, i); }
upto = i; docs[upTo] = DocIdSetIterator.NO_MORE_DOCS;
final int numBits = PackedInts.bitsRequired(Math.max(0, maxDoc - 1));
// Even though LSBRadixSorter cannot take advantage of partial ordering like TimSorter it is
// often still faster for nearly-sorted inputs.
sorter.sort(numBits, docs, upTo);
} }
// for testing PostingsEnum getWrapped() {
boolean reused(PostingsEnum other) { return in;
if (other == null || !(other instanceof SortingDocsEnum)) {
return false;
}
return docs == ((SortingDocsEnum) other).docs;
} }
@Override @Override
@ -379,26 +272,23 @@ final class FreqProxTermsWriter extends TermsHash {
@Override @Override
public int docID() { public int docID() {
return docIt < 0 ? -1 : docIt >= upto ? NO_MORE_DOCS : docs[docIt]; return docIt < 0 ? -1 : docs[docIt];
}
@Override
public int freq() throws IOException {
return withFreqs && docIt < upto ? freqs[docIt] : 1;
} }
@Override @Override
public int nextDoc() throws IOException { public int nextDoc() throws IOException {
if (++docIt >= upto) return NO_MORE_DOCS; return docs[++docIt];
return docs[docIt];
} }
/** Returns the wrapped {@link PostingsEnum}. */ @Override
PostingsEnum getWrapped() { public long cost() {
return in; return upTo;
} }
// we buffer up docs/freqs only, don't forward any positions requests to underlying enum @Override
public int freq() throws IOException {
return 1;
}
@Override @Override
public int nextPosition() throws IOException { public int nextPosition() throws IOException {
@ -496,7 +386,7 @@ final class FreqProxTermsWriter extends TermsHash {
private final int upto; private final int upto;
private final ByteBuffersDataInput postingInput; private final ByteBuffersDataInput postingInput;
private final boolean storeOffsets; private final boolean storePositions, storeOffsets;
private int docIt = -1; private int docIt = -1;
private int pos; private int pos;
@ -512,10 +402,12 @@ final class FreqProxTermsWriter extends TermsHash {
SortingPostingsEnum reuse, SortingPostingsEnum reuse,
final PostingsEnum in, final PostingsEnum in,
Sorter.DocMap docMap, Sorter.DocMap docMap,
boolean storePositions,
boolean storeOffsets) boolean storeOffsets)
throws IOException { throws IOException {
super(in); super(in);
this.maxDoc = maxDoc; this.maxDoc = maxDoc;
this.storePositions = storePositions;
this.storeOffsets = storeOffsets; this.storeOffsets = storeOffsets;
if (reuse != null) { if (reuse != null) {
docs = reuse.docs; docs = reuse.docs;
@ -556,37 +448,31 @@ final class FreqProxTermsWriter extends TermsHash {
this.postingInput = buffer.toDataInput(); this.postingInput = buffer.toDataInput();
} }
// for testing
boolean reused(PostingsEnum other) {
if (other == null || !(other instanceof SortingPostingsEnum)) {
return false;
}
return docs == ((SortingPostingsEnum) other).docs;
}
private void addPositions(final PostingsEnum in, final DataOutput out) throws IOException { private void addPositions(final PostingsEnum in, final DataOutput out) throws IOException {
int freq = in.freq(); int freq = in.freq();
out.writeVInt(freq); out.writeVInt(freq);
int previousPosition = 0; if (storePositions) {
int previousEndOffset = 0; int previousPosition = 0;
for (int i = 0; i < freq; i++) { int previousEndOffset = 0;
final int pos = in.nextPosition(); for (int i = 0; i < freq; i++) {
final BytesRef payload = in.getPayload(); final int pos = in.nextPosition();
// The low-order bit of token is set only if there is a payload, the final BytesRef payload = in.getPayload();
// previous bits are the delta-encoded position. // The low-order bit of token is set only if there is a payload, the
final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1); // previous bits are the delta-encoded position.
out.writeVInt(token); final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
previousPosition = pos; out.writeVInt(token);
if (storeOffsets) { // don't encode offsets if they are not stored previousPosition = pos;
final int startOffset = in.startOffset(); if (storeOffsets) { // don't encode offsets if they are not stored
final int endOffset = in.endOffset(); final int startOffset = in.startOffset();
out.writeVInt(startOffset - previousEndOffset); final int endOffset = in.endOffset();
out.writeVInt(endOffset - startOffset); out.writeVInt(startOffset - previousEndOffset);
previousEndOffset = endOffset; out.writeVInt(endOffset - startOffset);
} previousEndOffset = endOffset;
if (payload != null) { }
out.writeVInt(payload.length); if (payload != null) {
out.writeBytes(payload.bytes, payload.offset, payload.length); out.writeVInt(payload.length);
out.writeBytes(payload.bytes, payload.offset, payload.length);
}
} }
} }
} }
@ -631,6 +517,9 @@ final class FreqProxTermsWriter extends TermsHash {
@Override @Override
public int nextPosition() throws IOException { public int nextPosition() throws IOException {
if (storePositions == false) {
return -1;
}
final int token = postingInput.readVInt(); final int token = postingInput.readVInt();
pos += token >>> 1; pos += token >>> 1;
if (storeOffsets) { if (storeOffsets) {

View File

@ -62,6 +62,7 @@ import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField; import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchAllDocsQuery;
@ -83,6 +84,7 @@ import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
@ -2905,4 +2907,270 @@ public class TestIndexSorting extends LuceneTestCase {
w.close(); w.close();
dir.close(); dir.close();
} }
public void testSortDocs() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig();
config.setIndexSort(new Sort(new SortField("sort", SortField.Type.LONG)));
IndexWriter w = new IndexWriter(dir, config);
Document doc = new Document();
NumericDocValuesField sort = new NumericDocValuesField("sort", 0L);
doc.add(sort);
StringField field = new StringField("field", "a", Field.Store.NO);
doc.add(field);
w.addDocument(doc);
sort.setLongValue(1);
field.setStringValue("b");
w.addDocument(doc);
sort.setLongValue(-1);
field.setStringValue("a");
w.addDocument(doc);
sort.setLongValue(2);
field.setStringValue("a");
w.addDocument(doc);
sort.setLongValue(3);
field.setStringValue("b");
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader reader = DirectoryReader.open(w);
w.close();
LeafReader leafReader = getOnlyLeafReader(reader);
TermsEnum fieldTerms = leafReader.terms("field").iterator();
assertEquals(new BytesRef("a"), fieldTerms.next());
PostingsEnum postings = fieldTerms.postings(null, PostingsEnum.ALL);
assertEquals(0, postings.nextDoc());
assertEquals(1, postings.nextDoc());
assertEquals(3, postings.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
assertEquals(new BytesRef("b"), fieldTerms.next());
postings = fieldTerms.postings(postings, PostingsEnum.ALL);
assertEquals(2, postings.nextDoc());
assertEquals(4, postings.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
assertNull(fieldTerms.next());
reader.close();
dir.close();
}
public void testSortDocsAndFreqs() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig();
config.setIndexSort(new Sort(new SortField("sort", SortField.Type.LONG)));
IndexWriter w = new IndexWriter(dir, config);
FieldType ft = new FieldType();
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
ft.setTokenized(false);
ft.freeze();
Document doc = new Document();
doc.add(new NumericDocValuesField("sort", 0L));
doc.add(new Field("field", "a", ft));
doc.add(new Field("field", "a", ft));
w.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("sort", 1L));
doc.add(new Field("field", "b", ft));
w.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("sort", -1L));
doc.add(new Field("field", "a", ft));
doc.add(new Field("field", "a", ft));
doc.add(new Field("field", "a", ft));
w.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("sort", 2L));
doc.add(new Field("field", "a", ft));
w.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("sort", 3L));
doc.add(new Field("field", "b", ft));
doc.add(new Field("field", "b", ft));
doc.add(new Field("field", "b", ft));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader reader = DirectoryReader.open(w);
w.close();
LeafReader leafReader = getOnlyLeafReader(reader);
TermsEnum fieldTerms = leafReader.terms("field").iterator();
assertEquals(new BytesRef("a"), fieldTerms.next());
PostingsEnum postings = fieldTerms.postings(null, PostingsEnum.ALL);
assertEquals(0, postings.nextDoc());
assertEquals(3, postings.freq());
assertEquals(1, postings.nextDoc());
assertEquals(2, postings.freq());
assertEquals(3, postings.nextDoc());
assertEquals(1, postings.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
assertEquals(new BytesRef("b"), fieldTerms.next());
postings = fieldTerms.postings(postings, PostingsEnum.ALL);
assertEquals(2, postings.nextDoc());
assertEquals(1, postings.freq());
assertEquals(4, postings.nextDoc());
assertEquals(3, postings.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
assertNull(fieldTerms.next());
reader.close();
dir.close();
}
public void testSortDocsAndFreqsAndPositions() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig(new MockAnalyzer(random()));
config.setIndexSort(new Sort(new SortField("sort", SortField.Type.LONG)));
IndexWriter w = new IndexWriter(dir, config);
FieldType ft = new FieldType();
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
ft.setTokenized(true);
ft.freeze();
Document doc = new Document();
doc.add(new NumericDocValuesField("sort", 0L));
doc.add(new Field("field", "a a b", ft));
w.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("sort", 1L));
doc.add(new Field("field", "b", ft));
w.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("sort", -1L));
doc.add(new Field("field", "b a b b", ft));
w.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("sort", 2L));
doc.add(new Field("field", "a", ft));
w.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("sort", 3L));
doc.add(new Field("field", "b b", ft));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader reader = DirectoryReader.open(w);
w.close();
LeafReader leafReader = getOnlyLeafReader(reader);
TermsEnum fieldTerms = leafReader.terms("field").iterator();
assertEquals(new BytesRef("a"), fieldTerms.next());
PostingsEnum postings = fieldTerms.postings(null, PostingsEnum.ALL);
assertEquals(0, postings.nextDoc());
assertEquals(1, postings.freq());
assertEquals(1, postings.nextPosition());
assertEquals(1, postings.nextDoc());
assertEquals(2, postings.freq());
assertEquals(0, postings.nextPosition());
assertEquals(1, postings.nextPosition());
assertEquals(3, postings.nextDoc());
assertEquals(1, postings.freq());
assertEquals(0, postings.nextPosition());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
assertEquals(new BytesRef("b"), fieldTerms.next());
postings = fieldTerms.postings(postings, PostingsEnum.ALL);
assertEquals(0, postings.nextDoc());
assertEquals(3, postings.freq());
assertEquals(0, postings.nextPosition());
assertEquals(2, postings.nextPosition());
assertEquals(3, postings.nextPosition());
assertEquals(1, postings.nextDoc());
assertEquals(1, postings.freq());
assertEquals(2, postings.nextPosition());
assertEquals(2, postings.nextDoc());
assertEquals(1, postings.freq());
assertEquals(0, postings.nextPosition());
assertEquals(4, postings.nextDoc());
assertEquals(2, postings.freq());
assertEquals(0, postings.nextPosition());
assertEquals(1, postings.nextPosition());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
assertNull(fieldTerms.next());
reader.close();
dir.close();
}
public void testSortDocsAndFreqsAndPositionsAndOffsets() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig(new MockAnalyzer(random()));
config.setIndexSort(new Sort(new SortField("sort", SortField.Type.LONG)));
IndexWriter w = new IndexWriter(dir, config);
FieldType ft = new FieldType();
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
ft.setTokenized(true);
ft.freeze();
Document doc = new Document();
doc.add(new NumericDocValuesField("sort", 0L));
doc.add(new Field("field", "a a b", ft));
w.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("sort", 1L));
doc.add(new Field("field", "b", ft));
w.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("sort", -1L));
doc.add(new Field("field", "b a b b", ft));
w.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("sort", 2L));
doc.add(new Field("field", "a", ft));
w.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("sort", 3L));
doc.add(new Field("field", "b b", ft));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader reader = DirectoryReader.open(w);
w.close();
LeafReader leafReader = getOnlyLeafReader(reader);
TermsEnum fieldTerms = leafReader.terms("field").iterator();
assertEquals(new BytesRef("a"), fieldTerms.next());
PostingsEnum postings = fieldTerms.postings(null, PostingsEnum.ALL);
assertEquals(0, postings.nextDoc());
assertEquals(1, postings.freq());
assertEquals(1, postings.nextPosition());
assertEquals(2, postings.startOffset());
assertEquals(3, postings.endOffset());
assertEquals(1, postings.nextDoc());
assertEquals(2, postings.freq());
assertEquals(0, postings.nextPosition());
assertEquals(0, postings.startOffset());
assertEquals(1, postings.endOffset());
assertEquals(1, postings.nextPosition());
assertEquals(2, postings.startOffset());
assertEquals(3, postings.endOffset());
assertEquals(3, postings.nextDoc());
assertEquals(1, postings.freq());
assertEquals(0, postings.nextPosition());
assertEquals(0, postings.startOffset());
assertEquals(1, postings.endOffset());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
assertEquals(new BytesRef("b"), fieldTerms.next());
postings = fieldTerms.postings(postings, PostingsEnum.ALL);
assertEquals(0, postings.nextDoc());
assertEquals(3, postings.freq());
assertEquals(0, postings.nextPosition());
assertEquals(0, postings.startOffset());
assertEquals(1, postings.endOffset());
assertEquals(2, postings.nextPosition());
assertEquals(4, postings.startOffset());
assertEquals(5, postings.endOffset());
assertEquals(3, postings.nextPosition());
assertEquals(6, postings.startOffset());
assertEquals(7, postings.endOffset());
assertEquals(1, postings.nextDoc());
assertEquals(1, postings.freq());
assertEquals(2, postings.nextPosition());
assertEquals(4, postings.startOffset());
assertEquals(5, postings.endOffset());
assertEquals(2, postings.nextDoc());
assertEquals(1, postings.freq());
assertEquals(0, postings.nextPosition());
assertEquals(0, postings.startOffset());
assertEquals(1, postings.endOffset());
assertEquals(4, postings.nextDoc());
assertEquals(2, postings.freq());
assertEquals(0, postings.nextPosition());
assertEquals(0, postings.startOffset());
assertEquals(1, postings.endOffset());
assertEquals(1, postings.nextPosition());
assertEquals(2, postings.startOffset());
assertEquals(3, postings.endOffset());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
assertNull(fieldTerms.next());
reader.close();
dir.close();
}
} }