LUCENE-10603: Migrate remaining SSDV iteration to use docValueCount in production code (#995)

This commit is contained in:
Greg Miller 2022-06-30 14:01:14 -07:00 committed by GitHub
parent e05b3ec7de
commit 5f2a4998a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 54 additions and 118 deletions

View File

@ -95,7 +95,7 @@ Improvements
* LUCENE-10585: Facet module code cleanup (copy/paste scrubbing, simplification and some very minor
optimization tweaks). (Greg Miller)
* LUCENE-10603: Update SortedSetDocValues iteration within faceting implementations to use
* LUCENE-10603: Update SortedSetDocValues iteration within production code to use
SortedSetDocValues#docValueCount(). (Greg Miller)
* GITHUB#983: AbstractSortedSetDocValueFacetCounts internal code cleanup/refactoring. (Greg Miller)

View File

@ -959,11 +959,7 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer {
long numOrds = 0;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
numDocsWithField++;
for (long ord = values.nextOrd();
ord != SortedSetDocValues.NO_MORE_ORDS;
ord = values.nextOrd()) {
numOrds++;
}
numOrds += values.docValueCount();
}
if (numDocsWithField == numOrds) {
@ -1005,10 +1001,8 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer {
LegacyDirectWriter writer = LegacyDirectWriter.getInstance(data, numOrds, numberOfBitsPerOrd);
values = valuesProducer.getSortedSet(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
for (long ord = values.nextOrd();
ord != SortedSetDocValues.NO_MORE_ORDS;
ord = values.nextOrd()) {
writer.add(ord);
for (int i = 0; i < values.docValueCount(); i++) {
writer.add(values.nextOrd());
}
}
writer.finish();
@ -1026,11 +1020,7 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer {
addressesWriter.add(addr);
values = valuesProducer.getSortedSet(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
values.nextOrd();
addr++;
while (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
addr++;
}
addr += values.docValueCount();
addressesWriter.add(addr);
}
addressesWriter.finish();

View File

@ -434,13 +434,11 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
SortedSetDocValues values = valuesProducer.getSortedSet(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
sb2.setLength(0);
for (long ord = values.nextOrd();
ord != SortedSetDocValues.NO_MORE_ORDS;
ord = values.nextOrd()) {
for (int i = 0; i < values.docValueCount(); i++) {
if (sb2.length() > 0) {
sb2.append(",");
}
sb2.append(Long.toString(ord));
sb2.append(Long.toString(values.nextOrd()));
}
maxOrdListLength = Math.max(maxOrdListLength, sb2.length());
}
@ -490,13 +488,11 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
}
sb2.setLength(0);
if (values.docID() == i) {
for (long ord = values.nextOrd();
ord != SortedSetDocValues.NO_MORE_ORDS;
ord = values.nextOrd()) {
for (int j = 0; j < values.docValueCount(); j++) {
if (sb2.length() > 0) {
sb2.append(",");
}
sb2.append(Long.toString(ord));
sb2.append(Long.toString(values.nextOrd()));
}
}
// now pad to fit: these are numbers so spaces work well. reader calls trim()

View File

@ -836,9 +836,8 @@ public abstract class DocValuesConsumer implements Closeable {
int docID;
while ((docID = dv.nextDoc()) != NO_MORE_DOCS) {
if (liveDocs.get(docID)) {
long ord;
while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
bitset.set(ord);
for (int i = 0; i < dv.docValueCount(); i++) {
bitset.set(dv.nextOrd());
}
}
}

View File

@ -739,9 +739,9 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
assert values.docID() == -1;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
final long firstOrd = values.nextOrd();
assert firstOrd != SortedSetDocValues.NO_MORE_ORDS;
if (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
int docValueCount = values.docValueCount();
assert docValueCount > 0;
if (docValueCount > 1) {
return false;
}
}

View File

@ -176,9 +176,8 @@ abstract class SortedSetDocValuesRangeQuery extends Query {
new TwoPhaseIterator(values) {
@Override
public boolean matches() throws IOException {
for (long ord = values.nextOrd();
ord != SortedSetDocValues.NO_MORE_ORDS;
ord = values.nextOrd()) {
for (int i = 0; i < values.docValueCount(); i++) {
long ord = values.nextOrd();
if (ord < minOrd) {
continue;
}

View File

@ -3354,9 +3354,8 @@ public final class CheckIndex implements Closeable {
"advanceExact reports different value count: " + count + " != " + count2);
}
long lastOrd = -1;
long ord;
int ordCount = 0;
while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
for (int i = 0; i < count; i++) {
if (count != dv.docValueCount()) {
throw new CheckIndexException(
"value count changed from "
@ -3365,6 +3364,7 @@ public final class CheckIndex implements Closeable {
+ dv.docValueCount()
+ " during iterating over all values");
}
long ord = dv.nextOrd();
long ord2 = dv2.nextOrd();
if (ord != ord2) {
throw new CheckIndexException(
@ -3393,11 +3393,6 @@ public final class CheckIndex implements Closeable {
throw new CheckIndexException(
"dv for field: " + fieldName + " returned docID=" + docID + " yet has no ordinals");
}
long ord2 = dv2.nextOrd();
if (ord != ord2) {
throw new CheckIndexException(
"nextDoc and advanceExact report different ords: " + ord + " != " + ord2);
}
}
if (maxOrd != maxOrd2) {
throw new CheckIndexException(

View File

@ -16,7 +16,6 @@
*/
package org.apache.lucene.index;
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
@ -450,10 +449,10 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
while ((docID = oldValues.nextDoc()) != NO_MORE_DOCS) {
int newDocID = sortMap.oldToNew(docID);
long startOffset = ordOffset;
long ord;
while ((ord = oldValues.nextOrd()) != NO_MORE_ORDS) {
builder.add(ord);
ordOffset++;
int docValueCount = oldValues.docValueCount();
ordOffset += docValueCount;
for (int i = 0; i < docValueCount; i++) {
builder.add(oldValues.nextOrd());
}
docValueCounts.set(newDocID, ordOffset - startOffset);
if (startOffset != ordOffset) { // do we have any values?

View File

@ -169,10 +169,8 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
@Override
public boolean matches() throws IOException {
for (long ord = fcsi.nextOrd();
ord != SortedSetDocValues.NO_MORE_ORDS;
ord = fcsi.nextOrd()) {
if (termSet.get(ord)) {
for (int i = 0; i < fcsi.docValueCount(); i++) {
if (termSet.get(fcsi.nextOrd())) {
return true;
}
}

View File

@ -22,7 +22,6 @@ import java.io.IOException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
/** Selects a value from the document's set to use as the representative value */
@ -226,13 +225,11 @@ public class SortedSetSelector {
private void setOrd() throws IOException {
if (docID() != NO_MORE_DOCS) {
while (true) {
long nextOrd = in.nextOrd();
if (nextOrd == NO_MORE_ORDS) {
break;
}
ord = (int) nextOrd;
int docValueCount = in.docValueCount();
for (int i = 0; i < docValueCount - 1; i++) {
in.nextOrd();
}
ord = (int) in.nextOrd();
} else {
ord = (int) NO_MORE_ORDS;
}
@ -243,7 +240,6 @@ public class SortedSetSelector {
static class MiddleMinValue extends SortedDocValues {
final SortedSetDocValues in;
private int ord;
private int[] ords = new int[8];
MiddleMinValue(SortedSetDocValues in) {
this.in = in;
@ -304,25 +300,12 @@ public class SortedSetSelector {
private void setOrd() throws IOException {
if (docID() != NO_MORE_DOCS) {
int upto = 0;
while (true) {
long nextOrd = in.nextOrd();
if (nextOrd == NO_MORE_ORDS) {
break;
}
if (upto == ords.length) {
ords = ArrayUtil.grow(ords);
}
ords[upto++] = (int) nextOrd;
}
if (upto == 0) {
// iterator should not have returned this docID if it has no ords:
assert false;
ord = (int) NO_MORE_ORDS;
} else {
ord = ords[(upto - 1) >>> 1];
int docValueCount = in.docValueCount();
int targetIdx = (docValueCount - 1) >>> 1;
for (int i = 0; i < targetIdx; i++) {
in.nextOrd();
}
ord = (int) in.nextOrd();
} else {
ord = (int) NO_MORE_ORDS;
}
@ -333,7 +316,6 @@ public class SortedSetSelector {
static class MiddleMaxValue extends SortedDocValues {
final SortedSetDocValues in;
private int ord;
private int[] ords = new int[8];
MiddleMaxValue(SortedSetDocValues in) {
this.in = in;
@ -394,25 +376,12 @@ public class SortedSetSelector {
private void setOrd() throws IOException {
if (docID() != NO_MORE_DOCS) {
int upto = 0;
while (true) {
long nextOrd = in.nextOrd();
if (nextOrd == NO_MORE_ORDS) {
break;
}
if (upto == ords.length) {
ords = ArrayUtil.grow(ords);
}
ords[upto++] = (int) nextOrd;
}
if (upto == 0) {
// iterator should not have returned this docID if it has no ords:
assert false;
ord = (int) NO_MORE_ORDS;
} else {
ord = ords[upto >>> 1];
int docValueCount = in.docValueCount();
int targetIdx = docValueCount >>> 1;
for (int i = 0; i < targetIdx; i++) {
in.nextOrd();
}
ord = (int) in.nextOrd();
} else {
ord = (int) NO_MORE_ORDS;
}

View File

@ -275,9 +275,8 @@ public abstract class TermGroupFacetCollector extends GroupFacetCollector {
}
boolean empty = true;
if (doc == facetFieldDocTermOrds.docID()) {
long ord;
while ((ord = facetFieldDocTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
process(groupOrd, (int) ord);
for (int i = 0; i < facetFieldDocTermOrds.docValueCount(); i++) {
process(groupOrd, (int) facetFieldDocTermOrds.nextOrd());
empty = false;
}
}

View File

@ -62,13 +62,12 @@ abstract class TermsCollector<DV> extends DocValuesTermsCollector<DV> {
@Override
public void collect(int doc) throws IOException {
long ord;
if (doc > docValues.docID()) {
docValues.advance(doc);
}
if (doc == docValues.docID()) {
while ((ord = docValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
final BytesRef term = docValues.lookupOrd(ord);
for (int i = 0; i < docValues.docValueCount(); i++) {
final BytesRef term = docValues.lookupOrd(docValues.nextOrd());
collectorTerms.add(term);
}
}

View File

@ -213,9 +213,8 @@ abstract class TermsWithScoreCollector<DV> extends DocValuesTermsCollector<DV>
@Override
public void collect(int doc) throws IOException {
if (docValues.advanceExact(doc)) {
long ord;
while ((ord = docValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
int termID = collectedTerms.add(docValues.lookupOrd(ord));
for (int i = 0; i < docValues.docValueCount(); i++) {
int termID = collectedTerms.add(docValues.lookupOrd(docValues.nextOrd()));
if (termID < 0) {
termID = -termID - 1;
} else {
@ -260,9 +259,8 @@ abstract class TermsWithScoreCollector<DV> extends DocValuesTermsCollector<DV>
@Override
public void collect(int doc) throws IOException {
if (docValues.advanceExact(doc)) {
long ord;
while ((ord = docValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
int termID = collectedTerms.add(docValues.lookupOrd(ord));
for (int i = 0; i < docValues.docValueCount(); i++) {
int termID = collectedTerms.add(docValues.lookupOrd(docValues.nextOrd()));
if (termID < 0) {
termID = -termID - 1;
} else {

View File

@ -143,9 +143,8 @@ final class DocValuesAdapter {
if (ssvalues.advanceExact(docid)) {
List<BytesRef> values = new ArrayList<>();
long ord;
while ((ord = ssvalues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
values.add(BytesRef.deepCopyOf(ssvalues.lookupOrd(ord)));
for (int i = 0; i < ssvalues.docValueCount(); i++) {
values.add(BytesRef.deepCopyOf(ssvalues.lookupOrd(ssvalues.nextOrd())));
}
DocValues dv = DocValues.of(dvType, values, Collections.emptyList());

View File

@ -403,9 +403,8 @@ public abstract class DocValuesStats<T> {
@Override
protected void doAccumulate(int count) throws IOException {
long ord;
while ((ord = ssdv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
BytesRef val = ssdv.lookupOrd(ord);
for (int i = 0; i < ssdv.docValueCount(); i++) {
BytesRef val = ssdv.lookupOrd(ssdv.nextOrd());
if (max == null || val.compareTo(max) > 0) {
max = copyFrom(val, max);
}

View File

@ -210,10 +210,8 @@ public class DocValuesTermsQuery extends Query implements Accountable {
@Override
public boolean matches() throws IOException {
for (long ord = values.nextOrd();
ord != SortedSetDocValues.NO_MORE_ORDS;
ord = values.nextOrd()) {
if (bits.get(ord)) {
for (int i = 0; i < values.docValueCount(); i++) {
if (bits.get(values.nextOrd())) {
return true;
}
}

View File

@ -811,9 +811,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
contexts = new HashSet<BytesRef>();
int targetDocID = fd.doc - leaves.get(segment).docBase;
if (contextsDV.advance(targetDocID) == targetDocID) {
long ord;
while ((ord = contextsDV.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
BytesRef context = BytesRef.deepCopyOf(contextsDV.lookupOrd(ord));
for (int j = 0; j < contextsDV.docValueCount(); j++) {
BytesRef context = BytesRef.deepCopyOf(contextsDV.lookupOrd(contextsDV.nextOrd()));
contexts.add(context);
}
}