LUCENE-7400 - splitter should be able to group using sorted set dv

This commit is contained in:
Tommaso Teofili 2016-07-30 07:58:02 +02:00
parent 9554719f12
commit 53a34b312e
1 changed files with 13 additions and 3 deletions

View File

@ -30,6 +30,7 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
@ -82,11 +83,20 @@ public class DatasetSplitter {
// get the exact no. of existing classes // get the exact no. of existing classes
int noOfClasses = 0; int noOfClasses = 0;
for (LeafReaderContext leave : originalIndex.leaves()) { for (LeafReaderContext leave : originalIndex.leaves()) {
long valueCount = 0;
SortedDocValues classValues = leave.reader().getSortedDocValues(classFieldName); SortedDocValues classValues = leave.reader().getSortedDocValues(classFieldName);
if (classValues == null) { if (classValues != null) {
throw new IllegalStateException("the classFieldName \"" + classFieldName + "\" must index sorted doc values"); valueCount = classValues.getValueCount();
} else {
SortedSetDocValues sortedSetDocValues = leave.reader().getSortedSetDocValues(classFieldName);
if (sortedSetDocValues != null) {
valueCount = sortedSetDocValues.getValueCount();
}
} }
noOfClasses += classValues.getValueCount(); if (classValues == null) {
throw new IllegalStateException("field \"" + classFieldName + "\" must have sorted (set) doc values");
}
noOfClasses += valueCount;
} }
try { try {