mirror of https://github.com/apache/lucene.git
SOLR-13399: fix splitByPrefix test
This commit is contained in:
parent
b4ef1b279c
commit
5b76555dac
|
@ -47,6 +47,7 @@ import org.apache.solr.request.SolrQueryRequest;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
import org.apache.solr.update.SolrIndexSplitter;
|
import org.apache.solr.update.SolrIndexSplitter;
|
||||||
import org.apache.solr.update.SplitIndexCommand;
|
import org.apache.solr.update.SplitIndexCommand;
|
||||||
|
import org.apache.solr.util.RTimer;
|
||||||
import org.apache.solr.util.RefCounted;
|
import org.apache.solr.util.RefCounted;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -263,6 +264,11 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
|
||||||
public int compareTo(RangeCount o) {
|
public int compareTo(RangeCount o) {
|
||||||
return this.range.compareTo(o.range);
|
return this.range.compareTo(o.range);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return range.toString() + "=" + count;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -286,6 +292,7 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
|
||||||
|
|
||||||
// Returns a list of range counts sorted by the range lower bound
|
// Returns a list of range counts sorted by the range lower bound
|
||||||
static Collection<RangeCount> getHashHistogram(SolrIndexSearcher searcher, String prefixField, DocRouter router, DocCollection collection) throws IOException {
|
static Collection<RangeCount> getHashHistogram(SolrIndexSearcher searcher, String prefixField, DocRouter router, DocCollection collection) throws IOException {
|
||||||
|
RTimer timer = new RTimer();
|
||||||
TreeMap<DocRouter.Range,RangeCount> counts = new TreeMap<>();
|
TreeMap<DocRouter.Range,RangeCount> counts = new TreeMap<>();
|
||||||
|
|
||||||
Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), prefixField);
|
Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), prefixField);
|
||||||
|
@ -293,19 +300,30 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
|
||||||
return counts.values();
|
return counts.values();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int numPrefixes = 0;
|
||||||
|
int numTriLevel = 0;
|
||||||
|
int numCollisions = 0;
|
||||||
|
long sumBuckets = 0;
|
||||||
|
|
||||||
TermsEnum termsEnum = terms.iterator();
|
TermsEnum termsEnum = terms.iterator();
|
||||||
for (;;) {
|
for (;;) {
|
||||||
BytesRef term = termsEnum.next();
|
BytesRef term = termsEnum.next();
|
||||||
if (term == null) break;
|
if (term == null) break;
|
||||||
|
numPrefixes++;
|
||||||
|
|
||||||
String termStr = term.utf8ToString();
|
String termStr = term.utf8ToString();
|
||||||
int firstSep = termStr.indexOf(CompositeIdRouter.SEPARATOR);
|
int firstSep = termStr.indexOf(CompositeIdRouter.SEPARATOR);
|
||||||
// truncate to first separator since we don't support multiple levels currently
|
// truncate to first separator since we don't support multiple levels currently
|
||||||
|
// NOTE: this does not currently work for tri-level composite ids since the number of bits allocated to the first ID is 16 for a 2 part id
|
||||||
|
// and 8 for a 3 part id!
|
||||||
if (firstSep != termStr.length()-1 && firstSep > 0) {
|
if (firstSep != termStr.length()-1 && firstSep > 0) {
|
||||||
|
numTriLevel++;
|
||||||
termStr = termStr.substring(0, firstSep+1);
|
termStr = termStr.substring(0, firstSep+1);
|
||||||
}
|
}
|
||||||
|
|
||||||
DocRouter.Range range = router.getSearchRangeSingle(termStr, null, collection);
|
DocRouter.Range range = router.getSearchRangeSingle(termStr, null, collection);
|
||||||
int numDocs = termsEnum.docFreq();
|
int numDocs = termsEnum.docFreq();
|
||||||
|
sumBuckets += numDocs;
|
||||||
|
|
||||||
RangeCount rangeCount = new RangeCount(range, numDocs);
|
RangeCount rangeCount = new RangeCount(range, numDocs);
|
||||||
|
|
||||||
|
@ -313,16 +331,18 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
|
||||||
if (prev != null) {
|
if (prev != null) {
|
||||||
// we hit a hash collision or truncated a prefix to first level, so add the buckets together.
|
// we hit a hash collision or truncated a prefix to first level, so add the buckets together.
|
||||||
rangeCount.count += prev.count;
|
rangeCount.count += prev.count;
|
||||||
|
numCollisions++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.info("Split histogram: ms={}, numBuckets={} sumBuckets={} numPrefixes={} numTriLevel={} numCollisions={}", timer.getTime(), counts.size(), sumBuckets, numPrefixes, numTriLevel, numCollisions);
|
||||||
|
|
||||||
return counts.values();
|
return counts.values();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// returns the list of recommended splits, or null if there is not enough information
|
// returns the list of recommended splits, or null if there is not enough information
|
||||||
static Collection<DocRouter.Range> getSplits(Collection<RangeCount> rawCounts, DocRouter.Range currentRange) throws Exception {
|
static Collection<DocRouter.Range> getSplits(Collection<RangeCount> rawCounts, DocRouter.Range currentRange) throws Exception {
|
||||||
|
|
||||||
int totalCount = 0;
|
int totalCount = 0;
|
||||||
RangeCount biggest = null; // keep track of the largest in case we need to split it out into it's own shard
|
RangeCount biggest = null; // keep track of the largest in case we need to split it out into it's own shard
|
||||||
RangeCount last = null; // keep track of what the last range is
|
RangeCount last = null; // keep track of what the last range is
|
||||||
|
|
|
@ -24,7 +24,6 @@ import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.util.LuceneTestCase.BadApple;
|
|
||||||
import org.apache.solr.client.solrj.SolrServerException;
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||||
|
@ -45,7 +44,6 @@ import org.slf4j.LoggerFactory;
|
||||||
// This class tests higher level SPLITSHARD functionality when splitByPrefix is specified.
|
// This class tests higher level SPLITSHARD functionality when splitByPrefix is specified.
|
||||||
// See SplitHandlerTest for random tests of lower-level split selection logic.
|
// See SplitHandlerTest for random tests of lower-level split selection logic.
|
||||||
//
|
//
|
||||||
@BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-13399")
|
|
||||||
public class SplitByPrefixTest extends SolrCloudTestCase {
|
public class SplitByPrefixTest extends SolrCloudTestCase {
|
||||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||||
|
|
||||||
|
@ -133,6 +131,7 @@ public class SplitByPrefixTest extends SolrCloudTestCase {
|
||||||
SolrInputDocument getDoc(String prefix, String unique) {
|
SolrInputDocument getDoc(String prefix, String unique) {
|
||||||
String secondLevel = "";
|
String secondLevel = "";
|
||||||
if (random().nextBoolean()) {
|
if (random().nextBoolean()) {
|
||||||
|
prefix = prefix.substring(0, prefix.length()-1) + "/16!"; // change "foo!" into "foo/16!" to match 2 level compositeId
|
||||||
secondLevel="" + random().nextInt(2) + "!";
|
secondLevel="" + random().nextInt(2) + "!";
|
||||||
}
|
}
|
||||||
return sdoc("id", prefix + secondLevel + unique);
|
return sdoc("id", prefix + secondLevel + unique);
|
||||||
|
|
Loading…
Reference in New Issue