mirror of https://github.com/apache/lucene.git
SOLR-2272: pseudo-join
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1096610 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
44ba0859db
commit
cf2b0d322b
|
@ -129,6 +129,8 @@ public class DocTermOrds {
|
||||||
protected BytesRef prefix;
|
protected BytesRef prefix;
|
||||||
protected int ordBase;
|
protected int ordBase;
|
||||||
|
|
||||||
|
protected DocsEnum docsEnum; //used while uninverting
|
||||||
|
|
||||||
public long ramUsedInBytes() {
|
public long ramUsedInBytes() {
|
||||||
// can cache the mem size since it shouldn't change
|
// can cache the mem size since it shouldn't change
|
||||||
if (memsz!=0) return memsz;
|
if (memsz!=0) return memsz;
|
||||||
|
@ -270,7 +272,7 @@ public class DocTermOrds {
|
||||||
// frequent terms ahead of time.
|
// frequent terms ahead of time.
|
||||||
|
|
||||||
int termNum = 0;
|
int termNum = 0;
|
||||||
DocsEnum docsEnum = null;
|
docsEnum = null;
|
||||||
|
|
||||||
// Loop begins with te positioned to first term (we call
|
// Loop begins with te positioned to first term (we call
|
||||||
// seek above):
|
// seek above):
|
||||||
|
|
|
@ -137,6 +137,12 @@ New Features
|
||||||
* SOLR-2383: /browse improvements: generalize range and date facet display
|
* SOLR-2383: /browse improvements: generalize range and date facet display
|
||||||
(Jan Høydahl via yonik)
|
(Jan Høydahl via yonik)
|
||||||
|
|
||||||
|
* SOLR-2272: Pseudo-join queries / filters. Examples:
|
||||||
|
To restrict to the set of parents with at least one blue-eyed child:
|
||||||
|
fq={!join from=parent to=name}eyes:blue
|
||||||
|
To restrict to the set of children with at least one blue-eyed parent:
|
||||||
|
fq={!join from=name to=parent}eyes:blue
|
||||||
|
(yonik)
|
||||||
|
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
|
@ -163,6 +163,25 @@ public class ResponseBuilder
|
||||||
debugInfo.add( name, val );
|
debugInfo.add( name, val );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addDebug(Object val, String... path) {
|
||||||
|
if( debugInfo == null ) {
|
||||||
|
debugInfo = new SimpleOrderedMap<Object>();
|
||||||
|
}
|
||||||
|
|
||||||
|
NamedList<Object> target = debugInfo;
|
||||||
|
for (int i=0; i<path.length-1; i++) {
|
||||||
|
String elem = path[i];
|
||||||
|
NamedList<Object> newTarget = (NamedList<Object>)debugInfo.get(elem);
|
||||||
|
if (newTarget == null) {
|
||||||
|
newTarget = new SimpleOrderedMap<Object>();
|
||||||
|
target.add(elem, newTarget);
|
||||||
|
}
|
||||||
|
target = newTarget;
|
||||||
|
}
|
||||||
|
|
||||||
|
target.add(path[path.length-1], val);
|
||||||
|
}
|
||||||
|
|
||||||
//-------------------------------------------------------------------------
|
//-------------------------------------------------------------------------
|
||||||
//-------------------------------------------------------------------------
|
//-------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ import org.apache.lucene.index.*;
|
||||||
import org.apache.lucene.queryParser.ParseException;
|
import org.apache.lucene.queryParser.ParseException;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
import org.apache.lucene.util.packed.Direct16;
|
import org.apache.lucene.util.packed.Direct16;
|
||||||
import org.apache.lucene.util.packed.Direct32;
|
import org.apache.lucene.util.packed.Direct32;
|
||||||
import org.apache.lucene.util.packed.Direct8;
|
import org.apache.lucene.util.packed.Direct8;
|
||||||
|
@ -682,14 +683,15 @@ public class SimpleFacets {
|
||||||
|
|
||||||
if (deState==null) {
|
if (deState==null) {
|
||||||
deState = new SolrIndexSearcher.DocsEnumState();
|
deState = new SolrIndexSearcher.DocsEnumState();
|
||||||
|
deState.fieldName = StringHelper.intern(field);
|
||||||
deState.deletedDocs = MultiFields.getDeletedDocs(r);
|
deState.deletedDocs = MultiFields.getDeletedDocs(r);
|
||||||
deState.termsEnum = termsEnum;
|
deState.termsEnum = termsEnum;
|
||||||
deState.reuse = docsEnum;
|
deState.docsEnum = docsEnum;
|
||||||
}
|
}
|
||||||
|
|
||||||
c = searcher.numDocs(new TermQuery(t), docs, deState);
|
c = searcher.numDocs(docs, deState);
|
||||||
|
|
||||||
docsEnum = deState.reuse;
|
docsEnum = deState.docsEnum;
|
||||||
} else {
|
} else {
|
||||||
// iterate over TermDocs to calculate the intersection
|
// iterate over TermDocs to calculate the intersection
|
||||||
|
|
||||||
|
|
|
@ -17,11 +17,15 @@
|
||||||
|
|
||||||
package org.apache.solr.request;
|
package org.apache.solr.request;
|
||||||
|
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.handler.component.ResponseBuilder;
|
import org.apache.solr.handler.component.ResponseBuilder;
|
||||||
import org.apache.solr.response.SolrQueryResponse;
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
public class SolrRequestInfo {
|
public class SolrRequestInfo {
|
||||||
|
@ -31,6 +35,8 @@ public class SolrRequestInfo {
|
||||||
protected SolrQueryResponse rsp;
|
protected SolrQueryResponse rsp;
|
||||||
protected Date now;
|
protected Date now;
|
||||||
protected ResponseBuilder rb;
|
protected ResponseBuilder rb;
|
||||||
|
protected List<Closeable> closeHooks;
|
||||||
|
|
||||||
|
|
||||||
public static SolrRequestInfo getRequestInfo() {
|
public static SolrRequestInfo getRequestInfo() {
|
||||||
return threadLocal.get();
|
return threadLocal.get();
|
||||||
|
@ -48,8 +54,21 @@ public class SolrRequestInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void clearRequestInfo() {
|
public static void clearRequestInfo() {
|
||||||
|
try {
|
||||||
|
SolrRequestInfo info = threadLocal.get();
|
||||||
|
if (info != null && info.closeHooks != null) {
|
||||||
|
for (Closeable hook : info.closeHooks) {
|
||||||
|
try {
|
||||||
|
hook.close();
|
||||||
|
} catch (Throwable throwable) {
|
||||||
|
SolrException.log(SolrCore.log, "Exception during close hook", throwable);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
threadLocal.remove();
|
threadLocal.remove();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public SolrRequestInfo(SolrQueryRequest req, SolrQueryResponse rsp) {
|
public SolrRequestInfo(SolrQueryRequest req, SolrQueryResponse rsp) {
|
||||||
this.req = req;
|
this.req = req;
|
||||||
|
@ -88,4 +107,14 @@ public class SolrRequestInfo {
|
||||||
public void setResponseBuilder(ResponseBuilder rb) {
|
public void setResponseBuilder(ResponseBuilder rb) {
|
||||||
this.rb = rb;
|
this.rb = rb;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addCloseHook(Closeable hook) {
|
||||||
|
// is this better here, or on SolrQueryRequest?
|
||||||
|
synchronized (this) {
|
||||||
|
if (closeHooks == null) {
|
||||||
|
closeHooks = new LinkedList<Closeable>();
|
||||||
|
}
|
||||||
|
closeHooks.add(hook);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.TermRangeQuery;
|
import org.apache.lucene.search.TermRangeQuery;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
import org.apache.noggit.CharArr;
|
import org.apache.noggit.CharArr;
|
||||||
import org.apache.solr.common.params.FacetParams;
|
import org.apache.solr.common.params.FacetParams;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
@ -122,11 +123,15 @@ public class UnInvertedField extends DocTermOrds {
|
||||||
|
|
||||||
if (deState == null) {
|
if (deState == null) {
|
||||||
deState = new SolrIndexSearcher.DocsEnumState();
|
deState = new SolrIndexSearcher.DocsEnumState();
|
||||||
deState.termsEnum = te;
|
deState.fieldName = StringHelper.intern(field);
|
||||||
|
// deState.termsEnum = te.tenum;
|
||||||
|
deState.termsEnum = te; // TODO: check for MultiTermsEnum in SolrIndexSearcher could now fail?
|
||||||
|
deState.docsEnum = docsEnum;
|
||||||
|
deState.minSetSizeCached = maxTermDocFreq;
|
||||||
}
|
}
|
||||||
|
docsEnum = deState.docsEnum;
|
||||||
maxTermCounts[termNum] = searcher.getDocSet(new TermQuery(new Term(field, topTerm.term)), deState).size();
|
DocSet set = searcher.getDocSet(deState);
|
||||||
//System.out.println(" big term termNum=" + termNum + " term=" + topTerm.term.utf8ToString() + " size=" + maxTermCounts[termNum] + " dF=" + te.docFreq());
|
maxTermCounts[termNum] = set.size();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -158,10 +163,10 @@ public class UnInvertedField extends DocTermOrds {
|
||||||
super(field,
|
super(field,
|
||||||
// threshold, over which we use set intersections instead of counting
|
// threshold, over which we use set intersections instead of counting
|
||||||
// to (1) save memory, and (2) speed up faceting.
|
// to (1) save memory, and (2) speed up faceting.
|
||||||
// Add 1 for testing purposes so that there will always be some terms under
|
// Add 2 for testing purposes so that there will always be some terms under
|
||||||
// the threshold even when the index is very
|
// the threshold even when the index is very
|
||||||
// small.
|
// small.
|
||||||
searcher.maxDoc()/20 + 1,
|
searcher.maxDoc()/20 + 2,
|
||||||
DEFAULT_INDEX_INTERVAL_BITS);
|
DEFAULT_INDEX_INTERVAL_BITS);
|
||||||
//System.out.println("maxTermDocFreq=" + maxTermDocFreq + " maxDoc=" + searcher.maxDoc());
|
//System.out.println("maxTermDocFreq=" + maxTermDocFreq + " maxDoc=" + searcher.maxDoc());
|
||||||
|
|
||||||
|
|
|
@ -160,6 +160,16 @@ public class BitDocSet extends DocSetBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean intersects(DocSet other) {
|
||||||
|
if (other instanceof BitDocSet) {
|
||||||
|
return bits.intersects(((BitDocSet)other).bits);
|
||||||
|
} else {
|
||||||
|
// they had better not call us back!
|
||||||
|
return other.intersects(this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int unionSize(DocSet other) {
|
public int unionSize(DocSet other) {
|
||||||
if (other instanceof BitDocSet) {
|
if (other instanceof BitDocSet) {
|
||||||
|
@ -183,6 +193,11 @@ public class BitDocSet extends DocSetBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setBitsOn(OpenBitSet target) {
|
||||||
|
target.union(bits);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocSet andNot(DocSet other) {
|
public DocSet andNot(DocSet other) {
|
||||||
OpenBitSet newbits = (OpenBitSet)(bits.clone());
|
OpenBitSet newbits = (OpenBitSet)(bits.clone());
|
||||||
|
@ -211,4 +226,9 @@ public class BitDocSet extends DocSetBase {
|
||||||
public long memSize() {
|
public long memSize() {
|
||||||
return (bits.getBits().length << 3) + 16;
|
return (bits.getBits().length << 3) + 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected BitDocSet clone() {
|
||||||
|
return new BitDocSet((OpenBitSet)bits.clone(), size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -115,6 +115,9 @@ public interface DocSet /* extends Collection<Integer> */ {
|
||||||
*/
|
*/
|
||||||
public int intersectionSize(DocSet other);
|
public int intersectionSize(DocSet other);
|
||||||
|
|
||||||
|
/** Returns true if these sets have any elements in common */
|
||||||
|
public boolean intersects(DocSet other);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the union of this set with another set. Neither set is modified - a new DocSet is
|
* Returns the union of this set with another set. Neither set is modified - a new DocSet is
|
||||||
* created and returned.
|
* created and returned.
|
||||||
|
@ -146,6 +149,14 @@ public interface DocSet /* extends Collection<Integer> */ {
|
||||||
* methods will be invoked with.
|
* methods will be invoked with.
|
||||||
*/
|
*/
|
||||||
public Filter getTopFilter();
|
public Filter getTopFilter();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes the docs from this set and sets those bits on the target OpenBitSet.
|
||||||
|
* The target should be sized large enough to accommodate all of the documents before calling this method.
|
||||||
|
*/
|
||||||
|
public void setBitsOn(OpenBitSet target);
|
||||||
|
|
||||||
|
public static DocSet EMPTY = new SortedIntDocSet(new int[0], 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** A base class that may be usefull for implementing DocSets */
|
/** A base class that may be usefull for implementing DocSets */
|
||||||
|
@ -213,6 +224,17 @@ abstract class DocSetBase implements DocSet {
|
||||||
return new BitDocSet(newbits);
|
return new BitDocSet(newbits);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean intersects(DocSet other) {
|
||||||
|
// intersection is overloaded in the smaller DocSets to be more
|
||||||
|
// efficient, so dispatch off of it instead.
|
||||||
|
if (!(other instanceof BitDocSet)) {
|
||||||
|
return other.intersects(this);
|
||||||
|
}
|
||||||
|
// less efficient way: get the intersection size
|
||||||
|
return intersectionSize(other) > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public DocSet union(DocSet other) {
|
public DocSet union(DocSet other) {
|
||||||
OpenBitSet newbits = (OpenBitSet)(this.getBits().clone());
|
OpenBitSet newbits = (OpenBitSet)(this.getBits().clone());
|
||||||
newbits.or(other.getBits());
|
newbits.or(other.getBits());
|
||||||
|
@ -295,6 +317,14 @@ abstract class DocSetBase implements DocSet {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setBitsOn(OpenBitSet target) {
|
||||||
|
DocIterator iter = iterator();
|
||||||
|
while (iter.hasNext()) {
|
||||||
|
target.fastSet(iter.nextDoc());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
|
|
||||||
package org.apache.solr.search;
|
package org.apache.solr.search;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <code>DocSlice</code> implements DocList as an array of docids and optional scores.
|
* <code>DocSlice</code> implements DocList as an array of docids and optional scores.
|
||||||
*
|
*
|
||||||
|
@ -141,4 +143,22 @@ public class DocSlice extends DocSetBase implements DocList {
|
||||||
HashDocSet h = new HashDocSet(docs,offset,len);
|
HashDocSet h = new HashDocSet(docs,offset,len);
|
||||||
return h.intersectionSize(other);
|
return h.intersectionSize(other);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean intersects(DocSet other) {
|
||||||
|
if (other instanceof SortedIntDocSet || other instanceof HashDocSet) {
|
||||||
|
return other.intersects(this);
|
||||||
|
}
|
||||||
|
HashDocSet h = new HashDocSet(docs,offset,len);
|
||||||
|
return h.intersects(other);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DocSlice clone() {
|
||||||
|
try {
|
||||||
|
// DocSlice is not currently mutable
|
||||||
|
DocSlice slice = (DocSlice) super.clone();
|
||||||
|
} catch (CloneNotSupportedException e) {}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,6 +48,12 @@ public final class HashDocSet extends DocSetBase {
|
||||||
|
|
||||||
private final int mask;
|
private final int mask;
|
||||||
|
|
||||||
|
public HashDocSet(HashDocSet set) {
|
||||||
|
this.table = set.table.clone();
|
||||||
|
this.size = set.size;
|
||||||
|
this.mask = set.mask;
|
||||||
|
}
|
||||||
|
|
||||||
/** Create a HashDocSet from a list of *unique* ids */
|
/** Create a HashDocSet from a list of *unique* ids */
|
||||||
public HashDocSet(int[] docs, int offset, int len) {
|
public HashDocSet(int[] docs, int offset, int len) {
|
||||||
this(docs, offset, len, DEFAULT_INVERSE_LOAD_FACTOR);
|
this(docs, offset, len, DEFAULT_INVERSE_LOAD_FACTOR);
|
||||||
|
@ -207,6 +213,31 @@ public final class HashDocSet extends DocSetBase {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean intersects(DocSet other) {
|
||||||
|
if (other instanceof HashDocSet) {
|
||||||
|
// set "a" to the smallest doc set for the most efficient
|
||||||
|
// intersection.
|
||||||
|
final HashDocSet a = size()<=other.size() ? this : (HashDocSet)other;
|
||||||
|
final HashDocSet b = size()<=other.size() ? (HashDocSet)other : this;
|
||||||
|
|
||||||
|
for (int i=0; i<a.table.length; i++) {
|
||||||
|
int id=a.table[i];
|
||||||
|
if (id >= 0 && b.exists(id)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
for (int i=0; i<table.length; i++) {
|
||||||
|
int id=table[i];
|
||||||
|
if (id >= 0 && other.exists(id)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocSet andNot(DocSet other) {
|
public DocSet andNot(DocSet other) {
|
||||||
|
@ -249,6 +280,10 @@ public final class HashDocSet extends DocSetBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected HashDocSet clone() {
|
||||||
|
return new HashDocSet(this);
|
||||||
|
}
|
||||||
|
|
||||||
// don't implement andNotSize() and unionSize() on purpose... they are implemented
|
// don't implement andNotSize() and unionSize() on purpose... they are implemented
|
||||||
// in BaseDocSet in terms of intersectionSize().
|
// in BaseDocSet in terms of intersectionSize().
|
||||||
|
|
|
@ -0,0 +1,572 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.search;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.*;
|
||||||
|
import org.apache.lucene.queryParser.ParseException;
|
||||||
|
import org.apache.lucene.search.*;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||||
|
import org.apache.solr.core.CoreContainer;
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.handler.component.ResponseBuilder;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.request.SolrRequestInfo;
|
||||||
|
import org.apache.solr.schema.TrieField;
|
||||||
|
import org.apache.solr.util.RefCounted;
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
|
||||||
|
public class JoinQParserPlugin extends QParserPlugin {
|
||||||
|
public static String NAME = "join";
|
||||||
|
|
||||||
|
public void init(NamedList args) {
|
||||||
|
}
|
||||||
|
|
||||||
|
public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
|
||||||
|
return new QParser(qstr, localParams, params, req) {
|
||||||
|
public Query parse() throws ParseException {
|
||||||
|
String fromField = getParam("from");
|
||||||
|
String fromIndex = getParam("fromIndex");
|
||||||
|
String toField = getParam("to");
|
||||||
|
String v = localParams.get("v");
|
||||||
|
QParser fromQueryParser = subQuery(v, "lucene");
|
||||||
|
Query fromQuery = fromQueryParser.getQuery();
|
||||||
|
JoinQuery jq = new JoinQuery(fromField, toField, fromIndex, fromQuery);
|
||||||
|
return jq;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class JoinQuery extends Query {
|
||||||
|
String fromField;
|
||||||
|
String toField;
|
||||||
|
String fromIndex;
|
||||||
|
Query q;
|
||||||
|
|
||||||
|
public JoinQuery(String fromField, String toField, String fromIndex, Query subQuery) {
|
||||||
|
this.fromField = fromField;
|
||||||
|
this.toField = toField;
|
||||||
|
this.fromIndex = fromIndex;
|
||||||
|
this.q = subQuery;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query getQuery() { return q; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query rewrite(IndexReader reader) throws IOException {
|
||||||
|
Query newQ = q.rewrite(reader);
|
||||||
|
if (newQ == q) return this;
|
||||||
|
JoinQuery nq = (JoinQuery)this.clone();
|
||||||
|
nq.q = newQ;
|
||||||
|
return nq;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void extractTerms(Set terms) {
|
||||||
|
q.extractTerms(terms);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Weight createWeight(IndexSearcher searcher) throws IOException {
|
||||||
|
return new JoinQueryWeight((SolrIndexSearcher)searcher);
|
||||||
|
}
|
||||||
|
|
||||||
|
private class JoinQueryWeight extends Weight {
|
||||||
|
SolrIndexSearcher fromSearcher;
|
||||||
|
RefCounted<SolrIndexSearcher> fromRef;
|
||||||
|
SolrIndexSearcher toSearcher;
|
||||||
|
private Similarity similarity;
|
||||||
|
private float queryNorm;
|
||||||
|
private float queryWeight;
|
||||||
|
ResponseBuilder rb;
|
||||||
|
|
||||||
|
public JoinQueryWeight(SolrIndexSearcher searcher) throws IOException {
|
||||||
|
this.fromSearcher = searcher;
|
||||||
|
SolrRequestInfo info = SolrRequestInfo.getRequestInfo();
|
||||||
|
if (info != null) {
|
||||||
|
rb = info.getResponseBuilder();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fromIndex == null) {
|
||||||
|
this.fromSearcher = searcher;
|
||||||
|
} else {
|
||||||
|
if (info == null) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Cross-core join must have SolrRequestInfo");
|
||||||
|
}
|
||||||
|
|
||||||
|
CoreContainer container = searcher.getCore().getCoreDescriptor().getCoreContainer();
|
||||||
|
final SolrCore fromCore = container.getCore(fromIndex);
|
||||||
|
|
||||||
|
if (fromCore == null) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Cross-core join: no such core ");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (info.getReq().getCore() == fromCore) {
|
||||||
|
// if this is the same core, use the searcher passed in... otherwise we could be warming and
|
||||||
|
// get an older searcher from the core.
|
||||||
|
fromSearcher = searcher;
|
||||||
|
} else {
|
||||||
|
// This could block if there is a static warming query with a join in it, and if useColdSearcher is true.
|
||||||
|
// Deadlock could result if two cores both had useColdSearcher and had joins that used eachother.
|
||||||
|
// This would be very predictable though (should happen every time if misconfigured)
|
||||||
|
fromRef = fromCore.getSearcher(false, true, null);
|
||||||
|
|
||||||
|
// be careful not to do anything with this searcher that requires the thread local
|
||||||
|
// SolrRequestInfo in a manner that requires the core in the request to match
|
||||||
|
fromSearcher = fromRef.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fromRef != null) {
|
||||||
|
final RefCounted<SolrIndexSearcher> ref = fromRef;
|
||||||
|
info.addCloseHook(new Closeable() {
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
ref.decref();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
info.addCloseHook(new Closeable() {
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
fromCore.close();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
this.toSearcher = searcher;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query getQuery() {
|
||||||
|
return JoinQuery.this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float getValue() {
|
||||||
|
return getBoost();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float sumOfSquaredWeights() throws IOException {
|
||||||
|
queryWeight = getBoost();
|
||||||
|
return queryWeight * queryWeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void normalize(float norm) {
|
||||||
|
this.queryNorm = norm;
|
||||||
|
queryWeight *= this.queryNorm;
|
||||||
|
}
|
||||||
|
|
||||||
|
DocSet resultSet;
|
||||||
|
Filter filter;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Scorer scorer(IndexReader.AtomicReaderContext context, ScorerContext scorerContext) throws IOException {
|
||||||
|
if (filter == null) {
|
||||||
|
boolean debug = rb != null && rb.isDebug();
|
||||||
|
long start = debug ? System.currentTimeMillis() : 0;
|
||||||
|
resultSet = getDocSet();
|
||||||
|
long end = debug ? System.currentTimeMillis() : 0;
|
||||||
|
|
||||||
|
if (debug) {
|
||||||
|
SimpleOrderedMap<Object> dbg = new SimpleOrderedMap<Object>();
|
||||||
|
dbg.add("time", (end-start));
|
||||||
|
dbg.add("fromSetSize", fromSetSize); // the input
|
||||||
|
dbg.add("toSetSize", resultSet.size()); // the output
|
||||||
|
|
||||||
|
dbg.add("fromTermCount", fromTermCount);
|
||||||
|
dbg.add("fromTermTotalDf", fromTermTotalDf);
|
||||||
|
dbg.add("fromTermDirectCount", fromTermDirectCount);
|
||||||
|
dbg.add("fromTermHits", fromTermHits);
|
||||||
|
dbg.add("fromTermHitsTotalDf", fromTermHitsTotalDf);
|
||||||
|
dbg.add("toTermHits", toTermHits);
|
||||||
|
dbg.add("toTermHitsTotalDf", toTermHitsTotalDf);
|
||||||
|
dbg.add("toTermDirectCount", toTermDirectCount);
|
||||||
|
dbg.add("smallSetsDeferred", smallSetsDeferred);
|
||||||
|
dbg.add("toSetDocsAdded", resultListDocs);
|
||||||
|
|
||||||
|
// TODO: perhaps synchronize addDebug in the future...
|
||||||
|
rb.addDebug(dbg, "join", JoinQuery.this.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
filter = resultSet.getTopFilter();
|
||||||
|
}
|
||||||
|
|
||||||
|
DocIdSet readerSet = filter.getDocIdSet(context);
|
||||||
|
if (readerSet == null) readerSet=DocIdSet.EMPTY_DOCIDSET;
|
||||||
|
return new JoinScorer(this, readerSet.iterator());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int fromSetSize; // number of docs in the fromSet (that match the from query)
|
||||||
|
long resultListDocs; // total number of docs collected
|
||||||
|
int fromTermCount;
|
||||||
|
long fromTermTotalDf;
|
||||||
|
int fromTermDirectCount; // number of fromTerms that were too small to use the filter cache
|
||||||
|
int fromTermHits; // number of fromTerms that intersected the from query
|
||||||
|
long fromTermHitsTotalDf; // sum of the df of the matching terms
|
||||||
|
int toTermHits; // num if intersecting from terms that match a term in the to field
|
||||||
|
long toTermHitsTotalDf; // sum of the df for the toTermHits
|
||||||
|
int toTermDirectCount; // number of toTerms that we set directly on a bitset rather than doing set intersections
|
||||||
|
int smallSetsDeferred; // number of small sets collected to be used later to intersect w/ bitset or create another small set
|
||||||
|
|
||||||
|
|
||||||
|
public DocSet getDocSet() throws IOException {
|
||||||
|
OpenBitSet resultBits = null;
|
||||||
|
|
||||||
|
// minimum docFreq to use the cache
|
||||||
|
int minDocFreqFrom = Math.max(5, fromSearcher.maxDoc() >> 13);
|
||||||
|
int minDocFreqTo = Math.max(5, toSearcher.maxDoc() >> 13);
|
||||||
|
|
||||||
|
// use a smaller size than normal since we will need to sort and dedup the results
|
||||||
|
int maxSortedIntSize = Math.max(10, toSearcher.maxDoc() >> 10);
|
||||||
|
|
||||||
|
DocSet fromSet = fromSearcher.getDocSet(q);
|
||||||
|
fromSetSize = fromSet.size();
|
||||||
|
|
||||||
|
List<DocSet> resultList = new ArrayList<DocSet>(10);
|
||||||
|
|
||||||
|
// make sure we have a set that is fast for random access, if we will use it for that
|
||||||
|
DocSet fastForRandomSet = fromSet;
|
||||||
|
if (minDocFreqFrom>0 && fromSet instanceof SortedIntDocSet) {
|
||||||
|
SortedIntDocSet sset = (SortedIntDocSet)fromSet;
|
||||||
|
fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
Fields fromFields = MultiFields.getFields(fromSearcher.getIndexReader());
|
||||||
|
Fields toFields = fromSearcher==toSearcher ? fromFields : MultiFields.getFields(toSearcher.getIndexReader());
|
||||||
|
if (fromFields == null) return DocSet.EMPTY;
|
||||||
|
Terms terms = fromFields.terms(fromField);
|
||||||
|
Terms toTerms = toFields.terms(toField);
|
||||||
|
if (terms == null || toTerms==null) return DocSet.EMPTY;
|
||||||
|
String prefixStr = TrieField.getMainValuePrefix(fromSearcher.getSchema().getFieldType(fromField));
|
||||||
|
BytesRef prefix = prefixStr == null ? null : new BytesRef(prefixStr);
|
||||||
|
|
||||||
|
BytesRef term = null;
|
||||||
|
TermsEnum termsEnum = terms.iterator();
|
||||||
|
TermsEnum toTermsEnum = toTerms.iterator();
|
||||||
|
SolrIndexSearcher.DocsEnumState fromDeState = null;
|
||||||
|
SolrIndexSearcher.DocsEnumState toDeState = null;
|
||||||
|
|
||||||
|
if (prefix == null) {
|
||||||
|
term = termsEnum.next();
|
||||||
|
} else {
|
||||||
|
if (termsEnum.seek(prefix, true) != TermsEnum.SeekStatus.END) {
|
||||||
|
term = termsEnum.term();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Bits fromDeletedDocs = MultiFields.getDeletedDocs(fromSearcher.getIndexReader());
|
||||||
|
Bits toDeletedDocs = fromSearcher == toSearcher ? fromDeletedDocs : MultiFields.getDeletedDocs(toSearcher.getIndexReader());
|
||||||
|
|
||||||
|
fromDeState = new SolrIndexSearcher.DocsEnumState();
|
||||||
|
fromDeState.fieldName = StringHelper.intern(fromField);
|
||||||
|
fromDeState.deletedDocs = fromDeletedDocs;
|
||||||
|
fromDeState.termsEnum = termsEnum;
|
||||||
|
fromDeState.docsEnum = null;
|
||||||
|
fromDeState.minSetSizeCached = minDocFreqFrom;
|
||||||
|
|
||||||
|
toDeState = new SolrIndexSearcher.DocsEnumState();
|
||||||
|
toDeState.fieldName = StringHelper.intern(toField);
|
||||||
|
toDeState.deletedDocs = toDeletedDocs;
|
||||||
|
toDeState.termsEnum = toTermsEnum;
|
||||||
|
toDeState.docsEnum = null;
|
||||||
|
toDeState.minSetSizeCached = minDocFreqTo;
|
||||||
|
|
||||||
|
while (term != null) {
|
||||||
|
if (prefix != null && !term.startsWith(prefix))
|
||||||
|
break;
|
||||||
|
|
||||||
|
fromTermCount++;
|
||||||
|
|
||||||
|
boolean intersects = false;
|
||||||
|
int freq = termsEnum.docFreq();
|
||||||
|
fromTermTotalDf++;
|
||||||
|
|
||||||
|
if (freq < minDocFreqFrom) {
|
||||||
|
fromTermDirectCount++;
|
||||||
|
// OK to skip deletedDocs, since we check for intersection with docs matching query
|
||||||
|
fromDeState.docsEnum = fromDeState.termsEnum.docs(null, fromDeState.docsEnum);
|
||||||
|
DocsEnum docsEnum = fromDeState.docsEnum;
|
||||||
|
|
||||||
|
if (docsEnum instanceof MultiDocsEnum) {
|
||||||
|
MultiDocsEnum.EnumWithSlice[] subs = ((MultiDocsEnum)docsEnum).getSubs();
|
||||||
|
int numSubs = ((MultiDocsEnum)docsEnum).getNumSubs();
|
||||||
|
outer: for (int subindex = 0; subindex<numSubs; subindex++) {
|
||||||
|
MultiDocsEnum.EnumWithSlice sub = subs[subindex];
|
||||||
|
if (sub.docsEnum == null) continue;
|
||||||
|
DocsEnum.BulkReadResult bulk = sub.docsEnum.getBulkResult();
|
||||||
|
int base = sub.slice.start;
|
||||||
|
for (;;) {
|
||||||
|
int nDocs = sub.docsEnum.read();
|
||||||
|
if (nDocs == 0) break;
|
||||||
|
int[] docArr = bulk.docs.ints; // this might be movable outside the loop, but perhaps not worth the risk.
|
||||||
|
int end = bulk.docs.offset + nDocs;
|
||||||
|
for (int i=bulk.docs.offset; i<end; i++) {
|
||||||
|
if (fastForRandomSet.exists(docArr[i]+base)) {
|
||||||
|
intersects = true;
|
||||||
|
break outer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// this should be the same bulk result object if sharing of the docsEnum succeeded
|
||||||
|
DocsEnum.BulkReadResult bulk = docsEnum.getBulkResult();
|
||||||
|
|
||||||
|
outer: for (;;) {
|
||||||
|
int nDocs = docsEnum.read();
|
||||||
|
if (nDocs == 0) break;
|
||||||
|
int[] docArr = bulk.docs.ints; // this might be movable outside the loop, but perhaps not worth the risk.
|
||||||
|
int end = bulk.docs.offset + nDocs;
|
||||||
|
for (int i=bulk.docs.offset; i<end; i++) {
|
||||||
|
if (fastForRandomSet.exists(docArr[i])) {
|
||||||
|
intersects = true;
|
||||||
|
break outer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// use the filter cache
|
||||||
|
DocSet fromTermSet = fromSearcher.getDocSet(fromDeState);
|
||||||
|
intersects = fromSet.intersects(fromTermSet);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (intersects) {
|
||||||
|
fromTermHits++;
|
||||||
|
fromTermHitsTotalDf++;
|
||||||
|
TermsEnum.SeekStatus status = toTermsEnum.seek(term);
|
||||||
|
if (status == TermsEnum.SeekStatus.END) break;
|
||||||
|
if (status == TermsEnum.SeekStatus.FOUND) {
|
||||||
|
toTermHits++;
|
||||||
|
int df = toTermsEnum.docFreq();
|
||||||
|
toTermHitsTotalDf += df;
|
||||||
|
if (resultBits==null && df + resultListDocs > maxSortedIntSize && resultList.size() > 0) {
|
||||||
|
resultBits = new OpenBitSet(toSearcher.maxDoc());
|
||||||
|
}
|
||||||
|
|
||||||
|
// if we don't have a bitset yet, or if the resulting set will be too large
|
||||||
|
// use the filterCache to get a DocSet
|
||||||
|
if (toTermsEnum.docFreq() >= minDocFreqTo || resultBits == null) {
|
||||||
|
// use filter cache
|
||||||
|
DocSet toTermSet = toSearcher.getDocSet(toDeState);
|
||||||
|
resultListDocs += toTermSet.size();
|
||||||
|
if (resultBits != null) {
|
||||||
|
toTermSet.setBitsOn(resultBits);
|
||||||
|
} else {
|
||||||
|
if (toTermSet instanceof BitDocSet) {
|
||||||
|
resultBits = (OpenBitSet)((BitDocSet)toTermSet).bits.clone();
|
||||||
|
} else {
|
||||||
|
resultList.add(toTermSet);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
toTermDirectCount++;
|
||||||
|
|
||||||
|
// need to use deletedDocs here so we don't map to any deleted ones
|
||||||
|
toDeState.docsEnum = toDeState.termsEnum.docs(toDeState.deletedDocs, toDeState.docsEnum);
|
||||||
|
DocsEnum docsEnum = toDeState.docsEnum;
|
||||||
|
|
||||||
|
if (docsEnum instanceof MultiDocsEnum) {
|
||||||
|
MultiDocsEnum.EnumWithSlice[] subs = ((MultiDocsEnum)docsEnum).getSubs();
|
||||||
|
int numSubs = ((MultiDocsEnum)docsEnum).getNumSubs();
|
||||||
|
for (int subindex = 0; subindex<numSubs; subindex++) {
|
||||||
|
MultiDocsEnum.EnumWithSlice sub = subs[subindex];
|
||||||
|
if (sub.docsEnum == null) continue;
|
||||||
|
DocsEnum.BulkReadResult bulk = sub.docsEnum.getBulkResult();
|
||||||
|
int base = sub.slice.start;
|
||||||
|
for (;;) {
|
||||||
|
int nDocs = sub.docsEnum.read();
|
||||||
|
if (nDocs == 0) break;
|
||||||
|
resultListDocs += nDocs;
|
||||||
|
int[] docArr = bulk.docs.ints; // this might be movable outside the loop, but perhaps not worth the risk.
|
||||||
|
int end = bulk.docs.offset + nDocs;
|
||||||
|
for (int i=bulk.docs.offset; i<end; i++) {
|
||||||
|
resultBits.fastSet(docArr[i]+base);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// this should be the same bulk result object if sharing of the docsEnum succeeded
|
||||||
|
DocsEnum.BulkReadResult bulk = docsEnum.getBulkResult();
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
int nDocs = docsEnum.read();
|
||||||
|
if (nDocs == 0) break;
|
||||||
|
resultListDocs += nDocs;
|
||||||
|
int[] docArr = bulk.docs.ints; // this might be movable outside the loop, but perhaps not worth the risk.
|
||||||
|
int end = bulk.docs.offset + nDocs;
|
||||||
|
for (int i=bulk.docs.offset; i<end; i++) {
|
||||||
|
resultBits.fastSet(docArr[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
term = termsEnum.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
smallSetsDeferred = resultList.size();
|
||||||
|
|
||||||
|
if (resultBits != null) {
|
||||||
|
for (DocSet set : resultList) {
|
||||||
|
set.setBitsOn(resultBits);
|
||||||
|
}
|
||||||
|
return new BitDocSet(resultBits);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (resultList.size()==0) {
|
||||||
|
return DocSet.EMPTY;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (resultList.size() == 1) {
|
||||||
|
return resultList.get(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int sz = resultList.size();
|
||||||
|
|
||||||
|
for (DocSet set : resultList)
|
||||||
|
sz += set.size();
|
||||||
|
|
||||||
|
int[] docs = new int[sz];
|
||||||
|
int pos = 0;
|
||||||
|
for (DocSet set : resultList) {
|
||||||
|
System.arraycopy(((SortedIntDocSet)set).getDocs(), 0, docs, pos, set.size());
|
||||||
|
pos += set.size();
|
||||||
|
}
|
||||||
|
Arrays.sort(docs);
|
||||||
|
int[] dedup = new int[sz];
|
||||||
|
pos = 0;
|
||||||
|
int last = -1;
|
||||||
|
for (int doc : docs) {
|
||||||
|
if (doc != last)
|
||||||
|
dedup[pos++] = doc;
|
||||||
|
last = doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pos != dedup.length) {
|
||||||
|
dedup = Arrays.copyOf(dedup, pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new SortedIntDocSet(dedup, dedup.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Explanation explain(IndexReader.AtomicReaderContext context, int doc) throws IOException {
|
||||||
|
Scorer scorer = scorer(context, null);
|
||||||
|
boolean exists = scorer.advance(doc) == doc;
|
||||||
|
|
||||||
|
ComplexExplanation result = new ComplexExplanation();
|
||||||
|
|
||||||
|
if (exists) {
|
||||||
|
result.setDescription(this.toString()
|
||||||
|
+ " , product of:");
|
||||||
|
result.setValue(queryWeight);
|
||||||
|
result.setMatch(Boolean.TRUE);
|
||||||
|
result.addDetail(new Explanation(getBoost(), "boost"));
|
||||||
|
result.addDetail(new Explanation(queryNorm,"queryNorm"));
|
||||||
|
} else {
|
||||||
|
result.setDescription(this.toString()
|
||||||
|
+ " doesn't match id " + doc);
|
||||||
|
result.setValue(0);
|
||||||
|
result.setMatch(Boolean.FALSE);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected static class JoinScorer extends Scorer {
|
||||||
|
final DocIdSetIterator iter;
|
||||||
|
final float score;
|
||||||
|
int doc = -1;
|
||||||
|
|
||||||
|
public JoinScorer(Weight w, DocIdSetIterator iter) throws IOException {
|
||||||
|
super(w);
|
||||||
|
score = w.getValue();
|
||||||
|
this.iter = iter==null ? DocIdSet.EMPTY_DOCIDSET.iterator() : iter;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
return iter.nextDoc();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return iter.docID();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float score() throws IOException {
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
return iter.advance(target);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString(String field) {
|
||||||
|
return "{!join from="+fromField+" to="+toField
|
||||||
|
+ (fromIndex != null ? " fromIndex="+fromIndex : "")
|
||||||
|
+"}"+q.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (getClass() != o.getClass()) return false;
|
||||||
|
JoinQuery other = (JoinQuery)o;
|
||||||
|
return this.fromField.equals(other.fromField)
|
||||||
|
&& this.toField.equals(other.toField)
|
||||||
|
&& this.getBoost() == other.getBoost()
|
||||||
|
&& this.q.equals(other.q)
|
||||||
|
&& (this.fromIndex == other.fromIndex || this.fromIndex != null && this.fromIndex.equals(other.fromIndex));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
int h = q.hashCode();
|
||||||
|
h = h * 31 + fromField.hashCode();
|
||||||
|
h = h * 31 + toField.hashCode();
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -40,6 +40,7 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin {
|
||||||
FunctionRangeQParserPlugin.NAME, FunctionRangeQParserPlugin.class,
|
FunctionRangeQParserPlugin.NAME, FunctionRangeQParserPlugin.class,
|
||||||
SpatialFilterQParserPlugin.NAME, SpatialFilterQParserPlugin.class,
|
SpatialFilterQParserPlugin.NAME, SpatialFilterQParserPlugin.class,
|
||||||
SpatialBoxQParserPlugin.NAME, SpatialBoxQParserPlugin.class,
|
SpatialBoxQParserPlugin.NAME, SpatialBoxQParserPlugin.class,
|
||||||
|
JoinQParserPlugin.NAME, JoinQParserPlugin.class,
|
||||||
};
|
};
|
||||||
|
|
||||||
/** return a {@link QParser} */
|
/** return a {@link QParser} */
|
||||||
|
|
|
@ -28,12 +28,17 @@ import org.apache.lucene.store.FSDirectory;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.OpenBitSet;
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||||
import org.apache.solr.core.SolrConfig;
|
import org.apache.solr.core.SolrConfig;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.core.SolrInfoMBean;
|
import org.apache.solr.core.SolrInfoMBean;
|
||||||
|
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.request.SolrRequestInfo;
|
||||||
import org.apache.solr.request.UnInvertedField;
|
import org.apache.solr.request.UnInvertedField;
|
||||||
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -189,6 +194,10 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public SolrCore getCore() {
|
||||||
|
return core;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Register sub-objects such as caches
|
/** Register sub-objects such as caches
|
||||||
*/
|
*/
|
||||||
|
@ -576,32 +585,6 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
||||||
return answer;
|
return answer;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** lucene.internal */
|
|
||||||
public DocSet getDocSet(Query query, DocsEnumState deState) throws IOException {
|
|
||||||
// Get the absolute value (positive version) of this query. If we
|
|
||||||
// get back the same reference, we know it's positive.
|
|
||||||
Query absQ = QueryUtils.getAbs(query);
|
|
||||||
boolean positive = query==absQ;
|
|
||||||
|
|
||||||
if (filterCache != null) {
|
|
||||||
DocSet absAnswer = filterCache.get(absQ);
|
|
||||||
if (absAnswer!=null) {
|
|
||||||
if (positive) return absAnswer;
|
|
||||||
else return getPositiveDocSet(matchAllDocsQuery).andNot(absAnswer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
DocSet absAnswer = getDocSetNC(absQ, null, deState);
|
|
||||||
DocSet answer = positive ? absAnswer : getPositiveDocSet(matchAllDocsQuery, deState).andNot(absAnswer);
|
|
||||||
|
|
||||||
if (filterCache != null) {
|
|
||||||
// cache negative queries as positive
|
|
||||||
filterCache.put(absQ, absAnswer);
|
|
||||||
}
|
|
||||||
|
|
||||||
return answer;
|
|
||||||
}
|
|
||||||
|
|
||||||
// only handle positive (non negative) queries
|
// only handle positive (non negative) queries
|
||||||
DocSet getPositiveDocSet(Query q) throws IOException {
|
DocSet getPositiveDocSet(Query q) throws IOException {
|
||||||
DocSet answer;
|
DocSet answer;
|
||||||
|
@ -614,18 +597,6 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
||||||
return answer;
|
return answer;
|
||||||
}
|
}
|
||||||
|
|
||||||
// only handle positive (non negative) queries
|
|
||||||
DocSet getPositiveDocSet(Query q, DocsEnumState deState) throws IOException {
|
|
||||||
DocSet answer;
|
|
||||||
if (filterCache != null) {
|
|
||||||
answer = filterCache.get(q);
|
|
||||||
if (answer!=null) return answer;
|
|
||||||
}
|
|
||||||
answer = getDocSetNC(q,null,deState);
|
|
||||||
if (filterCache != null) filterCache.put(q,answer);
|
|
||||||
return answer;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Query matchAllDocsQuery = new MatchAllDocsQuery();
|
private static Query matchAllDocsQuery = new MatchAllDocsQuery();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -756,21 +727,31 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// query must be positive
|
/** lucene.internal */
|
||||||
protected DocSet getDocSetNC(Query query, DocSet filter, DocsEnumState deState) throws IOException {
|
public DocSet getDocSet(DocsEnumState deState) throws IOException {
|
||||||
if (filter != null) return getDocSetNC(query, filter, null);
|
int largestPossible = deState.termsEnum.docFreq();
|
||||||
|
boolean useCache = filterCache != null && largestPossible >= deState.minSetSizeCached;
|
||||||
|
TermQuery key = null;
|
||||||
|
|
||||||
|
if (useCache) {
|
||||||
|
key = new TermQuery(new Term(deState.fieldName, new BytesRef(deState.termsEnum.term()), false));
|
||||||
|
DocSet result = filterCache.get(key);
|
||||||
|
if (result != null) return result;
|
||||||
|
}
|
||||||
|
|
||||||
int smallSetSize = maxDoc()>>6;
|
int smallSetSize = maxDoc()>>6;
|
||||||
int largestPossible = deState.termsEnum.docFreq();
|
int scratchSize = Math.min(smallSetSize, largestPossible);
|
||||||
|
if (deState.scratch == null || deState.scratch.length < scratchSize)
|
||||||
|
deState.scratch = new int[scratchSize];
|
||||||
|
|
||||||
int[] docs = new int[Math.min(smallSetSize, largestPossible)];
|
final int[] docs = deState.scratch;
|
||||||
int upto = 0;
|
int upto = 0;
|
||||||
int bitsSet = 0;
|
int bitsSet = 0;
|
||||||
OpenBitSet obs = null;
|
OpenBitSet obs = null;
|
||||||
|
|
||||||
DocsEnum docsEnum = deState.termsEnum.docs(deState.deletedDocs, deState.reuse);
|
DocsEnum docsEnum = deState.termsEnum.docs(deState.deletedDocs, deState.docsEnum);
|
||||||
if (deState.reuse == null) {
|
if (deState.docsEnum == null) {
|
||||||
deState.reuse = docsEnum;
|
deState.docsEnum = docsEnum;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (docsEnum instanceof MultiDocsEnum) {
|
if (docsEnum instanceof MultiDocsEnum) {
|
||||||
|
@ -822,15 +803,22 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DocSet result;
|
||||||
if (obs != null) {
|
if (obs != null) {
|
||||||
for (int i=0; i<upto; i++) {
|
for (int i=0; i<upto; i++) {
|
||||||
obs.fastSet(docs[i]);
|
obs.fastSet(docs[i]);
|
||||||
}
|
}
|
||||||
bitsSet += upto;
|
bitsSet += upto;
|
||||||
return new BitDocSet(obs, bitsSet);
|
result = new BitDocSet(obs, bitsSet);
|
||||||
|
} else {
|
||||||
|
result = new SortedIntDocSet(Arrays.copyOf(docs, upto));
|
||||||
}
|
}
|
||||||
|
|
||||||
return new SortedIntDocSet(docs, upto);
|
if (useCache) {
|
||||||
|
filterCache.put(key, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// query must be positive
|
// query must be positive
|
||||||
|
@ -1640,17 +1628,20 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @lucene.internal */
|
/** @lucene.internal */
|
||||||
public int numDocs(Query a, DocSet b, DocsEnumState deState) throws IOException {
|
public int numDocs(DocSet a, DocsEnumState deState) throws IOException {
|
||||||
// Negative query if absolute value different from original
|
// Negative query if absolute value different from original
|
||||||
Query absQ = QueryUtils.getAbs(a);
|
return a.intersectionSize(getDocSet(deState));
|
||||||
DocSet positiveA = getPositiveDocSet(absQ, deState);
|
|
||||||
return a==absQ ? b.intersectionSize(positiveA) : b.andNotSize(positiveA);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class DocsEnumState {
|
public static class DocsEnumState {
|
||||||
|
public String fieldName; // currently interned for as long as lucene requires it
|
||||||
public TermsEnum termsEnum;
|
public TermsEnum termsEnum;
|
||||||
public Bits deletedDocs;
|
public Bits deletedDocs;
|
||||||
public DocsEnum reuse;
|
public DocsEnum docsEnum;
|
||||||
|
|
||||||
|
public int minSetSizeCached;
|
||||||
|
|
||||||
|
public int[] scratch;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1706,9 +1697,29 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
||||||
boolean logme = log.isInfoEnabled();
|
boolean logme = log.isInfoEnabled();
|
||||||
long warmingStartTime = System.currentTimeMillis();
|
long warmingStartTime = System.currentTimeMillis();
|
||||||
// warm the caches in order...
|
// warm the caches in order...
|
||||||
|
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||||
|
params.add("warming","true");
|
||||||
for (int i=0; i<cacheList.length; i++) {
|
for (int i=0; i<cacheList.length; i++) {
|
||||||
if (logme) log.info("autowarming " + this + " from " + old + "\n\t" + old.cacheList[i]);
|
if (logme) log.info("autowarming " + this + " from " + old + "\n\t" + old.cacheList[i]);
|
||||||
|
|
||||||
|
|
||||||
|
SolrQueryRequest req = new LocalSolrQueryRequest(core,params) {
|
||||||
|
@Override public SolrIndexSearcher getSearcher() { return SolrIndexSearcher.this; }
|
||||||
|
@Override public void close() { }
|
||||||
|
};
|
||||||
|
|
||||||
|
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||||
|
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
|
||||||
|
try {
|
||||||
this.cacheList[i].warm(this, old.cacheList[i]);
|
this.cacheList[i].warm(this, old.cacheList[i]);
|
||||||
|
} finally {
|
||||||
|
try {
|
||||||
|
req.close();
|
||||||
|
} finally {
|
||||||
|
SolrRequestInfo.clearRequestInfo();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (logme) log.info("autowarming result for " + this + "\n\t" + this.cacheList[i]);
|
if (logme) log.info("autowarming result for " + this + "\n\t" + this.cacheList[i]);
|
||||||
}
|
}
|
||||||
warmupTime = System.currentTimeMillis() - warmingStartTime;
|
warmupTime = System.currentTimeMillis() - warmingStartTime;
|
||||||
|
|
|
@ -166,6 +166,59 @@ public class SortedIntDocSet extends DocSetBase {
|
||||||
return icount;
|
return icount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static boolean intersects(int[] smallerSortedList, int[] biggerSortedList) {
|
||||||
|
// see intersectionSize for more in-depth comments of this algorithm
|
||||||
|
|
||||||
|
final int a[] = smallerSortedList;
|
||||||
|
final int b[] = biggerSortedList;
|
||||||
|
|
||||||
|
int step = (b.length/a.length)+1;
|
||||||
|
|
||||||
|
step = step + step;
|
||||||
|
|
||||||
|
int low = 0;
|
||||||
|
int max = b.length-1;
|
||||||
|
|
||||||
|
for (int i=0; i<a.length; i++) {
|
||||||
|
int doca = a[i];
|
||||||
|
int high = max;
|
||||||
|
int probe = low + step;
|
||||||
|
if (probe<high) {
|
||||||
|
if (b[probe]>=doca) {
|
||||||
|
high=probe;
|
||||||
|
} else {
|
||||||
|
low=probe+1;
|
||||||
|
probe = low + step;
|
||||||
|
if (probe<high) {
|
||||||
|
if (b[probe]>=doca) {
|
||||||
|
high=probe;
|
||||||
|
} else {
|
||||||
|
low=probe+1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (low <= high) {
|
||||||
|
int mid = (low+high) >>> 1;
|
||||||
|
int docb = b[mid];
|
||||||
|
|
||||||
|
if (docb < doca) {
|
||||||
|
low = mid+1;
|
||||||
|
}
|
||||||
|
else if (docb > doca) {
|
||||||
|
high = mid-1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
public int intersectionSize(DocSet other) {
|
public int intersectionSize(DocSet other) {
|
||||||
if (!(other instanceof SortedIntDocSet)) {
|
if (!(other instanceof SortedIntDocSet)) {
|
||||||
// assume other implementations are better at random access than we are,
|
// assume other implementations are better at random access than we are,
|
||||||
|
@ -215,6 +268,49 @@ public class SortedIntDocSet extends DocSetBase {
|
||||||
return icount;
|
return icount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean intersects(DocSet other) {
|
||||||
|
if (!(other instanceof SortedIntDocSet)) {
|
||||||
|
// assume other implementations are better at random access than we are,
|
||||||
|
// true of BitDocSet and HashDocSet.
|
||||||
|
for (int i=0; i<docs.length; i++) {
|
||||||
|
if (other.exists(docs[i])) return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// make "a" the smaller set.
|
||||||
|
int[] otherDocs = ((SortedIntDocSet)other).docs;
|
||||||
|
final int[] a = docs.length < otherDocs.length ? docs : otherDocs;
|
||||||
|
final int[] b = docs.length < otherDocs.length ? otherDocs : docs;
|
||||||
|
|
||||||
|
if (a.length==0) return false;
|
||||||
|
|
||||||
|
// if b is 8 times bigger than a, use the modified binary search.
|
||||||
|
if ((b.length>>3) >= a.length) {
|
||||||
|
return intersects(a,b);
|
||||||
|
}
|
||||||
|
|
||||||
|
// if they are close in size, just do a linear walk of both.
|
||||||
|
int i=0,j=0;
|
||||||
|
int doca=a[i],docb=b[j];
|
||||||
|
for(;;) {
|
||||||
|
// switch on the sign bit somehow? Hopefull JVM is smart enough to just test once.
|
||||||
|
|
||||||
|
// Since set a is less dense then set b, doca is likely to be greater than docb so
|
||||||
|
// check that case first. This resulted in a 13% speedup.
|
||||||
|
if (doca > docb) {
|
||||||
|
if (++j >= b.length) break;
|
||||||
|
docb=b[j];
|
||||||
|
} else if (doca < docb) {
|
||||||
|
if (++i >= a.length) break;
|
||||||
|
doca=a[i];
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/** puts the intersection of a and b into the target array and returns the size */
|
/** puts the intersection of a and b into the target array and returns the size */
|
||||||
public static int intersection(int a[], int lena, int b[], int lenb, int[] target) {
|
public static int intersection(int a[], int lena, int b[], int lenb, int[] target) {
|
||||||
|
@ -463,6 +559,13 @@ public class SortedIntDocSet extends DocSetBase {
|
||||||
return new SortedIntDocSet(arr,sz);
|
return new SortedIntDocSet(arr,sz);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setBitsOn(OpenBitSet target) {
|
||||||
|
for (int doc : docs) {
|
||||||
|
target.fastSet(doc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public boolean exists(int doc) {
|
public boolean exists(int doc) {
|
||||||
// this could be faster by estimating where in the list the doc is likely to appear,
|
// this could be faster by estimating where in the list the doc is likely to appear,
|
||||||
|
@ -653,4 +756,8 @@ public class SortedIntDocSet extends DocSetBase {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected SortedIntDocSet clone() {
|
||||||
|
return new SortedIntDocSet(docs.clone());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -722,6 +722,7 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final IRange ZERO_ONE = new IRange(0,1);
|
public static final IRange ZERO_ONE = new IRange(0,1);
|
||||||
|
public static final IRange ZERO_TWO = new IRange(0,2);
|
||||||
public static final IRange ONE_ONE = new IRange(1,1);
|
public static final IRange ONE_ONE = new IRange(1,1);
|
||||||
|
|
||||||
public static class Doc implements Comparable{
|
public static class Doc implements Comparable{
|
||||||
|
@ -1040,6 +1041,29 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase {
|
||||||
return out.toString();
|
return out.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Return a Map from field value to a list of document ids */
|
||||||
|
Map<Comparable, List<Comparable>> invertField(Map<Comparable, Doc> model, String field) {
|
||||||
|
Map<Comparable, List<Comparable>> value_to_id = new HashMap<Comparable, List<Comparable>>();
|
||||||
|
|
||||||
|
// invert field
|
||||||
|
for (Comparable key : model.keySet()) {
|
||||||
|
Doc doc = model.get(key);
|
||||||
|
List<Comparable> vals = doc.getValues(field);
|
||||||
|
if (vals == null) continue;
|
||||||
|
for (Comparable val : vals) {
|
||||||
|
List<Comparable> ids = value_to_id.get(val);
|
||||||
|
if (ids == null) {
|
||||||
|
ids = new ArrayList<Comparable>(2);
|
||||||
|
value_to_id.put(val, ids);
|
||||||
|
}
|
||||||
|
ids.add(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return value_to_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Gets a resource from the context classloader as {@link File}. This method should only be used,
|
/** Gets a resource from the context classloader as {@link File}. This method should only be used,
|
||||||
* if a real file is needed. To get a stream, code should prefer
|
* if a real file is needed. To get a stream, code should prefer
|
||||||
* {@link Class#getResourceAsStream} using {@code this.getClass()}.
|
* {@link Class#getResourceAsStream} using {@code this.getClass()}.
|
||||||
|
|
|
@ -0,0 +1,216 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.FieldCache;
|
||||||
|
import org.apache.noggit.JSONUtil;
|
||||||
|
import org.apache.noggit.ObjectBuilder;
|
||||||
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.handler.JsonUpdateRequestHandler;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.request.SolrRequestHandler;
|
||||||
|
import org.apache.solr.schema.IndexSchema;
|
||||||
|
import org.apache.solr.servlet.DirectSolrConnection;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class TestJoin extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeTests() throws Exception {
|
||||||
|
initCore("solrconfig.xml","schema12.xml");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testJoin() throws Exception {
|
||||||
|
assertU(add(doc("id", "1","name", "john", "title", "Director", "dept_s","Engineering")));
|
||||||
|
assertU(add(doc("id", "2","name", "mark", "title", "VP", "dept_s","Marketing")));
|
||||||
|
assertU(add(doc("id", "3","name", "nancy", "title", "MTS", "dept_s","Sales")));
|
||||||
|
assertU(add(doc("id", "4","name", "dave", "title", "MTS", "dept_s","Support", "dept_s","Engineering")));
|
||||||
|
assertU(add(doc("id", "5","name", "tina", "title", "VP", "dept_s","Engineering")));
|
||||||
|
|
||||||
|
assertU(add(doc("id","10", "dept_id_s", "Engineering", "text","These guys develop stuff")));
|
||||||
|
assertU(add(doc("id","11", "dept_id_s", "Marketing", "text","These guys make you look good")));
|
||||||
|
assertU(add(doc("id","12", "dept_id_s", "Sales", "text","These guys sell stuff")));
|
||||||
|
assertU(add(doc("id","13", "dept_id_s", "Support", "text","These guys help customers")));
|
||||||
|
|
||||||
|
assertU(commit());
|
||||||
|
|
||||||
|
// test debugging
|
||||||
|
assertJQ(req("q","{!join from=dept_s to=dept_id_s}title:MTS", "fl","id", "debugQuery","true")
|
||||||
|
,"/debug/join/{!join from=dept_s to=dept_id_s}title:MTS=={'_MATCH_':'fromSetSize,toSetSize', 'fromSetSize':2, 'toSetSize':3}"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertJQ(req("q","{!join from=dept_s to=dept_id_s}title:MTS", "fl","id")
|
||||||
|
,"/response=={'numFound':3,'start':0,'docs':[{'id':'10'},{'id':'12'},{'id':'13'}]}"
|
||||||
|
);
|
||||||
|
|
||||||
|
// empty from
|
||||||
|
assertJQ(req("q","{!join from=noexist_s to=dept_id_s}*:*", "fl","id")
|
||||||
|
,"/response=={'numFound':0,'start':0,'docs':[]}"
|
||||||
|
);
|
||||||
|
|
||||||
|
// empty to
|
||||||
|
assertJQ(req("q","{!join from=dept_s to=noexist_s}*:*", "fl","id")
|
||||||
|
,"/response=={'numFound':0,'start':0,'docs':[]}"
|
||||||
|
);
|
||||||
|
|
||||||
|
// self join... return everyone with she same title as Dave
|
||||||
|
assertJQ(req("q","{!join from=title to=title}name:dave", "fl","id")
|
||||||
|
,"/response=={'numFound':2,'start':0,'docs':[{'id':'3'},{'id':'4'}]}"
|
||||||
|
);
|
||||||
|
|
||||||
|
// find people that develop stuff
|
||||||
|
assertJQ(req("q","{!join from=dept_id_s to=dept_s}text:develop", "fl","id")
|
||||||
|
,"/response=={'numFound':3,'start':0,'docs':[{'id':'1'},{'id':'4'},{'id':'5'}]}"
|
||||||
|
);
|
||||||
|
|
||||||
|
// self join on multivalued text field
|
||||||
|
assertJQ(req("q","{!join from=title to=title}name:dave", "fl","id")
|
||||||
|
,"/response=={'numFound':2,'start':0,'docs':[{'id':'3'},{'id':'4'}]}"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertJQ(req("q","{!join from=dept_s to=dept_id_s}title:MTS", "fl","id", "debugQuery","true")
|
||||||
|
,"/response=={'numFound':3,'start':0,'docs':[{'id':'10'},{'id':'12'},{'id':'13'}]}"
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRandomJoin() throws Exception {
|
||||||
|
int indexIter=50 * RANDOM_MULTIPLIER;
|
||||||
|
int queryIter=50 * RANDOM_MULTIPLIER;
|
||||||
|
|
||||||
|
while (--indexIter >= 0) {
|
||||||
|
int indexSize = random.nextInt(20 * RANDOM_MULTIPLIER);
|
||||||
|
|
||||||
|
List<FldType> types = new ArrayList<FldType>();
|
||||||
|
types.add(new FldType("id",ONE_ONE, new SVal('A','Z',4,4)));
|
||||||
|
types.add(new FldType("score_f",ONE_ONE, new FVal(1,100))); // field used to score
|
||||||
|
types.add(new FldType("small_s",ZERO_ONE, new SVal('a',(char)('c'+indexSize/3),1,1)));
|
||||||
|
types.add(new FldType("small2_s",ZERO_ONE, new SVal('a',(char)('c'+indexSize/3),1,1)));
|
||||||
|
types.add(new FldType("small2_ss",ZERO_TWO, new SVal('a',(char)('c'+indexSize/3),1,1)));
|
||||||
|
types.add(new FldType("small3_ss",new IRange(0,25), new SVal('A','z',1,1)));
|
||||||
|
types.add(new FldType("small_i",ZERO_ONE, new IRange(0,5+indexSize/3)));
|
||||||
|
types.add(new FldType("small2_i",ZERO_ONE, new IRange(0,5+indexSize/3)));
|
||||||
|
types.add(new FldType("small2_is",ZERO_TWO, new IRange(0,5+indexSize/3)));
|
||||||
|
types.add(new FldType("small3_is",new IRange(0,25), new IRange(0,100)));
|
||||||
|
|
||||||
|
clearIndex();
|
||||||
|
Map<Comparable, Doc> model = indexDocs(types, null, indexSize);
|
||||||
|
Map<String, Map<Comparable, Set<Comparable>>> pivots = new HashMap<String, Map<Comparable, Set<Comparable>>>();
|
||||||
|
|
||||||
|
for (int qiter=0; qiter<queryIter; qiter++) {
|
||||||
|
String fromField = types.get(random.nextInt(types.size())).fname;
|
||||||
|
String toField = types.get(random.nextInt(types.size())).fname;
|
||||||
|
|
||||||
|
Map<Comparable, Set<Comparable>> pivot = pivots.get(fromField+"/"+toField);
|
||||||
|
if (pivot == null) {
|
||||||
|
pivot = createJoinMap(model, fromField, toField);
|
||||||
|
pivots.put(fromField+"/"+toField, pivot);
|
||||||
|
}
|
||||||
|
|
||||||
|
Collection<Doc> fromDocs = model.values();
|
||||||
|
Set<Comparable> docs = join(fromDocs, pivot);
|
||||||
|
List<Doc> docList = new ArrayList<Doc>(docs.size());
|
||||||
|
for (Comparable id : docs) docList.add(model.get(id));
|
||||||
|
Collections.sort(docList, createComparator("_docid_",true,false,false,false));
|
||||||
|
List sortedDocs = new ArrayList();
|
||||||
|
for (Doc doc : docList) {
|
||||||
|
if (sortedDocs.size() >= 10) break;
|
||||||
|
sortedDocs.add(doc.toObject(h.getCore().getSchema()));
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String,Object> resultSet = new LinkedHashMap<String,Object>();
|
||||||
|
resultSet.put("numFound", docList.size());
|
||||||
|
resultSet.put("start", 0);
|
||||||
|
resultSet.put("docs", sortedDocs);
|
||||||
|
|
||||||
|
// todo: use filters
|
||||||
|
|
||||||
|
SolrQueryRequest req = req("wt","json","indent","true", "echoParams","all",
|
||||||
|
"q","{!join from="+fromField+" to="+toField
|
||||||
|
+ (random.nextInt(4)==0 ? " fromIndex=collection1" : "")
|
||||||
|
+"}*:*"
|
||||||
|
);
|
||||||
|
|
||||||
|
String strResponse = h.query(req);
|
||||||
|
|
||||||
|
Object realResponse = ObjectBuilder.fromJSON(strResponse);
|
||||||
|
String err = JSONTestUtil.matchObj("/response", realResponse, resultSet);
|
||||||
|
if (err != null) {
|
||||||
|
log.error("GROUPING MISMATCH: " + err
|
||||||
|
+ "\n\trequest="+req
|
||||||
|
+ "\n\tresult="+strResponse
|
||||||
|
+ "\n\texpected="+ JSONUtil.toJSON(resultSet)
|
||||||
|
+ "\n\tmodel="+ JSONUtil.toJSON(model)
|
||||||
|
);
|
||||||
|
|
||||||
|
// re-execute the request... good for putting a breakpoint here for debugging
|
||||||
|
String rsp = h.query(req);
|
||||||
|
|
||||||
|
fail(err);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Map<Comparable, Set<Comparable>> createJoinMap(Map<Comparable, Doc> model, String fromField, String toField) {
|
||||||
|
Map<Comparable, Set<Comparable>> id_to_id = new HashMap<Comparable, Set<Comparable>>();
|
||||||
|
|
||||||
|
Map<Comparable, List<Comparable>> value_to_id = invertField(model, toField);
|
||||||
|
|
||||||
|
for (Comparable fromId : model.keySet()) {
|
||||||
|
Doc doc = model.get(fromId);
|
||||||
|
List<Comparable> vals = doc.getValues(fromField);
|
||||||
|
if (vals == null) continue;
|
||||||
|
for (Comparable val : vals) {
|
||||||
|
List<Comparable> toIds = value_to_id.get(val);
|
||||||
|
if (toIds == null) continue;
|
||||||
|
Set<Comparable> ids = id_to_id.get(fromId);
|
||||||
|
if (ids == null) {
|
||||||
|
ids = new HashSet<Comparable>();
|
||||||
|
id_to_id.put(fromId, ids);
|
||||||
|
}
|
||||||
|
for (Comparable toId : toIds)
|
||||||
|
ids.add(toId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return id_to_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Set<Comparable> join(Collection<Doc> input, Map<Comparable, Set<Comparable>> joinMap) {
|
||||||
|
Set<Comparable> ids = new HashSet<Comparable>();
|
||||||
|
for (Doc doc : input) {
|
||||||
|
Collection<Comparable> output = joinMap.get(doc.id);
|
||||||
|
if (output == null) continue;
|
||||||
|
ids.addAll(output);
|
||||||
|
}
|
||||||
|
return ids;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -26,6 +26,7 @@ import org.apache.solr.common.SolrInputDocument;
|
||||||
import org.apache.solr.core.CoreContainer;
|
import org.apache.solr.core.CoreContainer;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.util.ExternalPaths;
|
import org.apache.solr.util.ExternalPaths;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -66,7 +67,7 @@ public abstract class MultiCoreExampleTestBase extends SolrExampleTestBase
|
||||||
protected abstract SolrServer getSolrAdmin();
|
protected abstract SolrServer getSolrAdmin();
|
||||||
protected abstract SolrServer getSolrCore(String name);
|
protected abstract SolrServer getSolrCore(String name);
|
||||||
|
|
||||||
|
@Test
|
||||||
public void testMultiCore() throws Exception
|
public void testMultiCore() throws Exception
|
||||||
{
|
{
|
||||||
UpdateRequest up = new UpdateRequest();
|
UpdateRequest up = new UpdateRequest();
|
||||||
|
@ -79,6 +80,8 @@ public abstract class MultiCoreExampleTestBase extends SolrExampleTestBase
|
||||||
// Add something to each core
|
// Add something to each core
|
||||||
SolrInputDocument doc = new SolrInputDocument();
|
SolrInputDocument doc = new SolrInputDocument();
|
||||||
doc.setField( "id", "AAA" );
|
doc.setField( "id", "AAA" );
|
||||||
|
doc.setField( "name", "AAA1" );
|
||||||
|
doc.setField( "type", "BBB1" );
|
||||||
doc.setField( "core0", "yup" );
|
doc.setField( "core0", "yup" );
|
||||||
|
|
||||||
// Add to core0
|
// Add to core0
|
||||||
|
@ -96,6 +99,8 @@ public abstract class MultiCoreExampleTestBase extends SolrExampleTestBase
|
||||||
|
|
||||||
// Add to core1
|
// Add to core1
|
||||||
doc.setField( "id", "BBB" );
|
doc.setField( "id", "BBB" );
|
||||||
|
doc.setField( "name", "BBB1" );
|
||||||
|
doc.setField( "type", "AAA1" );
|
||||||
doc.setField( "core1", "yup" );
|
doc.setField( "core1", "yup" );
|
||||||
doc.removeField( "core0" );
|
doc.removeField( "core0" );
|
||||||
up.add( doc );
|
up.add( doc );
|
||||||
|
@ -124,6 +129,12 @@ public abstract class MultiCoreExampleTestBase extends SolrExampleTestBase
|
||||||
assertEquals( 0, getSolrCore1().query( new SolrQuery( "id:AAA" ) ).getResults().size() );
|
assertEquals( 0, getSolrCore1().query( new SolrQuery( "id:AAA" ) ).getResults().size() );
|
||||||
assertEquals( 1, getSolrCore1().query( new SolrQuery( "id:BBB" ) ).getResults().size() );
|
assertEquals( 1, getSolrCore1().query( new SolrQuery( "id:BBB" ) ).getResults().size() );
|
||||||
|
|
||||||
|
// cross-core join
|
||||||
|
assertEquals( 0, getSolrCore0().query( new SolrQuery( "{!join from=type to=name}*:*" ) ).getResults().size() ); // normal join
|
||||||
|
assertEquals( 1, getSolrCore0().query( new SolrQuery( "{!join from=type to=name fromIndex=core1}id:BBB" ) ).getResults().size() );
|
||||||
|
assertEquals( 1, getSolrCore1().query( new SolrQuery( "{!join from=type to=name fromIndex=core0}id:AAA" ) ).getResults().size() );
|
||||||
|
|
||||||
|
|
||||||
// Now test reloading it should have a newer open time
|
// Now test reloading it should have a newer open time
|
||||||
String name = "core0";
|
String name = "core0";
|
||||||
SolrServer coreadmin = getSolrAdmin();
|
SolrServer coreadmin = getSolrAdmin();
|
||||||
|
|
Loading…
Reference in New Issue