LUCENE-3602: Added query time joining.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1232223 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Martijn van Groningen 2012-01-16 23:16:35 +00:00
parent 23bd21c968
commit 7cdb8028c1
6 changed files with 715 additions and 1 deletions

View File

@ -58,6 +58,8 @@ New Features
way as DirectSpellChecker. This can be used to merge top-N results from more than one
SpellChecker. (James Dyer via Robert Muir)
* LUCENE-3602: Added query time joining under the join module. (Martijn van Groningen, Michael McCandless)
API Changes
* LUCENE-2606: Changed RegexCapabilities interface to fix thread

View File

@ -0,0 +1,61 @@
package org.apache.lucene.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import java.io.IOException;
/**
* Utility for query time joining using {@link TermsQuery} and {@link TermsCollector}.
*
* @lucene.experimental
*/
public final class JoinUtil {
// No instances allowed
private JoinUtil() {
}
/**
* Method for query time joining.
* <p/>
* Execute the returned query with a {@link IndexSearcher} to retrieve all documents that have the same terms in the
* to field that match with documents matching the specified fromQuery and have the same terms in the from field.
*
* @param fromField The from field to join from
* @param multipleValuesPerDocument Whether the from field has multiple terms per document
* @param toField The to field to join to
* @param fromQuery The query to match documents on the from side
* @param fromSearcher The searcher that executed the specified fromQuery
* @return a {@link Query} instance that can be used to join documents based on the
* terms in the from and to field
* @throws IOException If I/O related errors occur
*/
public static Query createJoinQuery(String fromField,
boolean multipleValuesPerDocument,
String toField,
Query fromQuery,
IndexSearcher fromSearcher) throws IOException {
TermsCollector termsCollector = TermsCollector.create(fromField, multipleValuesPerDocument);
fromSearcher.search(fromQuery, termsCollector);
return new TermsQuery(toField, termsCollector.getCollectorTerms());
}
}

View File

@ -0,0 +1,123 @@
package org.apache.lucene.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import java.io.IOException;
/**
* A collector that collects all terms from a specified field matching the query.
*
* @lucene.experimental
*/
abstract class TermsCollector extends Collector {
final String field;
final BytesRefHash collectorTerms = new BytesRefHash();
TermsCollector(String field) {
this.field = field;
}
public BytesRefHash getCollectorTerms() {
return collectorTerms;
}
public void setScorer(Scorer scorer) throws IOException {
}
public boolean acceptsDocsOutOfOrder() {
return true;
}
/**
* Chooses the right {@link TermsCollector} implementation.
*
* @param field The field to collect terms for
* @param multipleValuesPerDocument Whether the field to collect terms for has multiple values per document.
* @return a {@link TermsCollector} instance
*/
static TermsCollector create(String field, boolean multipleValuesPerDocument) {
return multipleValuesPerDocument ? new MV(field) : new SV(field);
}
// impl that works with multiple values per document
static class MV extends TermsCollector {
private DocTermOrds docTermOrds;
private TermsEnum docTermsEnum;
private DocTermOrds.TermOrdsIterator reuse;
MV(String field) {
super(field);
}
public void collect(int doc) throws IOException {
reuse = docTermOrds.lookup(doc, reuse);
int[] buffer = new int[5];
int chunk;
do {
chunk = reuse.read(buffer);
if (chunk == 0) {
return;
}
for (int idx = 0; idx < chunk; idx++) {
int key = buffer[idx];
docTermsEnum.seekExact((long) key);
collectorTerms.add(docTermsEnum.term());
}
} while (chunk >= buffer.length);
}
public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException {
docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader, field);
docTermsEnum = docTermOrds.getOrdTermsEnum(context.reader);
reuse = null; // LUCENE-3377 needs to be fixed first then this statement can be removed...
}
}
// impl that works with single value per document
static class SV extends TermsCollector {
final BytesRef spare = new BytesRef();
private FieldCache.DocTerms fromDocTerms;
SV(String field) {
super(field);
}
public void collect(int doc) throws IOException {
collectorTerms.add(fromDocTerms.getTerm(doc, spare));
}
public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException {
fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader, field);
}
}
}

View File

@ -0,0 +1,135 @@
package org.apache.lucene.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import java.io.IOException;
import java.util.Comparator;
/**
* A query that has an array of terms from a specific field. This query will match documents have one or more terms in
* the specified field that match with the terms specified in the array.
*
* @lucene.experimental
*/
class TermsQuery extends MultiTermQuery {
private final BytesRefHash terms;
/**
* @param field The field that should contain terms that are specified in the previous parameter
* @param terms The terms that matching documents should have. The terms must be sorted by natural order.
*/
TermsQuery(String field, BytesRefHash terms) {
super(field);
this.terms = terms;
}
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
if (this.terms.size() == 0) {
return TermsEnum.EMPTY;
}
return new SeekingTermSetTermsEnum(terms.iterator(null), this.terms);
}
public String toString(String string) {
return "TermsQuery{" +
"field=" + field +
'}';
}
static class SeekingTermSetTermsEnum extends FilteredTermsEnum {
private final BytesRefHash terms;
private final int[] ords;
private final int lastElement;
private final BytesRef lastTerm;
private final BytesRef spare = new BytesRef();
private final Comparator<BytesRef> comparator;
private BytesRef seekTerm;
private int upto = 0;
SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms) throws IOException {
super(tenum);
this.terms = terms;
lastElement = terms.size() - 1;
ords = terms.sort(comparator = tenum.getComparator());
lastTerm = terms.get(ords[lastElement], new BytesRef());
seekTerm = terms.get(ords[upto], spare);
}
@Override
protected BytesRef nextSeekTerm(BytesRef currentTerm) throws IOException {
BytesRef temp = seekTerm;
seekTerm = null;
return temp;
}
protected AcceptStatus accept(BytesRef term) throws IOException {
if (comparator.compare(term, lastTerm) > 0) {
return AcceptStatus.END;
}
BytesRef currentTerm = terms.get(ords[upto], spare);
if (comparator.compare(term, currentTerm) == 0) {
if (upto == lastElement) {
return AcceptStatus.YES;
} else {
seekTerm = terms.get(ords[++upto], spare);
return AcceptStatus.YES_AND_SEEK;
}
} else {
if (upto == lastElement) {
return AcceptStatus.NO;
} else { // Our current term doesn't match the the given term.
int cmp;
do { // We maybe are behind the given term by more than one step. Keep incrementing till we're the same or higher.
if (upto == lastElement) {
return AcceptStatus.NO;
}
// typically the terms dict is a superset of query's terms so it's unusual that we have to skip many of
// our terms so we don't do a binary search here
seekTerm = terms.get(ords[++upto], spare);
} while ((cmp = comparator.compare(seekTerm, term)) < 0);
if (cmp == 0) {
if (upto == lastElement) {
return AcceptStatus.YES;
}
seekTerm = terms.get(ords[++upto], spare);
return AcceptStatus.YES_AND_SEEK;
} else {
return AcceptStatus.NO_AND_SEEK;
}
}
}
}
}
}

View File

@ -1,7 +1,11 @@
<html>
<body>
<p>This module supports index-time joins while searching, where joined
<p>This modules support index-time and query-time joins.</p>
<h2>Index-time joins</h2>
<p>The index-time joining support joins while searching, where joined
documents are indexed as a single document block using
{@link org.apache.lucene.index.IndexWriter#addDocuments}. This is useful for any normalized content (XML documents or database tables). In database terms, all rows for all
joined tables matching a single row of the primary table must be
@ -34,5 +38,37 @@
org.apache.lucene.search.join.ToChildBlockJoinQuery}. This wraps
any query matching parent documents, creating the joined query
matching only child documents.
<h2>Search-time joins</h2>
<p>
The query time joining is terms based and implemented as two pass search. The first pass collects all the terms from a fromField
that match the fromQuery. The second pass returns all documents that have matching terms in a toField to the terms
collected in the first pass.
</p>
<p>Query time joining has the following input:</p>
<ul>
<li><code>fromField</code>: The from field to join from.
<li><code>fromQuery</code>: The query executed to collect the from terms. This is usually the user specified query.
<li><code>multipleValuesPerDocument</code>: Whether the fromField contains more than one value per document
<li><code>toField</code>: The to field to join to
</ul>
<p>
Basically the query-time joining is accessible from one static method. The user of this method supplies the method
with the described input and a <code>IndexSearcher</code> where the from terms need to be collected from. The returned
query can be executed with the same <code>IndexSearcher</code>, but also with another <code>IndexSearcher</code>.
Example usage of the {@link org.apache.lucene.search.join.JoinUtil#createJoinQuery(String, boolean, String, org.apache.lucene.search.Query, org.apache.lucene.search.IndexSearcher)} :
</p>
<pre class="prettyprint">
String fromField = "from"; // Name of the from field
boolean multipleValuesPerDocument = false; // Set only yo true in the case when your fromField has multiple values per document in your index
String fromField = "to"; // Name of the to field
Query fromQuery = new TermQuery(new Term("content", searchTerm)); // Query executed to collect from values to join to the to values
MultiTermQuery joinQuery = JoinUtil.createJoinQuery(fromField, multipleValuesPerDocument, toField, fromQuery, fromSearcher);
TopDocs topDocs = toSearcher.search(joinQuery, 10); // Note: toSearcher can be the same as the fromSearcher
// Render topDocs...
</pre>
</body>
</html>

View File

@ -0,0 +1,357 @@
package org.apache.lucene.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.junit.Test;
import java.io.IOException;
import java.util.*;
public class TestJoinUtil extends LuceneTestCase {
public void testSimple() throws Exception {
final String idField = "id";
final String toField = "productId";
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
random,
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
// 0
Document doc = new Document();
doc.add(new Field("description", "random text", TextField.TYPE_STORED));
doc.add(new Field("name", "name1", TextField.TYPE_STORED));
doc.add(new Field(idField, "1", TextField.TYPE_STORED));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new Field("price", "10.0", TextField.TYPE_STORED));
doc.add(new Field(idField, "2", TextField.TYPE_STORED));
doc.add(new Field(toField, "1", TextField.TYPE_STORED));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new Field("price", "20.0", TextField.TYPE_STORED));
doc.add(new Field(idField, "3", TextField.TYPE_STORED));
doc.add(new Field(toField, "1", TextField.TYPE_STORED));
w.addDocument(doc);
// 3
doc = new Document();
doc.add(new Field("description", "more random text", TextField.TYPE_STORED));
doc.add(new Field("name", "name2", TextField.TYPE_STORED));
doc.add(new Field(idField, "4", TextField.TYPE_STORED));
w.addDocument(doc);
w.commit();
// 4
doc = new Document();
doc.add(new Field("price", "10.0", TextField.TYPE_STORED));
doc.add(new Field(idField, "5", TextField.TYPE_STORED));
doc.add(new Field(toField, "4", TextField.TYPE_STORED));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new Field("price", "20.0", TextField.TYPE_STORED));
doc.add(new Field(idField, "6", TextField.TYPE_STORED));
doc.add(new Field(toField, "4", TextField.TYPE_STORED));
w.addDocument(doc);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close();
// Search for product
Query joinQuery =
JoinUtil.createJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name2")), indexSearcher);
TopDocs result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits);
assertEquals(4, result.scoreDocs[0].doc);
assertEquals(5, result.scoreDocs[1].doc);
joinQuery = JoinUtil.createJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name1")), indexSearcher);
result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits);
assertEquals(1, result.scoreDocs[0].doc);
assertEquals(2, result.scoreDocs[1].doc);
// Search for offer
joinQuery = JoinUtil.createJoinQuery(toField, false, idField, new TermQuery(new Term("id", "5")), indexSearcher);
result = indexSearcher.search(joinQuery, 10);
assertEquals(1, result.totalHits);
assertEquals(3, result.scoreDocs[0].doc);
indexSearcher.getIndexReader().close();
dir.close();
}
@Test
public void testSingleValueRandomJoin() throws Exception {
int maxIndexIter = _TestUtil.nextInt(random, 6, 12);
int maxSearchIter = _TestUtil.nextInt(random, 13, 26);
executeRandomJoin(false, maxIndexIter, maxSearchIter);
}
@Test
// This test really takes more time, that is why the number of iterations are smaller.
public void testMultiValueRandomJoin() throws Exception {
int maxIndexIter = _TestUtil.nextInt(random, 3, 6);
int maxSearchIter = _TestUtil.nextInt(random, 6, 12);
executeRandomJoin(true, maxIndexIter, maxSearchIter);
}
private void executeRandomJoin(boolean multipleValuesPerDocument, int maxIndexIter, int maxSearchIter) throws Exception {
for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) {
if (VERBOSE) {
System.out.println("indexIter=" + indexIter);
}
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
random,
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy())
);
int numberOfDocumentsToIndex = _TestUtil.nextInt(random, 87, 764);
IndexIterationContext context = createContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument);
IndexReader topLevelReader = w.getReader();
w.close();
for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) {
if (VERBOSE) {
System.out.println("searchIter=" + searchIter);
}
IndexSearcher indexSearcher = newSearcher(topLevelReader);
int r = random.nextInt(context.randomUniqueValues.length);
boolean from = context.randomFrom[r];
String randomValue = context.randomUniqueValues[r];
FixedBitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
Query actualQuery = new TermQuery(new Term("value", randomValue));
if (VERBOSE) {
System.out.println("actualQuery=" + actualQuery);
}
Query joinQuery;
if (from) {
joinQuery = JoinUtil.createJoinQuery("from", multipleValuesPerDocument, "to", actualQuery, indexSearcher);
} else {
joinQuery = JoinUtil.createJoinQuery("to", multipleValuesPerDocument, "from", actualQuery, indexSearcher);
}
if (VERBOSE) {
System.out.println("joinQuery=" + joinQuery);
}
// Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
final FixedBitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
indexSearcher.search(joinQuery, new Collector() {
int docBase;
public void collect(int doc) throws IOException {
actualResult.set(doc + docBase);
}
public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException {
docBase = context.docBase;
}
public void setScorer(Scorer scorer) throws IOException {
}
public boolean acceptsDocsOutOfOrder() {
return true;
}
});
if (VERBOSE) {
System.out.println("expected cardinality:" + expectedResult.cardinality());
DocIdSetIterator iterator = expectedResult.iterator();
for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
System.out.println(String.format("Expected doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id")));
}
System.out.println("actual cardinality:" + actualResult.cardinality());
iterator = actualResult.iterator();
for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
System.out.println(String.format("Actual doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id")));
}
}
assertEquals(expectedResult, actualResult);
}
topLevelReader.close();
dir.close();
}
}
private IndexIterationContext createContext(int nDocs, RandomIndexWriter writer, boolean multipleValuesPerDocument) throws IOException {
return createContext(nDocs, writer, writer, multipleValuesPerDocument);
}
private IndexIterationContext createContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, boolean multipleValuesPerDocument) throws IOException {
IndexIterationContext context = new IndexIterationContext();
int numRandomValues = nDocs / 2;
context.randomUniqueValues = new String[numRandomValues];
Set<String> trackSet = new HashSet<String>();
context.randomFrom = new boolean[numRandomValues];
for (int i = 0; i < numRandomValues; i++) {
String uniqueRandomValue;
do {
uniqueRandomValue = _TestUtil.randomRealisticUnicodeString(random);
// uniqueRandomValue = _TestUtil.randomSimpleString(random);
} while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue));
// Generate unique values and empty strings aren't allowed.
trackSet.add(uniqueRandomValue);
context.randomFrom[i] = random.nextBoolean();
context.randomUniqueValues[i] = uniqueRandomValue;
}
for (int i = 0; i < nDocs; i++) {
String id = Integer.toString(i);
int randomI = random.nextInt(context.randomUniqueValues.length);
String value = context.randomUniqueValues[randomI];
Document document = new Document();
document.add(newField(random, "id", id, TextField.TYPE_STORED));
document.add(newField(random, "value", value, TextField.TYPE_STORED));
boolean from = context.randomFrom[randomI];
int numberOfLinkValues = multipleValuesPerDocument ? 2 + random.nextInt(10) : 1;
RandomDoc doc = new RandomDoc(id, numberOfLinkValues, value);
for (int j = 0; j < numberOfLinkValues; j++) {
String linkValue = context.randomUniqueValues[random.nextInt(context.randomUniqueValues.length)];
doc.linkValues.add(linkValue);
if (from) {
if (!context.fromDocuments.containsKey(linkValue)) {
context.fromDocuments.put(linkValue, new ArrayList<RandomDoc>());
}
if (!context.randomValueFromDocs.containsKey(value)) {
context.randomValueFromDocs.put(value, new ArrayList<RandomDoc>());
}
context.fromDocuments.get(linkValue).add(doc);
context.randomValueFromDocs.get(value).add(doc);
document.add(newField(random, "from", linkValue, TextField.TYPE_STORED));
} else {
if (!context.toDocuments.containsKey(linkValue)) {
context.toDocuments.put(linkValue, new ArrayList<RandomDoc>());
}
if (!context.randomValueToDocs.containsKey(value)) {
context.randomValueToDocs.put(value, new ArrayList<RandomDoc>());
}
context.toDocuments.get(linkValue).add(doc);
context.randomValueToDocs.get(value).add(doc);
document.add(newField(random, "to", linkValue, TextField.TYPE_STORED));
}
}
final RandomIndexWriter w;
if (from) {
w = fromWriter;
} else {
w = toWriter;
}
w.addDocument(document);
if (random.nextInt(10) == 4) {
w.commit();
}
if (VERBOSE) {
System.out.println("Added document[" + i + "]: " + document);
}
}
return context;
}
private FixedBitSet createExpectedResult(String queryValue, boolean from, IndexReader topLevelReader, IndexIterationContext context) throws IOException {
final Map<String, List<RandomDoc>> randomValueDocs;
final Map<String, List<RandomDoc>> linkValueDocuments;
if (from) {
randomValueDocs = context.randomValueFromDocs;
linkValueDocuments = context.toDocuments;
} else {
randomValueDocs = context.randomValueToDocs;
linkValueDocuments = context.fromDocuments;
}
FixedBitSet expectedResult = new FixedBitSet(topLevelReader.maxDoc());
List<RandomDoc> matchingDocs = randomValueDocs.get(queryValue);
if (matchingDocs == null) {
return new FixedBitSet(topLevelReader.maxDoc());
}
for (RandomDoc matchingDoc : matchingDocs) {
for (String linkValue : matchingDoc.linkValues) {
List<RandomDoc> otherMatchingDocs = linkValueDocuments.get(linkValue);
if (otherMatchingDocs == null) {
continue;
}
for (RandomDoc otherSideDoc : otherMatchingDocs) {
DocsEnum docsEnum = MultiFields.getTermDocsEnum(topLevelReader, MultiFields.getLiveDocs(topLevelReader), "id", new BytesRef(otherSideDoc.id), false);
assert docsEnum != null;
int doc = docsEnum.nextDoc();
expectedResult.set(doc);
}
}
}
return expectedResult;
}
private static class IndexIterationContext {
String[] randomUniqueValues;
boolean[] randomFrom;
Map<String, List<RandomDoc>> fromDocuments = new HashMap<String, List<RandomDoc>>();
Map<String, List<RandomDoc>> toDocuments = new HashMap<String, List<RandomDoc>>();
Map<String, List<RandomDoc>> randomValueFromDocs = new HashMap<String, List<RandomDoc>>();
Map<String, List<RandomDoc>> randomValueToDocs = new HashMap<String, List<RandomDoc>>();
}
private static class RandomDoc {
final String id;
final List<String> linkValues;
final String value;
private RandomDoc(String id, int numberOfLinkValues, String value) {
this.id = id;
linkValues = new ArrayList<String>(numberOfLinkValues);
this.value = value;
}
}
}