SOLR-2950: Improve QEC performance by dropping field cache use and keeping a local smaller map

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1220983 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2011-12-19 22:39:01 +00:00
parent daa97d0be4
commit f42b2ffd63
10 changed files with 481 additions and 424 deletions

View File

@ -1,4 +1,6 @@
/**
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@ -15,8 +17,6 @@
* limitations under the License.
*/
package org.apache.lucene.search.grouping;
import java.util.Arrays;
/**

View File

@ -0,0 +1,48 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.junit.Test;
/**
*
*
**/
public class TestSentinelIntSet extends LuceneTestCase {
@Test
public void test() throws Exception {
SentinelIntSet set = new SentinelIntSet(10, -1);
assertFalse(set.exists(50));
set.put(50);
assertTrue(set.exists(50));
assertEquals(1, set.size());
assertEquals(-11, set.find(10));
assertEquals(1, set.size());
set.clear();
assertEquals(0, set.size());
assertEquals(50, set.hash(50));
//force a rehash
for (int i = 0; i < 20; i++){
set.put(i);
}
assertEquals(20, set.size());
assertEquals(24, set.rehashCount);
}
}

View File

@ -21,7 +21,7 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Type; // javadocs
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.grouping.AbstractAllGroupsCollector;
import org.apache.lucene.search.grouping.SentinelIntSet;
import org.apache.lucene.util.SentinelIntSet;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;

View File

@ -23,7 +23,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.grouping.AbstractSecondPassGroupingCollector;
import org.apache.lucene.search.grouping.SearchGroup;
import org.apache.lucene.search.grouping.SentinelIntSet;
import org.apache.lucene.util.SentinelIntSet;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;

View File

@ -20,7 +20,7 @@ package org.apache.lucene.search.grouping.term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.*;
import org.apache.lucene.search.grouping.AbstractAllGroupHeadsCollector;
import org.apache.lucene.search.grouping.SentinelIntSet;
import org.apache.lucene.util.SentinelIntSet;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;

View File

@ -20,7 +20,7 @@ package org.apache.lucene.search.grouping.term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.grouping.AbstractAllGroupsCollector;
import org.apache.lucene.search.grouping.SentinelIntSet;
import org.apache.lucene.util.SentinelIntSet;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;

View File

@ -22,7 +22,7 @@ import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.grouping.AbstractSecondPassGroupingCollector;
import org.apache.lucene.search.grouping.SearchGroup;
import org.apache.lucene.search.grouping.SentinelIntSet;
import org.apache.lucene.util.SentinelIntSet;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;

View File

@ -234,6 +234,9 @@ Optimizations
DirectUpdateHandler2.numDocsPending stats attribute.
(Alexey Serba, Mark Miller)
* SOLR-2950: The QueryElevationComponent now avoids using the FieldCache and looking up
every document id (gsingers, yonik)
Bug Fixes
----------------------

View File

@ -17,6 +17,46 @@
package org.apache.solr.handler.component;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.index.*;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SentinelIntSet;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.solr.cloud.ZkController;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.QueryElevationParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.DOMUtil;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.Config;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.transform.EditorialMarkerFactory;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortSpec;
import org.apache.solr.util.RefCounted;
import org.apache.solr.util.VersionedFile;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
@ -25,53 +65,12 @@ import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
import org.apache.solr.common.params.QueryElevationParams;
import org.apache.solr.response.transform.EditorialMarkerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.solr.cloud.ZkController;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.DOMUtil;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.Config;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.StrField;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SortSpec;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.VersionedFile;
import org.apache.solr.util.RefCounted;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.apache.solr.request.SolrQueryRequest;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
/**
* A component to elevate some documents to the top of the result set.
*
*
* @since solr 1.3
*/
public class QueryElevationComponent extends SearchComponent implements SolrCoreAware
{
public class QueryElevationComponent extends SearchComponent implements SolrCoreAware {
private static Logger log = LoggerFactory.getLogger(QueryElevationComponent.class);
// Constants used in solrconfig.xml
@ -102,11 +101,7 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
final Map<BytesRef, Integer> priority;
final Set<String> ids;
// use singletons so hashCode/equals on Sort will just work
final FieldComparatorSource comparatorSource;
ElevationObj( String qstr, List<String> elevate, List<String> exclude ) throws IOException
{
ElevationObj(String qstr, List<String> elevate, List<String> exclude) throws IOException {
this.text = qstr;
this.analyzed = getAnalyzedQuery(this.text);
this.ids = new HashSet<String>();
@ -125,27 +120,22 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
if (exclude == null || exclude.isEmpty()) {
this.exclude = null;
}
else {
} else {
this.exclude = new BooleanClause[exclude.size()];
for (int i = 0; i < exclude.size(); i++) {
TermQuery tq = new TermQuery(new Term(idField, idSchemaFT.readableToIndexed(exclude.get(i))));
this.exclude[i] = new BooleanClause(tq, BooleanClause.Occur.MUST_NOT);
}
}
this.comparatorSource = new ElevationComparatorSource(priority);
}
}
@Override
public void init( NamedList args )
{
public void init(NamedList args) {
this.initArgs = SolrParams.toSolrParams(args);
}
public void inform(SolrCore core)
{
public void inform(SolrCore core) {
String a = initArgs.get(FIELD_TYPE);
if (a != null) {
FieldType ft = core.getSchema().getFieldTypes().get(a);
@ -215,15 +205,14 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
}
}
}
}
catch( Exception ex ) {
} catch (Exception ex) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Error initializing QueryElevationComponent.", ex, false);
}
}
//get the elevation map from the data dir
Map<String, ElevationObj> getElevationMap( IndexReader reader, SolrCore core ) throws Exception
{
Map<String, ElevationObj> getElevationMap(IndexReader reader, SolrCore core) throws Exception {
synchronized (elevationCache) {
Map<String, ElevationObj> map = elevationCache.get(null);
if (map != null) return map;
@ -245,9 +234,9 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
return map;
}
}
//load up the elevation map
private Map<String, ElevationObj> loadElevationMap( Config cfg ) throws IOException
{
private Map<String, ElevationObj> loadElevationMap(Config cfg) throws IOException {
XPath xpath = XPathFactory.newInstance().newXPath();
Map<String, ElevationObj> map = new HashMap<String, ElevationObj>();
NodeList nodes = (NodeList) cfg.evaluate("elevate/query", XPathConstants.NODESET);
@ -258,8 +247,7 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
NodeList children = null;
try {
children = (NodeList) xpath.evaluate("doc", node, XPathConstants.NODESET);
}
catch (XPathExpressionException e) {
} catch (XPathExpressionException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"query requires '<doc .../>' child");
}
@ -291,10 +279,10 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
/**
* Helpful for testing without loading config.xml
*
* @throws IOException
*/
void setTopQueryResults( IndexReader reader, String query, String[] ids, String[] ex ) throws IOException
{
void setTopQueryResults(IndexReader reader, String query, String[] ids, String[] ex) throws IOException {
if (ids == null) {
ids = new String[0];
}
@ -311,8 +299,7 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
elev.put(obj.analyzed, obj);
}
String getAnalyzedQuery( String query ) throws IOException
{
String getAnalyzedQuery(String query) throws IOException {
if (analyzer == null) {
return query;
}
@ -334,8 +321,7 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
//---------------------------------------------------------------------------------
@Override
public void prepare(ResponseBuilder rb) throws IOException
{
public void prepare(ResponseBuilder rb) throws IOException {
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
// A runtime param can skip
@ -358,8 +344,7 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
ElevationObj booster = null;
try {
booster = getElevationMap(reader, req.getCore()).get(qstr);
}
catch( Exception ex ) {
} catch (Exception ex) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Error loading elevation", ex);
}
@ -383,28 +368,28 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
rb.setQuery(newq);
}
ElevationComparatorSource comparator = new ElevationComparatorSource(booster);
// if the sort is 'score desc' use a custom sorting method to
// insert documents in their proper place
SortSpec sortSpec = rb.getSortSpec();
if (sortSpec.getSort() == null) {
sortSpec.setSort( new Sort(
new SortField(idField, booster.comparatorSource, false ),
new SortField(null, SortField.Type.SCORE, false)));
}
else {
sortSpec.setSort(new Sort(new SortField[]{
new SortField(idField, comparator, false),
new SortField(null, SortField.Type.SCORE, false)
}));
} else {
// Check if the sort is based on score
boolean modify = false;
SortField[] current = sortSpec.getSort().getSort();
ArrayList<SortField> sorts = new ArrayList<SortField>(current.length + 1);
// Perhaps force it to always sort by score
if (force && current[0].getType() != SortField.Type.SCORE) {
sorts.add( new SortField(idField, booster.comparatorSource, false ) );
sorts.add(new SortField(idField, comparator, false));
modify = true;
}
for (SortField sf : current) {
if (sf.getType() == SortField.Type.SCORE) {
sorts.add( new SortField(idField, booster.comparatorSource, sf.getReverse() ) );
sorts.add(new SortField(idField, comparator, sf.getReverse()));
modify = true;
}
sorts.add(sf);
@ -471,28 +456,30 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
return new URL[]{
new URL("http://wiki.apache.org/solr/QueryElevationComponent")
};
}
catch (MalformedURLException e) {
} catch (MalformedURLException e) {
throw new RuntimeException(e);
}
}
}
class ElevationComparatorSource extends FieldComparatorSource {
private final Map<BytesRef,Integer> priority;
private QueryElevationComponent.ElevationObj elevations;
private SentinelIntSet ordSet; //the key half of the map
private BytesRef[] termValues;//the value half of the map
public ElevationComparatorSource( final Map<BytesRef,Integer> boosts) {
this.priority = boosts;
public ElevationComparatorSource(final QueryElevationComponent.ElevationObj elevations) throws IOException {
this.elevations = elevations;
int size = elevations.ids.size();
ordSet = new SentinelIntSet(size, -1);
termValues = new BytesRef[ordSet.keys.length];
}
@Override
public FieldComparator<Integer> newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException {
return new FieldComparator<Integer>() {
FieldCache.DocTermsIndex idIndex;
private final int[] values = new int[numHits];
int bottomVal;
private final BytesRef tempBR = new BytesRef();
private int bottomVal;
private TermsEnum termsEnum;
private DocsEnum docsEnum;
Set<String> seen = new HashSet<String>(elevations.ids.size());
@Override
public int compare(int slot1, int slot2) {
@ -505,10 +492,16 @@ class ElevationComparatorSource extends FieldComparatorSource {
}
private int docVal(int doc) throws IOException {
BytesRef id = idIndex.getTerm(doc, tempBR);
Integer prio = priority.get(id);
if (ordSet.size() > 0) {
int slot = ordSet.find(doc);
if (slot >= 0) {
BytesRef id = termValues[slot];
Integer prio = elevations.priority.get(id);
return prio == null ? 0 : prio.intValue();
}
}
return 0;
}
@Override
public int compareBottom(int doc) throws IOException {
@ -522,7 +515,24 @@ class ElevationComparatorSource extends FieldComparatorSource {
@Override
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
idIndex = FieldCache.DEFAULT.getTermsIndex(context.reader, fieldname);
//convert the ids to Lucene doc ids, the ordSet and termValues needs to be the same size as the number of elevation docs we have
ordSet.clear();
Fields fields = context.reader.fields();
Terms terms = fields.terms(fieldname);
termsEnum = terms.iterator(termsEnum);
BytesRef term = new BytesRef();
for (String id : elevations.ids) {
term.copyChars(id);
if (seen.contains(id) == false && termsEnum.seekExact(term, false)) {
docsEnum = termsEnum.docs(null, docsEnum, false);
if (docsEnum != null) {
int docId = docsEnum.nextDoc();
termValues[ordSet.put(docId)] = BytesRef.deepCopyOf(term);
seen.add(id);
}
}
}
return this;
}
@ -533,3 +543,6 @@ class ElevationComparatorSource extends FieldComparatorSource {
};
}
}
}

View File

@ -17,13 +17,6 @@
package org.apache.solr.handler.component;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.SolrTestCaseJ4;
@ -36,11 +29,14 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.QueryElevationComponent.ElevationObj;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.Map;
public class QueryElevationComponentTest extends SolrTestCaseJ4 {
@ -64,6 +60,7 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
File elevateDataFile = new File(dataDir, "elevate-data.xml");
FileUtils.copyFile(elevateFile, elevateDataFile);
initCore(config,schema);
clearIndex();
assertU(commit());
@ -141,8 +138,7 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
@Test
public void testInterface() throws Exception
{
public void testInterface() throws Exception {
try {
init("schema12.xml");
SolrCore core = h.getCore();
@ -242,8 +238,7 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
}
@Test
public void testSorting() throws Exception
{
public void testSorting() throws Exception {
try {
init("schema12.xml");
assertU(adoc("id", "a", "title", "ipod", "str_s1", "a"));
@ -361,8 +356,7 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
}
// write a test file to boost some docs
private void writeFile( File file, String query, String ... ids ) throws Exception
{
private void writeFile(File file, String query, String... ids) throws Exception {
PrintWriter out = new PrintWriter(new FileOutputStream(file));
out.println("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>");
out.println("<elevate>");
@ -379,8 +373,7 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
}
@Test
public void testElevationReloading() throws Exception
{
public void testElevationReloading() throws Exception {
try {
init("schema12.xml");
String testfile = "data-elevation.xml";