Merge branch 'apache-https-master' into jira/solr-8593

This commit is contained in:
Kevin Risden 2016-11-28 11:26:16 -06:00
commit 05a6170d12
150 changed files with 4796 additions and 2314 deletions

View File

@ -6,7 +6,10 @@ as to the usefulness of the tools.
Description of dev-tools/ contents:
./size-estimator-lucene-solr.xls -- Spreadsheet for estimating memory and disk usage in Lucene/Solr
./eclipse -- Used to generate project descriptors for the Eclipse IDE.
./idea -- Similar to Eclipse, but for IntelliJ's IDEA IDE.
./maven -- Mavenizes the Lucene/Solr packages
./scripts -- Odds and ends for building releases, etc.
./doap/ -- Lucene and Solr project descriptors in DOAP RDF format.
./eclipse/ -- Used to generate project descriptors for the Eclipse IDE.
./git/ -- Git documentation and resources.
./idea/ -- Used to generate project descriptors for IntelliJ's IDEA IDE.
./maven/ -- Mavenizes the Lucene/Solr packages
./netbeans/ -- Used to generate project descriptors for the Netbeans IDE.
./scripts/ -- Odds and ends for building releases, etc.

View File

@ -2,4 +2,7 @@ This folder contains the DOAP[1] files for each project.
Upon release, these files should be updated to include new release details.
NOTE: If this folder's contents are moved elsewhere, the website .htaccess
file will need to be updated.
[1] DOAP: https://github.com/edumbill/doap

View File

@ -1,21 +1,35 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<rdf:RDF xml:lang="en"
xmlns="http://usefulinc.com/ns/doap#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:asfext="http://projects.apache.org/ns/asfext#"
xmlns:foaf="http://xmlns.com/foaf/0.1/">
<!--
=======================================================================
This file's canonical URL is: http://lucene.apache.org/core/doap.rdf
Copyright (c) 2016 The Apache Software Foundation.
All rights reserved.
=======================================================================
Note that the canonical URL may redirect to other non-canonical locations.
-->
<Project rdf:about="http://lucene.apache.org/core/">
<created>2001-09-01</created>
<license rdf:resource="http://www.apache.org/licenses/LICENSE-2.0.txt"/>
<license rdf:resource="http://www.apache.org/licenses/LICENSE-2.0"/>
<name>Apache Lucene Core</name>
<homepage rdf:resource="http://lucene.apache.org/core/" />
<asfext:pmc rdf:resource="http://lucene.apache.org" />

View File

@ -1,21 +1,35 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<rdf:RDF xml:lang="en"
xmlns="http://usefulinc.com/ns/doap#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:asfext="http://projects.apache.org/ns/asfext#"
xmlns:foaf="http://xmlns.com/foaf/0.1/">
<!--
=======================================================================
This file's canonical URL is: http://lucene.apache.org/solr/doap.rdf
Copyright (c) 2007 The Apache Software Foundation.
All rights reserved.
=======================================================================
Note that the canonical URL may redirect to other non-canonical locations.
-->
<Project rdf:about="http://lucene.apache.org/solr/">
<created>2006-01-17</created>
<license rdf:resource="http://www.apache.org/licenses/LICENSE-2.0.txt"/>
<license rdf:resource="http://www.apache.org/licenses/LICENSE-2.0"/>
<name>Apache Solr</name>
<homepage rdf:resource="http://lucene.apache.org/solr/" />
<asfext:pmc rdf:resource="http://lucene.apache.org" />
@ -348,28 +362,28 @@
</release>
<release>
<Version>
<name>lucene-4.0-BETA</name>
<name>solr-4.0-BETA</name>
<created>2012-08-13</created>
<revision>4.0-BETA</revision>
</Version>
</release>
<release>
<Version>
<name>lucene-4.0-ALPHA</name>
<name>solr-4.0-ALPHA</name>
<created>2012-07-03</created>
<revision>4.0-ALPHA</revision>
</Version>
</release>
<release>
<Version>
<name>lucene-3.6.2</name>
<name>solr-3.6.2</name>
<created>2012-12-25</created>
<revision>3.6.2</revision>
</Version>
</release>
<release>
<Version>
<name>lucene-3.6.1</name>
<name>solr-3.6.1</name>
<created>2012-07-22</created>
<revision>3.6.1</revision>
</Version>

View File

@ -117,6 +117,11 @@ Improvements
control how text is analyzed and converted into a query (Matt Weber
via Mike McCandless)
Optimizations
* LUCENE-7568: Optimize merging when index sorting is used but the
index is already sorted (Jim Ferenczi via Mike McCandless)
Other
* LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file

View File

@ -195,9 +195,10 @@ public class KNearestNeighborClassifier implements Classifier<BytesRef> {
Map<BytesRef, Double> classBoosts = new HashMap<>(); // this is a boost based on class ranking positions in topDocs
float maxScore = topDocs.getMaxScore();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
IndexableField storableField = indexSearcher.doc(scoreDoc.doc).getField(classFieldName);
if (storableField != null) {
BytesRef cl = new BytesRef(storableField.stringValue());
IndexableField[] storableFields = indexSearcher.doc(scoreDoc.doc).getFields(classFieldName);
for (IndexableField singleStorableField : storableFields) {
if (singleStorableField != null) {
BytesRef cl = new BytesRef(singleStorableField.stringValue());
//update count
Integer count = classCounts.get(cl);
if (count != null) {
@ -213,6 +214,7 @@ public class KNearestNeighborClassifier implements Classifier<BytesRef> {
} else {
classBoosts.put(cl, singleBoost);
}
}
}
}
List<ClassificationResult<BytesRef>> returnList = new ArrayList<>();

View File

@ -109,6 +109,7 @@ public class KNearestNeighborDocumentClassifier extends KNearestNeighborClassifi
TopDocs knnResults = knnSearch(document);
List<ClassificationResult<BytesRef>> assignedClasses = buildListFromTopDocs(knnResults);
Collections.sort(assignedClasses);
max = Math.min(max, assignedClasses.size());
return assignedClasses.subList(0, max);
}
@ -130,15 +131,14 @@ public class KNearestNeighborDocumentClassifier extends KNearestNeighborClassifi
boost = field2boost[1];
}
String[] fieldValues = document.getValues(fieldName);
mlt.setBoost(true); // we want always to use the boost coming from TF * IDF of the term
if (boost != null) {
mlt.setBoost(true);
mlt.setBoostFactor(Float.parseFloat(boost));
mlt.setBoostFactor(Float.parseFloat(boost)); // this is an additional multiplicative boost coming from the field boost
}
mlt.setAnalyzer(field2analyzer.get(fieldName));
for (String fieldContent : fieldValues) {
mltQuery.add(new BooleanClause(mlt.like(fieldName, new StringReader(fieldContent)), BooleanClause.Occur.SHOULD));
}
mlt.setBoost(false);
}
Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));

View File

@ -2503,21 +2503,26 @@ ${ant.project.name}.test.dependencies=${test.classpath.list}
-->
<macrodef name="build-changes">
<attribute name="changes.product"/>
<attribute name="doap.property.prefix" default="doap.@{changes.product}"/>
<attribute name="changes.src.file" default="CHANGES.txt"/>
<attribute name="changes.src.doap" default="${dev-tools.dir}/doap/@{changes.product}.rdf"/>
<attribute name="changes.version.dates" default="build/@{doap.property.prefix}.version.dates.csv"/>
<attribute name="changes.target.dir" default="${changes.target.dir}"/>
<attribute name="lucene.javadoc.url" default="${lucene.javadoc.url}"/>
<sequential>
<mkdir dir="@{changes.target.dir}"/>
<xmlproperty keeproot="false" file="@{changes.src.doap}" collapseAttributes="false" prefix="@{doap.property.prefix}"/>
<echo file="@{changes.version.dates}" append="false">${@{doap.property.prefix}.Project.release.Version.revision}&#xA;</echo>
<echo file="@{changes.version.dates}" append="true">${@{doap.property.prefix}.Project.release.Version.created}&#xA;</echo>
<exec executable="${perl.exe}" input="@{changes.src.file}" output="@{changes.target.dir}/Changes.html"
failonerror="true" logError="true">
<arg value="-CSD"/>
<arg value="${changes.src.dir}/changes2html.pl"/>
<arg value="@{changes.product}"/>
<arg value="@{changes.src.doap}"/>
<arg value="@{changes.version.dates}"/>
<arg value="@{lucene.javadoc.url}"/>
</exec>
<delete file="@{changes.target.dir}/jiraVersionList.json"/>
<delete file="@{changes.version.dates}"/>
<copy todir="@{changes.target.dir}">
<fileset dir="${changes.src.dir}" includes="*.css"/>
</copy>

View File

@ -198,7 +198,7 @@ public abstract class DocValuesConsumer implements Closeable {
}
}
final DocIDMerger<NumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
final DocIDMerger<NumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
final long finalCost = cost;
@ -296,7 +296,7 @@ public abstract class DocValuesConsumer implements Closeable {
}
}
final DocIDMerger<BinaryDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
final DocIDMerger<BinaryDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
final long finalCost = cost;
return new BinaryDocValues() {
@ -397,7 +397,7 @@ public abstract class DocValuesConsumer implements Closeable {
final long finalCost = cost;
final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
return new SortedNumericDocValues() {
@ -555,7 +555,7 @@ public abstract class DocValuesConsumer implements Closeable {
final long finalCost = cost;
final DocIDMerger<SortedDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
final DocIDMerger<SortedDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
return new SortedDocValues() {
private int docID = -1;
@ -721,7 +721,7 @@ public abstract class DocValuesConsumer implements Closeable {
subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
}
final DocIDMerger<SortedSetDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
final DocIDMerger<SortedSetDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
final long finalCost = cost;

View File

@ -130,7 +130,7 @@ public abstract class NormsConsumer implements Closeable {
}
}
final DocIDMerger<NumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
final DocIDMerger<NumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
return new NumericDocValues() {
private int docID = -1;

View File

@ -117,7 +117,7 @@ public abstract class StoredFieldsWriter implements Closeable {
subs.add(new StoredFieldsMergeSub(new MergeVisitor(mergeState, i), mergeState.docMaps[i], storedFieldsReader, mergeState.maxDocs[i]));
}
final DocIDMerger<StoredFieldsMergeSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
final DocIDMerger<StoredFieldsMergeSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
int docCount = 0;
while (true) {

View File

@ -205,7 +205,7 @@ public abstract class TermVectorsWriter implements Closeable {
subs.add(new TermVectorsMergeSub(mergeState.docMaps[i], reader, mergeState.maxDocs[i]));
}
final DocIDMerger<TermVectorsMergeSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
final DocIDMerger<TermVectorsMergeSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
int docCount = 0;
while (true) {

View File

@ -486,7 +486,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
@Override
public int merge(MergeState mergeState) throws IOException {
if (mergeState.segmentInfo.getIndexSort() != null) {
if (mergeState.needsIndexSort) {
// TODO: can we gain back some optos even if index is sorted? E.g. if sort results in large chunks of contiguous docs from one sub
// being copied over...?
return super.merge(mergeState);

View File

@ -730,7 +730,7 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
@Override
public int merge(MergeState mergeState) throws IOException {
if (mergeState.segmentInfo.getIndexSort() != null) {
if (mergeState.needsIndexSort) {
// TODO: can we gain back some optos even if index is sorted? E.g. if sort results in large chunks of contiguous docs from one sub
// being copied over...?
return super.merge(mergeState);

View File

@ -134,7 +134,7 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
@Override
public void merge(MergeState mergeState) throws IOException {
if (mergeState.segmentInfo.getIndexSort() != null) {
if (mergeState.needsIndexSort) {
// TODO: can we gain back some optos even if index is sorted? E.g. if sort results in large chunks of contiguous docs from one sub
// being copied over...?
super.merge(mergeState);

View File

@ -62,7 +62,7 @@ final class MappingMultiPostingsEnum extends PostingsEnum {
for(int i=0;i<allSubs.length;i++) {
allSubs[i] = new MappingPostingsSub(mergeState.docMaps[i]);
}
this.docIDMerger = new DocIDMerger<MappingPostingsSub>(subs, allSubs.length, mergeState.segmentInfo.getIndexSort() != null);
this.docIDMerger = new DocIDMerger<MappingPostingsSub>(subs, allSubs.length, mergeState.needsIndexSort);
}
MappingMultiPostingsEnum reset(MultiPostingsEnum postingsEnum) throws IOException {

View File

@ -42,7 +42,7 @@ public class MergeState {
/** Maps document IDs from old segments to document IDs in the new segment */
public final DocMap[] docMaps;
// Only used by IW when it must remap deletes that arrived against the merging segmetns while a merge was running:
// Only used by IW when it must remap deletes that arrived against the merging segments while a merge was running:
final DocMap[] leafDocMaps;
/** {@link SegmentInfo} of the newly merged segment. */
@ -81,6 +81,9 @@ public class MergeState {
/** InfoStream for debugging messages. */
public final InfoStream infoStream;
/** Indicates if the index needs to be sorted **/
public boolean needsIndexSort;
/** Sole constructor. */
MergeState(List<CodecReader> originalReaders, SegmentInfo segmentInfo, InfoStream infoStream) throws IOException {
@ -143,50 +146,58 @@ public class MergeState {
this.docMaps = buildDocMaps(readers, indexSort);
}
private DocMap[] buildDocMaps(List<CodecReader> readers, Sort indexSort) throws IOException {
// Remap docIDs around deletions
private DocMap[] buildDeletionDocMaps(List<CodecReader> readers) {
int totalDocs = 0;
int numReaders = readers.size();
DocMap[] docMaps = new DocMap[numReaders];
for (int i = 0; i < numReaders; i++) {
LeafReader reader = readers.get(i);
Bits liveDocs = reader.getLiveDocs();
final PackedLongValues delDocMap;
if (liveDocs != null) {
delDocMap = removeDeletes(reader.maxDoc(), liveDocs);
} else {
delDocMap = null;
}
final int docBase = totalDocs;
docMaps[i] = new DocMap() {
@Override
public int get(int docID) {
if (liveDocs == null) {
return docBase + docID;
} else if (liveDocs.get(docID)) {
return docBase + (int) delDocMap.get(docID);
} else {
return -1;
}
}
};
totalDocs += reader.numDocs();
}
return docMaps;
}
private DocMap[] buildDocMaps(List<CodecReader> readers, Sort indexSort) throws IOException {
if (indexSort == null) {
// no index sort ... we only must map around deletions, and rebase to the merged segment's docID space
int totalDocs = 0;
DocMap[] docMaps = new DocMap[numReaders];
// Remap docIDs around deletions:
for (int i = 0; i < numReaders; i++) {
LeafReader reader = readers.get(i);
Bits liveDocs = reader.getLiveDocs();
final PackedLongValues delDocMap;
if (liveDocs != null) {
delDocMap = removeDeletes(reader.maxDoc(), liveDocs);
} else {
delDocMap = null;
}
final int docBase = totalDocs;
docMaps[i] = new DocMap() {
@Override
public int get(int docID) {
if (liveDocs == null) {
return docBase + docID;
} else if (liveDocs.get(docID)) {
return docBase + (int) delDocMap.get(docID);
} else {
return -1;
}
}
};
totalDocs += reader.numDocs();
}
return docMaps;
return buildDeletionDocMaps(readers);
} else {
// do a merge sort of the incoming leaves:
long t0 = System.nanoTime();
DocMap[] result = MultiSorter.sort(indexSort, readers);
if (result == null) {
// already sorted so we can switch back to map around deletions
return buildDeletionDocMaps(readers);
} else {
needsIndexSort = true;
}
long t1 = System.nanoTime();
if (infoStream.isEnabled("SM")) {
infoStream.message("SM", String.format(Locale.ROOT, "%.2f msec to build merge sorted DocMaps", (t1-t0)/1000000.0));
@ -233,6 +244,7 @@ public class MergeState {
if (infoStream.isEnabled("SM")) {
infoStream.message("SM", String.format(Locale.ROOT, "segment %s is not sorted; wrapping for sort %s now (%.2f msec to sort)", leaf, indexSort, msec));
}
needsIndexSort = true;
leaf = SlowCodecReaderWrapper.wrap(SortingLeafReader.wrap(new MergeReaderWrapper(leaf), sortDocMap));
leafDocMaps[readers.size()] = new DocMap() {
@Override

View File

@ -33,7 +33,9 @@ import org.apache.lucene.util.packed.PackedLongValues;
final class MultiSorter {
/** Does a merge sort of the leaves of the incoming reader, returning {@link DocMap} to map each leaf's
* documents into the merged segment. The documents for each incoming leaf reader must already be sorted by the same sort! */
* documents into the merged segment. The documents for each incoming leaf reader must already be sorted by the same sort!
* Returns null if the merge sort is not needed (segments are already in index sort order).
**/
static MergeState.DocMap[] sort(Sort sort, List<CodecReader> readers) throws IOException {
// TODO: optimize if only 1 reader is incoming, though that's a rare case
@ -80,8 +82,15 @@ final class MultiSorter {
// merge sort:
int mappedDocID = 0;
int lastReaderIndex = 0;
boolean isSorted = true;
while (queue.size() != 0) {
LeafAndDocID top = queue.top();
if (lastReaderIndex > top.readerIndex) {
// merge sort is needed
isSorted = false;
}
lastReaderIndex = top.readerIndex;
builders[top.readerIndex].add(mappedDocID);
if (top.liveDocs == null || top.liveDocs.get(top.docID)) {
mappedDocID++;
@ -97,6 +106,9 @@ final class MultiSorter {
queue.pop();
}
}
if (isSorted) {
return null;
}
MergeState.DocMap[] docMaps = new MergeState.DocMap[leafCount];
for(int i=0;i<leafCount;i++) {

View File

@ -28,6 +28,7 @@ import java.util.Random;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
@ -37,14 +38,19 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.BinaryPoint;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
@ -79,6 +85,190 @@ import org.apache.lucene.util.TestUtil;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
public class TestIndexSorting extends LuceneTestCase {
static class AssertingNeedsIndexSortCodec extends FilterCodec {
boolean needsIndexSort;
int numCalls;
AssertingNeedsIndexSortCodec() {
super(TestUtil.getDefaultCodec().getName(), TestUtil.getDefaultCodec());
}
@Override
public PointsFormat pointsFormat() {
final PointsFormat pf = delegate.pointsFormat();
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState state) throws IOException {
final PointsWriter writer = pf.fieldsWriter(state);
return new PointsWriter() {
@Override
public void merge(MergeState mergeState) throws IOException {
// For single segment merge we cannot infer if the segment is already sorted or not.
if (mergeState.docMaps.length > 1) {
assertEquals(needsIndexSort, mergeState.needsIndexSort);
}
++ numCalls;
writer.merge(mergeState);
}
@Override
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
writer.writeField(fieldInfo, values);
}
@Override
public void finish() throws IOException {
writer.finish();
}
@Override
public void close() throws IOException {
writer.close();
}
};
}
@Override
public PointsReader fieldsReader(SegmentReadState state) throws IOException {
return pf.fieldsReader(state);
}
};
}
}
private static void assertNeedsIndexSortMerge(SortField sortField, Consumer<Document> defaultValueConsumer, Consumer<Document> randomValueConsumer) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
AssertingNeedsIndexSortCodec codec = new AssertingNeedsIndexSortCodec();
iwc.setCodec(codec);
Sort indexSort = new Sort(sortField,
new SortField("id", SortField.Type.INT));
iwc.setIndexSort(indexSort);
LogMergePolicy policy = newLogMergePolicy();
// make sure that merge factor is always > 2
if (policy.getMergeFactor() <= 2) {
policy.setMergeFactor(3);
}
iwc.setMergePolicy(policy);
// add already sorted documents
codec.numCalls = 0;
codec.needsIndexSort = false;
IndexWriter w = new IndexWriter(dir, iwc);
boolean withValues = random().nextBoolean();
for (int i = 100; i < 200; i++) {
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(i), Store.YES));
doc.add(new NumericDocValuesField("id", i));
doc.add(new IntPoint("point", random().nextInt()));
if (withValues) {
defaultValueConsumer.accept(doc);
}
w.addDocument(doc);
if (i % 10 == 0) {
w.commit();
}
}
Set<Integer> deletedDocs = new HashSet<> ();
int num = random().nextInt(20);
for (int i = 0; i < num; i++) {
int nextDoc = random().nextInt(100);
w.deleteDocuments(new Term("id", Integer.toString(nextDoc)));
deletedDocs.add(nextDoc);
}
w.commit();
w.waitForMerges();
w.forceMerge(1);
assertTrue(codec.numCalls > 0);
// merge sort is needed
codec.numCalls = 0;
codec.needsIndexSort = true;
for (int i = 10; i >= 0; i--) {
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(i), Store.YES));
doc.add(new NumericDocValuesField("id", i));
doc.add(new IntPoint("point", random().nextInt()));
if (withValues) {
defaultValueConsumer.accept(doc);
}
w.addDocument(doc);
w.commit();
}
w.commit();
w.waitForMerges();
w.forceMerge(1);
assertTrue(codec.numCalls > 0);
// segment sort is needed
codec.needsIndexSort = true;
codec.numCalls = 0;
for (int i = 200; i < 300; i++) {
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(i), Store.YES));
doc.add(new NumericDocValuesField("id", i));
doc.add(new IntPoint("point", random().nextInt()));
randomValueConsumer.accept(doc);
w.addDocument(doc);
if (i % 10 == 0) {
w.commit();
}
}
w.commit();
w.waitForMerges();
w.forceMerge(1);
assertTrue(codec.numCalls > 0);
w.close();
dir.close();
}
public void testNumericAlreadySorted() throws Exception {
assertNeedsIndexSortMerge(new SortField("foo", SortField.Type.INT),
(doc) -> doc.add(new NumericDocValuesField("foo", 0)),
(doc) -> doc.add(new NumericDocValuesField("foo", random().nextInt())));
}
public void testStringAlreadySorted() throws Exception {
assertNeedsIndexSortMerge(new SortField("foo", SortField.Type.STRING),
(doc) -> doc.add(new SortedDocValuesField("foo", new BytesRef("default"))),
(doc) -> doc.add(new SortedDocValuesField("foo", TestUtil.randomBinaryTerm(random()))));
}
public void testMultiValuedNumericAlreadySorted() throws Exception {
assertNeedsIndexSortMerge(new SortedNumericSortField("foo", SortField.Type.INT),
(doc) -> {
doc.add(new SortedNumericDocValuesField("foo", Integer.MIN_VALUE));
int num = random().nextInt(5);
for (int j = 0; j < num; j++) {
doc.add(new SortedNumericDocValuesField("foo", random().nextInt()));
}
},
(doc) -> {
int num = random().nextInt(5);
for (int j = 0; j < num; j++) {
doc.add(new SortedNumericDocValuesField("foo", random().nextInt()));
}
});
}
public void testMultiValuedStringAlreadySorted() throws Exception {
assertNeedsIndexSortMerge(new SortedSetSortField("foo", false),
(doc) -> {
doc.add(new SortedSetDocValuesField("foo", new BytesRef("")));
int num = random().nextInt(5);
for (int j = 0; j < num; j++) {
doc.add(new SortedSetDocValuesField("foo", TestUtil.randomBinaryTerm(random())));
}
},
(doc) -> {
int num = random().nextInt(5);
for (int j = 0; j < num; j++) {
doc.add(new SortedSetDocValuesField("foo", TestUtil.randomBinaryTerm(random())));
}
});
}
public void testBasicString() throws Exception {
Directory dir = newDirectory();

View File

@ -13,9 +13,6 @@ com.carrotsearch.randomizedtesting.version = 2.4.0
/com.carrotsearch/hppc = 0.7.1
com.codahale.metrics.version = 3.0.1
/com.codahale.metrics/metrics-core = ${com.codahale.metrics.version}
/com.codahale.metrics/metrics-healthchecks = ${com.codahale.metrics.version}
/com.cybozu.labs/langdetect = 1.1-20120112
/com.drewnoakes/metadata-extractor = 2.8.1
@ -74,6 +71,10 @@ com.sun.jersey.version = 1.9
/hsqldb/hsqldb = 1.8.0.10
/io.airlift/slice = 0.10
io.dropwizard.metrics.version = 3.1.2
/io.dropwizard.metrics/metrics-core = ${io.dropwizard.metrics.version}
/io.dropwizard.metrics/metrics-healthchecks = ${io.dropwizard.metrics.version}
io.netty.netty-all.version = 4.0.36.Final
/io.netty/netty-all = ${io.netty.netty-all.version}
@ -250,7 +251,7 @@ org.codehaus.janino.version = 2.7.6
/org.codehaus.woodstox/woodstox-core-asl = 4.4.1
/org.easymock/easymock = 3.0
org.eclipse.jetty.version = 9.3.8.v20160314
org.eclipse.jetty.version = 9.3.14.v20161028
/org.eclipse.jetty/jetty-continuation = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-deploy = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-http = ${org.eclipse.jetty.version}

View File

@ -0,0 +1 @@
4ba272cee2e367766dfdc1901c960de352160d41

View File

@ -1 +0,0 @@
dec4dfc43617637694762822ef99c8373c944c98

View File

@ -0,0 +1 @@
ea3800883f79f757b2635a737bb71bb21e90cf19

View File

@ -1 +0,0 @@
0127feb7407f4137ff4295b5fa2895845db56710

View File

@ -0,0 +1 @@
52d796b58c3a997e59e6b47c4bf022cedcba3514

View File

@ -1 +0,0 @@
371e3c2b72d9a9737579ec0fdfd6a2a3ab8b8141

View File

@ -0,0 +1 @@
791df6c55ad62841ff518ba6634e905a95567260

View File

@ -1 +0,0 @@
da8366f602f35d4c3177cb081472e2fc4abe04ea

View File

@ -0,0 +1 @@
b5714a6005387b2a361d5b39a3a37d4df1892e62

View File

@ -1 +0,0 @@
ea5f25d3326d7745d9c21d405dcf6f878efbd5fb

View File

@ -0,0 +1 @@
fbf89f6f3b995992f82ec09104ab9a75d31d281b

View File

@ -1 +0,0 @@
01d53c7a7e7715e67d6f4edec6c5b328ee162e65

View File

@ -23,7 +23,6 @@
use strict;
use warnings;
use XML::Simple;
my $jira_url_prefix = 'http://issues.apache.org/jira/browse/';
my $github_pull_request_prefix = 'https://github.com/apache/lucene-solr/pull/';
@ -823,26 +822,33 @@ sub get_release_date {
# Pulls release dates from the project DOAP file.
#
sub setup_release_dates {
my %release_dates;
my %release_dates = ();
my $file = shift;
print STDERR "file: $file\n";
open(FILE, "<$file") || die "could not open $file: $!";
my $version_list = <FILE>;
my $created_list = <FILE>;
close(FILE);
my $project_info = XMLin($file)->{Project};
my $version;
$version_list =~ s/^\s+|\s+$//g;
my @versions = split /\s*,\s*/, $version_list;
$created_list =~ s/^\s+|\s+$//g;
my @created = split /\s*,\s*/, $created_list;
if (scalar(@versions) != scalar(@created)) {
die $file . " contains" . scalar(@versions) . " versions but " . scalar(@created) . " creation dates.";
}
my $date;
for my $release (@{$project_info->{release}}) {
$version = $release->{Version};
if ($version->{created}) {
$date = normalize_date($version->{created});
my $version_name = $version->{revision};
$release_dates{$version->{revision}} = $date;
if ($version_name =~ /^([1-9]\d*\.\d+)([^.0-9].*|$)/) {
my $padded_version_name = "$1.0$2"; # Alias w/trailing ".0"
$release_dates{$padded_version_name} = $date;
} elsif ($version_name =~ /\.0(?=[^.0-9]|$)/) {
my $trimmed_version_name = $version_name;
$trimmed_version_name =~ s/\.0(?=[^.0-9]|$)//; # Alias w/o trailing ".0"
$release_dates{$trimmed_version_name} = $date;
}
for my $pos (0..$#versions) {
$date = normalize_date($created[$pos]);
$release_dates{$versions[$pos]} = $date;
if ($versions[$pos] =~ /^([1-9]\d*\.\d+)([^.0-9].*|$)/) {
my $padded_version_name = "$1.0$2"; # Alias w/trailing ".0"
$release_dates{$padded_version_name} = $date;
} elsif ($versions[$pos] =~ /\.0(?=[^.0-9]|$)/) {
my $trimmed_version_name = $versions[$pos];
$trimmed_version_name =~ s/\.0(?=[^.0-9]|$)//; # Alias w/o trailing ".0"
$release_dates{$trimmed_version_name} = $date;
}
}
return %release_dates;

View File

@ -156,7 +156,7 @@ public class MockRandomMergePolicy extends MergePolicy {
if (LuceneTestCase.VERBOSE) {
System.out.println("NOTE: MockRandomMergePolicy now swaps in a SlowCodecReaderWrapper for merging reader=" + reader);
}
return SlowCodecReaderWrapper.wrap(new FilterLeafReader(reader) {});
return SlowCodecReaderWrapper.wrap(new FilterLeafReader(new MergeReaderWrapper(reader)) {});
} else if (thingToDo == 1) {
// renumber fields
// NOTE: currently this only "blocks" bulk merges just by
@ -165,7 +165,7 @@ public class MockRandomMergePolicy extends MergePolicy {
if (LuceneTestCase.VERBOSE) {
System.out.println("NOTE: MockRandomMergePolicy now swaps in a MismatchedLeafReader for merging reader=" + reader);
}
return SlowCodecReaderWrapper.wrap(new MismatchedLeafReader(reader, r));
return SlowCodecReaderWrapper.wrap(new MismatchedLeafReader(new MergeReaderWrapper(reader), r));
} else {
// otherwise, reader is unchanged
return reader;

View File

@ -74,7 +74,7 @@ Carrot2 3.15.0
Velocity 1.7 and Velocity Tools 2.0
Apache UIMA 2.3.1
Apache ZooKeeper 3.4.6
Jetty 9.3.8.v20160314
Jetty 9.3.14.v20161028
Detailed Change List
----------------------
@ -86,6 +86,25 @@ Upgrade Notes
consequence of this change is that you must be aware that some tuples will not have values if
there were none in the original document.
* SOLR-8785: Metrics related classes in org.apache.solr.util.stats have been removed in favor of
the dropwizard metrics library. Any custom plugins using these classes should be changed to use
the equivalent classes from the metrics library.
As part of this, the following changes were made to the output of Overseer Status API:
* The "totalTime" metric has been removed because it is no longer supported
* The metrics "75thPctlRequestTime", "95thPctlRequestTime", "99thPctlRequestTime"
and "999thPctlRequestTime" in Overseer Status API have been renamed to "75thPcRequestTime", "95thPcRequestTime"
and so on for consistency with stats output in other parts of Solr.
* The metrics "avgRequestsPerMinute", "5minRateRequestsPerMinute" and "15minRateRequestsPerMinute" have been
replaced by corresponding per-second rates viz. "avgRequestsPerSecond", "5minRateRequestsPerSecond"
and "15minRateRequestsPerSecond" for consistency with stats output in other parts of Solr.
* SOLR-9708: You are encouraged to try out the UnifiedHighlighter by setting hl.method=unified and report feedback. It
might become the default in 7.0. It's more efficient/faster than the other highlighters, especially compared to the
original Highlighter. That said, some options aren't supported yet, notably hl.fragsize and
hl.requireFieldMatch=false. It will get more features in time, especially with your input. See HighlightParams.java
for a listing of highlight parameters annotated with which highlighters use them.
hl.useFastVectorHighlighter is now considered deprecated in lieu of hl.method=fastVector.
New Features
----------------------
* SOLR-9293: Solrj client support for hierarchical clusters and other topics
@ -123,6 +142,16 @@ New Features
* SOLR-9324: Support Secure Impersonation / Proxy User for solr authentication
(Gregory Chanan, Hrishikesh Gadre via yonik)
* SOLR-9721: javabin Tuple parser for streaming and other end points (noble)
* SOLR-9708: Added UnifiedSolrHighlighter, a highlighter adapter for Lucene's UnifiedHighlighter. The adapter is a
derivative of the PostingsSolrHighlighter, supporting mostly the same parameters with some differences.
Introduced "hl.method" parameter which can be set to original|fastVector|postings|unified to pick the highlighter at
runtime without the need to modify solrconfig from the default configuration. hl.useFastVectorHighlighter is now
considered deprecated in lieu of hl.method=fastVector. (Timothy Rodriguez, David Smiley)
* SOLR-9728: Ability to specify Key Store type in solr.in.sh file for SSL (Michael Suzuki, Kevin Risden)
Optimizations
----------------------
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
@ -134,6 +163,16 @@ Optimizations
* SOLR-9772: Deriving distributed sort values (fieldSortValues) should reuse
comparator and only invalidate leafComparator. (John Call via yonik)
* SOLR-9786: FieldType has a new getSetQuery() method that can take a set of terms
and create a more efficient query (such as TermsQuery). The solr query parser has been
changed to use this method when appropriate. The parser also knows when it is being
used to parse a filter and will create TermsQueries from large lists of normal terms
or numbers, resulting in a query that will execute faster. This also acts to avoid
BooleanQuery maximum clause limit. Query parsing itself has also been optimized,
resulting in less produced garbage and 5-7% better performance.
(yonik)
Bug Fixes
----------------------
* SOLR-9701: NPE in export handler when "fl" parameter is omitted.
@ -172,7 +211,11 @@ Bug Fixes
* SOLR-9729: JDBCStream improvements (Kevin Risden)
* SOLR-9626: new Admin UI now also highlights matched terms in the Analysis screen. (Alexandre Rafalovitch)
* SOLR-9512: CloudSolrClient's cluster state cache can break direct updates to leaders (noble)
* SOLR-5260: Facet search on a docvalue field in a multi shard collection (Trym Møller, Erick Erickson)
Other Changes
----------------------
@ -195,6 +238,13 @@ Other Changes
* SOLR-9609: Change hard-coded keysize from 512 to 1024 (Jeremy Martini via Erick Erickson)
* SOLR-8785: Use Dropwizard Metrics library for core metrics. The copied over code in
org.apache.solr.util.stats has been removed. (Jeff Wartes, Kelvin Wong, Christine Poerschke, shalin)
* SOLR-9784: Refactor CloudSolrClient to eliminate direct dependency on ZK (noble)
* SOLR-9801: Upgrade jetty to 9.3.14.v20161028 (shalin)
================== 6.3.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
@ -5459,6 +5509,31 @@ Other Changes
* SOLR-6391: Improve message for CREATECOLLECTION failure due to missing
numShards (Anshum Gupta)
================== 4.9.1 ==================
Versions of Major Components
---------------------
Apache Tika 1.5 (with upgraded Apache POI 3.10.1)
Carrot2 3.9.0
Velocity 1.7 and Velocity Tools 2.0
Apache UIMA 2.3.1
Apache ZooKeeper 3.4.6
Detailed Change List
----------------------
Other Changes
---------------------
* SOLR-6503: Removed support for parsing netcdf files in Solr Cell because
of license issues. If you need support for this format, download the parser
JAR yourself (version 4.2) and add it to contrib/extraction/lib folder:
http://www.unidata.ucar.edu/software/thredds/current/netcdf-java/
(Uwe Schindler)
* SOLR-6388: Force upgrade of Apache POI dependency in Solr Cell to version
3.10.1 to fix CVE-2014-3529 and CVE-2014-3574. (Uwe Schindler)
================== 4.9.0 ==================
Versions of Major Components
@ -11846,6 +11921,122 @@ Documentation
* SOLR-1792: Documented peculiar behavior of TestHarness.LocalRequestFactory
(hossman)
================== Release 1.4.1 ==================
Release Date: See http://lucene.apache.org/solr for the official release date.
Upgrading from Solr 1.4
-----------------------
This is a bug fix release - no changes are required when upgrading from Solr 1.4.
However, a reindex is needed for some of the analysis fixes to take effect.
Versions of Major Components
----------------------------
Apache Lucene 2.9.3
Apache Tika 0.4
Carrot2 3.1.0
Lucene Information
----------------
Since Solr is built on top of Lucene, many people add customizations to Solr
that are dependent on Lucene. Please see http://lucene.apache.org/java/2_9_3/,
especially http://lucene.apache.org/java/2_9_3/changes/Changes.html for more
information on the version of Lucene used in Solr.
Bug Fixes
----------------------
* SOLR-1934: Upgrade to Apache Lucene 2.9.3 to obtain several bug
fixes from the previous 2.9.1. See the Lucene 2.9.3 release notes
for details. (hossman, Mark Miller)
* SOLR-1432: Make the new ValueSource.getValues(context,reader) delegate
to the original ValueSource.getValues(reader) so custom sources
will work. (yonik)
* SOLR-1572: FastLRUCache correctly implemented the LRU policy only
for the first 2B accesses. (yonik)
* SOLR-1595: StreamingUpdateSolrServer used the platform default character
set when streaming updates, rather than using UTF-8 as the HTTP headers
indicated, leading to an encoding mismatch. (hossman, yonik)
* SOLR-1660: CapitalizationFilter crashes if you use the maxWordCountOption
(Robert Muir via shalin)
* SOLR-1662: Added Javadocs in BufferedTokenStream and fixed incorrect cloning
in TestBufferedTokenStream (Robert Muir, Uwe Schindler via shalin)
* SOLR-1711: SolrJ - StreamingUpdateSolrServer had a race condition that
could halt the streaming of documents. The original patch to fix this
(never officially released) introduced another hanging bug due to
connections not being released. (Attila Babo, Erik Hetzner via yonik)
* SOLR-1748, SOLR-1747, SOLR-1746, SOLR-1745, SOLR-1744: Streams and Readers
retrieved from ContentStreams are not closed in various places, resulting
in file descriptor leaks.
(Christoff Brill, Mark Miller)
* SOLR-1580: Solr Configuration ignores 'mergeFactor' parameter, always
uses Lucene default. (Lance Norskog via Mark Miller)
* SOLR-1777: fieldTypes with sortMissingLast=true or sortMissingFirst=true can
result in incorrectly sorted results. (yonik)
* SOLR-1797: fix ConcurrentModificationException and potential memory
leaks in ResourceLoader. (yonik)
* SOLR-1798: Small memory leak (~100 bytes) in fastLRUCache for every
commit. (yonik)
* SOLR-1522: Show proper message if <script> tag is missing for DIH
ScriptTransformer (noble)
* SOLR-1538: Reordering of object allocations in ConcurrentLRUCache to eliminate
(an extremely small) potential for deadlock.
(gabriele renzi via hossman)
* SOLR-1558: QueryElevationComponent only works if the uniqueKey field is
implemented using StrField. In previous versions of Solr no warning or
error would be generated if you attempted to use QueryElevationComponent,
it would just fail in unexpected ways. This has been changed so that it
will fail with a clear error message on initialization. (hossman)
* SOLR-1563: Binary fields, including trie-based numeric fields, caused null
pointer exceptions in the luke request handler. (yonik)
* SOLR-1579: Fixes to XML escaping in stats.jsp
(David Bowen and hossman)
* SOLR-1582: copyField was ignored for BinaryField types (gsingers)
* SOLR-1596: A rollback operation followed by the shutdown of Solr
or the close of a core resulted in a warning:
"SEVERE: SolrIndexWriter was not closed prior to finalize()" although
there were no other consequences. (yonik)
* SOLR-1651: Fixed Incorrect dataimport handler package name in SolrResourceLoader
(Akshay Ukey via shalin)
* SOLR-1936: The JSON response format needed to escape unicode code point
U+2028 - 'LINE SEPARATOR' (Robert Hofstra, yonik)
* SOLR-1852: Fix WordDelimiterFilterFactory bug where position increments
were not being applied properly to subwords. (Peter Wolanin via Robert Muir)
* SOLR-1706: fixed WordDelimiterFilter for certain combinations of options
where it would output incorrect tokens. (Robert Muir, Chris Male)
* SOLR-1948: PatternTokenizerFactory should use parent's args (koji)
* SOLR-1870: Indexing documents using the 'javabin' format no longer
fails with a ClassCastException whenSolrInputDocuments contain field
values which are Collections or other classes that implement
Iterable. (noble, hossman)
* SOLR-1769 Solr 1.4 Replication - Repeater throwing NullPointerException (noble)
================== Release 1.4.0 ==================
Release Date: See http://lucene.apache.org/solr for the official release date.

View File

@ -158,22 +158,74 @@ SOLR_SSL_OPTS=""
if [ -n "$SOLR_SSL_KEY_STORE" ]; then
SOLR_JETTY_CONFIG+=("--module=https")
SOLR_URL_SCHEME=https
SOLR_SSL_OPTS=" -Dsolr.jetty.keystore=$SOLR_SSL_KEY_STORE \
-Dsolr.jetty.keystore.password=$SOLR_SSL_KEY_STORE_PASSWORD \
-Dsolr.jetty.truststore=$SOLR_SSL_TRUST_STORE \
-Dsolr.jetty.truststore.password=$SOLR_SSL_TRUST_STORE_PASSWORD \
-Dsolr.jetty.ssl.needClientAuth=$SOLR_SSL_NEED_CLIENT_AUTH \
-Dsolr.jetty.ssl.wantClientAuth=$SOLR_SSL_WANT_CLIENT_AUTH"
SOLR_SSL_OPTS+=" -Dsolr.jetty.keystore=$SOLR_SSL_KEY_STORE"
if [ -n "$SOLR_SSL_KEY_STORE_PASSWORD" ]; then
SOLR_SSL_OPTS+=" -Dsolr.jetty.keystore.password=$SOLR_SSL_KEY_STORE_PASSWORD"
fi
if [ -n "$SOLR_SSL_KEY_STORE_TYPE" ]; then
SOLR_SSL_OPTS+=" -Dsolr.jetty.keystore.type=$SOLR_SSL_KEY_STORE_TYPE"
fi
if [ -n "$SOLR_SSL_TRUST_STORE" ]; then
SOLR_SSL_OPTS+=" -Dsolr.jetty.truststore=$SOLR_SSL_TRUST_STORE"
fi
if [ -n "$SOLR_SSL_TRUST_STORE_PASSWORD" ]; then
SOLR_SSL_OPTS+=" -Dsolr.jetty.truststore.password=$SOLR_SSL_TRUST_STORE_PASSWORD"
fi
if [ -n "$SOLR_SSL_TRUST_STORE_TYPE" ]; then
SOLR_SSL_OPTS+=" -Dsolr.jetty.truststore.type=$SOLR_SSL_TRUST_STORE_TYPE"
fi
if [ -n "$SOLR_SSL_NEED_CLIENT_AUTH" ]; then
SOLR_SSL_OPTS+=" -Dsolr.jetty.ssl.needClientAuth=$SOLR_SSL_NEED_CLIENT_AUTH"
fi
if [ -n "$SOLR_SSL_WANT_CLIENT_AUTH" ]; then
SOLR_SSL_OPTS+=" -Dsolr.jetty.ssl.wantClientAuth=$SOLR_SSL_WANT_CLIENT_AUTH"
fi
if [ -n "$SOLR_SSL_CLIENT_KEY_STORE" ]; then
SOLR_SSL_OPTS+=" -Djavax.net.ssl.keyStore=$SOLR_SSL_CLIENT_KEY_STORE \
-Djavax.net.ssl.keyStorePassword=$SOLR_SSL_CLIENT_KEY_STORE_PASSWORD \
-Djavax.net.ssl.trustStore=$SOLR_SSL_CLIENT_TRUST_STORE \
-Djavax.net.ssl.trustStorePassword=$SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD"
SOLR_SSL_OPTS+=" -Djavax.net.ssl.keyStore=$SOLR_SSL_CLIENT_KEY_STORE"
if [ -n "$SOLR_SSL_CLIENT_KEY_STORE_PASSWORD" ]; then
SOLR_SSL_OPTS+=" -Djavax.net.ssl.keyStorePassword=$SOLR_SSL_CLIENT_KEY_STORE_PASSWORD"
fi
if [ -n "$SOLR_SSL_CLIENT_KEY_STORE_TYPE" ]; then
SOLR_SSL_OPTS+=" -Djavax.net.ssl.keyStoreType=$SOLR_SSL_CLIENT_KEY_STORE_TYPE"
fi
else
SOLR_SSL_OPTS+=" -Djavax.net.ssl.keyStore=$SOLR_SSL_KEY_STORE \
-Djavax.net.ssl.keyStorePassword=$SOLR_SSL_KEY_STORE_PASSWORD \
-Djavax.net.ssl.trustStore=$SOLR_SSL_TRUST_STORE \
-Djavax.net.ssl.trustStorePassword=$SOLR_SSL_TRUST_STORE_PASSWORD"
if [ -n "$SOLR_SSL_KEY_STORE" ]; then
SOLR_SSL_OPTS+=" -Djavax.net.ssl.keyStore=$SOLR_SSL_KEY_STORE"
fi
if [ -n "$SOLR_SSL_KEY_STORE_PASSWORD" ]; then
SOLR_SSL_OPTS+=" -Djavax.net.ssl.keyStorePassword=$SOLR_SSL_KEY_STORE_PASSWORD"
fi
if [ -n "$SOLR_SSL_KEY_STORE_TYPE" ]; then
SOLR_SSL_OPTS+=" -Djavax.net.ssl.keyStoreType=$SOLR_SSL_KEYSTORE_TYPE"
fi
fi
if [ -n "$SOLR_SSL_CLIENT_TRUST_STORE" ]; then
SOLR_SSL_OPTS+=" -Djavax.net.ssl.trustStore=$SOLR_SSL_CLIENT_TRUST_STORE"
if [ -n "$SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD" ]; then
SOLR_SSL_OPTS+=" -Djavax.net.ssl.trustStorePassword=$SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD"
fi
if [ -n "$SOLR_SSL_CLIENT_TRUST_STORE_TYPE" ]; then
SOLR_SSL_OPTS+=" -Djavax.net.ssl.trustStoreType=$SOLR_SSL_CLIENT_TRUST_STORE_TYPE"
fi
else
if [ -n "$SOLR_SSL_TRUST_STORE" ]; then
SOLR_SSL_OPTS+=" -Djavax.net.ssl.trustStore=$SOLR_SSL_TRUST_STORE"
fi
if [ -n "$SOLR_SSL_TRUST_STORE_PASSWORD" ]; then
SOLR_SSL_OPTS+=" -Djavax.net.ssl.trustStorePassword=$SOLR_SSL_TRUST_STORE_PASSWORD"
fi
if [ -n "$SOLR_SSL_TRUST_STORE_TYPE" ]; then
SOLR_SSL_OPTS+=" -Djavax.net.ssl.trustStoreType=$SOLR_SSL_TRUST_STORE_TYPE"
fi
fi
else
SOLR_JETTY_CONFIG+=("--module=http")

View File

@ -45,12 +45,72 @@ set "SOLR_SSL_OPTS= "
IF DEFINED SOLR_SSL_KEY_STORE (
set "SOLR_JETTY_CONFIG=--module=https"
set SOLR_URL_SCHEME=https
set "SCRIPT_ERROR=Solr server directory %SOLR_SERVER_DIR% not found!"
set "SOLR_SSL_OPTS=-Dsolr.jetty.keystore=%SOLR_SSL_KEY_STORE% -Dsolr.jetty.keystore.password=%SOLR_SSL_KEY_STORE_PASSWORD% -Dsolr.jetty.truststore=%SOLR_SSL_TRUST_STORE% -Dsolr.jetty.truststore.password=%SOLR_SSL_TRUST_STORE_PASSWORD% -Dsolr.jetty.ssl.needClientAuth=%SOLR_SSL_NEED_CLIENT_AUTH% -Dsolr.jetty.ssl.wantClientAuth=%SOLR_SSL_WANT_CLIENT_AUTH%"
IF DEFINED SOLR_SSL_CLIENT_KEY_STORE (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.keyStore=%SOLR_SSL_CLIENT_KEY_STORE% -Djavax.net.ssl.keyStorePassword=%SOLR_SSL_CLIENT_KEY_STORE_PASSWORD% -Djavax.net.ssl.trustStore=%SOLR_SSL_CLIENT_TRUST_STORE% -Djavax.net.ssl.trustStorePassword=%SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD%"
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Dsolr.jetty.keystore=%SOLR_SSL_KEY_STORE%"
IF DEFINED SOLR_SSL_KEY_STORE_PASSWORD (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Dsolr.jetty.keystore.password=%SOLR_SSL_KEY_STORE_PASSWORD%"
)
IF DEFINED SOLR_SSL_KEY_STORE_TYPE (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Dsolr.jetty.keystore.type=%SOLR_SSL_KEY_STORE_TYPE%"
)
IF DEFINED SOLR_SSL_TRUST_STORE (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Dsolr.jetty.truststore=%SOLR_SSL_TRUST_STORE%"
)
IF DEFINED SOLR_SSL_TRUST_STORE_PASSWORD (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Dsolr.jetty.truststore.password=%SOLR_SSL_TRUST_STORE_PASSWORD%"
)
IF DEFINED SOLR_SSL_TRUST_STORE_TYPE (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Dsolr.jetty.truststore.type=%SOLR_SSL_TRUST_STORE_TYPE%"
)
IF DEFINED SOLR_SSL_NEED_CLIENT_AUTH (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Dsolr.jetty.ssl.needClientAuth=%SOLR_SSL_NEED_CLIENT_AUTH%"
)
IF DEFINED SOLR_SSL_WANT_CLIENT_AUTH (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Dsolr.jetty.ssl.wantClientAuth=%SOLR_SSL_WANT_CLIENT_AUTH%"
)
IF DEFINED SOLR_SSL_CLIENT_KEY_STORE (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.keyStore=%SOLR_SSL_CLIENT_KEY_STORE%"
IF DEFINED SOLR_SSL_CLIENT_KEY_STORE_PASSWORD (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.keyStorePassword=%SOLR_SSL_CLIENT_KEY_STORE_PASSWORD%"
)
IF DEFINED SOLR_SSL_CLIENT_KEY_STORE_TYPE (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.keyStoreType=%SOLR_SSL_CLIENT_KEY_STORE_TYPE%"
)
) ELSE (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.keyStore=%SOLR_SSL_KEY_STORE% -Djavax.net.ssl.keyStorePassword=%SOLR_SSL_KEY_STORE_PASSWORD% -Djavax.net.ssl.trustStore=%SOLR_SSL_TRUST_STORE% -Djavax.net.ssl.trustStorePassword=%SOLR_SSL_TRUST_STORE_PASSWORD%"
IF DEFINED SOLR_SSL_KEY_STORE (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.keyStore=%SOLR_SSL_KEY_STORE%"
)
IF DEFINED SOLR_SSL_KEY_STORE_PASSWORD (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.keyStorePassword=%SOLR_SSL_KEY_STORE_PASSWORD%"
)
IF DEFINED SOLR_SSL_KEY_STORE_TYPE (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.keyStoreType=%SOLR_SSL_KEY_STORE_TYPE%"
)
)
IF DEFINED SOLR_SSL_CLIENT_TRUST_STORE (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.trustStore=%SOLR_SSL_CLIENT_TRUST_STORE%"
IF DEFINED SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.trustStorePassword=%SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD%"
)
IF DEFINED SOLR_SSL_CLIENT_TRUST_STORE_TYPE (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.trustStoreType=%SOLR_SSL_CLIENT_TRUST_STORE_TYPE%"
)
) ELSE (
IF DEFINED SOLR_SSL_TRUST_STORE (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.trustStore=%SOLR_SSL_TRUST_STORE%"
)
IF DEFINED SOLR_SSL_TRUST_STORE_PASSWORD (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.trustStorePassword=%SOLR_SSL_TRUST_STORE_PASSWORD%"
)
IF DEFINED SOLR_SSL_TRUST_STORE_TYPE (
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.trustStoreType=%SOLR_SSL_TRUST_STORE_TYPE%"
)
)
) ELSE (
set SOLR_SSL_OPTS=
@ -1612,4 +1672,4 @@ REM Safe echo which does not mess with () in strings
set "eout=%1"
set eout=%eout:"=%
echo !eout!
GOTO :eof
GOTO :eof

View File

@ -86,8 +86,10 @@ REM Uncomment to set SSL-related system properties
REM Be sure to update the paths to the correct keystore for your environment
REM set SOLR_SSL_KEY_STORE=etc/solr-ssl.keystore.jks
REM set SOLR_SSL_KEY_STORE_PASSWORD=secret
REM set SOLR_SSL_KEY_STORE_TYPE=JKS
REM set SOLR_SSL_TRUST_STORE=etc/solr-ssl.keystore.jks
REM set SOLR_SSL_TRUST_STORE_PASSWORD=secret
REM set SOLR_SSL_TRUST_STORE_TYPE=JKS
REM set SOLR_SSL_NEED_CLIENT_AUTH=false
REM set SOLR_SSL_WANT_CLIENT_AUTH=false
@ -95,8 +97,10 @@ REM Uncomment if you want to override previously defined SSL values for HTTP cli
REM otherwise keep them commented and the above values will automatically be set for HTTP clients
REM set SOLR_SSL_CLIENT_KEY_STORE=
REM set SOLR_SSL_CLIENT_KEY_STORE_PASSWORD=
REM set SOLR_SSL_CLIENT_KEY_STORE_TYPE=
REM set SOLR_SSL_CLIENT_TRUST_STORE=
REM setSOLR_SSL_CLIENT_TRUST_STORE_PASSWORD=
REM set SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD=
REM set SOLR_SSL_CLIENT_TRUST_STORE_TYPE=
REM Settings for authentication
REM set SOLR_AUTHENTICATION_CLIENT_BUILDER=

View File

@ -98,8 +98,10 @@
# Be sure to update the paths to the correct keystore for your environment
#SOLR_SSL_KEY_STORE=/home/shalin/work/oss/shalin-lusolr/solr/server/etc/solr-ssl.keystore.jks
#SOLR_SSL_KEY_STORE_PASSWORD=secret
#SOLR_SSL_KEY_STORE_TYPE=JKS
#SOLR_SSL_TRUST_STORE=/home/shalin/work/oss/shalin-lusolr/solr/server/etc/solr-ssl.keystore.jks
#SOLR_SSL_TRUST_STORE_PASSWORD=secret
#SOLR_SSL_TRUST_STORE_TYPE=JKS
#SOLR_SSL_NEED_CLIENT_AUTH=false
#SOLR_SSL_WANT_CLIENT_AUTH=false
@ -107,8 +109,10 @@
# otherwise keep them commented and the above values will automatically be set for HTTP clients
#SOLR_SSL_CLIENT_KEY_STORE=
#SOLR_SSL_CLIENT_KEY_STORE_PASSWORD=
#SOLR_SSL_CLIENT_KEY_STORE_TYPE=
#SOLR_SSL_CLIENT_TRUST_STORE=
#SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD=
#SOLR_SSL_CLIENT_TRUST_STORE_TYPE=
# Settings for authentication
#SOLR_AUTHENTICATION_CLIENT_BUILDER=

View File

@ -18,11 +18,10 @@ package org.apache.solr.analytics.plugin;
import java.util.concurrent.atomic.AtomicLong;
import com.codahale.metrics.Timer;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.util.stats.Snapshot;
import org.apache.solr.util.stats.Timer;
import org.apache.solr.util.stats.TimerContext;
import org.apache.solr.util.stats.TimerUtils;
public class AnalyticsStatisticsCollector {
private final AtomicLong numRequests;
@ -35,7 +34,7 @@ public class AnalyticsStatisticsCollector {
private final AtomicLong numQueries;
private final Timer requestTimes;
public TimerContext currentTimer;
public Timer.Context currentTimer;
public AnalyticsStatisticsCollector() {
numRequests = new AtomicLong();
@ -88,7 +87,6 @@ public class AnalyticsStatisticsCollector {
public NamedList<Object> getStatistics() {
NamedList<Object> lst = new SimpleOrderedMap<>();
Snapshot snapshot = requestTimes.getSnapshot();
lst.add("requests", numRequests.longValue());
lst.add("analyticsRequests", numAnalyticsRequests.longValue());
lst.add("statsRequests", numStatsRequests.longValue());
@ -97,17 +95,7 @@ public class AnalyticsStatisticsCollector {
lst.add("rangeFacets", numRangeFacets.longValue());
lst.add("queryFacets", numQueryFacets.longValue());
lst.add("queriesInQueryFacets", numQueries.longValue());
lst.add("totalTime", requestTimes.getSum());
lst.add("avgRequestsPerSecond", requestTimes.getMeanRate());
lst.add("5minRateReqsPerSecond", requestTimes.getFiveMinuteRate());
lst.add("15minRateReqsPerSecond", requestTimes.getFifteenMinuteRate());
lst.add("avgTimePerRequest", requestTimes.getMean());
lst.add("medianRequestTime", snapshot.getMedian());
lst.add("75thPcRequestTime", snapshot.get75thPercentile());
lst.add("95thPcRequestTime", snapshot.get95thPercentile());
lst.add("99thPcRequestTime", snapshot.get99thPercentile());
lst.add("999thPcRequestTime", snapshot.get999thPercentile());
TimerUtils.addMetrics(lst, requestTimes);
return lst;
}
}

View File

@ -34,8 +34,8 @@
<dependency org="org.kitesdk" name="kite-morphlines-avro" rev="${/org.kitesdk/kite-morphlines-avro}" conf="compile" />
<dependency org="com.codahale.metrics" name="metrics-core" rev="${/com.codahale.metrics/metrics-core}" conf="compile" />
<dependency org="com.codahale.metrics" name="metrics-healthchecks" rev="${/com.codahale.metrics/metrics-healthchecks}" conf="compile" />
<dependency org="io.dropwizard.metrics" name="metrics-core" rev="${/io.dropwizard.metrics/metrics-core}" conf="compile" />
<dependency org="io.dropwizard.metrics" name="metrics-healthchecks" rev="${/io.dropwizard.metrics/metrics-healthchecks}" conf="compile" />
<dependency org="com.typesafe" name="config" rev="${/com.typesafe/config}" conf="compile" />
<!-- Test Dependencies -->

View File

@ -16,22 +16,12 @@
*/
package org.apache.solr.uima.processor;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.UpdateParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.UpdateRequestHandler;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.update.processor.UpdateRequestProcessorChain;
@ -47,6 +37,11 @@ import org.junit.Test;
@Slow
public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
public static final String UIMA_CHAIN = "uima";
public static final String UIMA_MULTI_MAP_CHAIN = "uima-multi-map";
public static final String UIMA_IGNORE_ERRORS_CHAIN = "uima-ignoreErrors";
public static final String UIMA_NOT_IGNORE_ERRORS_CHAIN = "uima-not-ignoreErrors";
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig.xml", "schema.xml", getFile("uima/solr").getAbsolutePath());
@ -63,7 +58,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
@Test
public void testProcessorConfiguration() {
SolrCore core = h.getCore();
UpdateRequestProcessorChain chained = core.getUpdateProcessingChain("uima");
UpdateRequestProcessorChain chained = core.getUpdateProcessingChain(UIMA_CHAIN);
assertNotNull(chained);
UIMAUpdateRequestProcessorFactory factory = (UIMAUpdateRequestProcessorFactory)chained.getProcessors().get(0);
assertNotNull(factory);
@ -74,7 +69,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
@Test
public void testMultiMap() {
SolrCore core = h.getCore();
UpdateRequestProcessorChain chained = core.getUpdateProcessingChain("uima-multi-map");
UpdateRequestProcessorChain chained = core.getUpdateProcessingChain(UIMA_MULTI_MAP_CHAIN);
assertNotNull(chained);
UIMAUpdateRequestProcessorFactory factory = (UIMAUpdateRequestProcessorFactory)chained.getProcessors().get(0);
assertNotNull(factory);
@ -90,7 +85,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
@Test
public void testProcessing() throws Exception {
addDoc("uima", adoc(
addDoc(adoc(
"id",
"2312312321312",
"text",
@ -98,7 +93,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
+ "Add support for specifying Spelling SuggestWord Comparator to Lucene spell "
+ "checkers for SpellCheckComponent. Issue SOLR-2053 is already fixed, patch is"
+ " attached if you need it, but it is also committed to trunk and 3_x branch."
+ " Last Lucene European Conference has been held in Prague."));
+ " Last Lucene European Conference has been held in Prague."), UIMA_CHAIN);
assertU(commit());
assertQ(req("sentence:*"), "//*[@numFound='1']");
assertQ(req("sentiment:*"), "//*[@numFound='0']");
@ -108,16 +103,16 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
@Test
public void testTwoUpdates() throws Exception {
addDoc("uima", adoc("id", "1", "text", "The Apache Software Foundation is happy to announce "
addDoc(adoc("id", "1", "text", "The Apache Software Foundation is happy to announce "
+ "BarCampApache Sydney, Australia, the first ASF-backed event in the Southern "
+ "Hemisphere!"));
+ "Hemisphere!"), UIMA_CHAIN);
assertU(commit());
assertQ(req("sentence:*"), "//*[@numFound='1']");
addDoc("uima", adoc("id", "2", "text", "Taking place 11th December 2010 at the University "
addDoc(adoc("id", "2", "text", "Taking place 11th December 2010 at the University "
+ "of Sydney's Darlington Centre, the BarCampApache \"unconference\" will be"
+ " attendee-driven, facilitated by members of the Apache community and will "
+ "focus on the Apache..."));
+ "focus on the Apache..."), UIMA_CHAIN);
assertU(commit());
assertQ(req("sentence:*"), "//*[@numFound='2']");
@ -129,7 +124,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
public void testErrorHandling() throws Exception {
try{
addDoc("uima-not-ignoreErrors", adoc(
addDoc(adoc(
"id",
"2312312321312",
"text",
@ -137,14 +132,14 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
+ "Add support for specifying Spelling SuggestWord Comparator to Lucene spell "
+ "checkers for SpellCheckComponent. Issue SOLR-2053 is already fixed, patch is"
+ " attached if you need it, but it is also committed to trunk and 3_x branch."
+ " Last Lucene European Conference has been held in Prague."));
+ " Last Lucene European Conference has been held in Prague."), UIMA_NOT_IGNORE_ERRORS_CHAIN);
fail("exception shouldn't be ignored");
}
catch(RuntimeException expected){}
assertU(commit());
assertQ(req("*:*"), "//*[@numFound='0']");
addDoc("uima-ignoreErrors", adoc(
addDoc(adoc(
"id",
"2312312321312",
"text",
@ -152,16 +147,16 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
+ "Add support for specifying Spelling SuggestWord Comparator to Lucene spell "
+ "checkers for SpellCheckComponent. Issue SOLR-2053 is already fixed, patch is"
+ " attached if you need it, but it is also committed to trunk and 3_x branch."
+ " Last Lucene European Conference has been held in Prague."));
+ " Last Lucene European Conference has been held in Prague."), UIMA_IGNORE_ERRORS_CHAIN);
assertU(commit());
assertQ(req("*:*"), "//*[@numFound='1']");
try{
addDoc("uima-not-ignoreErrors", adoc(
addDoc(adoc(
"id",
"2312312321312",
"text",
"SpellCheckComponent got improvement related to recent Lucene changes."));
"SpellCheckComponent got improvement related to recent Lucene changes."), UIMA_NOT_IGNORE_ERRORS_CHAIN);
fail("exception shouldn't be ignored");
}
catch(StringIndexOutOfBoundsException e){ // SOLR-2579
@ -170,11 +165,11 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
catch(SolrException expected){}
try{
addDoc("uima-ignoreErrors", adoc(
addDoc(adoc(
"id",
"2312312321312",
"text",
"SpellCheckComponent got improvement related to recent Lucene changes."));
"SpellCheckComponent got improvement related to recent Lucene changes."), UIMA_IGNORE_ERRORS_CHAIN);
}
catch(StringIndexOutOfBoundsException e){ // SOLR-2579
fail("exception shouldn't be raised");
@ -188,19 +183,4 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
}
}
private void addDoc(String chain, String doc) throws Exception {
Map<String, String[]> params = new HashMap<>();
params.put(UpdateParams.UPDATE_CHAIN, new String[] { chain });
MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
SolrQueryRequestBase req = new SolrQueryRequestBase(h.getCore(), (SolrParams) mmparams) {
};
UpdateRequestHandler handler = new UpdateRequestHandler();
handler.init(null);
ArrayList<ContentStream> streams = new ArrayList<>(2);
streams.add(new ContentStreamBase.StringStream(doc));
req.setContentStreams(streams);
handler.handleRequestBody(req, new SolrQueryResponse());
}
}

View File

@ -50,6 +50,7 @@
<dependency org="log4j" name="log4j" rev="${/log4j/log4j}" conf="compile"/>
<dependency org="org.slf4j" name="slf4j-log4j12" rev="${/org.slf4j/slf4j-log4j12}" conf="compile"/>
<dependency org="org.slf4j" name="jcl-over-slf4j" rev="${/org.slf4j/jcl-over-slf4j}" conf="compile"/>
<dependency org="io.dropwizard.metrics" name="metrics-core" rev="${/io.dropwizard.metrics/metrics-core}" conf="compile" />
<dependency org="org.easymock" name="easymock" rev="${/org.easymock/easymock}" conf="test"/>
<dependency org="cglib" name="cglib-nodep" rev="${/cglib/cglib-nodep}" conf="test"/>

View File

@ -27,6 +27,7 @@ import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Predicate;
import com.codahale.metrics.Timer;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import org.apache.solr.common.SolrException;
@ -34,7 +35,6 @@ import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkCmdExecutor;
import org.apache.solr.common.util.Pair;
import org.apache.solr.util.stats.TimerContext;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
@ -119,7 +119,7 @@ public class DistributedQueue {
* @return data at the first element of the queue, or null.
*/
public byte[] peek() throws KeeperException, InterruptedException {
TimerContext time = stats.time(dir + "_peek");
Timer.Context time = stats.time(dir + "_peek");
try {
return firstElement();
} finally {
@ -147,7 +147,7 @@ public class DistributedQueue {
*/
public byte[] peek(long wait) throws KeeperException, InterruptedException {
Preconditions.checkArgument(wait > 0);
TimerContext time;
Timer.Context time;
if (wait == Long.MAX_VALUE) {
time = stats.time(dir + "_peek_wait_forever");
} else {
@ -177,7 +177,7 @@ public class DistributedQueue {
* @return Head of the queue or null.
*/
public byte[] poll() throws KeeperException, InterruptedException {
TimerContext time = stats.time(dir + "_poll");
Timer.Context time = stats.time(dir + "_poll");
try {
return removeFirst();
} finally {
@ -191,7 +191,7 @@ public class DistributedQueue {
* @return The former head of the queue
*/
public byte[] remove() throws NoSuchElementException, KeeperException, InterruptedException {
TimerContext time = stats.time(dir + "_remove");
Timer.Context time = stats.time(dir + "_remove");
try {
byte[] result = removeFirst();
if (result == null) {
@ -210,7 +210,7 @@ public class DistributedQueue {
*/
public byte[] take() throws KeeperException, InterruptedException {
// Same as for element. Should refactor this.
TimerContext timer = stats.time(dir + "_take");
Timer.Context timer = stats.time(dir + "_take");
updateLock.lockInterruptibly();
try {
while (true) {
@ -234,7 +234,7 @@ public class DistributedQueue {
* element to become visible.
*/
public void offer(byte[] data) throws KeeperException, InterruptedException {
TimerContext time = stats.time(dir + "_offer");
Timer.Context time = stats.time(dir + "_offer");
try {
while (true) {
try {

View File

@ -26,9 +26,9 @@ import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import com.codahale.metrics.Timer;
import org.apache.solr.client.solrj.SolrResponse;
import org.apache.solr.cloud.overseer.ClusterStateMutator;
import org.apache.solr.cloud.overseer.CollectionMutator;
@ -49,9 +49,6 @@ import org.apache.solr.core.CloudConfig;
import org.apache.solr.handler.admin.CollectionsHandler;
import org.apache.solr.handler.component.ShardHandler;
import org.apache.solr.update.UpdateShardHandler;
import org.apache.solr.util.stats.Clock;
import org.apache.solr.util.stats.Timer;
import org.apache.solr.util.stats.TimerContext;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
@ -254,7 +251,7 @@ public class Overseer implements Closeable {
private ClusterState processQueueItem(ZkNodeProps message, ClusterState clusterState, ZkStateWriter zkStateWriter, boolean enableBatching, ZkStateWriter.ZkWriteCallback callback) throws Exception {
final String operation = message.getStr(QUEUE_OPERATION);
List<ZkWriteCommand> zkWriteCommands = null;
final TimerContext timerContext = stats.time(operation);
final Timer.Context timerContext = stats.time(operation);
try {
zkWriteCommands = processMessage(clusterState, message, operation);
stats.success(operation);
@ -392,7 +389,7 @@ public class Overseer implements Closeable {
}
private LeaderStatus amILeader() {
TimerContext timerContext = stats.time("am_i_leader");
Timer.Context timerContext = stats.time("am_i_leader");
boolean success = true;
try {
ZkNodeProps props = ZkNodeProps.load(zkClient.getData(
@ -795,7 +792,7 @@ public class Overseer implements Closeable {
stat.errors.incrementAndGet();
}
public TimerContext time(String operation) {
public Timer.Context time(String operation) {
String op = operation.toLowerCase(Locale.ROOT);
Stat stat = stats.get(op);
if (stat == null) {
@ -853,7 +850,7 @@ public class Overseer implements Closeable {
public Stat() {
this.success = new AtomicInteger();
this.errors = new AtomicInteger();
this.requestTime = new Timer(TimeUnit.MILLISECONDS, TimeUnit.MINUTES, Clock.defaultClock());
this.requestTime = new Timer();
this.failureDetails = new LinkedList<>();
}
}

View File

@ -23,14 +23,14 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.codahale.metrics.Timer;
import org.apache.solr.cloud.OverseerCollectionMessageHandler.Cmd;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.util.stats.Snapshot;
import org.apache.solr.util.stats.Timer;
import org.apache.solr.util.stats.TimerUtils;
import org.apache.zookeeper.data.Stat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -100,17 +100,7 @@ public class OverseerStatusCmd implements Cmd {
lst.add("errors", errors);
}
Timer timer = entry.getValue().requestTime;
Snapshot snapshot = timer.getSnapshot();
lst.add("totalTime", timer.getSum());
lst.add("avgRequestsPerMinute", timer.getMeanRate());
lst.add("5minRateRequestsPerMinute", timer.getFiveMinuteRate());
lst.add("15minRateRequestsPerMinute", timer.getFifteenMinuteRate());
lst.add("avgTimePerRequest", timer.getMean());
lst.add("medianRequestTime", snapshot.getMedian());
lst.add("75thPctlRequestTime", snapshot.get75thPercentile());
lst.add("95thPctlRequestTime", snapshot.get95thPercentile());
lst.add("99thPctlRequestTime", snapshot.get99thPercentile());
lst.add("999thPctlRequestTime", snapshot.get999thPercentile());
TimerUtils.addMetrics(lst, timer);
}
results.add("overseer_operations", overseerStats);
results.add("collection_operations", collectionStats);

View File

@ -30,6 +30,7 @@ import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;
import com.codahale.metrics.Timer;
import com.google.common.collect.ImmutableSet;
import org.apache.commons.io.IOUtils;
import org.apache.solr.client.solrj.SolrResponse;
@ -43,7 +44,6 @@ import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.util.Utils;
import org.apache.solr.util.DefaultSolrThreadFactory;
import org.apache.solr.util.stats.TimerContext;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.data.Stat;
import org.slf4j.Logger;
@ -380,7 +380,7 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
protected LeaderStatus amILeader() {
String statsName = "collection_am_i_leader";
TimerContext timerContext = stats.time(statsName);
Timer.Context timerContext = stats.time(statsName);
boolean success = true;
try {
ZkNodeProps props = ZkNodeProps.load(zkStateReader.getZkClient().getData(
@ -451,7 +451,7 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
public void run() {
String statsName = messageHandler.getTimerName(operation);
final TimerContext timerContext = stats.time(statsName);
final Timer.Context timerContext = stats.time(statsName);
boolean success = false;
final String asyncId = message.getStr(ASYNC);

View File

@ -22,10 +22,10 @@ import java.util.List;
import java.util.TreeSet;
import java.util.function.Predicate;
import com.codahale.metrics.Timer;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.util.Pair;
import org.apache.solr.util.stats.TimerContext;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
@ -85,7 +85,7 @@ public class OverseerTaskQueue extends DistributedQueue {
*/
public void remove(QueueEvent event) throws KeeperException,
InterruptedException {
TimerContext time = stats.time(dir + "_remove_event");
Timer.Context time = stats.time(dir + "_remove_event");
try {
String path = event.getId();
String responsePath = dir + "/" + response_prefix
@ -181,7 +181,7 @@ public class OverseerTaskQueue extends DistributedQueue {
*/
public QueueEvent offer(byte[] data, long timeout) throws KeeperException,
InterruptedException {
TimerContext time = stats.time(dir + "_offer");
Timer.Context time = stats.time(dir + "_offer");
try {
// Create and watch the response node before creating the request node;
// otherwise we may miss the response.
@ -227,7 +227,7 @@ public class OverseerTaskQueue extends DistributedQueue {
ArrayList<QueueEvent> topN = new ArrayList<>();
LOG.debug("Peeking for top {} elements. ExcludeSet: {}", n, excludeSet);
TimerContext time;
Timer.Context time;
if (waitMillis == Long.MAX_VALUE) time = stats.time(dir + "_peekTopN_wait_forever");
else time = stats.time(dir + "_peekTopN_wait" + waitMillis);

View File

@ -21,12 +21,12 @@ import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import com.codahale.metrics.Timer;
import org.apache.solr.cloud.Overseer;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.Utils;
import org.apache.solr.util.stats.TimerContext;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.data.Stat;
@ -210,7 +210,7 @@ public class ZkStateWriter {
throw new IllegalStateException("ZkStateWriter has seen a tragic error, this instance can no longer be used");
}
if (!hasPendingUpdates()) return clusterState;
TimerContext timerContext = stats.time("update_state");
Timer.Context timerContext = stats.time("update_state");
boolean success = false;
try {
if (!updates.isEmpty()) {

View File

@ -51,10 +51,13 @@ import org.apache.solr.common.MapWriter.EntryWriter;
import org.apache.solr.common.PushWriter;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.JavaBinCodec;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.response.BinaryResponseWriter;
import org.apache.solr.response.JSONResponseWriter;
import org.apache.solr.response.QueryResponseWriter;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.BoolField;
import org.apache.solr.schema.FieldType;
@ -125,8 +128,14 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
}
public void write(OutputStream os) throws IOException {
respWriter = new OutputStreamWriter(os, StandardCharsets.UTF_8);
writer = JSONResponseWriter.getPushWriter(respWriter, req, res);
QueryResponseWriter rw = req.getCore().getResponseWriters().get(wt);
if (rw instanceof BinaryResponseWriter) {
//todo add support for other writers after testing
writer = new JavaBinCodec(os, null);
} else {
respWriter = new OutputStreamWriter(os, StandardCharsets.UTF_8);
writer = JSONResponseWriter.getPushWriter(respWriter, req, res);
}
Exception exception = res.getException();
if (exception != null) {
if (!(exception instanceof IgnoreException)) {

View File

@ -20,6 +20,7 @@ import java.lang.invoke.MethodHandles;
import java.net.URL;
import java.util.concurrent.atomic.LongAdder;
import com.codahale.metrics.Timer;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
@ -33,9 +34,7 @@ import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.util.SolrPluginUtils;
import org.apache.solr.util.stats.Snapshot;
import org.apache.solr.util.stats.Timer;
import org.apache.solr.util.stats.TimerContext;
import org.apache.solr.util.stats.TimerUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -144,7 +143,7 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
@Override
public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
numRequests.increment();
TimerContext timer = requestTimes.time();
Timer.Context timer = requestTimes.time();
try {
if(pluginInfo != null && pluginInfo.attributes.containsKey(USEPARAM)) req.getContext().put(USEPARAM,pluginInfo.attributes.get(USEPARAM));
SolrPluginUtils.setDefaults(this, req, defaults, appends, invariants);
@ -268,26 +267,16 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
@Override
public NamedList<Object> getStatistics() {
NamedList<Object> lst = new SimpleOrderedMap<>();
Snapshot snapshot = requestTimes.getSnapshot();
lst.add("handlerStart",handlerStart);
lst.add("requests", numRequests.longValue());
lst.add("errors", numServerErrors.longValue() + numClientErrors.longValue());
lst.add("serverErrors", numServerErrors.longValue());
lst.add("clientErrors", numClientErrors.longValue());
lst.add("timeouts", numTimeouts.longValue());
lst.add("totalTime", requestTimes.getSum());
lst.add("avgRequestsPerSecond", requestTimes.getMeanRate());
lst.add("5minRateReqsPerSecond", requestTimes.getFiveMinuteRate());
lst.add("15minRateReqsPerSecond", requestTimes.getFifteenMinuteRate());
lst.add("avgTimePerRequest", requestTimes.getMean());
lst.add("medianRequestTime", snapshot.getMedian());
lst.add("75thPcRequestTime", snapshot.get75thPercentile());
lst.add("95thPcRequestTime", snapshot.get95thPercentile());
lst.add("99thPcRequestTime", snapshot.get99thPercentile());
lst.add("999thPcRequestTime", snapshot.get999thPercentile());
TimerUtils.addMetrics(lst, requestTimes);
return lst;
}
}

View File

@ -16,6 +16,14 @@
*/
package org.apache.solr.handler.component;
import java.io.IOException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Stream;
import com.google.common.base.Objects;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;
@ -29,6 +37,7 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.highlight.DefaultSolrHighlighter;
import org.apache.solr.highlight.PostingsSolrHighlighter;
import org.apache.solr.highlight.SolrHighlighter;
import org.apache.solr.highlight.UnifiedSolrHighlighter;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;
@ -38,9 +47,7 @@ import org.apache.solr.util.SolrPluginUtils;
import org.apache.solr.util.plugin.PluginInfoInitialized;
import org.apache.solr.util.plugin.SolrCoreAware;
import java.io.IOException;
import java.net.URL;
import java.util.List;
import static java.util.stream.Collectors.toMap;
/**
* TODO!
@ -50,13 +57,50 @@ import java.util.List;
*/
public class HighlightComponent extends SearchComponent implements PluginInfoInitialized, SolrCoreAware
{
public static final String COMPONENT_NAME = "highlight";
private PluginInfo info = PluginInfo.EMPTY_INFO;
private SolrHighlighter highlighter;
public enum HighlightMethod {
UNIFIED("unified"),
FAST_VECTOR("fastVector"),
POSTINGS("postings"),
ORIGINAL("original");
private static final Map<String, HighlightMethod> METHODS = Collections.unmodifiableMap(Stream.of(values())
.collect(toMap(HighlightMethod::getMethodName, Function.identity())));
private final String methodName;
HighlightMethod(String method) {
this.methodName = method;
}
public String getMethodName() {
return methodName;
}
public static HighlightMethod parse(String method) {
return METHODS.get(method);
}
}
public static final String COMPONENT_NAME = "highlight";
private PluginInfo info = PluginInfo.EMPTY_INFO;
@Deprecated // DWS: in 7.0 lets restructure the abstractions/relationships
private SolrHighlighter solrConfigHighlighter;
/**
* @deprecated instead depend on {@link #process(ResponseBuilder)} to choose the highlighter based on
* {@link HighlightParams#METHOD}
*/
@Deprecated
public static SolrHighlighter getHighlighter(SolrCore core) {
HighlightComponent hl = (HighlightComponent) core.getSearchComponents().get(HighlightComponent.COMPONENT_NAME);
return hl==null ? null: hl.getHighlighter();
return hl==null ? null: hl.getHighlighter();
}
@Deprecated
public SolrHighlighter getHighlighter() {
return solrConfigHighlighter;
}
@Override
@ -67,7 +111,7 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
@Override
public void prepare(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
rb.doHighlights = highlighter.isHighlightingEnabled(params);
rb.doHighlights = solrConfigHighlighter.isHighlightingEnabled(params);
if(rb.doHighlights){
rb.setNeedDocList(true);
String hlq = params.get(HighlightParams.Q);
@ -90,26 +134,28 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
if(children.isEmpty()) {
PluginInfo pluginInfo = core.getSolrConfig().getPluginInfo(SolrHighlighter.class.getName()); //TODO deprecated configuration remove later
if (pluginInfo != null) {
highlighter = core.createInitInstance(pluginInfo, SolrHighlighter.class, null, DefaultSolrHighlighter.class.getName());
solrConfigHighlighter = core.createInitInstance(pluginInfo, SolrHighlighter.class, null, DefaultSolrHighlighter.class.getName());
} else {
DefaultSolrHighlighter defHighlighter = new DefaultSolrHighlighter(core);
defHighlighter.init(PluginInfo.EMPTY_INFO);
highlighter = defHighlighter;
solrConfigHighlighter = defHighlighter;
}
} else {
highlighter = core.createInitInstance(children.get(0),SolrHighlighter.class,null, DefaultSolrHighlighter.class.getName());
solrConfigHighlighter = core.createInitInstance(children.get(0),SolrHighlighter.class,null, DefaultSolrHighlighter.class.getName());
}
}
@Override
public void process(ResponseBuilder rb) throws IOException {
if (rb.doHighlights) {
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
String[] defaultHighlightFields; //TODO: get from builder by default?
SolrHighlighter highlighter = getHighlighter(params);
String[] defaultHighlightFields; //TODO: get from builder by default?
if (rb.getQparser() != null) {
defaultHighlightFields = rb.getQparser().getDefaultHighlightFields();
} else {
@ -130,14 +176,8 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
rb.setHighlightQuery( highlightQuery );
}
}
if(highlightQuery != null) {
boolean rewrite = (highlighter instanceof PostingsSolrHighlighter == false) && !(Boolean.valueOf(params.get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true")) &&
Boolean.valueOf(params.get(HighlightParams.HIGHLIGHT_MULTI_TERM, "true")));
highlightQuery = rewrite ? highlightQuery.rewrite(req.getSearcher().getIndexReader()) : highlightQuery;
}
// No highlighting if there is no query -- consider q.alt="*:*
// No highlighting if there is no query -- consider q.alt=*:*
if( highlightQuery != null ) {
NamedList sumData = highlighter.doHighlighting(
rb.getResults().docList,
@ -152,6 +192,36 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
}
}
protected SolrHighlighter getHighlighter(SolrParams params) {
HighlightMethod method = HighlightMethod.parse(params.get(HighlightParams.METHOD));
if (method == null) {
return solrConfigHighlighter;
}
switch (method) {
case UNIFIED:
if (solrConfigHighlighter instanceof UnifiedSolrHighlighter) {
return solrConfigHighlighter;
}
return new UnifiedSolrHighlighter(); // TODO cache one?
case POSTINGS:
if (solrConfigHighlighter instanceof PostingsSolrHighlighter) {
return solrConfigHighlighter;
}
return new PostingsSolrHighlighter(); // TODO cache one?
case FAST_VECTOR: // fall-through
case ORIGINAL:
if (solrConfigHighlighter instanceof DefaultSolrHighlighter) {
return solrConfigHighlighter;
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"In order to use " + HighlightParams.METHOD + "=" + method.getMethodName() + " the configured" +
" highlighter in solrconfig must be " + DefaultSolrHighlighter.class);
}
default: throw new AssertionError();
}
}
@Override
public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {
if (!rb.doHighlights) return;
@ -195,10 +265,6 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
}
}
public SolrHighlighter getHighlighter() {
return highlighter;
}
////////////////////////////////////////////
/// SolrInfoMBean
////////////////////////////////////////////

View File

@ -199,10 +199,11 @@ public class QueryComponent extends SearchComponent
if (fqs!=null && fqs.length!=0) {
List<Query> filters = rb.getFilters();
// if filters already exists, make a copy instead of modifying the original
filters = filters == null ? new ArrayList<Query>(fqs.length) : new ArrayList<>(filters);
filters = filters == null ? new ArrayList<>(fqs.length) : new ArrayList<>(filters);
for (String fq : fqs) {
if (fq != null && fq.trim().length()!=0) {
QParser fqp = QParser.getParser(fq, req);
fqp.setIsFilter(true);
filters.add(fqp.getQuery());
}
}

View File

@ -66,6 +66,7 @@ import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.HighlightComponent;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
@ -373,6 +374,13 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
if (!isHighlightingEnabled(params)) // also returns early if no unique key field
return null;
boolean rewrite = query != null && !(Boolean.valueOf(params.get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true")) &&
Boolean.valueOf(params.get(HighlightParams.HIGHLIGHT_MULTI_TERM, "true")));
if (rewrite) {
query = query.rewrite(req.getSearcher().getIndexReader());
}
SolrIndexSearcher searcher = req.getSearcher();
IndexSchema schema = searcher.getSchema();
@ -463,8 +471,11 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
* Determines if we should use the FastVectorHighlighter for this field.
*/
protected boolean useFastVectorHighlighter(SolrParams params, SchemaField schemaField) {
boolean useFvhParam = params.getFieldBool(schemaField.getName(), HighlightParams.USE_FVH, false);
if (!useFvhParam) return false;
boolean methodFvh =
HighlightComponent.HighlightMethod.FAST_VECTOR.getMethodName().equals(
params.getFieldParam(schemaField.getName(), HighlightParams.METHOD))
|| params.getFieldBool(schemaField.getName(), HighlightParams.USE_FVH, false);
if (!methodFvh) return false;
boolean termPosOff = schemaField.storeTermPositions() && schemaField.storeTermOffsets();
if (!termPosOff) {
log.warn("Solr will use the standard Highlighter instead of FastVectorHighlighter because the {} field " +

View File

@ -50,8 +50,9 @@ import org.apache.solr.util.plugin.PluginInfoInitialized;
* <p>
* Example configuration:
* <pre class="prettyprint">
* &lt;requestHandler name="standard" class="solr.StandardRequestHandler"&gt;
* &lt;requestHandler name="/select" class="solr.SearchHandler"&gt;
* &lt;lst name="defaults"&gt;
* &lt;str name="hl.method"&gt;postings&lt;/str&gt;
* &lt;int name="hl.snippets"&gt;1&lt;/int&gt;
* &lt;str name="hl.tag.pre"&gt;&amp;lt;em&amp;gt;&lt;/str&gt;
* &lt;str name="hl.tag.post"&gt;&amp;lt;/em&amp;gt;&lt;/str&gt;
@ -71,12 +72,6 @@ import org.apache.solr.util.plugin.PluginInfoInitialized;
* &lt;/lst&gt;
* &lt;/requestHandler&gt;
* </pre>
* ...
* <pre class="prettyprint">
* &lt;searchComponent class="solr.HighlightComponent" name="highlight"&gt;
* &lt;highlighting class="org.apache.solr.highlight.PostingsSolrHighlighter"/&gt;
* &lt;/searchComponent&gt;
* </pre>
* <p>
* Notes:
* <ul>

View File

@ -0,0 +1,365 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.postingshighlight.WholeBreakIterator;
import org.apache.lucene.search.uhighlight.DefaultPassageFormatter;
import org.apache.lucene.search.uhighlight.PassageFormatter;
import org.apache.lucene.search.uhighlight.PassageScorer;
import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RTimerTree;
import org.apache.solr.util.plugin.PluginInfoInitialized;
/**
* Highlighter impl that uses {@link UnifiedHighlighter}
* <p>
* Example configuration with default values:
* <pre class="prettyprint">
* &lt;requestHandler name="/select" class="solr.SearchHandler"&gt;
* &lt;lst name="defaults"&gt;
* &lt;str name="hl.method"&gt;unified&lt;/str&gt;
* &lt;int name="hl.snippets"&gt;1&lt;/int&gt;
* &lt;str name="hl.tag.pre"&gt;&amp;lt;em&amp;gt;&lt;/str&gt;
* &lt;str name="hl.tag.post"&gt;&amp;lt;/em&amp;gt;&lt;/str&gt;
* &lt;str name="hl.simple.pre"&gt;&amp;lt;em&amp;gt;&lt;/str&gt;
* &lt;str name="hl.simple.post"&gt;&amp;lt;/em&amp;gt;&lt;/str&gt;
* &lt;str name="hl.tag.ellipsis"&gt;... &lt;/str&gt;
* &lt;bool name="hl.defaultSummary"&gt;true&lt;/bool&gt;
* &lt;str name="hl.encoder"&gt;simple&lt;/str&gt;
* &lt;float name="hl.score.k1"&gt;1.2&lt;/float&gt;
* &lt;float name="hl.score.b"&gt;0.75&lt;/float&gt;
* &lt;float name="hl.score.pivot"&gt;87&lt;/float&gt;
* &lt;str name="hl.bs.language"&gt;&lt;/str&gt;
* &lt;str name="hl.bs.country"&gt;&lt;/str&gt;
* &lt;str name="hl.bs.variant"&gt;&lt;/str&gt;
* &lt;str name="hl.bs.type"&gt;SENTENCE&lt;/str&gt;
* &lt;int name="hl.maxAnalyzedChars"&gt;10000&lt;/int&gt;
* &lt;bool name="hl.highlightMultiTerm"&gt;true&lt;/bool&gt;
* &lt;bool name="hl.usePhraseHighlighter"&gt;true&lt;/bool&gt;
* &lt;int name="hl.cacheFieldValCharsThreshold"&gt;524288&lt;/int&gt;
* &lt;str name="hl.offsetSource"&gt;&lt;/str&gt;
* &lt;/lst&gt;
* &lt;/requestHandler&gt;
* </pre>
* <p>
* Notes:
* <ul>
* <li>hl.q (string) can specify the query
* <li>hl.fl (string) specifies the field list.
* <li>hl.snippets (int) specifies how many snippets to return.
* <li>hl.tag.pre (string) specifies text which appears before a highlighted term.
* <li>hl.tag.post (string) specifies text which appears after a highlighted term.
* <li>hl.simple.pre (string) specifies text which appears before a highlighted term. (prefer hl.tag.pre)
* <li>hl.simple.post (string) specifies text which appears before a highlighted term. (prefer hl.tag.post)
* <li>hl.tag.ellipsis (string) specifies text which joins non-adjacent passages. The default is to retain each
* value in a list without joining them.
* <li>hl.defaultSummary (bool) specifies if a field should have a default summary of the leading text.
* <li>hl.encoder (string) can be 'html' (html escapes content) or 'simple' (no escaping).
* <li>hl.score.k1 (float) specifies bm25 scoring parameter 'k1'
* <li>hl.score.b (float) specifies bm25 scoring parameter 'b'
* <li>hl.score.pivot (float) specifies bm25 scoring parameter 'avgdl'
* <li>hl.bs.type (string) specifies how to divide text into passages: [SENTENCE, LINE, WORD, CHAR, WHOLE]
* <li>hl.bs.language (string) specifies language code for BreakIterator. default is empty string (root locale)
* <li>hl.bs.country (string) specifies country code for BreakIterator. default is empty string (root locale)
* <li>hl.bs.variant (string) specifies country code for BreakIterator. default is empty string (root locale)
* <li>hl.maxAnalyzedChars (int) specifies how many characters at most will be processed in a document for any one field.
* <li>hl.highlightMultiTerm (bool) enables highlighting for range/wildcard/fuzzy/prefix queries at some cost. default is true
* <li>hl.usePhraseHighlighter (bool) enables phrase highlighting. default is true
* <li>hl.cacheFieldValCharsThreshold (int) controls how many characters from a field are cached. default is 524288 (1MB in 2 byte chars)
* <li>hl.offsetSource (string) specifies which offset source to use, prefers postings, but will use what's available if not specified
* </ul>
*
* @lucene.experimental
*/
public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized {
protected static final String SNIPPET_SEPARATOR = "\u0000";
private static final String[] ZERO_LEN_STR_ARRAY = new String[0];
@Override
public void init(PluginInfo info) {
}
@Override
public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
final SolrParams params = req.getParams();
// if highlighting isn't enabled, then why call doHighlighting?
if (!isHighlightingEnabled(params))
return null;
int[] docIDs = toDocIDs(docs);
// fetch the unique keys
String[] keys = getUniqueKeys(req.getSearcher(), docIDs);
// query-time parameters
String[] fieldNames = getHighlightFields(query, req, defaultFields);
int maxPassages[] = new int[fieldNames.length];
for (int i = 0; i < fieldNames.length; i++) {
maxPassages[i] = params.getFieldInt(fieldNames[i], HighlightParams.SNIPPETS, 1);
}
UnifiedHighlighter highlighter = getHighlighter(req);
Map<String, String[]> snippets = highlighter.highlightFields(fieldNames, query, docIDs, maxPassages);
return encodeSnippets(keys, fieldNames, snippets);
}
/**
* Creates an instance of the Lucene {@link UnifiedHighlighter}. Provided for subclass extension so that
* a subclass can return a subclass of {@link SolrExtendedUnifiedHighlighter}.
*/
protected UnifiedHighlighter getHighlighter(SolrQueryRequest req) {
return new SolrExtendedUnifiedHighlighter(req);
}
/**
* Encodes the resulting snippets into a namedlist
*
* @param keys the document unique keys
* @param fieldNames field names to highlight in the order
* @param snippets map from field name to snippet array for the docs
* @return encoded namedlist of summaries
*/
protected NamedList<Object> encodeSnippets(String[] keys, String[] fieldNames, Map<String, String[]> snippets) {
NamedList<Object> list = new SimpleOrderedMap<>();
for (int i = 0; i < keys.length; i++) {
NamedList<Object> summary = new SimpleOrderedMap<>();
for (String field : fieldNames) {
String snippet = snippets.get(field)[i];
if (snippet == null) {
//TODO reuse logic of DefaultSolrHighlighter.alternateField
summary.add(field, ZERO_LEN_STR_ARRAY);
} else {
// we used a special snippet separator char and we can now split on it.
summary.add(field, snippet.split(SNIPPET_SEPARATOR));
}
}
list.add(keys[i], summary);
}
return list;
}
/**
* Converts solr's DocList to the int[] docIDs
*/
protected int[] toDocIDs(DocList docs) {
int[] docIDs = new int[docs.size()];
DocIterator iterator = docs.iterator();
for (int i = 0; i < docIDs.length; i++) {
if (!iterator.hasNext()) {
throw new AssertionError();
}
docIDs[i] = iterator.nextDoc();
}
if (iterator.hasNext()) {
throw new AssertionError();
}
return docIDs;
}
/**
* Retrieves the unique keys for the topdocs to key the results
*/
protected String[] getUniqueKeys(SolrIndexSearcher searcher, int[] docIDs) throws IOException {
IndexSchema schema = searcher.getSchema();
SchemaField keyField = schema.getUniqueKeyField();
if (keyField != null) {
Set<String> selector = Collections.singleton(keyField.getName());
String[] uniqueKeys = new String[docIDs.length];
for (int i = 0; i < docIDs.length; i++) {
int docid = docIDs[i];
Document doc = searcher.doc(docid, selector);
String id = schema.printableUniqueKey(doc);
uniqueKeys[i] = id;
}
return uniqueKeys;
} else {
return new String[docIDs.length];
}
}
/**
* From {@link #getHighlighter(org.apache.solr.request.SolrQueryRequest)}.
*/
protected static class SolrExtendedUnifiedHighlighter extends UnifiedHighlighter {
protected final SolrParams params;
protected final IndexSchema schema;
protected final RTimerTree loadFieldValuesTimer;
public SolrExtendedUnifiedHighlighter(SolrQueryRequest req) {
super(req.getSearcher(), req.getSchema().getIndexAnalyzer());
this.params = req.getParams();
this.schema = req.getSchema();
this.setMaxLength(
params.getInt(HighlightParams.MAX_CHARS, UnifiedHighlighter.DEFAULT_MAX_LENGTH));
this.setCacheFieldValCharsThreshold(
params.getInt(HighlightParams.CACHE_FIELD_VAL_CHARS_THRESHOLD, DEFAULT_CACHE_CHARS_THRESHOLD));
// SolrRequestInfo is a thread-local singleton providing access to the ResponseBuilder to code that
// otherwise can't get it in a nicer way.
SolrQueryRequest request = SolrRequestInfo.getRequestInfo().getReq();
final RTimerTree timerTree;
if (request.getRequestTimer() != null) { //It may be null if not used in a search context.
timerTree = request.getRequestTimer();
} else {
timerTree = new RTimerTree(); // since null checks are annoying
}
loadFieldValuesTimer = timerTree.sub("loadFieldValues"); // we assume a new timer, state of STARTED
loadFieldValuesTimer.pause(); // state of PAUSED now with about zero time. Will fail if state isn't STARTED.
}
@Override
protected OffsetSource getOffsetSource(String field) {
String sourceStr = params.getFieldParam(field, HighlightParams.OFFSET_SOURCE);
if (sourceStr != null) {
return OffsetSource.valueOf(sourceStr.toUpperCase(Locale.ROOT));
} else {
return super.getOffsetSource(field);
}
}
@Override
public int getMaxNoHighlightPassages(String field) {
boolean defaultSummary = params.getFieldBool(field, HighlightParams.DEFAULT_SUMMARY, false);
if (defaultSummary) {
return -1;// signifies return first hl.snippets passages worth of the content
} else {
return 0;// will return null
}
}
@Override
protected PassageFormatter getFormatter(String fieldName) {
String preTag = params.getFieldParam(fieldName, HighlightParams.TAG_PRE,
params.getFieldParam(fieldName, HighlightParams.SIMPLE_PRE, "<em>")
);
String postTag = params.getFieldParam(fieldName, HighlightParams.TAG_POST,
params.getFieldParam(fieldName, HighlightParams.SIMPLE_POST, "</em>")
);
String ellipsis = params.getFieldParam(fieldName, HighlightParams.TAG_ELLIPSIS, SNIPPET_SEPARATOR);
String encoder = params.getFieldParam(fieldName, HighlightParams.ENCODER, "simple");
return new DefaultPassageFormatter(preTag, postTag, ellipsis, "html".equals(encoder));
}
@Override
protected PassageScorer getScorer(String fieldName) {
float k1 = params.getFieldFloat(fieldName, HighlightParams.SCORE_K1, 1.2f);
float b = params.getFieldFloat(fieldName, HighlightParams.SCORE_B, 0.75f);
float pivot = params.getFieldFloat(fieldName, HighlightParams.SCORE_PIVOT, 87f);
return new PassageScorer(k1, b, pivot);
}
@Override
protected BreakIterator getBreakIterator(String field) {
String language = params.getFieldParam(field, HighlightParams.BS_LANGUAGE);
String country = params.getFieldParam(field, HighlightParams.BS_COUNTRY);
String variant = params.getFieldParam(field, HighlightParams.BS_VARIANT);
Locale locale = parseLocale(language, country, variant);
String type = params.getFieldParam(field, HighlightParams.BS_TYPE);
return parseBreakIterator(type, locale);
}
/**
* parse a break iterator type for the specified locale
*/
protected BreakIterator parseBreakIterator(String type, Locale locale) {
if (type == null || "SENTENCE".equals(type)) {
return BreakIterator.getSentenceInstance(locale);
} else if ("LINE".equals(type)) {
return BreakIterator.getLineInstance(locale);
} else if ("WORD".equals(type)) {
return BreakIterator.getWordInstance(locale);
} else if ("CHARACTER".equals(type)) {
return BreakIterator.getCharacterInstance(locale);
} else if ("WHOLE".equals(type)) {
return new WholeBreakIterator();
} else {
throw new IllegalArgumentException("Unknown " + HighlightParams.BS_TYPE + ": " + type);
}
}
/**
* parse a locale from a language+country+variant spec
*/
protected Locale parseLocale(String language, String country, String variant) {
if (language == null && country == null && variant == null) {
return Locale.ROOT;
} else if (language == null) {
throw new IllegalArgumentException("language is required if country or variant is specified");
} else if (country == null && variant != null) {
throw new IllegalArgumentException("To specify variant, country is required");
} else if (country != null && variant != null) {
return new Locale(language, country, variant);
} else if (country != null) {
return new Locale(language, country);
} else {
return new Locale(language);
}
}
@Override
protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter, int
cacheCharsThreshold) throws IOException {
// Time loading field values. It can be an expensive part of highlighting.
loadFieldValuesTimer.resume();
try {
return super.loadFieldValues(fields, docIter, cacheCharsThreshold);
} finally {
loadFieldValuesTimer.pause(); // note: doesn't need to be "stopped"; pause is fine.
}
}
@Override
protected boolean shouldHandleMultiTermQuery(String field) {
return params.getFieldBool(field, HighlightParams.HIGHLIGHT_MULTI_TERM, true);
}
@Override
protected boolean shouldHighlightPhrasesStrictly(String field) {
return params.getFieldBool(field, HighlightParams.USE_PHRASE_HIGHLIGHTER, true);
}
}
}

View File

@ -4,11 +4,12 @@ package org.apache.solr.parser;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.QParser;
import org.apache.solr.search.SyntaxError;
public class QueryParser extends SolrQueryParserBase implements QueryParserConstants {
@ -135,9 +136,9 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
addClause(clauses, conj, mods, q);
}
if (clauses.size() == 1 && firstQuery != null)
{if (true) return firstQuery;}
{if (true) return rawToNormal(firstQuery);}
else {
{if (true) return getBooleanQuery(clauses);}
{if (true) return getBooleanQuery(clauses);}
}
throw new Error("Missing return statement in function");
}
@ -146,6 +147,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
Query q;
Token fieldToken=null, boost=null;
Token localParams=null;
int flags = 0;
if (jj_2_1(2)) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TERM:
@ -195,6 +197,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
break;
case FILTER:
jj_consume_token(FILTER);
flags=startFilter();
q = Query(field);
jj_consume_token(RPAREN);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
@ -206,7 +209,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
jj_la1[7] = jj_gen;
;
}
q=getFilter(q);
q=getFilter(q); restoreFlags(flags);
break;
case LPARAMS:
localParams = jj_consume_token(LPARAMS);

View File

@ -190,9 +190,9 @@ Query Query(String field) throws SyntaxError :
)*
{
if (clauses.size() == 1 && firstQuery != null)
return firstQuery;
return rawToNormal(firstQuery);
else {
return getBooleanQuery(clauses);
return getBooleanQuery(clauses);
}
}
}
@ -201,6 +201,7 @@ Query Clause(String field) throws SyntaxError : {
Query q;
Token fieldToken=null, boost=null;
Token localParams=null;
int flags = 0;
}
{
@ -216,7 +217,7 @@ Query Clause(String field) throws SyntaxError : {
(
q=Term(field)
| <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?
| (<FILTER> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? { q=getFilter(q); } )
| (<FILTER> { flags=startFilter(); } q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? { q=getFilter(q); restoreFlags(flags); } )
| (localParams = <LPARAMS> (<CARAT> boost=<NUMBER>)? { q=getLocalParams(field, localParams.image); } )
)
{ return handleBoost(q, boost); }

View File

@ -17,6 +17,7 @@
package org.apache.solr.parser;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
@ -61,6 +62,7 @@ import org.apache.solr.search.SyntaxError;
*/
public abstract class SolrQueryParserBase extends QueryBuilder {
public static final int TERMS_QUERY_THRESHOLD = 16; // @lucene.internal Set to a low value temporarily for better test coverage
static final int CONJ_NONE = 0;
static final int CONJ_AND = 1;
@ -89,7 +91,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
boolean autoGeneratePhraseQueries = false;
int flags;
protected IndexSchema schema;
protected QParser parser;
@ -125,6 +127,31 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
}
}
// internal: A simple raw fielded query
public static class RawQuery extends Query {
final SchemaField sfield;
final String externalVal;
public RawQuery(SchemaField sfield, String externalVal) {
this.sfield = sfield;
this.externalVal = externalVal;
}
@Override
public String toString(String field) {
return "RAW(" + field + "," + externalVal + ")";
}
@Override
public boolean equals(Object obj) {
return false;
}
@Override
public int hashCode() {
return 0;
}
}
// So the generated QueryParser(CharStream) won't error out
protected SolrQueryParserBase() {
@ -138,10 +165,22 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
public void init(Version matchVersion, String defaultField, QParser parser) {
this.schema = parser.getReq().getSchema();
this.parser = parser;
this.flags = parser.getFlags();
this.defaultField = defaultField;
setAnalyzer(schema.getQueryAnalyzer());
}
// Turn on the "filter" bit and return the previous flags for the caller to save
int startFilter() {
int oldFlags = flags;
flags |= QParser.FLAG_FILTER;
return oldFlags;
}
void restoreFlags(int flagsToRestore) {
flags = flagsToRestore;
}
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
* @param query the query string to be parsed.
*/
@ -381,7 +420,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
*/
protected Query getFieldQuery(String field, String queryText, int slop)
throws SyntaxError {
Query query = getFieldQuery(field, queryText, true);
Query query = getFieldQuery(field, queryText, true, false);
// only set slop of the phrase query was a result of this parser
// and not a sub-parser.
@ -492,11 +531,77 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
if (clauses.size()==0) {
return null; // all clause words were filtered away by the analyzer.
}
BooleanQuery.Builder query = newBooleanQuery();
for(final BooleanClause clause: clauses) {
query.add(clause);
SchemaField sfield = null;
List<String> fieldValues = null;
boolean useTermsQuery = (flags & QParser.FLAG_FILTER)!=0 && clauses.size() > TERMS_QUERY_THRESHOLD;
int clausesAdded = 0;
BooleanQuery.Builder booleanBuilder = newBooleanQuery();
Map<SchemaField, List<String>> fmap = new HashMap<>();
for (BooleanClause clause : clauses) {
Query subq = clause.getQuery();
if (subq instanceof RawQuery) {
if (clause.getOccur() != BooleanClause.Occur.SHOULD) {
// We only collect optional terms for set queries. Since this isn't optional,
// convert the raw query to a normal query and handle as usual.
clause = new BooleanClause( rawToNormal(subq), clause.getOccur() );
} else {
// Optional raw query.
RawQuery rawq = (RawQuery) subq;
// only look up fmap and type info on a field change
if (sfield != rawq.sfield) {
sfield = rawq.sfield;
fieldValues = fmap.get(sfield);
// If this field isn't indexed, or if it is indexed and we want to use TermsQuery, then collect this value.
// We are currently relying on things like PointField not being marked as indexed in order to bypass
// the "useTermQuery" check.
if (fieldValues == null && useTermsQuery || !sfield.indexed()) {
fieldValues = new ArrayList<>(2);
fmap.put(sfield, fieldValues);
}
}
if (fieldValues != null) {
fieldValues.add(rawq.externalVal);
continue;
}
clause = new BooleanClause( rawToNormal(subq), clause.getOccur() );
}
}
clausesAdded++;
booleanBuilder.add(clause);
}
return query.build();
for (Map.Entry<SchemaField,List<String>> entry : fmap.entrySet()) {
sfield = entry.getKey();
fieldValues = entry.getValue();
FieldType ft = sfield.getType();
// TODO: pull more of this logic out to FieldType? We would need to be able to add clauses to our existing booleanBuilder.
if (sfield.indexed() && fieldValues.size() < TERMS_QUERY_THRESHOLD || fieldValues.size() == 1) {
// use boolean query instead
for (String externalVal : fieldValues) {
Query subq = ft.getFieldQuery(this.parser, sfield, externalVal);
clausesAdded++;
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
}
} else {
Query subq = ft.getSetQuery(this.parser, sfield, fieldValues);
if (fieldValues.size() == clauses.size()) return subq; // if this is everything, don't wrap in a boolean query
clausesAdded++;
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
}
}
return booleanBuilder.build();
}
@ -526,7 +631,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
q = getFuzzyQuery(qfield, termImage, fms);
} else {
String termImage=discardEscapeChar(term.image);
q = getFieldQuery(qfield, termImage, false);
q = getFieldQuery(qfield, termImage, false, true);
}
return q;
}
@ -540,10 +645,15 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
}
catch (Exception ignored) { }
}
return getFieldQuery(qfield, discardEscapeChar(term.image.substring(1, term.image.length()-1)), s);
String raw = discardEscapeChar(term.image.substring(1, term.image.length()-1));
return getFieldQuery(qfield, raw, s);
}
// called from parser
// Called from parser
// Raw queries are transformed to normal queries before wrapping in a BoostQuery
Query handleBoost(Query q, Token boost) {
// q==null check is to avoid boosting null queries, such as those caused by stop words
if (boost == null || boost.image.length()==0 || q == null) {
@ -556,14 +666,14 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
if (q instanceof ConstantScoreQuery || q instanceof SolrConstantScoreQuery) {
// skip
} else {
newQ = new ConstantScoreQuery(q);
newQ = new ConstantScoreQuery( rawToNormal(q) );
}
return new BoostQuery(newQ, val);
}
float boostVal = Float.parseFloat(boost.image);
return new BoostQuery(q, boostVal);
return new BoostQuery( rawToNormal(q), boostVal);
}
@ -577,17 +687,21 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
*
*/
String discardEscapeChar(String input) throws SyntaxError {
int start = input.indexOf('\\');
if (start < 0) return input;
// Create char array to hold unescaped char sequence
char[] output = new char[input.length()];
input.getChars(0, start, output, 0);
// The length of the output can be less than the input
// due to discarded escape chars. This variable holds
// the actual length of the output
int length = 0;
int length = start;
// We remember whether the last processed character was
// an escape character
boolean lastCharWasEscapeChar = false;
boolean lastCharWasEscapeChar = true;
// The multiplier the current unicode digit must be multiplied with.
// E. g. the first digit must be multiplied with 16^3, the second with 16^2...
@ -596,7 +710,8 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
// Used to calculate the codepoint of the escaped unicode character
int codePoint = 0;
for (int i = 0; i < input.length(); i++) {
// start after the first escape char
for (int i = start+1; i < input.length(); i++) {
char curChar = input.charAt(i);
if (codePointMultiplier > 0) {
codePoint += hexToInt(curChar) * codePointMultiplier;
@ -715,25 +830,57 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
private QParser subQParser = null;
// Create a "normal" query from a RawQuery (or just return the current query if it's not raw)
Query rawToNormal(Query q) {
if (!(q instanceof RawQuery)) return q;
RawQuery rq = (RawQuery)q;
return rq.sfield.getType().getFieldQuery(parser, rq.sfield, rq.externalVal);
}
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws SyntaxError {
return getFieldQuery(field, queryText, quoted, false);
}
// private use for getFieldQuery
private String lastFieldName;
private SchemaField lastField;
// if raw==true, then it's possible for this method to return a RawQuery that will need to be transformed
// further before using.
protected Query getFieldQuery(String field, String queryText, boolean quoted, boolean raw) throws SyntaxError {
checkNullField(field);
// intercept magic field name of "_" to use as a hook for our
// own functions.
if (field.charAt(0) == '_' && parser != null) {
MagicFieldName magic = MagicFieldName.get(field);
if (null != magic) {
subQParser = parser.subQuery(queryText, magic.subParser);
return subQParser.getQuery();
SchemaField sf;
if (field.equals(lastFieldName)) {
// only look up the SchemaField on a field change... this helps with memory allocation of dynamic fields
// and large queries like foo_i:(1 2 3 4 5 6 7 8 9 10) when we are passed "foo_i" each time.
sf = lastField;
} else {
// intercept magic field name of "_" to use as a hook for our
// own functions.
if (field.charAt(0) == '_' && parser != null) {
MagicFieldName magic = MagicFieldName.get(field);
if (null != magic) {
subQParser = parser.subQuery(queryText, magic.subParser);
return subQParser.getQuery();
}
}
lastFieldName = field;
sf = lastField = schema.getFieldOrNull(field);
}
SchemaField sf = schema.getFieldOrNull(field);
if (sf != null) {
FieldType ft = sf.getType();
// delegate to type for everything except tokenized fields
if (ft.isTokenized() && sf.indexed()) {
return newFieldQuery(getAnalyzer(), field, queryText, quoted || (ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries()));
} else {
return sf.getType().getFieldQuery(parser, sf, queryText);
if (raw) {
return new RawQuery(sf, queryText);
} else {
return sf.getType().getFieldQuery(parser, sf, queryText);
}
}
}
@ -742,6 +889,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
}
// called from parser
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError {
checkNullField(field);

View File

@ -252,8 +252,8 @@ final class NumericFacets {
}
if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict
if (!sf.indexed()) {
throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is not indexed");
if (!sf.indexed() && !sf.hasDocValues()) {
throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is neither indexed nor docValues");
}
// Add zeros until there are limit results
final Set<String> alreadySeen = new HashSet<>();

View File

@ -19,6 +19,7 @@ package org.apache.solr.schema;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
@ -38,7 +39,10 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.legacy.LegacyNumericType;
import org.apache.lucene.queries.TermsQuery;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocValuesRangeQuery;
import org.apache.lucene.search.DocValuesRewriteMethod;
import org.apache.lucene.search.MultiTermQuery;
@ -56,8 +60,8 @@ import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.Version;
import org.apache.solr.analysis.SolrAnalyzer;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.Base64;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
@ -743,7 +747,27 @@ public abstract class FieldType extends FieldProperties {
return new TermQuery(new Term(field.getName(), br));
}
}
/** @lucene.experimental */
public Query getSetQuery(QParser parser, SchemaField field, Collection<String> externalVals) {
if (!field.indexed()) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (String externalVal : externalVals) {
Query subq = getFieldQuery(parser, field, externalVal);
builder.add(subq, BooleanClause.Occur.SHOULD);
}
return builder.build();
}
List<BytesRef> lst = new ArrayList<>(externalVals.size());
BytesRefBuilder br = new BytesRefBuilder();
for (String externalVal : externalVals) {
readableToIndexed(externalVal, br);
lst.add( br.toBytesRef() );
}
return new TermsQuery(field.getName() , lst);
}
/**
* Expert: Returns the rewrite method for multiterm queries such as wildcards.
* @param parser The {@link org.apache.solr.search.QParser} calling the method

View File

@ -1032,8 +1032,8 @@ public class ExtendedDismaxQParser extends QParser {
}
@Override
protected Query getFieldQuery(String field, String val, boolean quoted) throws SyntaxError {
this.type = QType.FIELD;
protected Query getFieldQuery(String field, String val, boolean quoted, boolean raw) throws SyntaxError {
this.type = quoted ? QType.PHRASE : QType.FIELD;
this.field = field;
this.val = val;
this.slop = getPhraseSlop(); // unspecified
@ -1212,7 +1212,7 @@ public class ExtendedDismaxQParser extends QParser {
switch (type) {
case FIELD: // fallthrough
case PHRASE:
Query query = super.getFieldQuery(field, val, type == QType.PHRASE);
Query query = super.getFieldQuery(field, val, type == QType.PHRASE, false);
// Boolean query on a whitespace-separated string
// If these were synonyms we would have a SynonymQuery
if (query instanceof BooleanQuery) {

View File

@ -46,7 +46,9 @@ public class FunctionRangeQParserPlugin extends QParserPlugin {
@Override
public Query parse() throws SyntaxError {
funcStr = localParams.get(QueryParsing.V, null);
Query funcQ = subQuery(funcStr, FunctionQParserPlugin.NAME).getQuery();
QParser subParser = subQuery(funcStr, FunctionQParserPlugin.NAME);
subParser.setIsFilter(false); // the range can be based on the relevancy score of embedded queries.
Query funcQ = subParser.getQuery();
if (funcQ instanceof FunctionQuery) {
vs = ((FunctionQuery)funcQ).getValueSource();
} else {

View File

@ -110,6 +110,7 @@ public class JoinQParserPlugin extends QParserPlugin {
} else {
coreName = null;
QParser fromQueryParser = subQuery(v, null);
fromQueryParser.setIsFilter(true);
fromQuery = fromQueryParser.getQuery();
}

View File

@ -32,12 +32,18 @@ import java.util.*;
*
*/
public abstract class QParser {
/** @lucene.experimental */
public static final int FLAG_FILTER = 0x01;
protected String qstr;
protected SolrParams params;
protected SolrParams localParams;
protected SolrQueryRequest req;
protected int recurseCount;
/** @lucene.experimental */
protected int flags;
protected Query query;
protected String stringIncludingLocalParams; // the original query string including any local params
@ -83,6 +89,28 @@ public abstract class QParser {
this.req = req;
}
/** @lucene.experimental */
public void setFlags(int flags) {
this.flags = flags;
}
/** @lucene.experimental */
public int getFlags() {
return flags;
}
/** @lucene.experimental Query is in the context of a filter, where scores don't matter */
public boolean isFilter() {
return (flags & FLAG_FILTER) != 0;
}
/** @lucene.experimental */
public void setIsFilter(boolean isFilter) {
if (isFilter)
flags |= FLAG_FILTER;
else
flags &= ~FLAG_FILTER;
}
private static void addTag(Map<Object,Collection<Object>> tagMap, Object key, Object val) {
Collection<Object> lst = tagMap.get(key);
@ -201,6 +229,7 @@ public abstract class QParser {
defaultType = localParams.get(QueryParsing.DEFTYPE);
}
QParser nestedParser = getParser(q, defaultType, getReq());
nestedParser.flags = this.flags; // TODO: this would be better passed in to the constructor... change to a ParserContext object?
nestedParser.recurseCount = recurseCount;
recurseCount--;
return nestedParser;

View File

@ -98,6 +98,7 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
QParser parser = null;
try {
parser = QParser.getParser((String)rawFilter, fcontext.req);
parser.setIsFilter(true);
Query symbolicFilter = parser.getQuery();
qlist.add(symbolicFilter);
} catch (SyntaxError syntaxError) {
@ -134,6 +135,7 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
QParser parser = null;
try {
parser = QParser.getParser((String) qstring, fcontext.req);
parser.setIsFilter(true);
Query symbolicFilter = parser.getQuery();
qlist.add(symbolicFilter);
} catch (SyntaxError syntaxError) {
@ -237,6 +239,7 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
Query parentQuery;
try {
QParser parser = QParser.getParser(parentStr, fcontext.req);
parser.setIsFilter(true);
parentQuery = parser.getQuery();
} catch (SyntaxError err) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing block join parent specification: " + parentStr);

View File

@ -568,6 +568,7 @@ class FacetQueryParser extends FacetParser<FacetQuery> {
if (qstring != null) {
QParser parser = QParser.getParser(qstring, getSolrRequest());
parser.setIsFilter(true);
facet.q = parser.getQuery();
}

View File

@ -19,6 +19,7 @@ package org.apache.solr.update.processor;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
@ -33,6 +34,7 @@ import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.ClassificationUpdateProcessorFactory.Algorithm;
/**
* This Class is a Request Update Processor to classify the document in input and add a field
@ -42,43 +44,54 @@ import org.apache.solr.update.AddUpdateCommand;
class ClassificationUpdateProcessor
extends UpdateRequestProcessor {
private String classFieldName; // the field to index the assigned class
private final String trainingClassField;
private final String predictedClassField;
private final int maxOutputClasses;
private DocumentClassifier<BytesRef> classifier;
/**
* Sole constructor
*
* @param inputFieldNames fields to be used as classifier's inputs
* @param classFieldName field to be used as classifier's output
* @param minDf setting for {@link org.apache.lucene.queries.mlt.MoreLikeThis#minDocFreq}, in case algorithm is {@code "knn"}
* @param minTf setting for {@link org.apache.lucene.queries.mlt.MoreLikeThis#minTermFreq}, in case algorithm is {@code "knn"}
* @param k setting for k nearest neighbors to analyze, in case algorithm is {@code "knn"}
* @param algorithm the name of the classifier to use
* @param classificationParams classification advanced params
* @param next next update processor in the chain
* @param indexReader index reader
* @param schema schema
*/
public ClassificationUpdateProcessor(String[] inputFieldNames, String classFieldName, int minDf, int minTf, int k, String algorithm,
UpdateRequestProcessor next, IndexReader indexReader, IndexSchema schema) {
public ClassificationUpdateProcessor(ClassificationUpdateProcessorParams classificationParams, UpdateRequestProcessor next, IndexReader indexReader, IndexSchema schema) {
super(next);
this.classFieldName = classFieldName;
Map<String, Analyzer> field2analyzer = new HashMap<String, Analyzer>();
this.trainingClassField = classificationParams.getTrainingClassField();
this.predictedClassField = classificationParams.getPredictedClassField();
this.maxOutputClasses = classificationParams.getMaxPredictedClasses();
String[] inputFieldNamesWithBoost = classificationParams.getInputFieldNames();
Algorithm classificationAlgorithm = classificationParams.getAlgorithm();
Map<String, Analyzer> field2analyzer = new HashMap<>();
String[] inputFieldNames = this.removeBoost(inputFieldNamesWithBoost);
for (String fieldName : inputFieldNames) {
SchemaField fieldFromSolrSchema = schema.getField(fieldName);
Analyzer indexAnalyzer = fieldFromSolrSchema.getType().getQueryAnalyzer();
field2analyzer.put(fieldName, indexAnalyzer);
}
switch (algorithm) {
case "knn":
classifier = new KNearestNeighborDocumentClassifier(indexReader, null, null, k, minDf, minTf, classFieldName, field2analyzer, inputFieldNames);
switch (classificationAlgorithm) {
case KNN:
classifier = new KNearestNeighborDocumentClassifier(indexReader, null, classificationParams.getTrainingFilterQuery(), classificationParams.getK(), classificationParams.getMinDf(), classificationParams.getMinTf(), trainingClassField, field2analyzer, inputFieldNamesWithBoost);
break;
case "bayes":
classifier = new SimpleNaiveBayesDocumentClassifier(indexReader, null, classFieldName, field2analyzer, inputFieldNames);
case BAYES:
classifier = new SimpleNaiveBayesDocumentClassifier(indexReader, null, trainingClassField, field2analyzer, inputFieldNamesWithBoost);
break;
}
}
private String[] removeBoost(String[] inputFieldNamesWithBoost) {
String[] inputFieldNames = new String[inputFieldNamesWithBoost.length];
for (int i = 0; i < inputFieldNamesWithBoost.length; i++) {
String singleFieldNameWithBoost = inputFieldNamesWithBoost[i];
String[] fieldName2boost = singleFieldNameWithBoost.split("\\^");
inputFieldNames[i] = fieldName2boost[0];
}
return inputFieldNames;
}
/**
* @param cmd the update command in input containing the Document to classify
* @throws IOException If there is a low-level I/O error
@ -89,12 +102,14 @@ class ClassificationUpdateProcessor
SolrInputDocument doc = cmd.getSolrInputDocument();
Document luceneDocument = cmd.getLuceneDocument();
String assignedClass;
Object documentClass = doc.getFieldValue(classFieldName);
Object documentClass = doc.getFieldValue(trainingClassField);
if (documentClass == null) {
ClassificationResult<BytesRef> classificationResult = classifier.assignClass(luceneDocument);
if (classificationResult != null) {
assignedClass = classificationResult.getAssignedClass().utf8ToString();
doc.addField(classFieldName, assignedClass);
List<ClassificationResult<BytesRef>> assignedClassifications = classifier.getClasses(luceneDocument, maxOutputClasses);
if (assignedClassifications != null) {
for (ClassificationResult<BytesRef> singleClassification : assignedClassifications) {
assignedClass = singleClassification.getAssignedClass().utf8ToString();
doc.addField(predictedClassField, assignedClass);
}
}
}
super.processAdd(cmd);

View File

@ -17,13 +17,20 @@
package org.apache.solr.update.processor;
import java.util.Locale;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.LuceneQParser;
import org.apache.solr.search.SyntaxError;
import static org.apache.solr.update.processor.ClassificationUpdateProcessorFactory.Algorithm.KNN;
/**
* This class implements an UpdateProcessorFactory for the Classification Update Processor.
@ -33,49 +40,67 @@ public class ClassificationUpdateProcessorFactory extends UpdateRequestProcessor
// Update Processor Config params
private static final String INPUT_FIELDS_PARAM = "inputFields";
private static final String CLASS_FIELD_PARAM = "classField";
private static final String TRAINING_CLASS_FIELD_PARAM = "classField";
private static final String PREDICTED_CLASS_FIELD_PARAM = "predictedClassField";
private static final String MAX_CLASSES_TO_ASSIGN_PARAM = "predictedClass.maxCount";
private static final String ALGORITHM_PARAM = "algorithm";
private static final String KNN_MIN_TF_PARAM = "knn.minTf";
private static final String KNN_MIN_DF_PARAM = "knn.minDf";
private static final String KNN_K_PARAM = "knn.k";
private static final String KNN_FILTER_QUERY = "knn.filterQuery";
public enum Algorithm {KNN, BAYES}
//Update Processor Defaults
private static final int DEFAULT_MAX_CLASSES_TO_ASSIGN = 1;
private static final int DEFAULT_MIN_TF = 1;
private static final int DEFAULT_MIN_DF = 1;
private static final int DEFAULT_K = 10;
private static final String DEFAULT_ALGORITHM = "knn";
private static final Algorithm DEFAULT_ALGORITHM = KNN;
private String[] inputFieldNames; // the array of fields to be sent to the Classifier
private String classFieldName; // the field containing the class for the Document
private String algorithm; // the Classification Algorithm to use - currently 'knn' or 'bayes'
private int minTf; // knn specific - the minimum Term Frequency for considering a term
private int minDf; // knn specific - the minimum Document Frequency for considering a term
private int k; // knn specific - thw window of top results to evaluate, when assigning the class
private SolrParams params;
private ClassificationUpdateProcessorParams classificationParams;
@Override
public void init(final NamedList args) {
if (args != null) {
SolrParams params = SolrParams.toSolrParams(args);
params = SolrParams.toSolrParams(args);
classificationParams = new ClassificationUpdateProcessorParams();
String fieldNames = params.get(INPUT_FIELDS_PARAM);// must be a comma separated list of fields
checkNotNull(INPUT_FIELDS_PARAM, fieldNames);
inputFieldNames = fieldNames.split("\\,");
classificationParams.setInputFieldNames(fieldNames.split("\\,"));
classFieldName = params.get(CLASS_FIELD_PARAM);
checkNotNull(CLASS_FIELD_PARAM, classFieldName);
String trainingClassField = (params.get(TRAINING_CLASS_FIELD_PARAM));
checkNotNull(TRAINING_CLASS_FIELD_PARAM, trainingClassField);
classificationParams.setTrainingClassField(trainingClassField);
algorithm = params.get(ALGORITHM_PARAM);
if (algorithm == null)
algorithm = DEFAULT_ALGORITHM;
String predictedClassField = (params.get(PREDICTED_CLASS_FIELD_PARAM));
if (predictedClassField == null || predictedClassField.isEmpty()) {
predictedClassField = trainingClassField;
}
classificationParams.setPredictedClassField(predictedClassField);
minTf = getIntParam(params, KNN_MIN_TF_PARAM, DEFAULT_MIN_TF);
minDf = getIntParam(params, KNN_MIN_DF_PARAM, DEFAULT_MIN_DF);
k = getIntParam(params, KNN_K_PARAM, DEFAULT_K);
classificationParams.setMaxPredictedClasses(getIntParam(params, MAX_CLASSES_TO_ASSIGN_PARAM, DEFAULT_MAX_CLASSES_TO_ASSIGN));
String algorithmString = params.get(ALGORITHM_PARAM);
Algorithm classificationAlgorithm;
try {
if (algorithmString == null || Algorithm.valueOf(algorithmString.toUpperCase(Locale.ROOT)) == null) {
classificationAlgorithm = DEFAULT_ALGORITHM;
} else {
classificationAlgorithm = Algorithm.valueOf(algorithmString.toUpperCase(Locale.ROOT));
}
} catch (IllegalArgumentException e) {
throw new SolrException
(SolrException.ErrorCode.SERVER_ERROR,
"Classification UpdateProcessor Algorithm: '" + algorithmString + "' not supported");
}
classificationParams.setAlgorithm(classificationAlgorithm);
classificationParams.setMinTf(getIntParam(params, KNN_MIN_TF_PARAM, DEFAULT_MIN_TF));
classificationParams.setMinDf(getIntParam(params, KNN_MIN_DF_PARAM, DEFAULT_MIN_DF));
classificationParams.setK(getIntParam(params, KNN_K_PARAM, DEFAULT_K));
}
}
@ -108,116 +133,34 @@ public class ClassificationUpdateProcessorFactory extends UpdateRequestProcessor
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
String trainingFilterQueryString = (params.get(KNN_FILTER_QUERY));
try {
if (trainingFilterQueryString != null && !trainingFilterQueryString.isEmpty()) {
Query trainingFilterQuery = this.parseFilterQuery(trainingFilterQueryString, params, req);
classificationParams.setTrainingFilterQuery(trainingFilterQuery);
}
} catch (SyntaxError | RuntimeException syntaxError) {
throw new SolrException
(SolrException.ErrorCode.SERVER_ERROR,
"Classification UpdateProcessor Training Filter Query: '" + trainingFilterQueryString + "' is not supported", syntaxError);
}
IndexSchema schema = req.getSchema();
IndexReader indexReader = req.getSearcher().getIndexReader();
return new ClassificationUpdateProcessor(inputFieldNames, classFieldName, minDf, minTf, k, algorithm, next, indexReader, schema);
return new ClassificationUpdateProcessor(classificationParams, next, indexReader, schema);
}
/**
* get field names used as classifier's inputs
*
* @return the input field names
*/
public String[] getInputFieldNames() {
return inputFieldNames;
private Query parseFilterQuery(String trainingFilterQueryString, SolrParams params, SolrQueryRequest req) throws SyntaxError {
LuceneQParser parser = new LuceneQParser(trainingFilterQueryString, null, params, req);
return parser.parse();
}
/**
* set field names used as classifier's inputs
*
* @param inputFieldNames the input field names
*/
public void setInputFieldNames(String[] inputFieldNames) {
this.inputFieldNames = inputFieldNames;
public ClassificationUpdateProcessorParams getClassificationParams() {
return classificationParams;
}
/**
* get field names used as classifier's output
*
* @return the output field name
*/
public String getClassFieldName() {
return classFieldName;
}
/**
* set field names used as classifier's output
*
* @param classFieldName the output field name
*/
public void setClassFieldName(String classFieldName) {
this.classFieldName = classFieldName;
}
/**
* get the name of the classifier algorithm used
*
* @return the classifier algorithm used
*/
public String getAlgorithm() {
return algorithm;
}
/**
* set the name of the classifier algorithm used
*
* @param algorithm the classifier algorithm used
*/
public void setAlgorithm(String algorithm) {
this.algorithm = algorithm;
}
/**
* get the min term frequency value to be used in case algorithm is {@code "knn"}
*
* @return the min term frequency
*/
public int getMinTf() {
return minTf;
}
/**
* set the min term frequency value to be used in case algorithm is {@code "knn"}
*
* @param minTf the min term frequency
*/
public void setMinTf(int minTf) {
this.minTf = minTf;
}
/**
* get the min document frequency value to be used in case algorithm is {@code "knn"}
*
* @return the min document frequency
*/
public int getMinDf() {
return minDf;
}
/**
* set the min document frequency value to be used in case algorithm is {@code "knn"}
*
* @param minDf the min document frequency
*/
public void setMinDf(int minDf) {
this.minDf = minDf;
}
/**
* get the the no. of nearest neighbor to analyze, to be used in case algorithm is {@code "knn"}
*
* @return the no. of neighbors to analyze
*/
public int getK() {
return k;
}
/**
* set the the no. of nearest neighbor to analyze, to be used in case algorithm is {@code "knn"}
*
* @param k the no. of neighbors to analyze
*/
public void setK(int k) {
this.k = k;
public void setClassificationParams(ClassificationUpdateProcessorParams classificationParams) {
this.classificationParams = classificationParams;
}
}

View File

@ -0,0 +1,112 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.lucene.search.Query;
public class ClassificationUpdateProcessorParams {
private String[] inputFieldNames; // the array of fields to be sent to the Classifier
private Query trainingFilterQuery; // a filter query to reduce the training set to a subset
private String trainingClassField; // the field containing the class for the Document
private String predictedClassField; // the field that will contain the predicted class
private int maxPredictedClasses; // the max number of classes to assign
private ClassificationUpdateProcessorFactory.Algorithm algorithm; // the Classification Algorithm to use - currently 'knn' or 'bayes'
private int minTf; // knn specific - the minimum Term Frequency for considering a term
private int minDf; // knn specific - the minimum Document Frequency for considering a term
private int k; // knn specific - thw window of top results to evaluate, when assigning the class
public String[] getInputFieldNames() {
return inputFieldNames;
}
public void setInputFieldNames(String[] inputFieldNames) {
this.inputFieldNames = inputFieldNames;
}
public Query getTrainingFilterQuery() {
return trainingFilterQuery;
}
public void setTrainingFilterQuery(Query trainingFilterQuery) {
this.trainingFilterQuery = trainingFilterQuery;
}
public String getTrainingClassField() {
return trainingClassField;
}
public void setTrainingClassField(String trainingClassField) {
this.trainingClassField = trainingClassField;
}
public String getPredictedClassField() {
return predictedClassField;
}
public void setPredictedClassField(String predictedClassField) {
this.predictedClassField = predictedClassField;
}
public int getMaxPredictedClasses() {
return maxPredictedClasses;
}
public void setMaxPredictedClasses(int maxPredictedClasses) {
this.maxPredictedClasses = maxPredictedClasses;
}
public ClassificationUpdateProcessorFactory.Algorithm getAlgorithm() {
return algorithm;
}
public void setAlgorithm(ClassificationUpdateProcessorFactory.Algorithm algorithm) {
this.algorithm = algorithm;
}
public int getMinTf() {
return minTf;
}
public void setMinTf(int minTf) {
this.minTf = minTf;
}
public int getMinDf() {
return minDf;
}
public void setMinDf(int minDf) {
this.minDf = minDf;
}
public int getK() {
return k;
}
public void setK(int k) {
this.k = k;
}
}

View File

@ -91,6 +91,7 @@ import org.apache.solr.client.solrj.impl.HttpClientUtil;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient.Builder;
import org.apache.solr.client.solrj.impl.SolrHttpClientBuilder;
import org.apache.solr.client.solrj.impl.ZkClientClusterStateProvider;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrException;
@ -1514,7 +1515,7 @@ public class SolrCLI {
echo("Uploading " + confPath.toAbsolutePath().toString() +
" for config " + confname + " to ZooKeeper at " + cloudSolrClient.getZkHost());
cloudSolrClient.uploadConfig(confPath, confname);
((ZkClientClusterStateProvider) cloudSolrClient.getClusterStateProvider()).uploadConfig(confPath, confname);
}
// since creating a collection is a heavy-weight operation, check for existence first

View File

@ -907,7 +907,7 @@ public class SolrPluginUtils {
* aliases should work)
*/
@Override
protected Query getFieldQuery(String field, String queryText, boolean quoted)
protected Query getFieldQuery(String field, String queryText, boolean quoted, boolean raw)
throws SyntaxError {
if (aliases.containsKey(field)) {
@ -917,7 +917,7 @@ public class SolrPluginUtils {
List<Query> disjuncts = new ArrayList<>();
for (String f : a.fields.keySet()) {
Query sub = getFieldQuery(f,queryText,quoted);
Query sub = getFieldQuery(f,queryText,quoted, false);
if (null != sub) {
if (null != a.fields.get(f)) {
sub = new BoostQuery(sub, a.fields.get(f));
@ -931,7 +931,7 @@ public class SolrPluginUtils {
} else {
try {
return super.getFieldQuery(field, queryText, quoted);
return super.getFieldQuery(field, queryText, quoted, raw);
} catch (Exception e) {
return null;
}

View File

@ -1,84 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Forked from https://github.com/codahale/metrics
*/
package org.apache.solr.util.stats;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadMXBean;
import org.apache.solr.common.util.SuppressForbidden;
/**
* An abstraction for how time passes. It is passed to {@link Timer} to track timing.
*/
public abstract class Clock {
/**
* Returns the current time tick.
*
* @return time tick in nanoseconds
*/
public abstract long getTick();
/**
* Returns the current time in milliseconds.
*
* @return time in milliseconds
*/
@SuppressForbidden(reason = "Need currentTimeMillis, API used by ExponentiallyDecayingSample for suspect reasons")
public long getTime() {
return System.currentTimeMillis();
}
private static final Clock DEFAULT = new UserTimeClock();
/**
* The default clock to use.
*
* @return the default {@link Clock} instance
*
* @see UserTimeClock
*/
public static Clock defaultClock() {
return DEFAULT;
}
/**
* A clock implementation which returns the current time in epoch nanoseconds.
*/
public static class UserTimeClock extends Clock {
@Override
public long getTick() {
return System.nanoTime();
}
}
/**
* A clock implementation which returns the current thread's CPU time.
*/
public static class CpuTimeClock extends Clock {
private static final ThreadMXBean THREAD_MX_BEAN = ManagementFactory.getThreadMXBean();
@Override
public long getTick() {
return THREAD_MX_BEAN.getCurrentThreadCpuTime();
}
}
}

View File

@ -1,126 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Forked from https://github.com/codahale/metrics
*/
package org.apache.solr.util.stats;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import static java.lang.Math.exp;
/**
* An exponentially-weighted moving average.
*
* @see <a href="http://www.teamquest.com/pdfs/whitepaper/ldavg1.pdf">UNIX Load Average Part 1: How
* It Works</a>
* @see <a href="http://www.teamquest.com/pdfs/whitepaper/ldavg2.pdf">UNIX Load Average Part 2: Not
* Your Average Average</a>
*/
public class EWMA {
private static final int INTERVAL = 5;
private static final double SECONDS_PER_MINUTE = 60.0;
private static final int ONE_MINUTE = 1;
private static final int FIVE_MINUTES = 5;
private static final int FIFTEEN_MINUTES = 15;
private static final double M1_ALPHA = 1 - exp(-INTERVAL / SECONDS_PER_MINUTE / ONE_MINUTE);
private static final double M5_ALPHA = 1 - exp(-INTERVAL / SECONDS_PER_MINUTE / FIVE_MINUTES);
private static final double M15_ALPHA = 1 - exp(-INTERVAL / SECONDS_PER_MINUTE / FIFTEEN_MINUTES);
private volatile boolean initialized = false;
private volatile double rate = 0.0;
private final AtomicLong uncounted = new AtomicLong();
private final double alpha, interval;
/**
* Creates a new EWMA which is equivalent to the UNIX one minute load average and which expects
* to be ticked every 5 seconds.
*
* @return a one-minute EWMA
*/
public static EWMA oneMinuteEWMA() {
return new EWMA(M1_ALPHA, INTERVAL, TimeUnit.SECONDS);
}
/**
* Creates a new EWMA which is equivalent to the UNIX five minute load average and which expects
* to be ticked every 5 seconds.
*
* @return a five-minute EWMA
*/
public static EWMA fiveMinuteEWMA() {
return new EWMA(M5_ALPHA, INTERVAL, TimeUnit.SECONDS);
}
/**
* Creates a new EWMA which is equivalent to the UNIX fifteen minute load average and which
* expects to be ticked every 5 seconds.
*
* @return a fifteen-minute EWMA
*/
public static EWMA fifteenMinuteEWMA() {
return new EWMA(M15_ALPHA, INTERVAL, TimeUnit.SECONDS);
}
/**
* Create a new EWMA with a specific smoothing constant.
*
* @param alpha the smoothing constant
* @param interval the expected tick interval
* @param intervalUnit the time unit of the tick interval
*/
public EWMA(double alpha, long interval, TimeUnit intervalUnit) {
this.interval = intervalUnit.toNanos(interval);
this.alpha = alpha;
}
/**
* Update the moving average with a new value.
*
* @param n the new value
*/
public void update(long n) {
uncounted.addAndGet(n);
}
/**
* Mark the passage of time and decay the current rate accordingly.
*/
public void tick() {
final long count = uncounted.getAndSet(0);
final double instantRate = count / interval;
if (initialized) {
rate += (alpha * (instantRate - rate));
} else {
rate = instantRate;
initialized = true;
}
}
/**
* Returns the rate in the given units of time.
*
* @param rateUnit the unit of time
* @return the rate
*/
public double getRate(TimeUnit rateUnit) {
return rate * (double) rateUnit.toNanos(1);
}
}

View File

@ -1,218 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Forked from https://github.com/codahale/metrics
*/
package org.apache.solr.util.stats;
import java.util.ArrayList;
import java.util.Random;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import static java.lang.Math.exp;
import static java.lang.Math.min;
/**
* An exponentially-decaying random sample of {@code long}s. Uses Cormode et al's forward-decaying
* priority reservoir sampling method to produce a statistically representative sample,
* exponentially biased towards newer entries.
*
* See <a href="http://www.research.att.com/people/Cormode_Graham/library/publications/CormodeShkapenyukSrivastavaXu09.pdf">
* Cormode et al. Forward Decay: A Practical Time Decay Model for Streaming Systems. ICDE '09: Proceedings of the 2009 IEEE International Conference on Data Engineering (2009)</a>
*/
public class ExponentiallyDecayingSample implements Sample {
private static final long RESCALE_THRESHOLD = TimeUnit.HOURS.toNanos(1);
private final ConcurrentSkipListMap<Double, Long> values;
private final ReentrantReadWriteLock lock;
private final double alpha;
private final int reservoirSize;
private final AtomicLong count = new AtomicLong(0);
private volatile long startTime;
private final AtomicLong nextScaleTime = new AtomicLong(0);
private final Clock clock;
// TODO: Maybe replace this with a Mersenne Twister?
private final Random random = new Random();
/**
* Creates a new {@link ExponentiallyDecayingSample}.
*
* @param reservoirSize the number of samples to keep in the sampling reservoir
* @param alpha the exponential decay factor; the higher this is, the more biased the
* sample will be towards newer values
*/
public ExponentiallyDecayingSample(int reservoirSize, double alpha) {
this(reservoirSize, alpha, Clock.defaultClock());
}
/**
* Creates a new {@link ExponentiallyDecayingSample}.
*
* @param reservoirSize the number of samples to keep in the sampling reservoir
* @param alpha the exponential decay factor; the higher this is, the more biased the
* sample will be towards newer values
*/
public ExponentiallyDecayingSample(int reservoirSize, double alpha, Clock clock) {
this.values = new ConcurrentSkipListMap<>();
this.lock = new ReentrantReadWriteLock();
this.alpha = alpha;
this.reservoirSize = reservoirSize;
this.clock = clock;
clear();
}
@Override
public void clear() {
lockForRescale();
try {
values.clear();
count.set(0);
this.startTime = currentTimeInSeconds();
nextScaleTime.set(clock.getTick() + RESCALE_THRESHOLD);
} finally {
unlockForRescale();
}
}
@Override
public int size() {
return (int) min(reservoirSize, count.get());
}
@Override
public void update(long value) {
update(value, currentTimeInSeconds());
}
/**
* Adds an old value with a fixed timestamp to the sample.
*
* @param value the value to be added
* @param timestamp the epoch timestamp of {@code value} in seconds
*/
public void update(long value, long timestamp) {
rescaleIfNeeded();
lockForRegularUsage();
try {
final double priority = weight(timestamp - startTime) / random.nextDouble();
final long newCount = count.incrementAndGet();
if (newCount <= reservoirSize) {
values.put(priority, value);
} else {
Double first = values.firstKey();
if (first < priority) {
if (values.putIfAbsent(priority, value) == null) {
// ensure we always remove an item
while (values.remove(first) == null) {
first = values.firstKey();
}
}
}
}
} finally {
unlockForRegularUsage();
}
}
private void rescaleIfNeeded() {
final long now = clock.getTick();
final long next = nextScaleTime.get();
if (now >= next) {
rescale(now, next);
}
}
@Override
public Snapshot getSnapshot() {
lockForRegularUsage();
try {
return new Snapshot(values.values());
} finally {
unlockForRegularUsage();
}
}
private long currentTimeInSeconds() {
return TimeUnit.MILLISECONDS.toSeconds(clock.getTime());
}
private double weight(long t) {
return exp(alpha * t);
}
/* "A common feature of the above techniques—indeed, the key technique that
* allows us to track the decayed weights efficientlyis that they maintain
* counts and other quantities based on g(ti L), and only scale by g(t L)
* at query time. But while g(ti L)/g(tL) is guaranteed to lie between zero
* and one, the intermediate values of g(ti L) could become very large. For
* polynomial functions, these values should not grow too large, and should be
* effectively represented in practice by floating point values without loss of
* precision. For exponential functions, these values could grow quite large as
* new values of (ti L) become large, and potentially exceed the capacity of
* common floating point types. However, since the values stored by the
* algorithms are linear combinations of g values (scaled sums), they can be
* rescaled relative to a new landmark. That is, by the analysis of exponential
* decay in Section III-A, the choice of L does not affect the final result. We
* can therefore multiply each value based on L by a factor of exp(α(L L)),
* and obtain the correct value as if we had instead computed relative to a new
* landmark L (and then use this new L at query time). This can be done with
* a linear pass over whatever data structure is being used."
*/
private void rescale(long now, long next) {
if (nextScaleTime.compareAndSet(next, now + RESCALE_THRESHOLD)) {
lockForRescale();
try {
final long oldStartTime = startTime;
this.startTime = currentTimeInSeconds();
final ArrayList<Double> keys = new ArrayList<>(values.keySet());
for (Double key : keys) {
final Long value = values.remove(key);
values.put(key * exp(-alpha * (startTime - oldStartTime)), value);
}
// make sure the counter is in sync with the number of stored samples.
count.set(values.size());
} finally {
unlockForRescale();
}
}
}
private void unlockForRescale() {
lock.writeLock().unlock();
}
private void lockForRescale() {
lock.writeLock().lock();
}
private void lockForRegularUsage() {
lock.readLock().lock();
}
private void unlockForRegularUsage() {
lock.readLock().unlock();
}
}

View File

@ -1,238 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Forked from https://github.com/codahale/metrics
*/
package org.apache.solr.util.stats;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import static java.lang.Math.sqrt;
/**
* A metric which calculates the distribution of a value.
*
* @see <a href="http://www.johndcook.com/standard_deviation.html">Accurately computing running
* variance</a>
*/
public class Histogram {
private static final int DEFAULT_SAMPLE_SIZE = 1028;
private static final double DEFAULT_ALPHA = 0.015;
/**
* The type of sampling the histogram should be performing.
*/
enum SampleType {
/**
* Uses a uniform sample of 1028 elements, which offers a 99.9% confidence level with a 5%
* margin of error assuming a normal distribution.
*/
UNIFORM {
@Override
public Sample newSample() {
return new UniformSample(DEFAULT_SAMPLE_SIZE);
}
},
/**
* Uses an exponentially decaying sample of 1028 elements, which offers a 99.9% confidence
* level with a 5% margin of error assuming a normal distribution, and an alpha factor of
* 0.015, which heavily biases the sample to the past 5 minutes of measurements.
*/
BIASED {
@Override
public Sample newSample() {
return new ExponentiallyDecayingSample(DEFAULT_SAMPLE_SIZE, DEFAULT_ALPHA);
}
};
public abstract Sample newSample();
}
private final Sample sample;
private final AtomicLong min = new AtomicLong();
private final AtomicLong max = new AtomicLong();
private final AtomicLong sum = new AtomicLong();
// These are for the Welford algorithm for calculating running variance
// without floating-point doom.
private final AtomicReference<double[]> variance =
new AtomicReference<>(new double[]{-1, 0}); // M, S
private final AtomicLong count = new AtomicLong();
/**
* Creates a new {@link Histogram} with the given sample type.
*
* @param type the type of sample to use
*/
Histogram(SampleType type) {
this(type.newSample());
}
/**
* Creates a new {@link Histogram} with the given sample.
*
* @param sample the sample to create a histogram from
*/
Histogram(Sample sample) {
this.sample = sample;
clear();
}
/**
* Clears all recorded values.
*/
public void clear() {
sample.clear();
count.set(0);
max.set(Long.MIN_VALUE);
min.set(Long.MAX_VALUE);
sum.set(0);
variance.set(new double[]{ -1, 0 });
}
/**
* Adds a recorded value.
*
* @param value the length of the value
*/
public void update(int value) {
update((long) value);
}
/**
* Adds a recorded value.
*
* @param value the length of the value
*/
public void update(long value) {
count.incrementAndGet();
sample.update(value);
setMax(value);
setMin(value);
sum.getAndAdd(value);
updateVariance(value);
}
/**
* Returns the number of values recorded.
*
* @return the number of values recorded
*/
public long getCount() {
return count.get();
}
/* (non-Javadoc)
* @see com.yammer.metrics.core.Summarizable#max()
*/
public double getMax() {
if (getCount() > 0) {
return max.get();
}
return 0.0;
}
/* (non-Javadoc)
* @see com.yammer.metrics.core.Summarizable#min()
*/
public double getMin() {
if (getCount() > 0) {
return min.get();
}
return 0.0;
}
/* (non-Javadoc)
* @see com.yammer.metrics.core.Summarizable#mean()
*/
public double getMean() {
if (getCount() > 0) {
return sum.get() / (double) getCount();
}
return 0.0;
}
/* (non-Javadoc)
* @see com.yammer.metrics.core.Summarizable#stdDev()
*/
public double getStdDev() {
if (getCount() > 0) {
return sqrt(getVariance());
}
return 0.0;
}
/* (non-Javadoc)
* @see com.yammer.metrics.core.Summarizable#sum()
*/
public double getSum() {
return (double) sum.get();
}
public Snapshot getSnapshot() {
return sample.getSnapshot();
}
private double getVariance() {
if (getCount() <= 1) {
return 0.0;
}
return variance.get()[1] / (getCount() - 1);
}
private void setMax(long potentialMax) {
boolean done = false;
while (!done) {
final long currentMax = max.get();
done = currentMax >= potentialMax || max.compareAndSet(currentMax, potentialMax);
}
}
private void setMin(long potentialMin) {
boolean done = false;
while (!done) {
final long currentMin = min.get();
done = currentMin <= potentialMin || min.compareAndSet(currentMin, potentialMin);
}
}
private void updateVariance(long value) {
while (true) {
final double[] oldValues = variance.get();
final double[] newValues = new double[2];
if (oldValues[0] == -1) {
newValues[0] = value;
newValues[1] = 0;
} else {
final double oldM = oldValues[0];
final double oldS = oldValues[1];
final double newM = oldM + ((value - oldM) / getCount());
final double newS = oldS + ((value - oldM) * (value - newM));
newValues[0] = newM;
newValues[1] = newS;
}
if (variance.compareAndSet(oldValues, newValues)) {
return;
}
}
}
}

View File

@ -1,143 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Forked from https://github.com/codahale/metrics
*/
package org.apache.solr.util.stats;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
/**
* A meter metric which measures mean throughput and one-, five-, and fifteen-minute
* exponentially-weighted moving average throughputs.
*
* @see <a href="http://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average">EMA</a>
*/
public class Meter {
private static final long TICK_INTERVAL = TimeUnit.SECONDS.toNanos(5);
private final EWMA m1Rate = EWMA.oneMinuteEWMA();
private final EWMA m5Rate = EWMA.fiveMinuteEWMA();
private final EWMA m15Rate = EWMA.fifteenMinuteEWMA();
private final AtomicLong count = new AtomicLong();
private final long startTime;
private final AtomicLong lastTick;
private final TimeUnit rateUnit;
private final String eventType;
private final Clock clock;
/**
* Creates a new {@link Meter}.
*
* @param eventType the plural name of the event the meter is measuring (e.g., {@code
* "requests"})
* @param rateUnit the rate unit of the new meter
* @param clock the clock to use for the meter ticks
*/
Meter(String eventType, TimeUnit rateUnit, Clock clock) {
this.rateUnit = rateUnit;
this.eventType = eventType;
this.clock = clock;
this.startTime = this.clock.getTick();
this.lastTick = new AtomicLong(startTime);
}
public TimeUnit getRateUnit() {
return rateUnit;
}
public String getEventType() {
return eventType;
}
/**
* Updates the moving averages.
*/
void tick() {
m1Rate.tick();
m5Rate.tick();
m15Rate.tick();
}
/**
* Mark the occurrence of an event.
*/
public void mark() {
mark(1);
}
/**
* Mark the occurrence of a given number of events.
*
* @param n the number of events
*/
public void mark(long n) {
tickIfNecessary();
count.addAndGet(n);
m1Rate.update(n);
m5Rate.update(n);
m15Rate.update(n);
}
private void tickIfNecessary() {
final long oldTick = lastTick.get();
final long newTick = clock.getTick();
final long age = newTick - oldTick;
if (age > TICK_INTERVAL && lastTick.compareAndSet(oldTick, newTick)) {
final long requiredTicks = age / TICK_INTERVAL;
for (long i = 0; i < requiredTicks; i++) {
tick();
}
}
}
public long getCount() {
return count.get();
}
public double getFifteenMinuteRate() {
tickIfNecessary();
return m15Rate.getRate(rateUnit);
}
public double getFiveMinuteRate() {
tickIfNecessary();
return m5Rate.getRate(rateUnit);
}
public double getMeanRate() {
if (getCount() == 0) {
return 0.0;
} else {
final long elapsed = (clock.getTick() - startTime);
return convertNsRate(getCount() / (double) elapsed);
}
}
public double getOneMinuteRate() {
tickIfNecessary();
return m1Rate.getRate(rateUnit);
}
private double convertNsRate(double ratePerNs) {
return ratePerNs * (double) rateUnit.toNanos(1);
}
}

View File

@ -1,52 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Forked from https://github.com/codahale/metrics
*/
package org.apache.solr.util.stats;
/**
* A statistically representative sample of a data stream.
*/
public interface Sample {
/**
* Clears all recorded values.
*/
void clear();
/**
* Returns the number of values recorded.
*
* @return the number of values recorded
*/
int size();
/**
* Adds a new recorded value to the sample.
*
* @param value a new recorded value
*/
void update(long value);
/**
* Returns a snapshot of the sample's values.
*
* @return a snapshot of the sample's values
*/
Snapshot getSnapshot();
}

View File

@ -1,168 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Forked from https://github.com/codahale/metrics
*/
package org.apache.solr.util.stats;
import java.util.Arrays;
import java.util.Collection;
import static java.lang.Math.floor;
/**
* A statistical snapshot of a {@link Snapshot}.
*/
public class Snapshot {
private static final double MEDIAN_Q = 0.5;
private static final double P75_Q = 0.75;
private static final double P95_Q = 0.95;
private static final double P98_Q = 0.98;
private static final double P99_Q = 0.99;
private static final double P999_Q = 0.999;
private final double[] values;
/**
* Create a new {@link Snapshot} with the given values.
*
* @param values an unordered set of values in the sample
*/
public Snapshot(Collection<Long> values) {
final Object[] copy = values.toArray();
this.values = new double[copy.length];
for (int i = 0; i < copy.length; i++) {
this.values[i] = (Long) copy[i];
}
Arrays.sort(this.values);
}
/**
* Create a new {@link Snapshot} with the given values.
*
* @param values an unordered set of values in the sample
*/
public Snapshot(double[] values) {
this.values = new double[values.length];
System.arraycopy(values, 0, this.values, 0, values.length);
Arrays.sort(this.values);
}
/**
* Returns the value at the given quantile.
*
* @param quantile a given quantile, in {@code [0..1]}
* @return the value in the distribution at {@code quantile}
*/
public double getValue(double quantile) {
if (quantile < 0.0 || quantile > 1.0) {
throw new IllegalArgumentException(quantile + " is not in [0..1]");
}
if (values.length == 0) {
return 0.0;
}
final double pos = quantile * (values.length + 1);
if (pos < 1) {
return values[0];
}
if (pos >= values.length) {
return values[values.length - 1];
}
final double lower = values[(int) pos - 1];
final double upper = values[(int) pos];
return lower + (pos - floor(pos)) * (upper - lower);
}
/**
* Returns the number of values in the snapshot.
*
* @return the number of values in the snapshot
*/
public int size() {
return values.length;
}
/**
* Returns the median value in the distribution.
*
* @return the median value in the distribution
*/
public double getMedian() {
return getValue(MEDIAN_Q);
}
/**
* Returns the value at the 75th percentile in the distribution.
*
* @return the value at the 75th percentile in the distribution
*/
public double get75thPercentile() {
return getValue(P75_Q);
}
/**
* Returns the value at the 95th percentile in the distribution.
*
* @return the value at the 95th percentile in the distribution
*/
public double get95thPercentile() {
return getValue(P95_Q);
}
/**
* Returns the value at the 98th percentile in the distribution.
*
* @return the value at the 98th percentile in the distribution
*/
public double get98thPercentile() {
return getValue(P98_Q);
}
/**
* Returns the value at the 99th percentile in the distribution.
*
* @return the value at the 99th percentile in the distribution
*/
public double get99thPercentile() {
return getValue(P99_Q);
}
/**
* Returns the value at the 99.9th percentile in the distribution.
*
* @return the value at the 99.9th percentile in the distribution
*/
public double get999thPercentile() {
return getValue(P999_Q);
}
/**
* Returns the entire set of values in the snapshot.
*
* @return the entire set of values in the snapshot
*/
public double[] getValues() {
return Arrays.copyOf(values, values.length);
}
}

View File

@ -1,203 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Forked from https://github.com/codahale/metrics
*/
package org.apache.solr.util.stats;
import org.apache.solr.util.stats.Histogram.SampleType;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeUnit;
/**
* A timer metric which aggregates timing durations and provides duration statistics, plus
* throughput statistics via {@link Meter}.
*/
public class Timer {
private final TimeUnit durationUnit, rateUnit;
private final Meter meter;
private final Histogram histogram = new Histogram(SampleType.BIASED);
private final Clock clock;
public Timer() {
this(TimeUnit.MILLISECONDS, TimeUnit.SECONDS, Clock.defaultClock());
}
/**
* Creates a new {@link Timer}.
*
* @param durationUnit the scale unit for this timer's duration metrics
* @param rateUnit the scale unit for this timer's rate metrics
* @param clock the clock used to calculate duration
*/
public Timer(TimeUnit durationUnit, TimeUnit rateUnit, Clock clock) {
this.durationUnit = durationUnit;
this.rateUnit = rateUnit;
this.meter = new Meter("calls", rateUnit, clock);
this.clock = clock;
clear();
}
/**
* Returns the timer's duration scale unit.
*
* @return the timer's duration scale unit
*/
public TimeUnit getDurationUnit() {
return durationUnit;
}
public TimeUnit getRateUnit() {
return rateUnit;
}
/**
* Clears all recorded durations.
*/
public void clear() {
histogram.clear();
}
/**
* Adds a recorded duration.
*
* @param duration the length of the duration
* @param unit the scale unit of {@code duration}
*/
public void update(long duration, TimeUnit unit) {
update(unit.toNanos(duration));
}
/**
* Times and records the duration of event.
*
* @param event a {@link Callable} whose {@link Callable#call()} method implements a process
* whose duration should be timed
* @param <T> the type of the value returned by {@code event}
* @return the value returned by {@code event}
* @throws Exception if {@code event} throws an {@link Exception}
*/
public <T> T time(Callable<T> event) throws Exception {
final long startTime = clock.getTick();
try {
return event.call();
} finally {
update(clock.getTick() - startTime);
}
}
/**
* Returns a timing {@link TimerContext}, which measures an elapsed time in nanoseconds.
*
* @return a new {@link TimerContext}
*/
public TimerContext time() {
return new TimerContext(this, clock);
}
public long getCount() {
return histogram.getCount();
}
public double getFifteenMinuteRate() {
return meter.getFifteenMinuteRate();
}
public double getFiveMinuteRate() {
return meter.getFiveMinuteRate();
}
public double getMeanRate() {
return meter.getMeanRate();
}
public double getOneMinuteRate() {
return meter.getOneMinuteRate();
}
/**
* Returns the longest recorded duration.
*
* @return the longest recorded duration
*/
public double getMax() {
return convertFromNS(histogram.getMax());
}
/**
* Returns the shortest recorded duration.
*
* @return the shortest recorded duration
*/
public double getMin() {
return convertFromNS(histogram.getMin());
}
/**
* Returns the arithmetic mean of all recorded durations.
*
* @return the arithmetic mean of all recorded durations
*/
public double getMean() {
return convertFromNS(histogram.getMean());
}
/**
* Returns the standard deviation of all recorded durations.
*
* @return the standard deviation of all recorded durations
*/
public double getStdDev() {
return convertFromNS(histogram.getStdDev());
}
/**
* Returns the sum of all recorded durations.
*
* @return the sum of all recorded durations
*/
public double getSum() {
return convertFromNS(histogram.getSum());
}
public Snapshot getSnapshot() {
final double[] values = histogram.getSnapshot().getValues();
final double[] converted = new double[values.length];
for (int i = 0; i < values.length; i++) {
converted[i] = convertFromNS(values[i]);
}
return new Snapshot(converted);
}
public String getEventType() {
return meter.getEventType();
}
private void update(long duration) {
if (duration >= 0) {
histogram.update(duration);
meter.mark();
}
}
private double convertFromNS(double ns) {
return ns / TimeUnit.NANOSECONDS.convert(1, durationUnit);
}
}

View File

@ -1,55 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Forked from https://github.com/codahale/metrics
*/
package org.apache.solr.util.stats;
import java.util.concurrent.TimeUnit;
/**
* A timing context.
*
* @see Timer#time()
*/
public class TimerContext {
private final Timer timer;
private final Clock clock;
private final long startTime;
/**
* Creates a new {@link TimerContext} with the current time as its starting value and with the
* given {@link Timer}.
*
* @param timer the {@link Timer} to report the elapsed time to
*/
TimerContext(Timer timer, Clock clock) {
this.timer = timer;
this.clock = clock;
this.startTime = clock.getTick();
}
/**
* Stops recording the elapsed time, updates the timer and returns the elapsed time
*/
public long stop() {
final long elapsedNanos = clock.getTick() - startTime;
timer.update(elapsedNanos, TimeUnit.NANOSECONDS);
return elapsedNanos;
}
}

View File

@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util.stats;
import java.util.concurrent.TimeUnit;
import com.codahale.metrics.Snapshot;
import com.codahale.metrics.Timer;
import org.apache.solr.common.util.NamedList;
/**
* Solr specific {@link Timer} utility functions.
*/
public class TimerUtils {
/**
* Adds metrics from a Timer to a NamedList, using well-known names.
* @param lst The NamedList to add the metrics data to
* @param timer The Timer to extract the metrics from
*/
public static void addMetrics(NamedList<Object> lst, Timer timer) {
Snapshot snapshot = timer.getSnapshot();
lst.add("avgRequestsPerSecond", timer.getMeanRate());
lst.add("5minRateRequestsPerSecond", timer.getFiveMinuteRate());
lst.add("15minRateRequestsPerSecond", timer.getFifteenMinuteRate());
lst.add("avgTimePerRequest", nsToMs(snapshot.getMean()));
lst.add("medianRequestTime", nsToMs(snapshot.getMedian()));
lst.add("75thPcRequestTime", nsToMs(snapshot.get75thPercentile()));
lst.add("95thPcRequestTime", nsToMs(snapshot.get95thPercentile()));
lst.add("99thPcRequestTime", nsToMs(snapshot.get99thPercentile()));
lst.add("999thPcRequestTime", nsToMs(snapshot.get999thPercentile()));
}
/**
* Converts a double representing nanoseconds to a double representing milliseconds.
*
* @param ns the amount of time in nanoseconds
* @return the amount of time in milliseconds
*/
static double nsToMs(double ns) {
return ns / TimeUnit.MILLISECONDS.toNanos(1);
}
}

View File

@ -1,108 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Forked from https://github.com/codahale/metrics
*/
package org.apache.solr.util.stats;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicLongArray;
/**
* A random sample of a stream of {@code long}s. Uses Vitter's Algorithm R to produce a
* statistically representative sample.
*
* @see <a href="http://www.cs.umd.edu/~samir/498/vitter.pdf">Random Sampling with a Reservoir</a>
*/
public class UniformSample implements Sample {
private static final int BITS_PER_LONG = 63;
private final AtomicLong count = new AtomicLong();
private final AtomicLongArray values;
//TODO: Maybe replace with a Mersenne twister for better distribution
private static final Random random = new Random();
/**
* Creates a new {@link UniformSample}.
*
* @param reservoirSize the number of samples to keep in the sampling reservoir
*/
public UniformSample(int reservoirSize) {
this.values = new AtomicLongArray(reservoirSize);
clear();
}
@Override
public void clear() {
for (int i = 0; i < values.length(); i++) {
values.set(i, 0);
}
count.set(0);
}
@Override
public int size() {
final long c = count.get();
if (c > values.length()) {
return values.length();
}
return (int) c;
}
@Override
public void update(long value) {
final long c = count.incrementAndGet();
if (c <= values.length()) {
values.set((int) c - 1, value);
} else {
final long r = nextLong(c);
if (r < values.length()) {
values.set((int) r, value);
}
}
}
/**
* Get a pseudo-random long uniformly between 0 and n-1. Stolen from
* {@link java.util.Random#nextInt()}.
*
* @param n the bound
* @return a value select randomly from the range {@code [0..n)}.
*/
private static long nextLong(long n) {
long bits, val;
do {
bits = random.nextLong() & (~(1L << BITS_PER_LONG));
val = bits % n;
} while (bits - val + (n - 1) < 0L);
return val;
}
@Override
public Snapshot getSnapshot() {
final int s = size();
final List<Long> copy = new ArrayList<>(s);
for (int i = 0; i < s; i++) {
copy.add(values.get(i));
}
return new Snapshot(copy);
}
}

View File

@ -0,0 +1,46 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Test schema file for UnifiedHighlighter -->
<schema name="unifiedhighlight" version="1.0">
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<!-- basic text field: no offsets! -->
<fieldType name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!-- text field with offsets -->
<fieldType name="text_offsets" class="solr.TextField" storeOffsetsWithPositions="true">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="text" type="text_offsets" indexed="true" stored="true"/>
<field name="text2" type="text" indexed="true" stored="true"/>
<field name="text3" type="text_offsets" indexed="true" stored="true"/>
<defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -47,6 +47,21 @@
<str name="knn.minTf">1</str>
<str name="knn.minDf">1</str>
<str name="knn.k">5</str>
<str name="knn.filterQuery">cat:(class1 OR class2)</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory"/>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="classification-unsupported-filterQuery">
<processor class="solr.ClassificationUpdateProcessorFactory">
<str name="inputFields">title,content,author</str>
<str name="classField">cat</str>
<!-- Knn algorithm specific-->
<str name="algorithm">knn</str>
<str name="knn.minTf">1</str>
<str name="knn.minDf">1</str>
<str name="knn.k">5</str>
<str name="knn.filterQuery">not valid ( lucene query</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory"/>
</updateRequestProcessorChain>

View File

@ -0,0 +1,527 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.request.schema.FieldTypeDefinition;
import org.apache.solr.client.solrj.request.schema.SchemaRequest;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.Group;
import org.apache.solr.client.solrj.response.GroupCommand;
import org.apache.solr.client.solrj.response.GroupResponse;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.schema.SchemaResponse;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.RuleChain;
import org.junit.rules.TestRule;
import static org.apache.solr.client.solrj.request.schema.SchemaRequest.*;
public class DocValuesNotIndexedTest extends SolrCloudTestCase {
@Rule
public TestRule solrTestRules = RuleChain.outerRule(new SystemPropertiesRestoreRule());
static final String COLLECTION = "dv_coll";
static List<FieldProps> fieldsToTestSingle = null;
static List<FieldProps> fieldsToTestMulti = null;
static List<FieldProps> fieldsToTestGroupSortFirst = null;
static List<FieldProps> fieldsToTestGroupSortLast = null;
@BeforeClass
public static void createCluster() throws Exception {
System.setProperty("managed.schema.mutable", "true");
configureCluster(2)
.addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-managed").resolve("conf"))
.configure();
// Need enough shards that we have some shards that don't have any docs on them.
CollectionAdminRequest.createCollection(COLLECTION, "conf1", 4, 1)
.setMaxShardsPerNode(2)
.process(cluster.getSolrClient());
fieldsToTestSingle =
Collections.unmodifiableList(Stream.of(
new FieldProps("intField", "int", 1),
new FieldProps("longField", "long", 1),
new FieldProps("doubleField", "double", 1),
new FieldProps("floatField", "float", 1),
new FieldProps("dateField", "date", 1),
new FieldProps("stringField", "string", 1),
new FieldProps("boolField", "boolean", 1)
).collect(Collectors.toList()));
fieldsToTestMulti =
Collections.unmodifiableList(Stream.of(
new FieldProps("intFieldMulti", "int", 5),
new FieldProps("longFieldMulti", "long", 5),
new FieldProps("doubleFieldMulti", "double", 5),
new FieldProps("floatFieldMulti", "float", 5),
new FieldProps("dateFieldMulti", "date", 5),
new FieldProps("stringFieldMulti", "string", 5),
new FieldProps("boolFieldMulti", "boolean", 2)
).collect(Collectors.toList()));
// Fields to test for grouping and sorting with sortMinssingFirst/Last.
fieldsToTestGroupSortFirst =
Collections.unmodifiableList(Stream.of(
new FieldProps("intGSF", "int"),
new FieldProps("longGSF", "long"),
new FieldProps("doubleGSF", "double"),
new FieldProps("floatGSF", "float"),
new FieldProps("dateGSF", "date"),
new FieldProps("stringGSF", "string"),
new FieldProps("boolGSF", "boolean")
).collect(Collectors.toList()));
fieldsToTestGroupSortLast =
Collections.unmodifiableList(Stream.of(
new FieldProps("intGSL", "int"),
new FieldProps("longGSL", "long"),
new FieldProps("doubleGSL", "double"),
new FieldProps("floatGSL", "float"),
new FieldProps("dateGSL", "date"),
new FieldProps("stringGSL", "string"),
new FieldProps("boolGSL", "boolean")
).collect(Collectors.toList()));
List<Update> updateList = new ArrayList<>(fieldsToTestSingle.size() +
fieldsToTestMulti.size() + fieldsToTestGroupSortFirst.size() + fieldsToTestGroupSortLast.size() +
4);
updateList.add(getType("name", "float", "class", "solr.TrieFloatField", "precisionStep", "0"));
updateList.add(getType("name", "double", "class", "solr.TrieDoubleField", "precisionStep", "0"));
updateList.add(getType("name", "date", "class", "solr.TrieDateField", "precisionStep", "0"));
updateList.add(getType("name", "boolean", "class", "solr.BoolField"));
// Add a field for each of the types we want to the schema.
defineFields(updateList, fieldsToTestSingle, false);
defineFields(updateList, fieldsToTestMulti, true);
defineFields(updateList, fieldsToTestGroupSortFirst, false, "sorMissingFirst", "true");
defineFields(updateList, fieldsToTestGroupSortLast, false, "sorMissingLast", "true");
MultiUpdate multiUpdateRequest = new MultiUpdate(updateList);
SchemaResponse.UpdateResponse multipleUpdatesResponse = multiUpdateRequest.process(cluster.getSolrClient(), COLLECTION);
assertNull("Error adding fields", multipleUpdatesResponse.getResponse().get("errors"));
cluster.getSolrClient().setDefaultCollection(COLLECTION);
}
@Before
public void before() throws IOException, SolrServerException {
CloudSolrClient client = cluster.getSolrClient();
client.deleteByQuery("*:*");
client.commit();
resetFieldBases(fieldsToTestSingle);
resetFieldBases(fieldsToTestMulti);
resetFieldBases(fieldsToTestGroupSortFirst);
resetFieldBases(fieldsToTestGroupSortLast);
}
private void resetFieldBases(List<FieldProps> props) {
// OK, it's not bad with the int and string fields, but every time a new test counts on docs being
// indexed so they sort in a particular order, then particularly the boolean and string fields need to be
// reset to a known state.
for (FieldProps prop : props) {
prop.resetBase();
}
}
@Test
public void testDistribFaceting() throws IOException, SolrServerException {
// For this test, I want to insure that there are shards that do _not_ have a doc with any of the DV_only
// fields, see SOLR-5260. So I'll add exactly 1 document to a 4 shard collection.
CloudSolrClient client = cluster.getSolrClient();
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", "1");
for (FieldProps prop : fieldsToTestSingle) {
doc.addField(prop.getName(), prop.getValue(true));
}
for (FieldProps prop : fieldsToTestMulti) {
for (int idx = 0; idx < 5; ++idx) {
doc.addField(prop.getName(), prop.getValue(true));
}
}
new UpdateRequest()
.add(doc)
.commit(client, COLLECTION);
final SolrQuery solrQuery = new SolrQuery("q", "*:*", "rows", "0");
solrQuery.setFacet(true);
for (FieldProps prop : fieldsToTestSingle) {
solrQuery.addFacetField(prop.getName());
}
for (FieldProps prop : fieldsToTestMulti) {
solrQuery.addFacetField(prop.getName());
}
final QueryResponse rsp = client.query(COLLECTION, solrQuery);
for (FieldProps props : fieldsToTestSingle) {
testFacet(props, rsp);
}
for (FieldProps props : fieldsToTestMulti) {
testFacet(props, rsp);
}
}
// We should be able to sort thing with missing first/last and that are _NOT_ present at all on one server.
@Test
public void testGroupingSorting() throws IOException, SolrServerException {
CloudSolrClient client = cluster.getSolrClient();
// The point of these is to have at least one shard w/o the value.
// While getting values for each of these fields starts _out_ random, each successive
// _value_ increases.
List<SolrInputDocument> docs = new ArrayList<>(3);
docs.add(makeGSDoc(2, fieldsToTestGroupSortFirst, fieldsToTestGroupSortLast));
docs.add(makeGSDoc(1, fieldsToTestGroupSortFirst, fieldsToTestGroupSortLast));
docs.add(makeGSDoc(3, fieldsToTestGroupSortFirst, fieldsToTestGroupSortLast));
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", 4);
docs.add(doc);
new UpdateRequest()
.add(docs)
.commit(client, COLLECTION);
checkSortOrder(client, fieldsToTestGroupSortFirst, "asc", new String[]{"4", "2", "1", "3"}, new String[]{"4", "1", "2", "3"});
checkSortOrder(client, fieldsToTestGroupSortFirst, "desc", new String[]{"3", "1", "2", "4"}, new String[]{"2", "3", "1", "4"});
checkSortOrder(client, fieldsToTestGroupSortLast, "asc", new String[]{"4", "2", "1", "3"}, new String[]{"4", "1", "2", "3"});
checkSortOrder(client, fieldsToTestGroupSortLast, "desc", new String[]{"3", "1", "2", "4"}, new String[]{"2", "3", "1", "4"});
}
private void checkSortOrder(CloudSolrClient client, List<FieldProps> props, String sortDir, String[] order, String[] orderBool) throws IOException, SolrServerException {
for (FieldProps prop : props) {
final SolrQuery solrQuery = new SolrQuery("q", "*:*", "rows", "100");
solrQuery.setSort(prop.getName(), "asc".equals(sortDir) ? SolrQuery.ORDER.asc : SolrQuery.ORDER.desc);
solrQuery.addSort("id", SolrQuery.ORDER.asc);
final QueryResponse rsp = client.query(COLLECTION, solrQuery);
SolrDocumentList res = rsp.getResults();
assertEquals("Should have exactly " + order.length + " documents returned", order.length, res.getNumFound());
String expected;
for (int idx = 0; idx < res.size(); ++idx) {
if (prop.getName().startsWith("bool")) expected = orderBool[idx];
else expected = order[idx];
assertEquals("Documents in wrong order for field: " + prop.getName(),
expected, res.get(idx).get("id"));
}
}
}
@Test
public void testGroupingDocAbsent() throws IOException, SolrServerException {
List<SolrInputDocument> docs = new ArrayList<>(3);
docs.add(makeGSDoc(2, fieldsToTestGroupSortFirst, null));
docs.add(makeGSDoc(1, fieldsToTestGroupSortFirst, null));
docs.add(makeGSDoc(3, fieldsToTestGroupSortFirst, null));
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", 4);
docs.add(doc);
CloudSolrClient client = cluster.getSolrClient();
new UpdateRequest()
.add(docs)
.commit(client, COLLECTION);
// when grouping on any of these DV-only (not indexed) fields we expect exactly 4 groups except for Boolean.
for (FieldProps prop : fieldsToTestGroupSortFirst) {
// Special handling until SOLR-9802 is fixed
if (prop.getName().startsWith("date")) continue;
// SOLR-9802 to here
final SolrQuery solrQuery = new SolrQuery("q", "*:*",
"group", "true",
"group.field", prop.getName());
final QueryResponse rsp = client.query(COLLECTION, solrQuery);
GroupResponse groupResponse = rsp.getGroupResponse();
List<GroupCommand> commands = groupResponse.getValues();
GroupCommand fieldCommand = commands.get(0);
int expected = 4;
if (prop.getName().startsWith("bool")) expected = 3; //true, false and null
List<Group> fieldCommandGroups = fieldCommand.getValues();
assertEquals("Did not find the expected number of groups!", expected, fieldCommandGroups.size());
}
}
@Test
// Verify that we actually form groups that are "expected". Most of the processing takes some care to
// make sure all the values for each field are unique. We need to have docs that have values that are _not_
// unique.
public void testGroupingDVOnly() throws IOException, SolrServerException {
List<SolrInputDocument> docs = new ArrayList<>(50);
for (int idx = 0; idx < 49; ++idx) {
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", idx);
boolean doInc = ((idx % 7) == 0);
for (FieldProps prop : fieldsToTestGroupSortFirst) {
doc.addField(prop.getName(), prop.getValue(doInc));
}
docs.add(doc);
if ((idx % 5) == 0) {
doc = new SolrInputDocument();
doc.addField("id", idx + 100);
docs.add(doc);
}
}
CloudSolrClient client = cluster.getSolrClient();
new UpdateRequest()
.add(docs)
.commit(client, COLLECTION);
// OK, we should have one group with 10 entries for null, a group with 1 entry and 7 groups with 7
for (FieldProps prop : fieldsToTestGroupSortFirst) {
// Special handling until SOLR-9802 is fixed
if (prop.getName().startsWith("date")) continue;
// SOLR-9802 to here
final SolrQuery solrQuery = new SolrQuery(
"q", "*:*",
"rows", "100",
"group", "true",
"group.field", prop.getName(),
"group.limit", "100");
final QueryResponse rsp = client.query(COLLECTION, solrQuery);
GroupResponse groupResponse = rsp.getGroupResponse();
List<GroupCommand> commands = groupResponse.getValues();
int nullCount = 0;
int sevenCount = 0;
int boolCount = 0;
for (int idx = 0; idx < commands.size(); ++idx) {
GroupCommand fieldCommand = commands.get(idx);
for (Group grp : fieldCommand.getValues()) {
switch (grp.getResult().size()) {
case 7:
++sevenCount;
assertNotNull("Every group with 7 entries should have a group value.", grp.getGroupValue());
break;
case 10:
++nullCount;
assertNull("This should be the null group", grp.getGroupValue());
break;
case 25:
case 24:
++boolCount;
assertEquals("We should have more counts for boolean fields!", "boolGSF", prop.getName());
break;
default:
fail("Unexpected number of elements in the group for " + prop.getName() + ": " + grp.getResult().size());
}
}
}
assertEquals("Should be exactly one group with 1 entry of 10 for null for field " + prop.getName(), 1, nullCount);
if (prop.getName().startsWith("bool")) {
assertEquals("Should be exactly 2 groups with non-null Boolean types " + prop.getName(), 2, boolCount);
assertEquals("Should be no seven count groups for Boolean types " + prop.getName(), 0, sevenCount);
} else {
assertEquals("Should be exactly 7 groups with seven entries for field " + prop.getName(), 7, sevenCount);
assertEquals("Should be no gropus with 24 or 25 entries for field " + prop.getName(), 0, boolCount);
}
}
}
private SolrInputDocument makeGSDoc(int id, List<FieldProps> p1, List<FieldProps> p2, String... args) {
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", id);
for (FieldProps prop : p1) {
doc.addField(prop.getName(), prop.getValue(true));
}
if (p2 != null) {
for (FieldProps prop : p2) {
doc.addField(prop.getName(), prop.getValue(true));
}
}
for (int idx = 0; idx < args.length; idx += 2) {
doc.addField(args[idx], args[idx + 1]);
}
return doc;
}
private static void defineFields(List<Update> updateList, List<FieldProps> props, boolean multi, String... extras) {
for (FieldProps prop : props) {
Map<String, Object> fieldAttributes = new LinkedHashMap<>();
fieldAttributes.put("name", prop.getName());
fieldAttributes.put("type", prop.getType());
fieldAttributes.put("indexed", "false");
fieldAttributes.put("multiValued", multi ? "true" : "false");
fieldAttributes.put("docValues", "true");
updateList.add(new AddField(fieldAttributes));
}
}
private static AddFieldType getType(String... args) {
FieldTypeDefinition ftd = new FieldTypeDefinition();
Map<String, Object> ftas = new LinkedHashMap<>();
for (int idx = 0; idx < args.length; idx += 2) {
ftas.put(args[idx], args[idx + 1]);
}
ftd.setAttributes(ftas);
return new SchemaRequest.AddFieldType(ftd);
}
private void testFacet(FieldProps props, QueryResponse rsp) {
String name = props.getName();
final List<FacetField.Count> counts = rsp.getFacetField(name).getValues();
long expectedCount = props.getExpectedCount();
long foundCount = getCount(counts);
assertEquals("Field " + name + " should have a count of " + expectedCount, expectedCount, foundCount);
}
private long getCount(final List<FacetField.Count> counts) {
return counts.stream().mapToLong(FacetField.Count::getCount).sum();
}
}
class FieldProps {
private final String name;
private final String type;
private final int expectedCount;
private Object base;
private int counter = 0;
static SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.ROOT);
FieldProps(String name, String type, int expectedCount) {
this.name = name;
this.type = type;
this.expectedCount = expectedCount;
resetBase();
}
void resetBase() {
if (name.startsWith("int")) {
base = Math.abs(DocValuesNotIndexedTest.random().nextInt());
} else if (name.startsWith("long")) {
base = Math.abs(DocValuesNotIndexedTest.random().nextLong());
} else if (name.startsWith("float")) {
base = Math.abs(DocValuesNotIndexedTest.random().nextFloat());
} else if (name.startsWith("double")) {
base = Math.abs(DocValuesNotIndexedTest.random().nextDouble());
} else if (name.startsWith("date")) {
base = Math.abs(DocValuesNotIndexedTest.random().nextLong());
} else if (name.startsWith("bool")) {
base = true; // Must start with a known value since bools only have a two values....
} else if (name.startsWith("string")) {
base = "base_string_" + DocValuesNotIndexedTest.random().nextInt(1_000_000) + "_";
} else {
throw new RuntimeException("Should have found a prefix for the field before now!");
}
}
FieldProps(String name, String type) {
this(name, type, -1);
}
String getName() {
return name;
}
String getType() {
return type;
}
int getExpectedCount() {
return expectedCount;
}
public String getValue(boolean incrementCounter) {
if (incrementCounter) {
counter += DocValuesNotIndexedTest.random().nextInt(10) + 100;
}
if (name.startsWith("int")) {
return Integer.toString((int) base + counter);
}
if (name.startsWith("long")) {
return Long.toString((long) base + counter);
}
if (name.startsWith("float")) {
return Float.toString((float) base + counter);
}
if (name.startsWith("double")) {
return Double.toString((double) base + counter);
}
if (name.startsWith("date")) {
return format.format(985_847_645 + (long) base + counter);
}
if (name.startsWith("bool")) {
String ret = Boolean.toString((boolean) base);
base = !((boolean) base);
return ret;
}
if (name.startsWith("string")) {
return String.format(Locale.ROOT, "%s_%08d", (String) base, counter);
}
throw new RuntimeException("Should have found a prefix for the field before now!");
}
}

View File

@ -34,6 +34,8 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import com.codahale.metrics.Snapshot;
import com.codahale.metrics.Timer;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.cloud.overseer.OverseerAction;
@ -52,9 +54,6 @@ import org.apache.solr.handler.component.HttpShardHandlerFactory;
import org.apache.solr.update.UpdateShardHandler;
import org.apache.solr.update.UpdateShardHandlerConfig;
import org.apache.solr.util.DefaultSolrThreadFactory;
import org.apache.solr.util.stats.Snapshot;
import org.apache.solr.util.stats.Timer;
import org.apache.solr.util.stats.TimerContext;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NoNodeException;
@ -1027,7 +1026,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
q.offer(Utils.toJSON(m));
Timer t = new Timer();
TimerContext context = t.time();
Timer.Context context = t.time();
try {
overseerClient = electNewOverseer(server.getZkAddress());
assertTrue(overseers.size() > 0);
@ -1072,16 +1071,19 @@ public class OverseerTest extends SolrTestCaseJ4 {
private void printTimingStats(Timer timer) {
Snapshot snapshot = timer.getSnapshot();
log.info("\t totalTime: {}", timer.getSum());
log.info("\t avgRequestsPerMinute: {}", timer.getMeanRate());
log.info("\t 5minRateRequestsPerMinute: {}", timer.getFiveMinuteRate());
log.info("\t 15minRateRequestsPerMinute: {}", timer.getFifteenMinuteRate());
log.info("\t avgTimePerRequest: {}", timer.getMean());
log.info("\t medianRequestTime: {}", snapshot.getMedian());
log.info("\t 75thPctlRequestTime: {}", snapshot.get75thPercentile());
log.info("\t 95thPctlRequestTime: {}", snapshot.get95thPercentile());
log.info("\t 99thPctlRequestTime: {}", snapshot.get99thPercentile());
log.info("\t 999thPctlRequestTime: {}", snapshot.get999thPercentile());
log.info("\t avgRequestsPerSecond: {}", timer.getMeanRate());
log.info("\t 5minRateRequestsPerSecond: {}", timer.getFiveMinuteRate());
log.info("\t 15minRateRequestsPerSecond: {}", timer.getFifteenMinuteRate());
log.info("\t avgTimePerRequest: {}", nsToMs(snapshot.getMean()));
log.info("\t medianRequestTime: {}", nsToMs(snapshot.getMedian()));
log.info("\t 75thPcRequestTime: {}", nsToMs(snapshot.get75thPercentile()));
log.info("\t 95thPcRequestTime: {}", nsToMs(snapshot.get95thPercentile()));
log.info("\t 99thPcRequestTime: {}", nsToMs(snapshot.get99thPercentile()));
log.info("\t 999thPcRequestTime: {}", nsToMs(snapshot.get999thPercentile()));
}
private static long nsToMs(double ns) {
return TimeUnit.NANOSECONDS.convert((long)ns, TimeUnit.MILLISECONDS);
}
private void close(MockZKController mockController) {

View File

@ -108,8 +108,8 @@ public class RequestHandlersTest extends SolrTestCaseJ4 {
NamedList updateStats = updateHandler.getStatistics();
NamedList termStats = termHandler.getStatistics();
Double updateTime = (Double) updateStats.get("totalTime");
Double termTime = (Double) termStats.get("totalTime");
Double updateTime = (Double) updateStats.get("avgTimePerRequest");
Double termTime = (Double) termStats.get("avgTimePerRequest");
assertFalse("RequestHandlers should not share statistics!", updateTime.equals(termTime));
}

View File

@ -25,6 +25,7 @@ import java.util.Map;
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.util.Constants;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
@ -283,6 +284,7 @@ public class CoreAdminHandlerTest extends SolrTestCaseJ4 {
@Test
public void testDeleteInstanceDirAfterCreateFailure() throws Exception {
assumeFalse("Ignore test on windows because it does not delete data directory immediately after unload", Constants.WINDOWS);
File solrHomeDirectory = new File(initCoreDataDir, getClass().getName() + "-corex-"
+ System.nanoTime());
solrHomeDirectory.mkdirs();

View File

@ -70,7 +70,12 @@ public class FastVectorHighlighterTest extends SolrTestCaseJ4 {
args.put("hl", "true");
args.put("hl.fl", "tv_text");
args.put("hl.snippets", "2");
args.put("hl.useFastVectorHighlighter", "true");
args.put("hl.tag.pre", "<fvpre>"); //... and let post default to </em>. This is just a test.
if (random().nextBoolean()) {
args.put("hl.useFastVectorHighlighter", "true"); // old way
} else {
args.put("hl.method", "fastVector"); // the new way
}
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard",0,200,args);
@ -81,7 +86,7 @@ public class FastVectorHighlighterTest extends SolrTestCaseJ4 {
assertQ("Basic summarization",
sumLRF.makeRequest("tv_text:vector"),
"//lst[@name='highlighting']/lst[@name='1']",
"//lst[@name='1']/arr[@name='tv_text']/str[.='basic fast <em>vector</em> highlighter test']"
"//lst[@name='1']/arr[@name='tv_text']/str[.='basic fast <fvpre>vector</em> highlighter test']"
);
}
}

View File

@ -43,10 +43,6 @@ import org.junit.After;
import org.junit.BeforeClass;
import org.junit.Test;
/**
* Tests some basic functionality of Solr while demonstrating good
* Best Practices for using AbstractSolrTestCase
*/
public class HighlighterTest extends SolrTestCaseJ4 {
private static String LONG_TEXT = "a long days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is " +
@ -90,6 +86,25 @@ public class HighlighterTest extends SolrTestCaseJ4 {
assertTrue(regex instanceof RegexFragmenter);
}
@Test
public void testMethodPostings() {
String field = "t_text";
assertU(adoc(field, LONG_TEXT,
"id", "1"));
assertU(commit());
try {
assertQ("Tried PostingsSolrHighlighter but failed due to offsets not in postings",
req("q", "long", "hl.method", "postings", "df", field, "hl", "true"));
fail("Did not encounter exception for no offsets");
} catch (Exception e) {
assertTrue("Cause should be illegal argument", e.getCause() instanceof IllegalArgumentException);
assertTrue("Should warn no offsets", e.getCause().getMessage().contains("indexed without offsets"));
}
// note: the default schema.xml has no offsets in postings to test the PostingsHighlighter. Leave that for another
// test class.
}
@Test
public void testMergeContiguous() throws Exception {
HashMap<String,String> args = new HashMap<>();
@ -99,6 +114,7 @@ public class HighlighterTest extends SolrTestCaseJ4 {
args.put(HighlightParams.SNIPPETS, String.valueOf(4));
args.put(HighlightParams.FRAGSIZE, String.valueOf(40));
args.put(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "true");
args.put(HighlightParams.METHOD, "original"); // test works; no complaints
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard", 0, 200, args);
String input = "this is some long text. It has the word long in many places. In fact, it has long on some different fragments. " +
@ -763,7 +779,7 @@ public class HighlighterTest extends SolrTestCaseJ4 {
);
// Prove fallback highlighting works also with FVH
args.put("hl.useFastVectorHighlighter", "true");
args.put("hl.method", "fastVector");
args.put("hl.tag.pre", "<fvhpre>");
args.put("hl.tag.post", "</fvhpost>");
args.put("f.t_text.hl.maxAlternateFieldLength", "18");

View File

@ -52,7 +52,7 @@ public class TestPostingsSolrHighlighter extends SolrTestCaseJ4 {
public void testSimple() {
assertQ("simplest test",
req("q", "text:document", "sort", "id asc", "hl", "true"),
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.method", "postings"), // test hl.method is happy too
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");

View File

@ -0,0 +1,229 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema;
import org.junit.BeforeClass;
/** Tests for the UnifiedHighlighter Solr plugin **/
public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-basic.xml", "schema-unifiedhighlight.xml");
// test our config is sane, just to be sure:
// 'text' and 'text3' should have offsets, 'text2' should not
IndexSchema schema = h.getCore().getLatestSchema();
assertTrue(schema.getField("text").storeOffsetsWithPositions());
assertTrue(schema.getField("text3").storeOffsetsWithPositions());
assertFalse(schema.getField("text2").storeOffsetsWithPositions());
}
@Override
public void setUp() throws Exception {
super.setUp();
clearIndex();
assertU(adoc("text", "document one", "text2", "document one", "text3", "crappy document", "id", "101"));
assertU(adoc("text", "second document", "text2", "second document", "text3", "crappier document", "id", "102"));
assertU(commit());
}
public static SolrQueryRequest req(String... params) {
return SolrTestCaseJ4.req(params, "hl.method", "unified");
}
public void testSimple() {
assertQ("simplest test",
req("q", "text:document", "sort", "id asc", "hl", "true"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
}
public void testImpossibleOffsetSource() {
try {
assertQ("impossible offset source",
req("q", "text2:document", "hl.offsetSource", "postings", "hl.fl", "text2", "sort", "id asc", "hl", "true"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
fail("Did not encounter exception for no offsets");
} catch (Exception e) {
assertTrue("Cause should be illegal argument", e.getCause() instanceof IllegalArgumentException);
assertTrue("Should warn no offsets", e.getCause().getMessage().contains("indexed without offsets"));
}
}
public void testMultipleSnippetsReturned() {
clearIndex();
assertU(adoc("text", "Document snippet one. Intermediate sentence. Document snippet two.",
"text2", "document one", "text3", "crappy document", "id", "101"));
assertU(commit());
assertQ("multiple snippets test",
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.snippets", "2", "hl.bs.type", "SENTENCE"),
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Document</em> snippet one. '",
"//lst[@name='highlighting']/lst[@name='101']/arr/str[2]='<em>Document</em> snippet two.'");
}
public void testStrictPhrasesEnabledByDefault() {
clearIndex();
assertU(adoc("text", "Strict phrases should be enabled for phrases",
"text2", "document one", "text3", "crappy document", "id", "101"));
assertU(commit());
assertQ("strict phrase handling",
req("q", "text:\"strict phrases\"", "sort", "id asc", "hl", "true"),
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=1",
"//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Strict</em> <em>phrases</em> should be enabled for phrases'");
}
public void testStrictPhrasesCanBeDisabled() {
clearIndex();
assertU(adoc("text", "Strict phrases should be disabled for phrases",
"text2", "document one", "text3", "crappy document", "id", "101"));
assertU(commit());
assertQ("strict phrase handling",
req("q", "text:\"strict phrases\"", "sort", "id asc", "hl", "true", "hl.usePhraseHighlighter", "false"),
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=1",
"//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Strict</em> <em>phrases</em> should be disabled for <em>phrases</em>'");
}
public void testMultiTermQueryEnabledByDefault() {
clearIndex();
assertU(adoc("text", "Aviary Avenue document",
"text2", "document one", "text3", "crappy document", "id", "101"));
assertU(commit());
assertQ("multi term query handling",
req("q", "text:av*", "sort", "id asc", "hl", "true"),
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=1",
"//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Aviary</em> <em>Avenue</em> document'");
}
public void testMultiTermQueryCanBeDisabled() {
clearIndex();
assertU(adoc("text", "Aviary Avenue document",
"text2", "document one", "text3", "crappy document", "id", "101"));
assertU(commit());
assertQ("multi term query handling",
req("q", "text:av*", "sort", "id asc", "hl", "true", "hl.highlightMultiTerm", "false"),
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=0");
}
public void testPagination() {
assertQ("pagination test",
req("q", "text:document", "sort", "id asc", "hl", "true", "rows", "1", "start", "1"),
"count(//lst[@name='highlighting']/*)=1",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
}
public void testEmptySnippet() {
assertQ("null snippet test",
req("q", "text:one OR *:*", "sort", "id asc", "hl", "true"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='document <em>one</em>'",
"count(//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/*)=0");
}
public void testDefaultSummary() {
assertQ("null snippet test",
req("q", "text:one OR *:*", "sort", "id asc", "hl", "true", "hl.defaultSummary", "true"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='document <em>one</em>'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second document'");
}
public void testDifferentField() {
assertQ("highlighting text3",
req("q", "text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text3"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy <em>document</em>'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier <em>document</em>'");
}
public void testTwoFields() {
assertQ("highlighting text and text3",
req("q", "text:document text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text,text3"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy <em>document</em>'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier <em>document</em>'");
}
public void testTags() {
assertQ("different pre/post tags",
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.tag.pre", "[", "hl.tag.post", "]"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='[document] one'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second [document]'");
}
public void testUsingSimplePrePostTags() {
assertQ("different pre/post tags",
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.simple.pre", "[", "hl.simple.post", "]"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='[document] one'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second [document]'");
}
public void testUsingSimplePrePostTagsPerField() {
assertQ("different pre/post tags",
req("q", "text:document", "sort", "id asc", "hl", "true", "f.text.hl.simple.pre", "[", "f.text.hl.simple.post", "]"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='[document] one'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second [document]'");
}
public void testTagsPerField() {
assertQ("highlighting text and text3",
req("q", "text:document text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text,text3", "f.text3.hl.tag.pre", "[", "f.text3.hl.tag.post", "]"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy [document]'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier [document]'");
}
public void testBreakIterator() {
assertQ("different breakiterator",
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WORD"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em>'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='<em>document</em>'");
}
public void testBreakIterator2() {
assertU(adoc("text", "Document one has a first sentence. Document two has a second sentence.", "id", "103"));
assertU(commit());
assertQ("different breakiterator",
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WHOLE"),
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first sentence. <em>Document</em> two has a second sentence.'");
}
public void testEncoder() {
assertU(adoc("text", "Document one has a first <i>sentence</i>.", "id", "103"));
assertU(commit());
assertQ("html escaped",
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.encoder", "html"),
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em>&#32;one&#32;has&#32;a&#32;first&#32;&lt;i&gt;sentence&lt;&#x2F;i&gt;&#46;'");
}
}

View File

@ -147,13 +147,8 @@ public class SmileWriterTest extends SolrTestCaseJ4 {
@Test
public void test10Docs() throws IOException {
SolrDocumentList l = new SolrDocumentList();
for(int i=0;i<10; i++){
l.add(sampleDoc(random(), i));
}
SolrQueryResponse response = new SolrQueryResponse();
response.getValues().add("results", l);
SolrDocumentList l = constructSolrDocList(response);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
new SmileResponseWriter().write(baos, new LocalSolrQueryRequest(null, new ModifiableSolrParams()), response);
@ -171,6 +166,16 @@ public class SmileWriterTest extends SolrTestCaseJ4 {
}
public static SolrDocumentList constructSolrDocList(SolrQueryResponse response) {
SolrDocumentList l = new SolrDocumentList();
for(int i=0;i<10; i++){
l.add(sampleDoc(random(), i));
}
response.getValues().add("results", l);
return l;
}
public static SolrDocument sampleDoc(Random r, int bufnum) {
SolrDocument sdoc = new SolrDocument();
sdoc.put("id", "my_id_" + bufnum);

View File

@ -0,0 +1,195 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.response;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.io.Tuple;
import org.apache.solr.client.solrj.io.comp.StreamComparator;
import org.apache.solr.client.solrj.io.stream.JavabinTupleStreamParser;
import org.apache.solr.client.solrj.io.stream.StreamContext;
import org.apache.solr.client.solrj.io.stream.TupleStream;
import org.apache.solr.client.solrj.io.stream.expr.Explanation;
import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.util.JavaBinCodec;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.Utils;
import static org.apache.solr.response.SmileWriterTest.constructSolrDocList;
public class TestJavabinTupleStreamParser extends SolrTestCaseJ4 {
public void testKnown() throws IOException {
String payload = "{\n" +
" \"responseHeader\":{\n" +
" \"zkConnected\":true,\n" +
" \"status\":0,\n" +
" \"QTime\":46},\n" +
" \"response\":{\n" +
" \"numFound\":2,\n" +
" \"start\":0,\n" +
" \"docs\":[\n" +
" {\n" +
" \"id\":\"2\",\n" +
" \"a_s\":\"hello2\",\n" +
" \"a_i\":2,\n" +
" \"a_f\":0.0},\n" +
" {\n" +
" \"id\":\"3\",\n" +
" \"a_s\":\"hello3\",\n" +
" \"a_i\":3,\n" +
" \"a_f\":3.0}]}}";
SimpleOrderedMap nl = convert2OrderedMap((Map) Utils.fromJSONString(payload));
byte[] bytes = serialize(nl);
JavabinTupleStreamParser parser = new JavabinTupleStreamParser(new ByteArrayInputStream(bytes), true);
Map<String, Object> map = parser.next();
assertEquals("2", map.get("id"));
map = parser.next();
assertEquals("3", map.get("id"));
System.out.println();
map = parser.next();
assertNull(map);
}
public SimpleOrderedMap convert2OrderedMap(Map m) {
SimpleOrderedMap result = new SimpleOrderedMap<>();
m.forEach((k, v) -> {
if (v instanceof List) v = ((List) v).iterator();
if (v instanceof Map) v = convert2OrderedMap((Map) v);
result.add((String) k, v);
});
return result;
}
public void testSimple() throws IOException {
List<Map<String, Object>> l = new ArrayList();
l.add(Utils.makeMap("id", 1, "f", 1.0f, "s", "Some str 1"));
l.add(Utils.makeMap("id", 2, "f", 2.0f, "s", "Some str 2"));
l.add(Utils.makeMap("id", 3, "f", 1.0f, "s", "Some str 3"));
l.add(Utils.makeMap("EOF", true, "RESPONSE_TIME", 206, "sleepMillis", 1000));
Iterator<Map<String, Object>> iterator = l.iterator();
TupleStream tupleStream = new TupleStream() {
@Override
public void setStreamContext(StreamContext context) {
}
@Override
public List<TupleStream> children() {
return null;
}
@Override
public void open() throws IOException {
}
@Override
public void close() throws IOException {
}
@Override
public Tuple read() throws IOException {
if (iterator.hasNext()) return new Tuple(iterator.next());
else return null;
}
@Override
public StreamComparator getStreamSort() {
return null;
}
@Override
public Explanation toExplanation(StreamFactory factory) throws IOException {
return new StreamExplanation(getStreamNodeId().toString())
.withFunctionName("Dummy")
.withImplementingClass(this.getClass().getName())
.withExpressionType(Explanation.ExpressionType.STREAM_SOURCE)
.withExpression("--non-expressible--");
}
};
byte[] bytes = serialize(tupleStream);
JavabinTupleStreamParser parser = new JavabinTupleStreamParser(new ByteArrayInputStream(bytes), true);
Map m = parser.next();
assertEquals(1L, m.get("id"));
assertEquals(1.0, (Double) m.get("f"), 0.01);
m = parser.next();
assertEquals(2L, m.get("id"));
assertEquals(2.0, (Double) m.get("f"), 0.01);
m = parser.next();
assertEquals(3L, m.get("id"));
assertEquals(1.0, (Double) m.get("f"), 0.01);
m = parser.next();
assertEquals(Boolean.TRUE, m.get("EOF"));
parser = new JavabinTupleStreamParser(new ByteArrayInputStream(bytes), false);
m = parser.next();
assertEquals(1, m.get("id"));
assertEquals(1.0, (Float) m.get("f"), 0.01);
m = parser.next();
assertEquals(2, m.get("id"));
assertEquals(2.0, (Float) m.get("f"), 0.01);
m = parser.next();
assertEquals(3, m.get("id"));
assertEquals(1.0, (Float) m.get("f"), 0.01);
m = parser.next();
assertEquals(Boolean.TRUE, m.get("EOF"));
}
public void testSolrDocumentList() throws IOException {
SolrQueryResponse response = new SolrQueryResponse();
SolrDocumentList l = constructSolrDocList(response);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
new JavaBinCodec().marshal(response.getValues(), baos);
byte[] bytes = serialize(response.getValues());
Object o = new JavaBinCodec().unmarshal(new ByteArrayInputStream(bytes));
List list = new ArrayList<>();
Map m = null;
JavabinTupleStreamParser parser = new JavabinTupleStreamParser(new ByteArrayInputStream(bytes), false);
while ((m = parser.next()) != null) {
list.add(m);
}
assertEquals(l.size(), list.size());
for(int i =0;i<list.size();i++){
compareSolrDocument(l.get(i),new SolrDocument((Map<String, Object>) list.get(i)));
}
}
public static byte[] serialize(Object o) throws IOException {
SolrQueryResponse response = new SolrQueryResponse();
response.getValues().add("results", o);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
new JavaBinCodec().marshal(response.getValues(), baos);
return baos.toByteArray();
}
}

View File

@ -1453,11 +1453,11 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
@Override
protected Query getFieldQuery(String field,
String val, boolean quoted) throws SyntaxError {
String val, boolean quoted, boolean raw) throws SyntaxError {
if(frequentlyMisspelledWords.contains(val)) {
return getFuzzyQuery(field, val, 0.75F);
}
return super.getFieldQuery(field, val, quoted);
return super.getFieldQuery(field, val, quoted, raw);
}
}
}

View File

@ -16,11 +16,20 @@
*/
package org.apache.solr.search;
import java.util.Locale;
import java.util.Random;
import org.apache.lucene.queries.TermsQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.parser.QueryParser;
import org.apache.solr.query.FilterQuery;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.BeforeClass;
import org.junit.Test;
@ -37,9 +46,9 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
public static void createIndex() {
String v;
v = "how now brown cow";
assertU(adoc("id", "1", "text", v, "text_np", v));
assertU(adoc("id", "1", "text", v, "text_np", v, "foo_i","11"));
v = "now cow";
assertU(adoc("id", "2", "text", v, "text_np", v));
assertU(adoc("id", "2", "text", v, "text_np", v, "foo_i","12"));
assertU(adoc("id", "3", "foo_s", "a ' \" \\ {! ) } ( { z")); // A value filled with special chars
assertU(adoc("id", "10", "qqq_s", "X"));
@ -184,6 +193,92 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
req.close();
}
// automatically use TermsQuery when appropriate
@Test
public void testAutoTerms() throws Exception {
SolrQueryRequest req = req();
QParser qParser;
Query q,qq;
// relevance query should not be a filter
qParser = QParser.getParser("foo_s:(a b c)", req);
q = qParser.getQuery();
assertEquals(3, ((BooleanQuery)q).clauses().size());
// small filter query should still use BooleanQuery
if (QueryParser.TERMS_QUERY_THRESHOLD > 3) {
qParser = QParser.getParser("foo_s:(a b c)", req);
qParser.setIsFilter(true); // this may change in the future
q = qParser.getQuery();
assertEquals(3, ((BooleanQuery) q).clauses().size());
}
// large relevancy query should use BooleanQuery
// TODO: we may decide that string fields shouldn't have relevance in the future... change to a text field w/o a stop filter if so
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
q = qParser.getQuery();
assertEquals(26, ((BooleanQuery)q).clauses().size());
// large filter query should use TermsQuery
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
qParser.setIsFilter(true); // this may change in the future
q = qParser.getQuery();
assertEquals(26, ((TermsQuery)q).getTermData().size());
// large numeric filter query should use TermsQuery (for trie fields)
qParser = QParser.getParser("foo_i:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req);
qParser.setIsFilter(true); // this may change in the future
q = qParser.getQuery();
assertEquals(20, ((TermsQuery)q).getTermData().size());
// a filter() clause inside a relevancy query should be able to use a TermsQuery
qParser = QParser.getParser("foo_s:aaa filter(foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z))", req);
q = qParser.getQuery();
assertEquals(2, ((BooleanQuery)q).clauses().size());
qq = ((BooleanQuery)q).clauses().get(0).getQuery();
if (qq instanceof TermQuery) {
qq = ((BooleanQuery)q).clauses().get(1).getQuery();
}
if (qq instanceof FilterQuery) {
qq = ((FilterQuery)qq).getQuery();
}
assertEquals(26, ((TermsQuery)qq).getTermData().size());
// test mixed boolean query, including quotes (which shouldn't matter)
qParser = QParser.getParser("foo_s:(a +aaa b -bbb c d e f bar_s:(qqq www) g h i j k l m n o p q r s t u v w x y z)", req);
qParser.setIsFilter(true); // this may change in the future
q = qParser.getQuery();
assertEquals(4, ((BooleanQuery)q).clauses().size());
qq = null;
for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
qq = clause.getQuery();
if (qq instanceof TermsQuery) break;
}
assertEquals(26, ((TermsQuery)qq).getTermData().size());
req.close();
}
@Test
public void testManyClauses() throws Exception {
String a = "1 a 2 b 3 c 10 d 11 12 "; // 10 terms
StringBuilder sb = new StringBuilder("id:(");
for (int i = 0; i < 1024; i++) { // historically, the max number of boolean clauses defaulted to 1024
sb.append('z').append(i).append(' ');
}
sb.append(a);
sb.append(")");
String q = sb.toString();
// This will still fail when used as the main query, but will pass in a filter query since TermsQuery can be used.
assertJQ(req("q","*:*", "fq", q)
,"/response/numFound==6");
}
@Test
public void testComments() throws Exception {
assertJQ(req("q", "id:1 id:2 /* *:* */ id:3")
@ -317,4 +412,103 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
}
// parsing performance test
// Run from command line with ant test -Dtestcase=TestSolrQueryParser -Dtestmethod=testParsingPerformance -Dtests.asserts=false 2>/dev/null | grep QPS
@Test
public void testParsingPerformance() throws Exception {
String[] args = {"-queries","100" ,"-iter","1000", "-clauses","100", "-format","term%d", "-seed","0"};
args = new String[] {"-queries","1000" ,"-iter","2000", "-clauses","10", "-format","term%d", "-seed","0"};
// args = new String[] {"-queries","1000" ,"-iter","1000000000", "-clauses","10", "-format","term%d", "-seed","0"};
boolean assertOn = false;
assert assertOn = true;
if (assertOn) {
// System.out.println("WARNING! Assertions are enabled!!!! Will only execute small run. Change with -Dtests.asserts=false");
args = new String[]{"-queries","10" ,"-iter","2", "-clauses","20", "-format","term%d", "-seed","0"};
}
int iter = 1000;
int numQueries = 100;
int maxClauses = 5;
int maxTerm = 10000000;
String format = "term%d";
String field = "foo_s";
long seed = 0;
boolean isFilter = true;
boolean rewrite = false;
String otherStuff = "";
for (int i = 0; i < args.length; i++) {
String a = args[i];
if ("-queries".equals(a)) {
numQueries = Integer.parseInt(args[++i]);
} else if ("-iter".equals(a)) {
iter = Integer.parseInt(args[++i]);
} else if ("-clauses".equals(a)) {
maxClauses = Integer.parseInt(args[++i]);
} else if ("-format".equals(a)) {
format = args[++i];
} else if ("-seed".equals(a)) {
seed = Long.parseLong(args[++i]);
} else {
otherStuff = otherStuff + " " + a;
}
}
Random r = new Random(seed);
String[] queries = new String[numQueries];
for (int i = 0; i < queries.length; i++) {
StringBuilder sb = new StringBuilder();
boolean explicitField = r.nextInt(5) == 0;
if (!explicitField) {
sb.append(field + ":(");
}
sb.append(otherStuff).append(" ");
int nClauses = r.nextInt(maxClauses) + 1; // TODO: query parse can't parse () for some reason???
for (int c = 0; c<nClauses; c++) {
String termString = String.format(Locale.US, format, r.nextInt(maxTerm));
if (explicitField) {
sb.append(field).append(':');
}
sb.append(termString);
sb.append(' ');
}
if (!explicitField) {
sb.append(")");
}
queries[i] = sb.toString();
// System.out.println(queries[i]);
}
SolrQueryRequest req = req();
long start = System.nanoTime();
int ret = 0;
for (int i=0; i<iter; i++) {
for (String qStr : queries) {
QParser parser = QParser.getParser(qStr,req);
parser.setIsFilter(isFilter);
Query q = parser.getQuery();
if (rewrite) {
// TODO: do rewrite
}
ret += q.getClass().hashCode(); // use the query somehow
}
}
long end = System.nanoTime();
System.out.println((assertOn ? "WARNING, assertions enabled. " : "") + "ret=" + ret + " Parser QPS:" + ((long)numQueries * iter)*1000000000/(end-start));
req.close();
}
}

View File

@ -14,71 +14,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.UpdateParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.UpdateRequestHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.SolrIndexSearcher;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.hamcrest.core.Is.is;
import static org.mockito.Mockito.mock;
/**
* Tests for {@link ClassificationUpdateProcessor} and {@link ClassificationUpdateProcessorFactory}
* Tests for {@link ClassificationUpdateProcessorFactory}
*/
public class ClassificationUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
// field names are used in accordance with the solrconfig and schema supplied
private static final String ID = "id";
private static final String TITLE = "title";
private static final String CONTENT = "content";
private static final String AUTHOR = "author";
private static final String CLASS = "cat";
private static final String CHAIN = "classification";
private ClassificationUpdateProcessorFactory cFactoryToTest = new ClassificationUpdateProcessorFactory();
private NamedList args = new NamedList<String>();
@BeforeClass
public static void beforeClass() throws Exception {
System.setProperty("enable.update.log", "false");
initCore("solrconfig-classification.xml", "schema-classification.xml");
}
@Override
@Before
public void setUp() throws Exception {
super.setUp();
clearIndex();
assertU(commit());
}
@Before
public void initArgs() {
args.add("inputFields", "inputField1,inputField2");
args.add("classField", "classField1");
args.add("predictedClassField", "classFieldX");
args.add("algorithm", "bayes");
args.add("knn.k", "9");
args.add("knn.minDf", "8");
@ -86,22 +46,23 @@ public class ClassificationUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
}
@Test
public void testFullInit() {
public void init_fullArgs_shouldInitFullClassificationParams() {
cFactoryToTest.init(args);
ClassificationUpdateProcessorParams classificationParams = cFactoryToTest.getClassificationParams();
String[] inputFieldNames = cFactoryToTest.getInputFieldNames();
String[] inputFieldNames = classificationParams.getInputFieldNames();
assertEquals("inputField1", inputFieldNames[0]);
assertEquals("inputField2", inputFieldNames[1]);
assertEquals("classField1", cFactoryToTest.getClassFieldName());
assertEquals("bayes", cFactoryToTest.getAlgorithm());
assertEquals(8, cFactoryToTest.getMinDf());
assertEquals(10, cFactoryToTest.getMinTf());
assertEquals(9, cFactoryToTest.getK());
assertEquals("classField1", classificationParams.getTrainingClassField());
assertEquals("classFieldX", classificationParams.getPredictedClassField());
assertEquals(ClassificationUpdateProcessorFactory.Algorithm.BAYES, classificationParams.getAlgorithm());
assertEquals(8, classificationParams.getMinDf());
assertEquals(10, classificationParams.getMinTf());
assertEquals(9, classificationParams.getK());
}
@Test
public void testInitEmptyInputField() {
public void init_emptyInputFields_shouldThrowExceptionWithDetailedMessage() {
args.removeAll("inputFields");
try {
cFactoryToTest.init(args);
@ -111,7 +72,7 @@ public class ClassificationUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
}
@Test
public void testInitEmptyClassField() {
public void init_emptyClassField_shouldThrowExceptionWithDetailedMessage() {
args.removeAll("classField");
try {
cFactoryToTest.init(args);
@ -121,114 +82,53 @@ public class ClassificationUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
}
@Test
public void testDefaults() {
public void init_emptyPredictedClassField_shouldDefaultToTrainingClassField() {
args.removeAll("predictedClassField");
cFactoryToTest.init(args);
ClassificationUpdateProcessorParams classificationParams = cFactoryToTest.getClassificationParams();
assertThat(classificationParams.getPredictedClassField(), is("classField1"));
}
@Test
public void init_unsupportedAlgorithm_shouldThrowExceptionWithDetailedMessage() {
args.removeAll("algorithm");
args.add("algorithm", "unsupported");
try {
cFactoryToTest.init(args);
} catch (SolrException e) {
assertEquals("Classification UpdateProcessor Algorithm: 'unsupported' not supported", e.getMessage());
}
}
@Test
public void init_unsupportedFilterQuery_shouldThrowExceptionWithDetailedMessage() {
UpdateRequestProcessor mockProcessor = mock(UpdateRequestProcessor.class);
SolrQueryRequest mockRequest = mock(SolrQueryRequest.class);
SolrQueryResponse mockResponse = mock(SolrQueryResponse.class);
args.add("knn.filterQuery", "not supported query");
try {
cFactoryToTest.init(args);
/* parsing failure happens because of the mocks, fine enough to check a proper exception propagation */
cFactoryToTest.getInstance(mockRequest, mockResponse, mockProcessor);
} catch (SolrException e) {
assertEquals("Classification UpdateProcessor Training Filter Query: 'not supported query' is not supported", e.getMessage());
}
}
@Test
public void init_emptyArgs_shouldDefaultClassificationParams() {
args.removeAll("algorithm");
args.removeAll("knn.k");
args.removeAll("knn.minDf");
args.removeAll("knn.minTf");
cFactoryToTest.init(args);
assertEquals("knn", cFactoryToTest.getAlgorithm());
assertEquals(1, cFactoryToTest.getMinDf());
assertEquals(1, cFactoryToTest.getMinTf());
assertEquals(10, cFactoryToTest.getK());
}
ClassificationUpdateProcessorParams classificationParams = cFactoryToTest.getClassificationParams();
@Test
public void testBasicClassification() throws Exception {
prepareTrainedIndex();
// To be classified,we index documents without a class and verify the expected one is returned
addDoc(adoc(ID, "10",
TITLE, "word4 word4 word4",
CONTENT, "word5 word5 ",
AUTHOR, "Name1 Surname1"));
addDoc(adoc(ID, "11",
TITLE, "word1 word1",
CONTENT, "word2 word2",
AUTHOR, "Name Surname"));
addDoc(commit());
Document doc10 = getDoc("10");
assertEquals("class2", doc10.get(CLASS));
Document doc11 = getDoc("11");
assertEquals("class1", doc11.get(CLASS));
}
/**
* Index some example documents with a class manually assigned.
* This will be our trained model.
*
* @throws Exception If there is a low-level I/O error
*/
private void prepareTrainedIndex() throws Exception {
//class1
addDoc(adoc(ID, "1",
TITLE, "word1 word1 word1",
CONTENT, "word2 word2 word2",
AUTHOR, "Name Surname",
CLASS, "class1"));
addDoc(adoc(ID, "2",
TITLE, "word1 word1",
CONTENT, "word2 word2",
AUTHOR, "Name Surname",
CLASS, "class1"));
addDoc(adoc(ID, "3",
TITLE, "word1 word1 word1",
CONTENT, "word2",
AUTHOR, "Name Surname",
CLASS, "class1"));
addDoc(adoc(ID, "4",
TITLE, "word1 word1 word1",
CONTENT, "word2 word2 word2",
AUTHOR, "Name Surname",
CLASS, "class1"));
//class2
addDoc(adoc(ID, "5",
TITLE, "word4 word4 word4",
CONTENT, "word5 word5",
AUTHOR, "Name1 Surname1",
CLASS, "class2"));
addDoc(adoc(ID, "6",
TITLE, "word4 word4",
CONTENT, "word5",
AUTHOR, "Name1 Surname1",
CLASS, "class2"));
addDoc(adoc(ID, "7",
TITLE, "word4 word4 word4",
CONTENT, "word5 word5 word5",
AUTHOR, "Name1 Surname1",
CLASS, "class2"));
addDoc(adoc(ID, "8",
TITLE, "word4",
CONTENT, "word5 word5 word5 word5",
AUTHOR, "Name1 Surname1",
CLASS, "class2"));
addDoc(commit());
}
private Document getDoc(String id) throws IOException {
try (SolrQueryRequest req = req()) {
SolrIndexSearcher searcher = req.getSearcher();
TermQuery query = new TermQuery(new Term(ID, id));
TopDocs doc1 = searcher.search(query, 1);
ScoreDoc scoreDoc = doc1.scoreDocs[0];
return searcher.doc(scoreDoc.doc);
}
}
static void addDoc(String doc) throws Exception {
Map<String, String[]> params = new HashMap<>();
MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
params.put(UpdateParams.UPDATE_CHAIN, new String[]{CHAIN});
SolrQueryRequestBase req = new SolrQueryRequestBase(h.getCore(),
(SolrParams) mmparams) {
};
UpdateRequestHandler handler = new UpdateRequestHandler();
handler.init(null);
ArrayList<ContentStream> streams = new ArrayList<>(2);
streams.add(new ContentStreamBase.StringStream(doc));
req.setContentStreams(streams);
handler.handleRequestBody(req, new SolrQueryResponse());
req.close();
assertEquals(ClassificationUpdateProcessorFactory.Algorithm.KNN, classificationParams.getAlgorithm());
assertEquals(1, classificationParams.getMinDf());
assertEquals(1, classificationParams.getMinTf());
assertEquals(10, classificationParams.getK());
}
}

View File

@ -0,0 +1,192 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.SolrIndexSearcher;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.hamcrest.core.Is.is;
/**
* Tests for {@link ClassificationUpdateProcessor} and {@link ClassificationUpdateProcessorFactory}
*/
public class ClassificationUpdateProcessorIntegrationTest extends SolrTestCaseJ4 {
/* field names are used in accordance with the solrconfig and schema supplied */
private static final String ID = "id";
private static final String TITLE = "title";
private static final String CONTENT = "content";
private static final String AUTHOR = "author";
private static final String CLASS = "cat";
private static final String CHAIN = "classification";
private static final String BROKEN_CHAIN_FILTER_QUERY = "classification-unsupported-filterQuery";
private ClassificationUpdateProcessorFactory cFactoryToTest = new ClassificationUpdateProcessorFactory();
private NamedList args = new NamedList<String>();
@BeforeClass
public static void beforeClass() throws Exception {
System.setProperty("enable.update.log", "false");
initCore("solrconfig-classification.xml", "schema-classification.xml");
}
@Override
@Before
public void setUp() throws Exception {
super.setUp();
clearIndex();
assertU(commit());
}
@Test
public void classify_fullConfiguration_shouldAutoClassify() throws Exception {
indexTrainingSet();
// To be classified,we index documents without a class and verify the expected one is returned
addDoc(adoc(ID, "22",
TITLE, "word4 word4 word4",
CONTENT, "word5 word5 ",
AUTHOR, "Name1 Surname1"), CHAIN);
addDoc(adoc(ID, "21",
TITLE, "word1 word1",
CONTENT, "word2 word2",
AUTHOR, "Name Surname"), CHAIN);
addDoc(commit());
Document doc22 = getDoc("22");
assertThat(doc22.get(CLASS),is("class2"));
Document doc21 = getDoc("21");
assertThat(doc21.get(CLASS),is("class1"));
}
@Test
public void classify_unsupportedFilterQueryConfiguration_shouldThrowExceptionWithDetailedMessage() throws Exception {
indexTrainingSet();
try {
addDoc(adoc(ID, "21",
TITLE, "word4 word4 word4",
CONTENT, "word5 word5 ",
AUTHOR, "Name1 Surname1"), BROKEN_CHAIN_FILTER_QUERY);
addDoc(adoc(ID, "22",
TITLE, "word1 word1",
CONTENT, "word2 word2",
AUTHOR, "Name Surname"), BROKEN_CHAIN_FILTER_QUERY);
addDoc(commit());
} catch (SolrException e) {
assertEquals("Classification UpdateProcessor Training Filter Query: 'not valid ( lucene query' is not supported", e.getMessage());
}
}
/**
* Index some example documents with a class manually assigned.
* This will be our trained model.
*
* @throws Exception If there is a low-level I/O error
*/
private void indexTrainingSet() throws Exception {
//class1
addDoc(adoc(ID, "1",
TITLE, "word1 word1 word1",
CONTENT, "word2 word2 word2",
AUTHOR, "Name Surname",
CLASS, "class1"), CHAIN);
addDoc(adoc(ID, "2",
TITLE, "word1 word1",
CONTENT, "word2 word2",
AUTHOR, "Name Surname",
CLASS, "class1"), CHAIN);
addDoc(adoc(ID, "3",
TITLE, "word1 word1 word1",
CONTENT, "word2",
AUTHOR, "Name Surname",
CLASS, "class1"), CHAIN);
addDoc(adoc(ID, "4",
TITLE, "word1 word1 word1",
CONTENT, "word2 word2 word2",
AUTHOR, "Name Surname",
CLASS, "class1"), CHAIN);
//class2
addDoc(adoc(ID, "5",
TITLE, "word4 word4 word4",
CONTENT, "word5 word5",
AUTHOR, "Name Surname",
CLASS, "class2"), CHAIN);
addDoc(adoc(ID, "6",
TITLE, "word4 word4",
CONTENT, "word5",
AUTHOR, "Name Surname",
CLASS, "class2"), CHAIN);
addDoc(adoc(ID, "7",
TITLE, "word4 word4 word4",
CONTENT, "word5 word5 word5",
AUTHOR, "Name Surname",
CLASS, "class2"), CHAIN);
addDoc(adoc(ID, "8",
TITLE, "word4",
CONTENT, "word5 word5 word5 word5",
AUTHOR, "Name Surname",
CLASS, "class2"), CHAIN);
//class3
addDoc(adoc(ID, "9",
TITLE, "word4 word4 word4",
CONTENT, "word5 word5",
AUTHOR, "Name1 Surname1",
CLASS, "class3"), CHAIN);
addDoc(adoc(ID, "10",
TITLE, "word4 word4",
CONTENT, "word5",
AUTHOR, "Name1 Surname1",
CLASS, "class3"), CHAIN);
addDoc(adoc(ID, "11",
TITLE, "word4 word4 word4",
CONTENT, "word5 word5 word5",
AUTHOR, "Name1 Surname1",
CLASS, "class3"), CHAIN);
addDoc(adoc(ID, "12",
TITLE, "word4",
CONTENT, "word5 word5 word5 word5",
AUTHOR, "Name1 Surname1",
CLASS, "class3"), CHAIN);
addDoc(commit());
}
private Document getDoc(String id) throws IOException {
try (SolrQueryRequest req = req()) {
SolrIndexSearcher searcher = req.getSearcher();
TermQuery query = new TermQuery(new Term(ID, id));
TopDocs doc1 = searcher.search(query, 1);
ScoreDoc scoreDoc = doc1.scoreDocs[0];
return searcher.doc(scoreDoc.doc);
}
}
private void addDoc(String doc) throws Exception {
addDoc(doc, CHAIN);
}
}

View File

@ -0,0 +1,506 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.update.AddUpdateCommand;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.hamcrest.core.Is.is;
import static org.mockito.Mockito.mock;
/**
* Tests for {@link ClassificationUpdateProcessor}
*/
public class ClassificationUpdateProcessorTest extends SolrTestCaseJ4 {
/* field names are used in accordance with the solrconfig and schema supplied */
private static final String ID = "id";
private static final String TITLE = "title";
private static final String CONTENT = "content";
private static final String AUTHOR = "author";
private static final String TRAINING_CLASS = "cat";
private static final String PREDICTED_CLASS = "predicted";
public static final String KNN = "knn";
protected Directory directory;
protected IndexReader reader;
protected IndexSearcher searcher;
protected Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
private ClassificationUpdateProcessor updateProcessorToTest;
@BeforeClass
public static void beforeClass() throws Exception {
System.setProperty("enable.update.log", "false");
initCore("solrconfig-classification.xml", "schema-classification.xml");
}
@Override
public void setUp() throws Exception {
super.setUp();
}
@Override
public void tearDown() throws Exception {
reader.close();
directory.close();
analyzer.close();
super.tearDown();
}
@Test
public void classificationMonoClass_predictedClassFieldSet_shouldAssignClassInPredictedClassField() throws Exception {
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
prepareTrainedIndexMonoClass();
AddUpdateCommand update=new AddUpdateCommand(req());
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
TITLE, "word4 word4 word4",
CONTENT, "word2 word2 ",
AUTHOR, "unseenAuthor");
update.solrDoc=unseenDocument1;
ClassificationUpdateProcessorParams params = initParams(ClassificationUpdateProcessorFactory.Algorithm.KNN);
params.setPredictedClassField(PREDICTED_CLASS);
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
updateProcessorToTest.processAdd(update);
assertThat(unseenDocument1.getFieldValue(PREDICTED_CLASS),is("class1"));
}
@Test
public void knnMonoClass_sampleParams_shouldAssignCorrectClass() throws Exception {
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
prepareTrainedIndexMonoClass();
AddUpdateCommand update=new AddUpdateCommand(req());
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
TITLE, "word4 word4 word4",
CONTENT, "word2 word2 ",
AUTHOR, "unseenAuthor");
update.solrDoc=unseenDocument1;
ClassificationUpdateProcessorParams params = initParams(ClassificationUpdateProcessorFactory.Algorithm.KNN);
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
updateProcessorToTest.processAdd(update);
assertThat(unseenDocument1.getFieldValue(TRAINING_CLASS),is("class1"));
}
@Test
public void knnMonoClass_boostFields_shouldAssignCorrectClass() throws Exception {
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
prepareTrainedIndexMonoClass();
AddUpdateCommand update=new AddUpdateCommand(req());
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
TITLE, "word4 word4 word4",
CONTENT, "word2 word2 ",
AUTHOR, "unseenAuthor");
update.solrDoc=unseenDocument1;
ClassificationUpdateProcessorParams params = initParams(ClassificationUpdateProcessorFactory.Algorithm.KNN);
params.setInputFieldNames(new String[]{TITLE + "^1.5", CONTENT + "^0.5", AUTHOR + "^2.5"});
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
updateProcessorToTest.processAdd(update);
assertThat(unseenDocument1.getFieldValue(TRAINING_CLASS),is("class2"));
}
@Test
public void bayesMonoClass_sampleParams_shouldAssignCorrectClass() throws Exception {
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
prepareTrainedIndexMonoClass();
AddUpdateCommand update=new AddUpdateCommand(req());
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
TITLE, "word4 word4 word4",
CONTENT, "word2 word2 ",
AUTHOR, "unseenAuthor");
update.solrDoc=unseenDocument1;
ClassificationUpdateProcessorParams params= initParams(ClassificationUpdateProcessorFactory.Algorithm.BAYES);
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
updateProcessorToTest.processAdd(update);
assertThat(unseenDocument1.getFieldValue(TRAINING_CLASS),is("class1"));
}
@Test
public void knnMonoClass_contextQueryFiltered_shouldAssignCorrectClass() throws Exception {
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
prepareTrainedIndexMonoClass();
AddUpdateCommand update=new AddUpdateCommand(req());
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
TITLE, "word4 word4 word4",
CONTENT, "word2 word2 ",
AUTHOR, "a");
update.solrDoc=unseenDocument1;
ClassificationUpdateProcessorParams params= initParams(ClassificationUpdateProcessorFactory.Algorithm.KNN);
Query class3DocsChunk=new TermQuery(new Term(TITLE,"word6"));
params.setTrainingFilterQuery(class3DocsChunk);
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
updateProcessorToTest.processAdd(update);
assertThat(unseenDocument1.getFieldValue(TRAINING_CLASS),is("class3"));
}
@Test
public void bayesMonoClass_boostFields_shouldAssignCorrectClass() throws Exception {
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
prepareTrainedIndexMonoClass();
AddUpdateCommand update=new AddUpdateCommand(req());
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
TITLE, "word4 word4 word4",
CONTENT, "word2 word2 ",
AUTHOR, "unseenAuthor");
update.solrDoc=unseenDocument1;
ClassificationUpdateProcessorParams params= initParams(ClassificationUpdateProcessorFactory.Algorithm.BAYES);
params.setInputFieldNames(new String[]{TITLE+"^1.5",CONTENT+"^0.5",AUTHOR+"^2.5"});
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
updateProcessorToTest.processAdd(update);
assertThat(unseenDocument1.getFieldValue(TRAINING_CLASS),is("class2"));
}
@Test
public void knnClassification_maxOutputClassesGreaterThanAvailable_shouldAssignCorrectClass() throws Exception {
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
prepareTrainedIndexMultiClass();
AddUpdateCommand update=new AddUpdateCommand(req());
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
TITLE, "word1 word1 word1",
CONTENT, "word2 word2 ",
AUTHOR, "unseenAuthor");
update.solrDoc=unseenDocument1;
ClassificationUpdateProcessorParams params= initParams(ClassificationUpdateProcessorFactory.Algorithm.KNN);
params.setMaxPredictedClasses(100);
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
updateProcessorToTest.processAdd(update);
ArrayList<Object> assignedClasses = (ArrayList)unseenDocument1.getFieldValues(TRAINING_CLASS);
assertThat(assignedClasses.get(0),is("class2"));
assertThat(assignedClasses.get(1),is("class1"));
}
@Test
public void knnMultiClass_maxOutputClasses2_shouldAssignMax2Classes() throws Exception {
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
prepareTrainedIndexMultiClass();
AddUpdateCommand update=new AddUpdateCommand(req());
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
TITLE, "word1 word1 word1",
CONTENT, "word2 word2 ",
AUTHOR, "unseenAuthor");
update.solrDoc=unseenDocument1;
ClassificationUpdateProcessorParams params= initParams(ClassificationUpdateProcessorFactory.Algorithm.KNN);
params.setMaxPredictedClasses(2);
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
updateProcessorToTest.processAdd(update);
ArrayList<Object> assignedClasses = (ArrayList)unseenDocument1.getFieldValues(TRAINING_CLASS);
assertThat(assignedClasses.size(),is(2));
assertThat(assignedClasses.get(0),is("class2"));
assertThat(assignedClasses.get(1),is("class1"));
}
@Test
public void bayesMultiClass_maxOutputClasses2_shouldAssignMax2Classes() throws Exception {
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
prepareTrainedIndexMultiClass();
AddUpdateCommand update=new AddUpdateCommand(req());
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
TITLE, "word1 word1 word1",
CONTENT, "word2 word2 ",
AUTHOR, "unseenAuthor");
update.solrDoc=unseenDocument1;
ClassificationUpdateProcessorParams params= initParams(ClassificationUpdateProcessorFactory.Algorithm.BAYES);
params.setMaxPredictedClasses(2);
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
updateProcessorToTest.processAdd(update);
ArrayList<Object> assignedClasses = (ArrayList)unseenDocument1.getFieldValues(TRAINING_CLASS);
assertThat(assignedClasses.size(),is(2));
assertThat(assignedClasses.get(0),is("class2"));
assertThat(assignedClasses.get(1),is("class1"));
}
@Test
public void knnMultiClass_boostFieldsMaxOutputClasses2_shouldAssignMax2Classes() throws Exception {
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
prepareTrainedIndexMultiClass();
AddUpdateCommand update=new AddUpdateCommand(req());
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
TITLE, "word4 word4 word4",
CONTENT, "word2 word2 ",
AUTHOR, "unseenAuthor");
update.solrDoc=unseenDocument1;
ClassificationUpdateProcessorParams params= initParams(ClassificationUpdateProcessorFactory.Algorithm.KNN);
params.setInputFieldNames(new String[]{TITLE+"^1.5",CONTENT+"^0.5",AUTHOR+"^2.5"});
params.setMaxPredictedClasses(2);
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
updateProcessorToTest.processAdd(update);
ArrayList<Object> assignedClasses = (ArrayList)unseenDocument1.getFieldValues(TRAINING_CLASS);
assertThat(assignedClasses.size(),is(2));
assertThat(assignedClasses.get(0),is("class4"));
assertThat(assignedClasses.get(1),is("class6"));
}
@Test
public void bayesMultiClass_boostFieldsMaxOutputClasses2_shouldAssignMax2Classes() throws Exception {
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
prepareTrainedIndexMultiClass();
AddUpdateCommand update=new AddUpdateCommand(req());
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
TITLE, "word4 word4 word4",
CONTENT, "word2 word2 ",
AUTHOR, "unseenAuthor");
update.solrDoc=unseenDocument1;
ClassificationUpdateProcessorParams params= initParams(ClassificationUpdateProcessorFactory.Algorithm.BAYES);
params.setInputFieldNames(new String[]{TITLE+"^1.5",CONTENT+"^0.5",AUTHOR+"^2.5"});
params.setMaxPredictedClasses(2);
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
updateProcessorToTest.processAdd(update);
ArrayList<Object> assignedClasses = (ArrayList)unseenDocument1.getFieldValues(TRAINING_CLASS);
assertThat(assignedClasses.size(),is(2));
assertThat(assignedClasses.get(0),is("class4"));
assertThat(assignedClasses.get(1),is("class6"));
}
private ClassificationUpdateProcessorParams initParams(ClassificationUpdateProcessorFactory.Algorithm classificationAlgorithm) {
ClassificationUpdateProcessorParams params= new ClassificationUpdateProcessorParams();
params.setInputFieldNames(new String[]{TITLE,CONTENT,AUTHOR});
params.setTrainingClassField(TRAINING_CLASS);
params.setPredictedClassField(TRAINING_CLASS);
params.setMinTf(1);
params.setMinDf(1);
params.setK(5);
params.setAlgorithm(classificationAlgorithm);
params.setMaxPredictedClasses(1);
return params;
}
/**
* Index some example documents with a class manually assigned.
* This will be our trained model.
*
* @throws Exception If there is a low-level I/O error
*/
private void prepareTrainedIndexMonoClass() throws Exception {
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
//class1
addDoc(writer, buildLuceneDocument(ID, "1",
TITLE, "word1 word1 word1",
CONTENT, "word2 word2 word2",
AUTHOR, "a",
TRAINING_CLASS, "class1"));
addDoc(writer, buildLuceneDocument(ID, "2",
TITLE, "word1 word1",
CONTENT, "word2 word2",
AUTHOR, "a",
TRAINING_CLASS, "class1"));
addDoc(writer, buildLuceneDocument(ID, "3",
TITLE, "word1 word1 word1",
CONTENT, "word2",
AUTHOR, "a",
TRAINING_CLASS, "class1"));
addDoc(writer, buildLuceneDocument(ID, "4",
TITLE, "word1 word1 word1",
CONTENT, "word2 word2 word2",
AUTHOR, "a",
TRAINING_CLASS, "class1"));
//class2
addDoc(writer, buildLuceneDocument(ID, "5",
TITLE, "word4 word4 word4",
CONTENT, "word5 word5",
AUTHOR, "c",
TRAINING_CLASS, "class2"));
addDoc(writer, buildLuceneDocument(ID, "6",
TITLE, "word4 word4",
CONTENT, "word5",
AUTHOR, "c",
TRAINING_CLASS, "class2"));
addDoc(writer, buildLuceneDocument(ID, "7",
TITLE, "word4 word4 word4",
CONTENT, "word5 word5 word5",
AUTHOR, "c",
TRAINING_CLASS, "class2"));
addDoc(writer, buildLuceneDocument(ID, "8",
TITLE, "word4",
CONTENT, "word5 word5 word5 word5",
AUTHOR, "c",
TRAINING_CLASS, "class2"));
//class3
addDoc(writer, buildLuceneDocument(ID, "9",
TITLE, "word6",
CONTENT, "word7",
AUTHOR, "a",
TRAINING_CLASS, "class3"));
addDoc(writer, buildLuceneDocument(ID, "10",
TITLE, "word6",
CONTENT, "word7",
AUTHOR, "a",
TRAINING_CLASS, "class3"));
addDoc(writer, buildLuceneDocument(ID, "11",
TITLE, "word6",
CONTENT, "word7",
AUTHOR, "a",
TRAINING_CLASS, "class3"));
addDoc(writer, buildLuceneDocument(ID, "12",
TITLE, "word6",
CONTENT, "word7",
AUTHOR, "a",
TRAINING_CLASS, "class3"));
reader = writer.getReader();
writer.close();
searcher = newSearcher(reader);
}
private void prepareTrainedIndexMultiClass() throws Exception {
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
//class1
addDoc(writer, buildLuceneDocument(ID, "1",
TITLE, "word1 word1 word1",
CONTENT, "word2 word2 word2",
AUTHOR, "Name Surname",
TRAINING_CLASS, "class1",
TRAINING_CLASS, "class2"
));
addDoc(writer, buildLuceneDocument(ID, "2",
TITLE, "word1 word1",
CONTENT, "word2 word2",
AUTHOR, "Name Surname",
TRAINING_CLASS, "class3",
TRAINING_CLASS, "class2"
));
addDoc(writer, buildLuceneDocument(ID, "3",
TITLE, "word1 word1 word1",
CONTENT, "word2",
AUTHOR, "Name Surname",
TRAINING_CLASS, "class1",
TRAINING_CLASS, "class2"
));
addDoc(writer, buildLuceneDocument(ID, "4",
TITLE, "word1 word1 word1",
CONTENT, "word2 word2 word2",
AUTHOR, "Name Surname",
TRAINING_CLASS, "class1",
TRAINING_CLASS, "class2"
));
//class2
addDoc(writer, buildLuceneDocument(ID, "5",
TITLE, "word4 word4 word4",
CONTENT, "word5 word5",
AUTHOR, "Name1 Surname1",
TRAINING_CLASS, "class6",
TRAINING_CLASS, "class4"
));
addDoc(writer, buildLuceneDocument(ID, "6",
TITLE, "word4 word4",
CONTENT, "word5",
AUTHOR, "Name1 Surname1",
TRAINING_CLASS, "class5",
TRAINING_CLASS, "class4"
));
addDoc(writer, buildLuceneDocument(ID, "7",
TITLE, "word4 word4 word4",
CONTENT, "word5 word5 word5",
AUTHOR, "Name1 Surname1",
TRAINING_CLASS, "class6",
TRAINING_CLASS, "class4"
));
addDoc(writer, buildLuceneDocument(ID, "8",
TITLE, "word4",
CONTENT, "word5 word5 word5 word5",
AUTHOR, "Name1 Surname1",
TRAINING_CLASS, "class6",
TRAINING_CLASS, "class4"
));
reader = writer.getReader();
writer.close();
searcher = newSearcher(reader);
}
public static Document buildLuceneDocument(Object... fieldsAndValues) {
Document luceneDoc = new Document();
for (int i=0; i<fieldsAndValues.length; i+=2) {
luceneDoc.add(newTextField((String)fieldsAndValues[i], (String)fieldsAndValues[i+1], Field.Store.YES));
}
return luceneDoc;
}
private int addDoc(RandomIndexWriter writer, Document doc) throws IOException {
writer.addDocument(doc);
return writer.numDocs() - 1;
}
}

Some files were not shown because too many files have changed in this diff Show More