mirror of https://github.com/apache/lucene.git
Merge branch 'apache-https-master' into jira/solr-8593
This commit is contained in:
commit
05a6170d12
|
@ -6,7 +6,10 @@ as to the usefulness of the tools.
|
|||
Description of dev-tools/ contents:
|
||||
|
||||
./size-estimator-lucene-solr.xls -- Spreadsheet for estimating memory and disk usage in Lucene/Solr
|
||||
./eclipse -- Used to generate project descriptors for the Eclipse IDE.
|
||||
./idea -- Similar to Eclipse, but for IntelliJ's IDEA IDE.
|
||||
./maven -- Mavenizes the Lucene/Solr packages
|
||||
./scripts -- Odds and ends for building releases, etc.
|
||||
./doap/ -- Lucene and Solr project descriptors in DOAP RDF format.
|
||||
./eclipse/ -- Used to generate project descriptors for the Eclipse IDE.
|
||||
./git/ -- Git documentation and resources.
|
||||
./idea/ -- Used to generate project descriptors for IntelliJ's IDEA IDE.
|
||||
./maven/ -- Mavenizes the Lucene/Solr packages
|
||||
./netbeans/ -- Used to generate project descriptors for the Netbeans IDE.
|
||||
./scripts/ -- Odds and ends for building releases, etc.
|
||||
|
|
|
@ -2,4 +2,7 @@ This folder contains the DOAP[1] files for each project.
|
|||
|
||||
Upon release, these files should be updated to include new release details.
|
||||
|
||||
NOTE: If this folder's contents are moved elsewhere, the website .htaccess
|
||||
file will need to be updated.
|
||||
|
||||
[1] DOAP: https://github.com/edumbill/doap
|
||||
|
|
|
@ -1,21 +1,35 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<rdf:RDF xml:lang="en"
|
||||
xmlns="http://usefulinc.com/ns/doap#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:asfext="http://projects.apache.org/ns/asfext#"
|
||||
xmlns:foaf="http://xmlns.com/foaf/0.1/">
|
||||
<!--
|
||||
=======================================================================
|
||||
This file's canonical URL is: http://lucene.apache.org/core/doap.rdf
|
||||
|
||||
Copyright (c) 2016 The Apache Software Foundation.
|
||||
All rights reserved.
|
||||
|
||||
=======================================================================
|
||||
Note that the canonical URL may redirect to other non-canonical locations.
|
||||
-->
|
||||
<Project rdf:about="http://lucene.apache.org/core/">
|
||||
<created>2001-09-01</created>
|
||||
<license rdf:resource="http://www.apache.org/licenses/LICENSE-2.0.txt"/>
|
||||
<license rdf:resource="http://www.apache.org/licenses/LICENSE-2.0"/>
|
||||
<name>Apache Lucene Core</name>
|
||||
<homepage rdf:resource="http://lucene.apache.org/core/" />
|
||||
<asfext:pmc rdf:resource="http://lucene.apache.org" />
|
||||
|
|
|
@ -1,21 +1,35 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<rdf:RDF xml:lang="en"
|
||||
xmlns="http://usefulinc.com/ns/doap#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:asfext="http://projects.apache.org/ns/asfext#"
|
||||
xmlns:foaf="http://xmlns.com/foaf/0.1/">
|
||||
<!--
|
||||
=======================================================================
|
||||
This file's canonical URL is: http://lucene.apache.org/solr/doap.rdf
|
||||
|
||||
Copyright (c) 2007 The Apache Software Foundation.
|
||||
All rights reserved.
|
||||
|
||||
=======================================================================
|
||||
Note that the canonical URL may redirect to other non-canonical locations.
|
||||
-->
|
||||
<Project rdf:about="http://lucene.apache.org/solr/">
|
||||
<created>2006-01-17</created>
|
||||
<license rdf:resource="http://www.apache.org/licenses/LICENSE-2.0.txt"/>
|
||||
<license rdf:resource="http://www.apache.org/licenses/LICENSE-2.0"/>
|
||||
<name>Apache Solr</name>
|
||||
<homepage rdf:resource="http://lucene.apache.org/solr/" />
|
||||
<asfext:pmc rdf:resource="http://lucene.apache.org" />
|
||||
|
@ -348,28 +362,28 @@
|
|||
</release>
|
||||
<release>
|
||||
<Version>
|
||||
<name>lucene-4.0-BETA</name>
|
||||
<name>solr-4.0-BETA</name>
|
||||
<created>2012-08-13</created>
|
||||
<revision>4.0-BETA</revision>
|
||||
</Version>
|
||||
</release>
|
||||
<release>
|
||||
<Version>
|
||||
<name>lucene-4.0-ALPHA</name>
|
||||
<name>solr-4.0-ALPHA</name>
|
||||
<created>2012-07-03</created>
|
||||
<revision>4.0-ALPHA</revision>
|
||||
</Version>
|
||||
</release>
|
||||
<release>
|
||||
<Version>
|
||||
<name>lucene-3.6.2</name>
|
||||
<name>solr-3.6.2</name>
|
||||
<created>2012-12-25</created>
|
||||
<revision>3.6.2</revision>
|
||||
</Version>
|
||||
</release>
|
||||
<release>
|
||||
<Version>
|
||||
<name>lucene-3.6.1</name>
|
||||
<name>solr-3.6.1</name>
|
||||
<created>2012-07-22</created>
|
||||
<revision>3.6.1</revision>
|
||||
</Version>
|
||||
|
|
|
@ -117,6 +117,11 @@ Improvements
|
|||
control how text is analyzed and converted into a query (Matt Weber
|
||||
via Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-7568: Optimize merging when index sorting is used but the
|
||||
index is already sorted (Jim Ferenczi via Mike McCandless)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file
|
||||
|
|
|
@ -195,9 +195,10 @@ public class KNearestNeighborClassifier implements Classifier<BytesRef> {
|
|||
Map<BytesRef, Double> classBoosts = new HashMap<>(); // this is a boost based on class ranking positions in topDocs
|
||||
float maxScore = topDocs.getMaxScore();
|
||||
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
||||
IndexableField storableField = indexSearcher.doc(scoreDoc.doc).getField(classFieldName);
|
||||
if (storableField != null) {
|
||||
BytesRef cl = new BytesRef(storableField.stringValue());
|
||||
IndexableField[] storableFields = indexSearcher.doc(scoreDoc.doc).getFields(classFieldName);
|
||||
for (IndexableField singleStorableField : storableFields) {
|
||||
if (singleStorableField != null) {
|
||||
BytesRef cl = new BytesRef(singleStorableField.stringValue());
|
||||
//update count
|
||||
Integer count = classCounts.get(cl);
|
||||
if (count != null) {
|
||||
|
@ -213,6 +214,7 @@ public class KNearestNeighborClassifier implements Classifier<BytesRef> {
|
|||
} else {
|
||||
classBoosts.put(cl, singleBoost);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
List<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
|
||||
|
|
|
@ -109,6 +109,7 @@ public class KNearestNeighborDocumentClassifier extends KNearestNeighborClassifi
|
|||
TopDocs knnResults = knnSearch(document);
|
||||
List<ClassificationResult<BytesRef>> assignedClasses = buildListFromTopDocs(knnResults);
|
||||
Collections.sort(assignedClasses);
|
||||
max = Math.min(max, assignedClasses.size());
|
||||
return assignedClasses.subList(0, max);
|
||||
}
|
||||
|
||||
|
@ -130,15 +131,14 @@ public class KNearestNeighborDocumentClassifier extends KNearestNeighborClassifi
|
|||
boost = field2boost[1];
|
||||
}
|
||||
String[] fieldValues = document.getValues(fieldName);
|
||||
mlt.setBoost(true); // we want always to use the boost coming from TF * IDF of the term
|
||||
if (boost != null) {
|
||||
mlt.setBoost(true);
|
||||
mlt.setBoostFactor(Float.parseFloat(boost));
|
||||
mlt.setBoostFactor(Float.parseFloat(boost)); // this is an additional multiplicative boost coming from the field boost
|
||||
}
|
||||
mlt.setAnalyzer(field2analyzer.get(fieldName));
|
||||
for (String fieldContent : fieldValues) {
|
||||
mltQuery.add(new BooleanClause(mlt.like(fieldName, new StringReader(fieldContent)), BooleanClause.Occur.SHOULD));
|
||||
}
|
||||
mlt.setBoost(false);
|
||||
}
|
||||
Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
|
||||
mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
|
||||
|
|
|
@ -2503,21 +2503,26 @@ ${ant.project.name}.test.dependencies=${test.classpath.list}
|
|||
-->
|
||||
<macrodef name="build-changes">
|
||||
<attribute name="changes.product"/>
|
||||
<attribute name="doap.property.prefix" default="doap.@{changes.product}"/>
|
||||
<attribute name="changes.src.file" default="CHANGES.txt"/>
|
||||
<attribute name="changes.src.doap" default="${dev-tools.dir}/doap/@{changes.product}.rdf"/>
|
||||
<attribute name="changes.version.dates" default="build/@{doap.property.prefix}.version.dates.csv"/>
|
||||
<attribute name="changes.target.dir" default="${changes.target.dir}"/>
|
||||
<attribute name="lucene.javadoc.url" default="${lucene.javadoc.url}"/>
|
||||
<sequential>
|
||||
<mkdir dir="@{changes.target.dir}"/>
|
||||
<xmlproperty keeproot="false" file="@{changes.src.doap}" collapseAttributes="false" prefix="@{doap.property.prefix}"/>
|
||||
<echo file="@{changes.version.dates}" append="false">${@{doap.property.prefix}.Project.release.Version.revision}
</echo>
|
||||
<echo file="@{changes.version.dates}" append="true">${@{doap.property.prefix}.Project.release.Version.created}
</echo>
|
||||
<exec executable="${perl.exe}" input="@{changes.src.file}" output="@{changes.target.dir}/Changes.html"
|
||||
failonerror="true" logError="true">
|
||||
<arg value="-CSD"/>
|
||||
<arg value="${changes.src.dir}/changes2html.pl"/>
|
||||
<arg value="@{changes.product}"/>
|
||||
<arg value="@{changes.src.doap}"/>
|
||||
<arg value="@{changes.version.dates}"/>
|
||||
<arg value="@{lucene.javadoc.url}"/>
|
||||
</exec>
|
||||
<delete file="@{changes.target.dir}/jiraVersionList.json"/>
|
||||
<delete file="@{changes.version.dates}"/>
|
||||
<copy todir="@{changes.target.dir}">
|
||||
<fileset dir="${changes.src.dir}" includes="*.css"/>
|
||||
</copy>
|
||||
|
|
|
@ -198,7 +198,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
final DocIDMerger<NumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
|
||||
final DocIDMerger<NumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
|
||||
|
||||
final long finalCost = cost;
|
||||
|
||||
|
@ -296,7 +296,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
final DocIDMerger<BinaryDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
|
||||
final DocIDMerger<BinaryDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
|
||||
final long finalCost = cost;
|
||||
|
||||
return new BinaryDocValues() {
|
||||
|
@ -397,7 +397,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
|
||||
final long finalCost = cost;
|
||||
|
||||
final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
|
||||
final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
|
||||
|
||||
return new SortedNumericDocValues() {
|
||||
|
||||
|
@ -555,7 +555,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
|
||||
final long finalCost = cost;
|
||||
|
||||
final DocIDMerger<SortedDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
|
||||
final DocIDMerger<SortedDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
|
||||
|
||||
return new SortedDocValues() {
|
||||
private int docID = -1;
|
||||
|
@ -721,7 +721,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
|
||||
}
|
||||
|
||||
final DocIDMerger<SortedSetDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
|
||||
final DocIDMerger<SortedSetDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
|
||||
|
||||
final long finalCost = cost;
|
||||
|
||||
|
|
|
@ -130,7 +130,7 @@ public abstract class NormsConsumer implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
final DocIDMerger<NumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
|
||||
final DocIDMerger<NumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
|
||||
|
||||
return new NumericDocValues() {
|
||||
private int docID = -1;
|
||||
|
|
|
@ -117,7 +117,7 @@ public abstract class StoredFieldsWriter implements Closeable {
|
|||
subs.add(new StoredFieldsMergeSub(new MergeVisitor(mergeState, i), mergeState.docMaps[i], storedFieldsReader, mergeState.maxDocs[i]));
|
||||
}
|
||||
|
||||
final DocIDMerger<StoredFieldsMergeSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
|
||||
final DocIDMerger<StoredFieldsMergeSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
|
||||
|
||||
int docCount = 0;
|
||||
while (true) {
|
||||
|
|
|
@ -205,7 +205,7 @@ public abstract class TermVectorsWriter implements Closeable {
|
|||
subs.add(new TermVectorsMergeSub(mergeState.docMaps[i], reader, mergeState.maxDocs[i]));
|
||||
}
|
||||
|
||||
final DocIDMerger<TermVectorsMergeSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
|
||||
final DocIDMerger<TermVectorsMergeSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
|
||||
|
||||
int docCount = 0;
|
||||
while (true) {
|
||||
|
|
|
@ -486,7 +486,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
|||
|
||||
@Override
|
||||
public int merge(MergeState mergeState) throws IOException {
|
||||
if (mergeState.segmentInfo.getIndexSort() != null) {
|
||||
if (mergeState.needsIndexSort) {
|
||||
// TODO: can we gain back some optos even if index is sorted? E.g. if sort results in large chunks of contiguous docs from one sub
|
||||
// being copied over...?
|
||||
return super.merge(mergeState);
|
||||
|
|
|
@ -730,7 +730,7 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
|
|||
|
||||
@Override
|
||||
public int merge(MergeState mergeState) throws IOException {
|
||||
if (mergeState.segmentInfo.getIndexSort() != null) {
|
||||
if (mergeState.needsIndexSort) {
|
||||
// TODO: can we gain back some optos even if index is sorted? E.g. if sort results in large chunks of contiguous docs from one sub
|
||||
// being copied over...?
|
||||
return super.merge(mergeState);
|
||||
|
|
|
@ -134,7 +134,7 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
|
|||
|
||||
@Override
|
||||
public void merge(MergeState mergeState) throws IOException {
|
||||
if (mergeState.segmentInfo.getIndexSort() != null) {
|
||||
if (mergeState.needsIndexSort) {
|
||||
// TODO: can we gain back some optos even if index is sorted? E.g. if sort results in large chunks of contiguous docs from one sub
|
||||
// being copied over...?
|
||||
super.merge(mergeState);
|
||||
|
|
|
@ -62,7 +62,7 @@ final class MappingMultiPostingsEnum extends PostingsEnum {
|
|||
for(int i=0;i<allSubs.length;i++) {
|
||||
allSubs[i] = new MappingPostingsSub(mergeState.docMaps[i]);
|
||||
}
|
||||
this.docIDMerger = new DocIDMerger<MappingPostingsSub>(subs, allSubs.length, mergeState.segmentInfo.getIndexSort() != null);
|
||||
this.docIDMerger = new DocIDMerger<MappingPostingsSub>(subs, allSubs.length, mergeState.needsIndexSort);
|
||||
}
|
||||
|
||||
MappingMultiPostingsEnum reset(MultiPostingsEnum postingsEnum) throws IOException {
|
||||
|
|
|
@ -42,7 +42,7 @@ public class MergeState {
|
|||
/** Maps document IDs from old segments to document IDs in the new segment */
|
||||
public final DocMap[] docMaps;
|
||||
|
||||
// Only used by IW when it must remap deletes that arrived against the merging segmetns while a merge was running:
|
||||
// Only used by IW when it must remap deletes that arrived against the merging segments while a merge was running:
|
||||
final DocMap[] leafDocMaps;
|
||||
|
||||
/** {@link SegmentInfo} of the newly merged segment. */
|
||||
|
@ -81,6 +81,9 @@ public class MergeState {
|
|||
/** InfoStream for debugging messages. */
|
||||
public final InfoStream infoStream;
|
||||
|
||||
/** Indicates if the index needs to be sorted **/
|
||||
public boolean needsIndexSort;
|
||||
|
||||
/** Sole constructor. */
|
||||
MergeState(List<CodecReader> originalReaders, SegmentInfo segmentInfo, InfoStream infoStream) throws IOException {
|
||||
|
||||
|
@ -143,50 +146,58 @@ public class MergeState {
|
|||
this.docMaps = buildDocMaps(readers, indexSort);
|
||||
}
|
||||
|
||||
private DocMap[] buildDocMaps(List<CodecReader> readers, Sort indexSort) throws IOException {
|
||||
// Remap docIDs around deletions
|
||||
private DocMap[] buildDeletionDocMaps(List<CodecReader> readers) {
|
||||
|
||||
int totalDocs = 0;
|
||||
int numReaders = readers.size();
|
||||
DocMap[] docMaps = new DocMap[numReaders];
|
||||
|
||||
for (int i = 0; i < numReaders; i++) {
|
||||
LeafReader reader = readers.get(i);
|
||||
Bits liveDocs = reader.getLiveDocs();
|
||||
|
||||
final PackedLongValues delDocMap;
|
||||
if (liveDocs != null) {
|
||||
delDocMap = removeDeletes(reader.maxDoc(), liveDocs);
|
||||
} else {
|
||||
delDocMap = null;
|
||||
}
|
||||
|
||||
final int docBase = totalDocs;
|
||||
docMaps[i] = new DocMap() {
|
||||
@Override
|
||||
public int get(int docID) {
|
||||
if (liveDocs == null) {
|
||||
return docBase + docID;
|
||||
} else if (liveDocs.get(docID)) {
|
||||
return docBase + (int) delDocMap.get(docID);
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
};
|
||||
totalDocs += reader.numDocs();
|
||||
}
|
||||
|
||||
return docMaps;
|
||||
}
|
||||
|
||||
private DocMap[] buildDocMaps(List<CodecReader> readers, Sort indexSort) throws IOException {
|
||||
|
||||
if (indexSort == null) {
|
||||
// no index sort ... we only must map around deletions, and rebase to the merged segment's docID space
|
||||
|
||||
int totalDocs = 0;
|
||||
DocMap[] docMaps = new DocMap[numReaders];
|
||||
|
||||
// Remap docIDs around deletions:
|
||||
for (int i = 0; i < numReaders; i++) {
|
||||
LeafReader reader = readers.get(i);
|
||||
Bits liveDocs = reader.getLiveDocs();
|
||||
|
||||
final PackedLongValues delDocMap;
|
||||
if (liveDocs != null) {
|
||||
delDocMap = removeDeletes(reader.maxDoc(), liveDocs);
|
||||
} else {
|
||||
delDocMap = null;
|
||||
}
|
||||
|
||||
final int docBase = totalDocs;
|
||||
docMaps[i] = new DocMap() {
|
||||
@Override
|
||||
public int get(int docID) {
|
||||
if (liveDocs == null) {
|
||||
return docBase + docID;
|
||||
} else if (liveDocs.get(docID)) {
|
||||
return docBase + (int) delDocMap.get(docID);
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
};
|
||||
totalDocs += reader.numDocs();
|
||||
}
|
||||
|
||||
return docMaps;
|
||||
|
||||
return buildDeletionDocMaps(readers);
|
||||
} else {
|
||||
// do a merge sort of the incoming leaves:
|
||||
long t0 = System.nanoTime();
|
||||
DocMap[] result = MultiSorter.sort(indexSort, readers);
|
||||
if (result == null) {
|
||||
// already sorted so we can switch back to map around deletions
|
||||
return buildDeletionDocMaps(readers);
|
||||
} else {
|
||||
needsIndexSort = true;
|
||||
}
|
||||
long t1 = System.nanoTime();
|
||||
if (infoStream.isEnabled("SM")) {
|
||||
infoStream.message("SM", String.format(Locale.ROOT, "%.2f msec to build merge sorted DocMaps", (t1-t0)/1000000.0));
|
||||
|
@ -233,6 +244,7 @@ public class MergeState {
|
|||
if (infoStream.isEnabled("SM")) {
|
||||
infoStream.message("SM", String.format(Locale.ROOT, "segment %s is not sorted; wrapping for sort %s now (%.2f msec to sort)", leaf, indexSort, msec));
|
||||
}
|
||||
needsIndexSort = true;
|
||||
leaf = SlowCodecReaderWrapper.wrap(SortingLeafReader.wrap(new MergeReaderWrapper(leaf), sortDocMap));
|
||||
leafDocMaps[readers.size()] = new DocMap() {
|
||||
@Override
|
||||
|
|
|
@ -33,7 +33,9 @@ import org.apache.lucene.util.packed.PackedLongValues;
|
|||
final class MultiSorter {
|
||||
|
||||
/** Does a merge sort of the leaves of the incoming reader, returning {@link DocMap} to map each leaf's
|
||||
* documents into the merged segment. The documents for each incoming leaf reader must already be sorted by the same sort! */
|
||||
* documents into the merged segment. The documents for each incoming leaf reader must already be sorted by the same sort!
|
||||
* Returns null if the merge sort is not needed (segments are already in index sort order).
|
||||
**/
|
||||
static MergeState.DocMap[] sort(Sort sort, List<CodecReader> readers) throws IOException {
|
||||
|
||||
// TODO: optimize if only 1 reader is incoming, though that's a rare case
|
||||
|
@ -80,8 +82,15 @@ final class MultiSorter {
|
|||
|
||||
// merge sort:
|
||||
int mappedDocID = 0;
|
||||
int lastReaderIndex = 0;
|
||||
boolean isSorted = true;
|
||||
while (queue.size() != 0) {
|
||||
LeafAndDocID top = queue.top();
|
||||
if (lastReaderIndex > top.readerIndex) {
|
||||
// merge sort is needed
|
||||
isSorted = false;
|
||||
}
|
||||
lastReaderIndex = top.readerIndex;
|
||||
builders[top.readerIndex].add(mappedDocID);
|
||||
if (top.liveDocs == null || top.liveDocs.get(top.docID)) {
|
||||
mappedDocID++;
|
||||
|
@ -97,6 +106,9 @@ final class MultiSorter {
|
|||
queue.pop();
|
||||
}
|
||||
}
|
||||
if (isSorted) {
|
||||
return null;
|
||||
}
|
||||
|
||||
MergeState.DocMap[] docMaps = new MergeState.DocMap[leafCount];
|
||||
for(int i=0;i<leafCount;i++) {
|
||||
|
|
|
@ -28,6 +28,7 @@ import java.util.Random;
|
|||
import java.util.Set;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
|
@ -37,14 +38,19 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.codecs.FilterCodec;
|
||||
import org.apache.lucene.codecs.PointsFormat;
|
||||
import org.apache.lucene.codecs.PointsReader;
|
||||
import org.apache.lucene.codecs.PointsWriter;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.BinaryPoint;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.DoubleDocValuesField;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.FloatDocValuesField;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
|
@ -79,6 +85,190 @@ import org.apache.lucene.util.TestUtil;
|
|||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||
|
||||
public class TestIndexSorting extends LuceneTestCase {
|
||||
static class AssertingNeedsIndexSortCodec extends FilterCodec {
|
||||
boolean needsIndexSort;
|
||||
int numCalls;
|
||||
|
||||
AssertingNeedsIndexSortCodec() {
|
||||
super(TestUtil.getDefaultCodec().getName(), TestUtil.getDefaultCodec());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PointsFormat pointsFormat() {
|
||||
final PointsFormat pf = delegate.pointsFormat();
|
||||
return new PointsFormat() {
|
||||
@Override
|
||||
public PointsWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
||||
final PointsWriter writer = pf.fieldsWriter(state);
|
||||
return new PointsWriter() {
|
||||
@Override
|
||||
public void merge(MergeState mergeState) throws IOException {
|
||||
// For single segment merge we cannot infer if the segment is already sorted or not.
|
||||
if (mergeState.docMaps.length > 1) {
|
||||
assertEquals(needsIndexSort, mergeState.needsIndexSort);
|
||||
}
|
||||
++ numCalls;
|
||||
writer.merge(mergeState);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
|
||||
writer.writeField(fieldInfo, values);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish() throws IOException {
|
||||
writer.finish();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
writer.close();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public PointsReader fieldsReader(SegmentReadState state) throws IOException {
|
||||
return pf.fieldsReader(state);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private static void assertNeedsIndexSortMerge(SortField sortField, Consumer<Document> defaultValueConsumer, Consumer<Document> randomValueConsumer) throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
AssertingNeedsIndexSortCodec codec = new AssertingNeedsIndexSortCodec();
|
||||
iwc.setCodec(codec);
|
||||
Sort indexSort = new Sort(sortField,
|
||||
new SortField("id", SortField.Type.INT));
|
||||
iwc.setIndexSort(indexSort);
|
||||
LogMergePolicy policy = newLogMergePolicy();
|
||||
// make sure that merge factor is always > 2
|
||||
if (policy.getMergeFactor() <= 2) {
|
||||
policy.setMergeFactor(3);
|
||||
}
|
||||
iwc.setMergePolicy(policy);
|
||||
|
||||
// add already sorted documents
|
||||
codec.numCalls = 0;
|
||||
codec.needsIndexSort = false;
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
boolean withValues = random().nextBoolean();
|
||||
for (int i = 100; i < 200; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("id", Integer.toString(i), Store.YES));
|
||||
doc.add(new NumericDocValuesField("id", i));
|
||||
doc.add(new IntPoint("point", random().nextInt()));
|
||||
if (withValues) {
|
||||
defaultValueConsumer.accept(doc);
|
||||
}
|
||||
w.addDocument(doc);
|
||||
if (i % 10 == 0) {
|
||||
w.commit();
|
||||
}
|
||||
}
|
||||
Set<Integer> deletedDocs = new HashSet<> ();
|
||||
int num = random().nextInt(20);
|
||||
for (int i = 0; i < num; i++) {
|
||||
int nextDoc = random().nextInt(100);
|
||||
w.deleteDocuments(new Term("id", Integer.toString(nextDoc)));
|
||||
deletedDocs.add(nextDoc);
|
||||
}
|
||||
w.commit();
|
||||
w.waitForMerges();
|
||||
w.forceMerge(1);
|
||||
assertTrue(codec.numCalls > 0);
|
||||
|
||||
|
||||
// merge sort is needed
|
||||
codec.numCalls = 0;
|
||||
codec.needsIndexSort = true;
|
||||
for (int i = 10; i >= 0; i--) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("id", Integer.toString(i), Store.YES));
|
||||
doc.add(new NumericDocValuesField("id", i));
|
||||
doc.add(new IntPoint("point", random().nextInt()));
|
||||
if (withValues) {
|
||||
defaultValueConsumer.accept(doc);
|
||||
}
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
}
|
||||
w.commit();
|
||||
w.waitForMerges();
|
||||
w.forceMerge(1);
|
||||
assertTrue(codec.numCalls > 0);
|
||||
|
||||
// segment sort is needed
|
||||
codec.needsIndexSort = true;
|
||||
codec.numCalls = 0;
|
||||
for (int i = 200; i < 300; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("id", Integer.toString(i), Store.YES));
|
||||
doc.add(new NumericDocValuesField("id", i));
|
||||
doc.add(new IntPoint("point", random().nextInt()));
|
||||
randomValueConsumer.accept(doc);
|
||||
w.addDocument(doc);
|
||||
if (i % 10 == 0) {
|
||||
w.commit();
|
||||
}
|
||||
}
|
||||
w.commit();
|
||||
w.waitForMerges();
|
||||
w.forceMerge(1);
|
||||
assertTrue(codec.numCalls > 0);
|
||||
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNumericAlreadySorted() throws Exception {
|
||||
assertNeedsIndexSortMerge(new SortField("foo", SortField.Type.INT),
|
||||
(doc) -> doc.add(new NumericDocValuesField("foo", 0)),
|
||||
(doc) -> doc.add(new NumericDocValuesField("foo", random().nextInt())));
|
||||
}
|
||||
|
||||
public void testStringAlreadySorted() throws Exception {
|
||||
assertNeedsIndexSortMerge(new SortField("foo", SortField.Type.STRING),
|
||||
(doc) -> doc.add(new SortedDocValuesField("foo", new BytesRef("default"))),
|
||||
(doc) -> doc.add(new SortedDocValuesField("foo", TestUtil.randomBinaryTerm(random()))));
|
||||
}
|
||||
|
||||
public void testMultiValuedNumericAlreadySorted() throws Exception {
|
||||
assertNeedsIndexSortMerge(new SortedNumericSortField("foo", SortField.Type.INT),
|
||||
(doc) -> {
|
||||
doc.add(new SortedNumericDocValuesField("foo", Integer.MIN_VALUE));
|
||||
int num = random().nextInt(5);
|
||||
for (int j = 0; j < num; j++) {
|
||||
doc.add(new SortedNumericDocValuesField("foo", random().nextInt()));
|
||||
}
|
||||
},
|
||||
(doc) -> {
|
||||
int num = random().nextInt(5);
|
||||
for (int j = 0; j < num; j++) {
|
||||
doc.add(new SortedNumericDocValuesField("foo", random().nextInt()));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public void testMultiValuedStringAlreadySorted() throws Exception {
|
||||
assertNeedsIndexSortMerge(new SortedSetSortField("foo", false),
|
||||
(doc) -> {
|
||||
doc.add(new SortedSetDocValuesField("foo", new BytesRef("")));
|
||||
int num = random().nextInt(5);
|
||||
for (int j = 0; j < num; j++) {
|
||||
doc.add(new SortedSetDocValuesField("foo", TestUtil.randomBinaryTerm(random())));
|
||||
}
|
||||
},
|
||||
(doc) -> {
|
||||
int num = random().nextInt(5);
|
||||
for (int j = 0; j < num; j++) {
|
||||
doc.add(new SortedSetDocValuesField("foo", TestUtil.randomBinaryTerm(random())));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public void testBasicString() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
|
|
|
@ -13,9 +13,6 @@ com.carrotsearch.randomizedtesting.version = 2.4.0
|
|||
|
||||
/com.carrotsearch/hppc = 0.7.1
|
||||
|
||||
com.codahale.metrics.version = 3.0.1
|
||||
/com.codahale.metrics/metrics-core = ${com.codahale.metrics.version}
|
||||
/com.codahale.metrics/metrics-healthchecks = ${com.codahale.metrics.version}
|
||||
|
||||
/com.cybozu.labs/langdetect = 1.1-20120112
|
||||
/com.drewnoakes/metadata-extractor = 2.8.1
|
||||
|
@ -74,6 +71,10 @@ com.sun.jersey.version = 1.9
|
|||
/hsqldb/hsqldb = 1.8.0.10
|
||||
/io.airlift/slice = 0.10
|
||||
|
||||
io.dropwizard.metrics.version = 3.1.2
|
||||
/io.dropwizard.metrics/metrics-core = ${io.dropwizard.metrics.version}
|
||||
/io.dropwizard.metrics/metrics-healthchecks = ${io.dropwizard.metrics.version}
|
||||
|
||||
io.netty.netty-all.version = 4.0.36.Final
|
||||
/io.netty/netty-all = ${io.netty.netty-all.version}
|
||||
|
||||
|
@ -250,7 +251,7 @@ org.codehaus.janino.version = 2.7.6
|
|||
/org.codehaus.woodstox/woodstox-core-asl = 4.4.1
|
||||
/org.easymock/easymock = 3.0
|
||||
|
||||
org.eclipse.jetty.version = 9.3.8.v20160314
|
||||
org.eclipse.jetty.version = 9.3.14.v20161028
|
||||
/org.eclipse.jetty/jetty-continuation = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-deploy = ${org.eclipse.jetty.version}
|
||||
/org.eclipse.jetty/jetty-http = ${org.eclipse.jetty.version}
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
4ba272cee2e367766dfdc1901c960de352160d41
|
|
@ -1 +0,0 @@
|
|||
dec4dfc43617637694762822ef99c8373c944c98
|
|
@ -0,0 +1 @@
|
|||
ea3800883f79f757b2635a737bb71bb21e90cf19
|
|
@ -1 +0,0 @@
|
|||
0127feb7407f4137ff4295b5fa2895845db56710
|
|
@ -0,0 +1 @@
|
|||
52d796b58c3a997e59e6b47c4bf022cedcba3514
|
|
@ -1 +0,0 @@
|
|||
371e3c2b72d9a9737579ec0fdfd6a2a3ab8b8141
|
|
@ -0,0 +1 @@
|
|||
791df6c55ad62841ff518ba6634e905a95567260
|
|
@ -1 +0,0 @@
|
|||
da8366f602f35d4c3177cb081472e2fc4abe04ea
|
|
@ -0,0 +1 @@
|
|||
b5714a6005387b2a361d5b39a3a37d4df1892e62
|
|
@ -1 +0,0 @@
|
|||
ea5f25d3326d7745d9c21d405dcf6f878efbd5fb
|
|
@ -0,0 +1 @@
|
|||
fbf89f6f3b995992f82ec09104ab9a75d31d281b
|
|
@ -1 +0,0 @@
|
|||
01d53c7a7e7715e67d6f4edec6c5b328ee162e65
|
|
@ -23,7 +23,6 @@
|
|||
|
||||
use strict;
|
||||
use warnings;
|
||||
use XML::Simple;
|
||||
|
||||
my $jira_url_prefix = 'http://issues.apache.org/jira/browse/';
|
||||
my $github_pull_request_prefix = 'https://github.com/apache/lucene-solr/pull/';
|
||||
|
@ -823,26 +822,33 @@ sub get_release_date {
|
|||
# Pulls release dates from the project DOAP file.
|
||||
#
|
||||
sub setup_release_dates {
|
||||
my %release_dates;
|
||||
my %release_dates = ();
|
||||
my $file = shift;
|
||||
print STDERR "file: $file\n";
|
||||
open(FILE, "<$file") || die "could not open $file: $!";
|
||||
my $version_list = <FILE>;
|
||||
my $created_list = <FILE>;
|
||||
close(FILE);
|
||||
|
||||
my $project_info = XMLin($file)->{Project};
|
||||
my $version;
|
||||
$version_list =~ s/^\s+|\s+$//g;
|
||||
my @versions = split /\s*,\s*/, $version_list;
|
||||
$created_list =~ s/^\s+|\s+$//g;
|
||||
my @created = split /\s*,\s*/, $created_list;
|
||||
|
||||
if (scalar(@versions) != scalar(@created)) {
|
||||
die $file . " contains" . scalar(@versions) . " versions but " . scalar(@created) . " creation dates.";
|
||||
}
|
||||
my $date;
|
||||
for my $release (@{$project_info->{release}}) {
|
||||
$version = $release->{Version};
|
||||
if ($version->{created}) {
|
||||
$date = normalize_date($version->{created});
|
||||
my $version_name = $version->{revision};
|
||||
$release_dates{$version->{revision}} = $date;
|
||||
if ($version_name =~ /^([1-9]\d*\.\d+)([^.0-9].*|$)/) {
|
||||
my $padded_version_name = "$1.0$2"; # Alias w/trailing ".0"
|
||||
$release_dates{$padded_version_name} = $date;
|
||||
} elsif ($version_name =~ /\.0(?=[^.0-9]|$)/) {
|
||||
my $trimmed_version_name = $version_name;
|
||||
$trimmed_version_name =~ s/\.0(?=[^.0-9]|$)//; # Alias w/o trailing ".0"
|
||||
$release_dates{$trimmed_version_name} = $date;
|
||||
}
|
||||
for my $pos (0..$#versions) {
|
||||
$date = normalize_date($created[$pos]);
|
||||
$release_dates{$versions[$pos]} = $date;
|
||||
if ($versions[$pos] =~ /^([1-9]\d*\.\d+)([^.0-9].*|$)/) {
|
||||
my $padded_version_name = "$1.0$2"; # Alias w/trailing ".0"
|
||||
$release_dates{$padded_version_name} = $date;
|
||||
} elsif ($versions[$pos] =~ /\.0(?=[^.0-9]|$)/) {
|
||||
my $trimmed_version_name = $versions[$pos];
|
||||
$trimmed_version_name =~ s/\.0(?=[^.0-9]|$)//; # Alias w/o trailing ".0"
|
||||
$release_dates{$trimmed_version_name} = $date;
|
||||
}
|
||||
}
|
||||
return %release_dates;
|
||||
|
|
|
@ -156,7 +156,7 @@ public class MockRandomMergePolicy extends MergePolicy {
|
|||
if (LuceneTestCase.VERBOSE) {
|
||||
System.out.println("NOTE: MockRandomMergePolicy now swaps in a SlowCodecReaderWrapper for merging reader=" + reader);
|
||||
}
|
||||
return SlowCodecReaderWrapper.wrap(new FilterLeafReader(reader) {});
|
||||
return SlowCodecReaderWrapper.wrap(new FilterLeafReader(new MergeReaderWrapper(reader)) {});
|
||||
} else if (thingToDo == 1) {
|
||||
// renumber fields
|
||||
// NOTE: currently this only "blocks" bulk merges just by
|
||||
|
@ -165,7 +165,7 @@ public class MockRandomMergePolicy extends MergePolicy {
|
|||
if (LuceneTestCase.VERBOSE) {
|
||||
System.out.println("NOTE: MockRandomMergePolicy now swaps in a MismatchedLeafReader for merging reader=" + reader);
|
||||
}
|
||||
return SlowCodecReaderWrapper.wrap(new MismatchedLeafReader(reader, r));
|
||||
return SlowCodecReaderWrapper.wrap(new MismatchedLeafReader(new MergeReaderWrapper(reader), r));
|
||||
} else {
|
||||
// otherwise, reader is unchanged
|
||||
return reader;
|
||||
|
|
195
solr/CHANGES.txt
195
solr/CHANGES.txt
|
@ -74,7 +74,7 @@ Carrot2 3.15.0
|
|||
Velocity 1.7 and Velocity Tools 2.0
|
||||
Apache UIMA 2.3.1
|
||||
Apache ZooKeeper 3.4.6
|
||||
Jetty 9.3.8.v20160314
|
||||
Jetty 9.3.14.v20161028
|
||||
|
||||
Detailed Change List
|
||||
----------------------
|
||||
|
@ -86,6 +86,25 @@ Upgrade Notes
|
|||
consequence of this change is that you must be aware that some tuples will not have values if
|
||||
there were none in the original document.
|
||||
|
||||
* SOLR-8785: Metrics related classes in org.apache.solr.util.stats have been removed in favor of
|
||||
the dropwizard metrics library. Any custom plugins using these classes should be changed to use
|
||||
the equivalent classes from the metrics library.
|
||||
As part of this, the following changes were made to the output of Overseer Status API:
|
||||
* The "totalTime" metric has been removed because it is no longer supported
|
||||
* The metrics "75thPctlRequestTime", "95thPctlRequestTime", "99thPctlRequestTime"
|
||||
and "999thPctlRequestTime" in Overseer Status API have been renamed to "75thPcRequestTime", "95thPcRequestTime"
|
||||
and so on for consistency with stats output in other parts of Solr.
|
||||
* The metrics "avgRequestsPerMinute", "5minRateRequestsPerMinute" and "15minRateRequestsPerMinute" have been
|
||||
replaced by corresponding per-second rates viz. "avgRequestsPerSecond", "5minRateRequestsPerSecond"
|
||||
and "15minRateRequestsPerSecond" for consistency with stats output in other parts of Solr.
|
||||
|
||||
* SOLR-9708: You are encouraged to try out the UnifiedHighlighter by setting hl.method=unified and report feedback. It
|
||||
might become the default in 7.0. It's more efficient/faster than the other highlighters, especially compared to the
|
||||
original Highlighter. That said, some options aren't supported yet, notably hl.fragsize and
|
||||
hl.requireFieldMatch=false. It will get more features in time, especially with your input. See HighlightParams.java
|
||||
for a listing of highlight parameters annotated with which highlighters use them.
|
||||
hl.useFastVectorHighlighter is now considered deprecated in lieu of hl.method=fastVector.
|
||||
|
||||
New Features
|
||||
----------------------
|
||||
* SOLR-9293: Solrj client support for hierarchical clusters and other topics
|
||||
|
@ -123,6 +142,16 @@ New Features
|
|||
* SOLR-9324: Support Secure Impersonation / Proxy User for solr authentication
|
||||
(Gregory Chanan, Hrishikesh Gadre via yonik)
|
||||
|
||||
* SOLR-9721: javabin Tuple parser for streaming and other end points (noble)
|
||||
|
||||
* SOLR-9708: Added UnifiedSolrHighlighter, a highlighter adapter for Lucene's UnifiedHighlighter. The adapter is a
|
||||
derivative of the PostingsSolrHighlighter, supporting mostly the same parameters with some differences.
|
||||
Introduced "hl.method" parameter which can be set to original|fastVector|postings|unified to pick the highlighter at
|
||||
runtime without the need to modify solrconfig from the default configuration. hl.useFastVectorHighlighter is now
|
||||
considered deprecated in lieu of hl.method=fastVector. (Timothy Rodriguez, David Smiley)
|
||||
|
||||
* SOLR-9728: Ability to specify Key Store type in solr.in.sh file for SSL (Michael Suzuki, Kevin Risden)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
|
||||
|
@ -134,6 +163,16 @@ Optimizations
|
|||
* SOLR-9772: Deriving distributed sort values (fieldSortValues) should reuse
|
||||
comparator and only invalidate leafComparator. (John Call via yonik)
|
||||
|
||||
* SOLR-9786: FieldType has a new getSetQuery() method that can take a set of terms
|
||||
and create a more efficient query (such as TermsQuery). The solr query parser has been
|
||||
changed to use this method when appropriate. The parser also knows when it is being
|
||||
used to parse a filter and will create TermsQueries from large lists of normal terms
|
||||
or numbers, resulting in a query that will execute faster. This also acts to avoid
|
||||
BooleanQuery maximum clause limit. Query parsing itself has also been optimized,
|
||||
resulting in less produced garbage and 5-7% better performance.
|
||||
(yonik)
|
||||
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
* SOLR-9701: NPE in export handler when "fl" parameter is omitted.
|
||||
|
@ -172,7 +211,11 @@ Bug Fixes
|
|||
* SOLR-9729: JDBCStream improvements (Kevin Risden)
|
||||
|
||||
* SOLR-9626: new Admin UI now also highlights matched terms in the Analysis screen. (Alexandre Rafalovitch)
|
||||
|
||||
|
||||
* SOLR-9512: CloudSolrClient's cluster state cache can break direct updates to leaders (noble)
|
||||
|
||||
* SOLR-5260: Facet search on a docvalue field in a multi shard collection (Trym Møller, Erick Erickson)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
@ -195,6 +238,13 @@ Other Changes
|
|||
|
||||
* SOLR-9609: Change hard-coded keysize from 512 to 1024 (Jeremy Martini via Erick Erickson)
|
||||
|
||||
* SOLR-8785: Use Dropwizard Metrics library for core metrics. The copied over code in
|
||||
org.apache.solr.util.stats has been removed. (Jeff Wartes, Kelvin Wong, Christine Poerschke, shalin)
|
||||
|
||||
* SOLR-9784: Refactor CloudSolrClient to eliminate direct dependency on ZK (noble)
|
||||
|
||||
* SOLR-9801: Upgrade jetty to 9.3.14.v20161028 (shalin)
|
||||
|
||||
================== 6.3.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||
|
@ -5459,6 +5509,31 @@ Other Changes
|
|||
* SOLR-6391: Improve message for CREATECOLLECTION failure due to missing
|
||||
numShards (Anshum Gupta)
|
||||
|
||||
================== 4.9.1 ==================
|
||||
|
||||
Versions of Major Components
|
||||
---------------------
|
||||
Apache Tika 1.5 (with upgraded Apache POI 3.10.1)
|
||||
Carrot2 3.9.0
|
||||
Velocity 1.7 and Velocity Tools 2.0
|
||||
Apache UIMA 2.3.1
|
||||
Apache ZooKeeper 3.4.6
|
||||
|
||||
Detailed Change List
|
||||
----------------------
|
||||
|
||||
Other Changes
|
||||
---------------------
|
||||
|
||||
* SOLR-6503: Removed support for parsing netcdf files in Solr Cell because
|
||||
of license issues. If you need support for this format, download the parser
|
||||
JAR yourself (version 4.2) and add it to contrib/extraction/lib folder:
|
||||
http://www.unidata.ucar.edu/software/thredds/current/netcdf-java/
|
||||
(Uwe Schindler)
|
||||
|
||||
* SOLR-6388: Force upgrade of Apache POI dependency in Solr Cell to version
|
||||
3.10.1 to fix CVE-2014-3529 and CVE-2014-3574. (Uwe Schindler)
|
||||
|
||||
================== 4.9.0 ==================
|
||||
|
||||
Versions of Major Components
|
||||
|
@ -11846,6 +11921,122 @@ Documentation
|
|||
* SOLR-1792: Documented peculiar behavior of TestHarness.LocalRequestFactory
|
||||
(hossman)
|
||||
|
||||
================== Release 1.4.1 ==================
|
||||
Release Date: See http://lucene.apache.org/solr for the official release date.
|
||||
|
||||
Upgrading from Solr 1.4
|
||||
-----------------------
|
||||
|
||||
This is a bug fix release - no changes are required when upgrading from Solr 1.4.
|
||||
However, a reindex is needed for some of the analysis fixes to take effect.
|
||||
|
||||
Versions of Major Components
|
||||
----------------------------
|
||||
Apache Lucene 2.9.3
|
||||
Apache Tika 0.4
|
||||
Carrot2 3.1.0
|
||||
|
||||
Lucene Information
|
||||
----------------
|
||||
|
||||
Since Solr is built on top of Lucene, many people add customizations to Solr
|
||||
that are dependent on Lucene. Please see http://lucene.apache.org/java/2_9_3/,
|
||||
especially http://lucene.apache.org/java/2_9_3/changes/Changes.html for more
|
||||
information on the version of Lucene used in Solr.
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
* SOLR-1934: Upgrade to Apache Lucene 2.9.3 to obtain several bug
|
||||
fixes from the previous 2.9.1. See the Lucene 2.9.3 release notes
|
||||
for details. (hossman, Mark Miller)
|
||||
|
||||
* SOLR-1432: Make the new ValueSource.getValues(context,reader) delegate
|
||||
to the original ValueSource.getValues(reader) so custom sources
|
||||
will work. (yonik)
|
||||
|
||||
* SOLR-1572: FastLRUCache correctly implemented the LRU policy only
|
||||
for the first 2B accesses. (yonik)
|
||||
|
||||
* SOLR-1595: StreamingUpdateSolrServer used the platform default character
|
||||
set when streaming updates, rather than using UTF-8 as the HTTP headers
|
||||
indicated, leading to an encoding mismatch. (hossman, yonik)
|
||||
|
||||
* SOLR-1660: CapitalizationFilter crashes if you use the maxWordCountOption
|
||||
(Robert Muir via shalin)
|
||||
|
||||
* SOLR-1662: Added Javadocs in BufferedTokenStream and fixed incorrect cloning
|
||||
in TestBufferedTokenStream (Robert Muir, Uwe Schindler via shalin)
|
||||
|
||||
* SOLR-1711: SolrJ - StreamingUpdateSolrServer had a race condition that
|
||||
could halt the streaming of documents. The original patch to fix this
|
||||
(never officially released) introduced another hanging bug due to
|
||||
connections not being released. (Attila Babo, Erik Hetzner via yonik)
|
||||
|
||||
* SOLR-1748, SOLR-1747, SOLR-1746, SOLR-1745, SOLR-1744: Streams and Readers
|
||||
retrieved from ContentStreams are not closed in various places, resulting
|
||||
in file descriptor leaks.
|
||||
(Christoff Brill, Mark Miller)
|
||||
|
||||
* SOLR-1580: Solr Configuration ignores 'mergeFactor' parameter, always
|
||||
uses Lucene default. (Lance Norskog via Mark Miller)
|
||||
|
||||
* SOLR-1777: fieldTypes with sortMissingLast=true or sortMissingFirst=true can
|
||||
result in incorrectly sorted results. (yonik)
|
||||
|
||||
* SOLR-1797: fix ConcurrentModificationException and potential memory
|
||||
leaks in ResourceLoader. (yonik)
|
||||
|
||||
* SOLR-1798: Small memory leak (~100 bytes) in fastLRUCache for every
|
||||
commit. (yonik)
|
||||
|
||||
* SOLR-1522: Show proper message if <script> tag is missing for DIH
|
||||
ScriptTransformer (noble)
|
||||
|
||||
* SOLR-1538: Reordering of object allocations in ConcurrentLRUCache to eliminate
|
||||
(an extremely small) potential for deadlock.
|
||||
(gabriele renzi via hossman)
|
||||
|
||||
* SOLR-1558: QueryElevationComponent only works if the uniqueKey field is
|
||||
implemented using StrField. In previous versions of Solr no warning or
|
||||
error would be generated if you attempted to use QueryElevationComponent,
|
||||
it would just fail in unexpected ways. This has been changed so that it
|
||||
will fail with a clear error message on initialization. (hossman)
|
||||
|
||||
* SOLR-1563: Binary fields, including trie-based numeric fields, caused null
|
||||
pointer exceptions in the luke request handler. (yonik)
|
||||
|
||||
* SOLR-1579: Fixes to XML escaping in stats.jsp
|
||||
(David Bowen and hossman)
|
||||
|
||||
* SOLR-1582: copyField was ignored for BinaryField types (gsingers)
|
||||
|
||||
* SOLR-1596: A rollback operation followed by the shutdown of Solr
|
||||
or the close of a core resulted in a warning:
|
||||
"SEVERE: SolrIndexWriter was not closed prior to finalize()" although
|
||||
there were no other consequences. (yonik)
|
||||
|
||||
* SOLR-1651: Fixed Incorrect dataimport handler package name in SolrResourceLoader
|
||||
(Akshay Ukey via shalin)
|
||||
|
||||
* SOLR-1936: The JSON response format needed to escape unicode code point
|
||||
U+2028 - 'LINE SEPARATOR' (Robert Hofstra, yonik)
|
||||
|
||||
* SOLR-1852: Fix WordDelimiterFilterFactory bug where position increments
|
||||
were not being applied properly to subwords. (Peter Wolanin via Robert Muir)
|
||||
|
||||
* SOLR-1706: fixed WordDelimiterFilter for certain combinations of options
|
||||
where it would output incorrect tokens. (Robert Muir, Chris Male)
|
||||
|
||||
* SOLR-1948: PatternTokenizerFactory should use parent's args (koji)
|
||||
|
||||
* SOLR-1870: Indexing documents using the 'javabin' format no longer
|
||||
fails with a ClassCastException whenSolrInputDocuments contain field
|
||||
values which are Collections or other classes that implement
|
||||
Iterable. (noble, hossman)
|
||||
|
||||
* SOLR-1769 Solr 1.4 Replication - Repeater throwing NullPointerException (noble)
|
||||
|
||||
================== Release 1.4.0 ==================
|
||||
Release Date: See http://lucene.apache.org/solr for the official release date.
|
||||
|
||||
|
|
|
@ -158,22 +158,74 @@ SOLR_SSL_OPTS=""
|
|||
if [ -n "$SOLR_SSL_KEY_STORE" ]; then
|
||||
SOLR_JETTY_CONFIG+=("--module=https")
|
||||
SOLR_URL_SCHEME=https
|
||||
SOLR_SSL_OPTS=" -Dsolr.jetty.keystore=$SOLR_SSL_KEY_STORE \
|
||||
-Dsolr.jetty.keystore.password=$SOLR_SSL_KEY_STORE_PASSWORD \
|
||||
-Dsolr.jetty.truststore=$SOLR_SSL_TRUST_STORE \
|
||||
-Dsolr.jetty.truststore.password=$SOLR_SSL_TRUST_STORE_PASSWORD \
|
||||
-Dsolr.jetty.ssl.needClientAuth=$SOLR_SSL_NEED_CLIENT_AUTH \
|
||||
-Dsolr.jetty.ssl.wantClientAuth=$SOLR_SSL_WANT_CLIENT_AUTH"
|
||||
SOLR_SSL_OPTS+=" -Dsolr.jetty.keystore=$SOLR_SSL_KEY_STORE"
|
||||
if [ -n "$SOLR_SSL_KEY_STORE_PASSWORD" ]; then
|
||||
SOLR_SSL_OPTS+=" -Dsolr.jetty.keystore.password=$SOLR_SSL_KEY_STORE_PASSWORD"
|
||||
fi
|
||||
if [ -n "$SOLR_SSL_KEY_STORE_TYPE" ]; then
|
||||
SOLR_SSL_OPTS+=" -Dsolr.jetty.keystore.type=$SOLR_SSL_KEY_STORE_TYPE"
|
||||
fi
|
||||
|
||||
if [ -n "$SOLR_SSL_TRUST_STORE" ]; then
|
||||
SOLR_SSL_OPTS+=" -Dsolr.jetty.truststore=$SOLR_SSL_TRUST_STORE"
|
||||
fi
|
||||
if [ -n "$SOLR_SSL_TRUST_STORE_PASSWORD" ]; then
|
||||
SOLR_SSL_OPTS+=" -Dsolr.jetty.truststore.password=$SOLR_SSL_TRUST_STORE_PASSWORD"
|
||||
fi
|
||||
if [ -n "$SOLR_SSL_TRUST_STORE_TYPE" ]; then
|
||||
SOLR_SSL_OPTS+=" -Dsolr.jetty.truststore.type=$SOLR_SSL_TRUST_STORE_TYPE"
|
||||
fi
|
||||
|
||||
if [ -n "$SOLR_SSL_NEED_CLIENT_AUTH" ]; then
|
||||
SOLR_SSL_OPTS+=" -Dsolr.jetty.ssl.needClientAuth=$SOLR_SSL_NEED_CLIENT_AUTH"
|
||||
fi
|
||||
if [ -n "$SOLR_SSL_WANT_CLIENT_AUTH" ]; then
|
||||
SOLR_SSL_OPTS+=" -Dsolr.jetty.ssl.wantClientAuth=$SOLR_SSL_WANT_CLIENT_AUTH"
|
||||
fi
|
||||
|
||||
if [ -n "$SOLR_SSL_CLIENT_KEY_STORE" ]; then
|
||||
SOLR_SSL_OPTS+=" -Djavax.net.ssl.keyStore=$SOLR_SSL_CLIENT_KEY_STORE \
|
||||
-Djavax.net.ssl.keyStorePassword=$SOLR_SSL_CLIENT_KEY_STORE_PASSWORD \
|
||||
-Djavax.net.ssl.trustStore=$SOLR_SSL_CLIENT_TRUST_STORE \
|
||||
-Djavax.net.ssl.trustStorePassword=$SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD"
|
||||
SOLR_SSL_OPTS+=" -Djavax.net.ssl.keyStore=$SOLR_SSL_CLIENT_KEY_STORE"
|
||||
|
||||
if [ -n "$SOLR_SSL_CLIENT_KEY_STORE_PASSWORD" ]; then
|
||||
SOLR_SSL_OPTS+=" -Djavax.net.ssl.keyStorePassword=$SOLR_SSL_CLIENT_KEY_STORE_PASSWORD"
|
||||
fi
|
||||
if [ -n "$SOLR_SSL_CLIENT_KEY_STORE_TYPE" ]; then
|
||||
SOLR_SSL_OPTS+=" -Djavax.net.ssl.keyStoreType=$SOLR_SSL_CLIENT_KEY_STORE_TYPE"
|
||||
fi
|
||||
else
|
||||
SOLR_SSL_OPTS+=" -Djavax.net.ssl.keyStore=$SOLR_SSL_KEY_STORE \
|
||||
-Djavax.net.ssl.keyStorePassword=$SOLR_SSL_KEY_STORE_PASSWORD \
|
||||
-Djavax.net.ssl.trustStore=$SOLR_SSL_TRUST_STORE \
|
||||
-Djavax.net.ssl.trustStorePassword=$SOLR_SSL_TRUST_STORE_PASSWORD"
|
||||
if [ -n "$SOLR_SSL_KEY_STORE" ]; then
|
||||
SOLR_SSL_OPTS+=" -Djavax.net.ssl.keyStore=$SOLR_SSL_KEY_STORE"
|
||||
fi
|
||||
if [ -n "$SOLR_SSL_KEY_STORE_PASSWORD" ]; then
|
||||
SOLR_SSL_OPTS+=" -Djavax.net.ssl.keyStorePassword=$SOLR_SSL_KEY_STORE_PASSWORD"
|
||||
fi
|
||||
if [ -n "$SOLR_SSL_KEY_STORE_TYPE" ]; then
|
||||
SOLR_SSL_OPTS+=" -Djavax.net.ssl.keyStoreType=$SOLR_SSL_KEYSTORE_TYPE"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -n "$SOLR_SSL_CLIENT_TRUST_STORE" ]; then
|
||||
SOLR_SSL_OPTS+=" -Djavax.net.ssl.trustStore=$SOLR_SSL_CLIENT_TRUST_STORE"
|
||||
|
||||
if [ -n "$SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD" ]; then
|
||||
SOLR_SSL_OPTS+=" -Djavax.net.ssl.trustStorePassword=$SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD"
|
||||
fi
|
||||
|
||||
if [ -n "$SOLR_SSL_CLIENT_TRUST_STORE_TYPE" ]; then
|
||||
SOLR_SSL_OPTS+=" -Djavax.net.ssl.trustStoreType=$SOLR_SSL_CLIENT_TRUST_STORE_TYPE"
|
||||
fi
|
||||
else
|
||||
if [ -n "$SOLR_SSL_TRUST_STORE" ]; then
|
||||
SOLR_SSL_OPTS+=" -Djavax.net.ssl.trustStore=$SOLR_SSL_TRUST_STORE"
|
||||
fi
|
||||
|
||||
if [ -n "$SOLR_SSL_TRUST_STORE_PASSWORD" ]; then
|
||||
SOLR_SSL_OPTS+=" -Djavax.net.ssl.trustStorePassword=$SOLR_SSL_TRUST_STORE_PASSWORD"
|
||||
fi
|
||||
|
||||
if [ -n "$SOLR_SSL_TRUST_STORE_TYPE" ]; then
|
||||
SOLR_SSL_OPTS+=" -Djavax.net.ssl.trustStoreType=$SOLR_SSL_TRUST_STORE_TYPE"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
SOLR_JETTY_CONFIG+=("--module=http")
|
||||
|
|
|
@ -45,12 +45,72 @@ set "SOLR_SSL_OPTS= "
|
|||
IF DEFINED SOLR_SSL_KEY_STORE (
|
||||
set "SOLR_JETTY_CONFIG=--module=https"
|
||||
set SOLR_URL_SCHEME=https
|
||||
set "SCRIPT_ERROR=Solr server directory %SOLR_SERVER_DIR% not found!"
|
||||
set "SOLR_SSL_OPTS=-Dsolr.jetty.keystore=%SOLR_SSL_KEY_STORE% -Dsolr.jetty.keystore.password=%SOLR_SSL_KEY_STORE_PASSWORD% -Dsolr.jetty.truststore=%SOLR_SSL_TRUST_STORE% -Dsolr.jetty.truststore.password=%SOLR_SSL_TRUST_STORE_PASSWORD% -Dsolr.jetty.ssl.needClientAuth=%SOLR_SSL_NEED_CLIENT_AUTH% -Dsolr.jetty.ssl.wantClientAuth=%SOLR_SSL_WANT_CLIENT_AUTH%"
|
||||
IF DEFINED SOLR_SSL_CLIENT_KEY_STORE (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.keyStore=%SOLR_SSL_CLIENT_KEY_STORE% -Djavax.net.ssl.keyStorePassword=%SOLR_SSL_CLIENT_KEY_STORE_PASSWORD% -Djavax.net.ssl.trustStore=%SOLR_SSL_CLIENT_TRUST_STORE% -Djavax.net.ssl.trustStorePassword=%SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD%"
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Dsolr.jetty.keystore=%SOLR_SSL_KEY_STORE%"
|
||||
IF DEFINED SOLR_SSL_KEY_STORE_PASSWORD (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Dsolr.jetty.keystore.password=%SOLR_SSL_KEY_STORE_PASSWORD%"
|
||||
)
|
||||
IF DEFINED SOLR_SSL_KEY_STORE_TYPE (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Dsolr.jetty.keystore.type=%SOLR_SSL_KEY_STORE_TYPE%"
|
||||
)
|
||||
|
||||
IF DEFINED SOLR_SSL_TRUST_STORE (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Dsolr.jetty.truststore=%SOLR_SSL_TRUST_STORE%"
|
||||
)
|
||||
IF DEFINED SOLR_SSL_TRUST_STORE_PASSWORD (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Dsolr.jetty.truststore.password=%SOLR_SSL_TRUST_STORE_PASSWORD%"
|
||||
)
|
||||
IF DEFINED SOLR_SSL_TRUST_STORE_TYPE (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Dsolr.jetty.truststore.type=%SOLR_SSL_TRUST_STORE_TYPE%"
|
||||
)
|
||||
|
||||
IF DEFINED SOLR_SSL_NEED_CLIENT_AUTH (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Dsolr.jetty.ssl.needClientAuth=%SOLR_SSL_NEED_CLIENT_AUTH%"
|
||||
)
|
||||
IF DEFINED SOLR_SSL_WANT_CLIENT_AUTH (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Dsolr.jetty.ssl.wantClientAuth=%SOLR_SSL_WANT_CLIENT_AUTH%"
|
||||
)
|
||||
|
||||
IF DEFINED SOLR_SSL_CLIENT_KEY_STORE (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.keyStore=%SOLR_SSL_CLIENT_KEY_STORE%"
|
||||
|
||||
IF DEFINED SOLR_SSL_CLIENT_KEY_STORE_PASSWORD (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.keyStorePassword=%SOLR_SSL_CLIENT_KEY_STORE_PASSWORD%"
|
||||
)
|
||||
IF DEFINED SOLR_SSL_CLIENT_KEY_STORE_TYPE (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.keyStoreType=%SOLR_SSL_CLIENT_KEY_STORE_TYPE%"
|
||||
)
|
||||
) ELSE (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.keyStore=%SOLR_SSL_KEY_STORE% -Djavax.net.ssl.keyStorePassword=%SOLR_SSL_KEY_STORE_PASSWORD% -Djavax.net.ssl.trustStore=%SOLR_SSL_TRUST_STORE% -Djavax.net.ssl.trustStorePassword=%SOLR_SSL_TRUST_STORE_PASSWORD%"
|
||||
IF DEFINED SOLR_SSL_KEY_STORE (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.keyStore=%SOLR_SSL_KEY_STORE%"
|
||||
)
|
||||
IF DEFINED SOLR_SSL_KEY_STORE_PASSWORD (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.keyStorePassword=%SOLR_SSL_KEY_STORE_PASSWORD%"
|
||||
)
|
||||
IF DEFINED SOLR_SSL_KEY_STORE_TYPE (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.keyStoreType=%SOLR_SSL_KEY_STORE_TYPE%"
|
||||
)
|
||||
)
|
||||
|
||||
IF DEFINED SOLR_SSL_CLIENT_TRUST_STORE (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.trustStore=%SOLR_SSL_CLIENT_TRUST_STORE%"
|
||||
|
||||
IF DEFINED SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.trustStorePassword=%SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD%"
|
||||
)
|
||||
|
||||
IF DEFINED SOLR_SSL_CLIENT_TRUST_STORE_TYPE (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.trustStoreType=%SOLR_SSL_CLIENT_TRUST_STORE_TYPE%"
|
||||
)
|
||||
) ELSE (
|
||||
IF DEFINED SOLR_SSL_TRUST_STORE (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.trustStore=%SOLR_SSL_TRUST_STORE%"
|
||||
)
|
||||
IF DEFINED SOLR_SSL_TRUST_STORE_PASSWORD (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.trustStorePassword=%SOLR_SSL_TRUST_STORE_PASSWORD%"
|
||||
)
|
||||
IF DEFINED SOLR_SSL_TRUST_STORE_TYPE (
|
||||
set "SOLR_SSL_OPTS=%SOLR_SSL_OPTS% -Djavax.net.ssl.trustStoreType=%SOLR_SSL_TRUST_STORE_TYPE%"
|
||||
)
|
||||
)
|
||||
) ELSE (
|
||||
set SOLR_SSL_OPTS=
|
||||
|
@ -1612,4 +1672,4 @@ REM Safe echo which does not mess with () in strings
|
|||
set "eout=%1"
|
||||
set eout=%eout:"=%
|
||||
echo !eout!
|
||||
GOTO :eof
|
||||
GOTO :eof
|
||||
|
|
|
@ -86,8 +86,10 @@ REM Uncomment to set SSL-related system properties
|
|||
REM Be sure to update the paths to the correct keystore for your environment
|
||||
REM set SOLR_SSL_KEY_STORE=etc/solr-ssl.keystore.jks
|
||||
REM set SOLR_SSL_KEY_STORE_PASSWORD=secret
|
||||
REM set SOLR_SSL_KEY_STORE_TYPE=JKS
|
||||
REM set SOLR_SSL_TRUST_STORE=etc/solr-ssl.keystore.jks
|
||||
REM set SOLR_SSL_TRUST_STORE_PASSWORD=secret
|
||||
REM set SOLR_SSL_TRUST_STORE_TYPE=JKS
|
||||
REM set SOLR_SSL_NEED_CLIENT_AUTH=false
|
||||
REM set SOLR_SSL_WANT_CLIENT_AUTH=false
|
||||
|
||||
|
@ -95,8 +97,10 @@ REM Uncomment if you want to override previously defined SSL values for HTTP cli
|
|||
REM otherwise keep them commented and the above values will automatically be set for HTTP clients
|
||||
REM set SOLR_SSL_CLIENT_KEY_STORE=
|
||||
REM set SOLR_SSL_CLIENT_KEY_STORE_PASSWORD=
|
||||
REM set SOLR_SSL_CLIENT_KEY_STORE_TYPE=
|
||||
REM set SOLR_SSL_CLIENT_TRUST_STORE=
|
||||
REM setSOLR_SSL_CLIENT_TRUST_STORE_PASSWORD=
|
||||
REM set SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD=
|
||||
REM set SOLR_SSL_CLIENT_TRUST_STORE_TYPE=
|
||||
|
||||
REM Settings for authentication
|
||||
REM set SOLR_AUTHENTICATION_CLIENT_BUILDER=
|
||||
|
|
|
@ -98,8 +98,10 @@
|
|||
# Be sure to update the paths to the correct keystore for your environment
|
||||
#SOLR_SSL_KEY_STORE=/home/shalin/work/oss/shalin-lusolr/solr/server/etc/solr-ssl.keystore.jks
|
||||
#SOLR_SSL_KEY_STORE_PASSWORD=secret
|
||||
#SOLR_SSL_KEY_STORE_TYPE=JKS
|
||||
#SOLR_SSL_TRUST_STORE=/home/shalin/work/oss/shalin-lusolr/solr/server/etc/solr-ssl.keystore.jks
|
||||
#SOLR_SSL_TRUST_STORE_PASSWORD=secret
|
||||
#SOLR_SSL_TRUST_STORE_TYPE=JKS
|
||||
#SOLR_SSL_NEED_CLIENT_AUTH=false
|
||||
#SOLR_SSL_WANT_CLIENT_AUTH=false
|
||||
|
||||
|
@ -107,8 +109,10 @@
|
|||
# otherwise keep them commented and the above values will automatically be set for HTTP clients
|
||||
#SOLR_SSL_CLIENT_KEY_STORE=
|
||||
#SOLR_SSL_CLIENT_KEY_STORE_PASSWORD=
|
||||
#SOLR_SSL_CLIENT_KEY_STORE_TYPE=
|
||||
#SOLR_SSL_CLIENT_TRUST_STORE=
|
||||
#SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD=
|
||||
#SOLR_SSL_CLIENT_TRUST_STORE_TYPE=
|
||||
|
||||
# Settings for authentication
|
||||
#SOLR_AUTHENTICATION_CLIENT_BUILDER=
|
||||
|
|
|
@ -18,11 +18,10 @@ package org.apache.solr.analytics.plugin;
|
|||
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import com.codahale.metrics.Timer;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.util.stats.Snapshot;
|
||||
import org.apache.solr.util.stats.Timer;
|
||||
import org.apache.solr.util.stats.TimerContext;
|
||||
import org.apache.solr.util.stats.TimerUtils;
|
||||
|
||||
public class AnalyticsStatisticsCollector {
|
||||
private final AtomicLong numRequests;
|
||||
|
@ -35,7 +34,7 @@ public class AnalyticsStatisticsCollector {
|
|||
private final AtomicLong numQueries;
|
||||
private final Timer requestTimes;
|
||||
|
||||
public TimerContext currentTimer;
|
||||
public Timer.Context currentTimer;
|
||||
|
||||
public AnalyticsStatisticsCollector() {
|
||||
numRequests = new AtomicLong();
|
||||
|
@ -88,7 +87,6 @@ public class AnalyticsStatisticsCollector {
|
|||
|
||||
public NamedList<Object> getStatistics() {
|
||||
NamedList<Object> lst = new SimpleOrderedMap<>();
|
||||
Snapshot snapshot = requestTimes.getSnapshot();
|
||||
lst.add("requests", numRequests.longValue());
|
||||
lst.add("analyticsRequests", numAnalyticsRequests.longValue());
|
||||
lst.add("statsRequests", numStatsRequests.longValue());
|
||||
|
@ -97,17 +95,7 @@ public class AnalyticsStatisticsCollector {
|
|||
lst.add("rangeFacets", numRangeFacets.longValue());
|
||||
lst.add("queryFacets", numQueryFacets.longValue());
|
||||
lst.add("queriesInQueryFacets", numQueries.longValue());
|
||||
lst.add("totalTime", requestTimes.getSum());
|
||||
lst.add("avgRequestsPerSecond", requestTimes.getMeanRate());
|
||||
lst.add("5minRateReqsPerSecond", requestTimes.getFiveMinuteRate());
|
||||
lst.add("15minRateReqsPerSecond", requestTimes.getFifteenMinuteRate());
|
||||
lst.add("avgTimePerRequest", requestTimes.getMean());
|
||||
lst.add("medianRequestTime", snapshot.getMedian());
|
||||
lst.add("75thPcRequestTime", snapshot.get75thPercentile());
|
||||
lst.add("95thPcRequestTime", snapshot.get95thPercentile());
|
||||
lst.add("99thPcRequestTime", snapshot.get99thPercentile());
|
||||
lst.add("999thPcRequestTime", snapshot.get999thPercentile());
|
||||
TimerUtils.addMetrics(lst, requestTimes);
|
||||
return lst;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -34,8 +34,8 @@
|
|||
|
||||
<dependency org="org.kitesdk" name="kite-morphlines-avro" rev="${/org.kitesdk/kite-morphlines-avro}" conf="compile" />
|
||||
|
||||
<dependency org="com.codahale.metrics" name="metrics-core" rev="${/com.codahale.metrics/metrics-core}" conf="compile" />
|
||||
<dependency org="com.codahale.metrics" name="metrics-healthchecks" rev="${/com.codahale.metrics/metrics-healthchecks}" conf="compile" />
|
||||
<dependency org="io.dropwizard.metrics" name="metrics-core" rev="${/io.dropwizard.metrics/metrics-core}" conf="compile" />
|
||||
<dependency org="io.dropwizard.metrics" name="metrics-healthchecks" rev="${/io.dropwizard.metrics/metrics-healthchecks}" conf="compile" />
|
||||
<dependency org="com.typesafe" name="config" rev="${/com.typesafe/config}" conf="compile" />
|
||||
|
||||
<!-- Test Dependencies -->
|
||||
|
|
|
@ -16,22 +16,12 @@
|
|||
*/
|
||||
package org.apache.solr.uima.processor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.MultiMapSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.UpdateParams;
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.util.ContentStreamBase;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.UpdateRequestHandler;
|
||||
import org.apache.solr.request.SolrQueryRequestBase;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessorChain;
|
||||
|
@ -47,6 +37,11 @@ import org.junit.Test;
|
|||
@Slow
|
||||
public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
||||
|
||||
public static final String UIMA_CHAIN = "uima";
|
||||
public static final String UIMA_MULTI_MAP_CHAIN = "uima-multi-map";
|
||||
public static final String UIMA_IGNORE_ERRORS_CHAIN = "uima-ignoreErrors";
|
||||
public static final String UIMA_NOT_IGNORE_ERRORS_CHAIN = "uima-not-ignoreErrors";
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig.xml", "schema.xml", getFile("uima/solr").getAbsolutePath());
|
||||
|
@ -63,7 +58,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
@Test
|
||||
public void testProcessorConfiguration() {
|
||||
SolrCore core = h.getCore();
|
||||
UpdateRequestProcessorChain chained = core.getUpdateProcessingChain("uima");
|
||||
UpdateRequestProcessorChain chained = core.getUpdateProcessingChain(UIMA_CHAIN);
|
||||
assertNotNull(chained);
|
||||
UIMAUpdateRequestProcessorFactory factory = (UIMAUpdateRequestProcessorFactory)chained.getProcessors().get(0);
|
||||
assertNotNull(factory);
|
||||
|
@ -74,7 +69,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
@Test
|
||||
public void testMultiMap() {
|
||||
SolrCore core = h.getCore();
|
||||
UpdateRequestProcessorChain chained = core.getUpdateProcessingChain("uima-multi-map");
|
||||
UpdateRequestProcessorChain chained = core.getUpdateProcessingChain(UIMA_MULTI_MAP_CHAIN);
|
||||
assertNotNull(chained);
|
||||
UIMAUpdateRequestProcessorFactory factory = (UIMAUpdateRequestProcessorFactory)chained.getProcessors().get(0);
|
||||
assertNotNull(factory);
|
||||
|
@ -90,7 +85,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
|
||||
@Test
|
||||
public void testProcessing() throws Exception {
|
||||
addDoc("uima", adoc(
|
||||
addDoc(adoc(
|
||||
"id",
|
||||
"2312312321312",
|
||||
"text",
|
||||
|
@ -98,7 +93,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
+ "Add support for specifying Spelling SuggestWord Comparator to Lucene spell "
|
||||
+ "checkers for SpellCheckComponent. Issue SOLR-2053 is already fixed, patch is"
|
||||
+ " attached if you need it, but it is also committed to trunk and 3_x branch."
|
||||
+ " Last Lucene European Conference has been held in Prague."));
|
||||
+ " Last Lucene European Conference has been held in Prague."), UIMA_CHAIN);
|
||||
assertU(commit());
|
||||
assertQ(req("sentence:*"), "//*[@numFound='1']");
|
||||
assertQ(req("sentiment:*"), "//*[@numFound='0']");
|
||||
|
@ -108,16 +103,16 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
@Test
|
||||
public void testTwoUpdates() throws Exception {
|
||||
|
||||
addDoc("uima", adoc("id", "1", "text", "The Apache Software Foundation is happy to announce "
|
||||
addDoc(adoc("id", "1", "text", "The Apache Software Foundation is happy to announce "
|
||||
+ "BarCampApache Sydney, Australia, the first ASF-backed event in the Southern "
|
||||
+ "Hemisphere!"));
|
||||
+ "Hemisphere!"), UIMA_CHAIN);
|
||||
assertU(commit());
|
||||
assertQ(req("sentence:*"), "//*[@numFound='1']");
|
||||
|
||||
addDoc("uima", adoc("id", "2", "text", "Taking place 11th December 2010 at the University "
|
||||
addDoc(adoc("id", "2", "text", "Taking place 11th December 2010 at the University "
|
||||
+ "of Sydney's Darlington Centre, the BarCampApache \"unconference\" will be"
|
||||
+ " attendee-driven, facilitated by members of the Apache community and will "
|
||||
+ "focus on the Apache..."));
|
||||
+ "focus on the Apache..."), UIMA_CHAIN);
|
||||
assertU(commit());
|
||||
assertQ(req("sentence:*"), "//*[@numFound='2']");
|
||||
|
||||
|
@ -129,7 +124,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
public void testErrorHandling() throws Exception {
|
||||
|
||||
try{
|
||||
addDoc("uima-not-ignoreErrors", adoc(
|
||||
addDoc(adoc(
|
||||
"id",
|
||||
"2312312321312",
|
||||
"text",
|
||||
|
@ -137,14 +132,14 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
+ "Add support for specifying Spelling SuggestWord Comparator to Lucene spell "
|
||||
+ "checkers for SpellCheckComponent. Issue SOLR-2053 is already fixed, patch is"
|
||||
+ " attached if you need it, but it is also committed to trunk and 3_x branch."
|
||||
+ " Last Lucene European Conference has been held in Prague."));
|
||||
+ " Last Lucene European Conference has been held in Prague."), UIMA_NOT_IGNORE_ERRORS_CHAIN);
|
||||
fail("exception shouldn't be ignored");
|
||||
}
|
||||
catch(RuntimeException expected){}
|
||||
assertU(commit());
|
||||
assertQ(req("*:*"), "//*[@numFound='0']");
|
||||
|
||||
addDoc("uima-ignoreErrors", adoc(
|
||||
addDoc(adoc(
|
||||
"id",
|
||||
"2312312321312",
|
||||
"text",
|
||||
|
@ -152,16 +147,16 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
+ "Add support for specifying Spelling SuggestWord Comparator to Lucene spell "
|
||||
+ "checkers for SpellCheckComponent. Issue SOLR-2053 is already fixed, patch is"
|
||||
+ " attached if you need it, but it is also committed to trunk and 3_x branch."
|
||||
+ " Last Lucene European Conference has been held in Prague."));
|
||||
+ " Last Lucene European Conference has been held in Prague."), UIMA_IGNORE_ERRORS_CHAIN);
|
||||
assertU(commit());
|
||||
assertQ(req("*:*"), "//*[@numFound='1']");
|
||||
|
||||
try{
|
||||
addDoc("uima-not-ignoreErrors", adoc(
|
||||
addDoc(adoc(
|
||||
"id",
|
||||
"2312312321312",
|
||||
"text",
|
||||
"SpellCheckComponent got improvement related to recent Lucene changes."));
|
||||
"SpellCheckComponent got improvement related to recent Lucene changes."), UIMA_NOT_IGNORE_ERRORS_CHAIN);
|
||||
fail("exception shouldn't be ignored");
|
||||
}
|
||||
catch(StringIndexOutOfBoundsException e){ // SOLR-2579
|
||||
|
@ -170,11 +165,11 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
catch(SolrException expected){}
|
||||
|
||||
try{
|
||||
addDoc("uima-ignoreErrors", adoc(
|
||||
addDoc(adoc(
|
||||
"id",
|
||||
"2312312321312",
|
||||
"text",
|
||||
"SpellCheckComponent got improvement related to recent Lucene changes."));
|
||||
"SpellCheckComponent got improvement related to recent Lucene changes."), UIMA_IGNORE_ERRORS_CHAIN);
|
||||
}
|
||||
catch(StringIndexOutOfBoundsException e){ // SOLR-2579
|
||||
fail("exception shouldn't be raised");
|
||||
|
@ -188,19 +183,4 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
}
|
||||
|
||||
private void addDoc(String chain, String doc) throws Exception {
|
||||
Map<String, String[]> params = new HashMap<>();
|
||||
params.put(UpdateParams.UPDATE_CHAIN, new String[] { chain });
|
||||
MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
|
||||
SolrQueryRequestBase req = new SolrQueryRequestBase(h.getCore(), (SolrParams) mmparams) {
|
||||
};
|
||||
|
||||
UpdateRequestHandler handler = new UpdateRequestHandler();
|
||||
handler.init(null);
|
||||
ArrayList<ContentStream> streams = new ArrayList<>(2);
|
||||
streams.add(new ContentStreamBase.StringStream(doc));
|
||||
req.setContentStreams(streams);
|
||||
handler.handleRequestBody(req, new SolrQueryResponse());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -50,6 +50,7 @@
|
|||
<dependency org="log4j" name="log4j" rev="${/log4j/log4j}" conf="compile"/>
|
||||
<dependency org="org.slf4j" name="slf4j-log4j12" rev="${/org.slf4j/slf4j-log4j12}" conf="compile"/>
|
||||
<dependency org="org.slf4j" name="jcl-over-slf4j" rev="${/org.slf4j/jcl-over-slf4j}" conf="compile"/>
|
||||
<dependency org="io.dropwizard.metrics" name="metrics-core" rev="${/io.dropwizard.metrics/metrics-core}" conf="compile" />
|
||||
|
||||
<dependency org="org.easymock" name="easymock" rev="${/org.easymock/easymock}" conf="test"/>
|
||||
<dependency org="cglib" name="cglib-nodep" rev="${/cglib/cglib-nodep}" conf="test"/>
|
||||
|
|
|
@ -27,6 +27,7 @@ import java.util.concurrent.locks.Condition;
|
|||
import java.util.concurrent.locks.ReentrantLock;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import com.codahale.metrics.Timer;
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -34,7 +35,6 @@ import org.apache.solr.common.SolrException.ErrorCode;
|
|||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkCmdExecutor;
|
||||
import org.apache.solr.common.util.Pair;
|
||||
import org.apache.solr.util.stats.TimerContext;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.WatchedEvent;
|
||||
|
@ -119,7 +119,7 @@ public class DistributedQueue {
|
|||
* @return data at the first element of the queue, or null.
|
||||
*/
|
||||
public byte[] peek() throws KeeperException, InterruptedException {
|
||||
TimerContext time = stats.time(dir + "_peek");
|
||||
Timer.Context time = stats.time(dir + "_peek");
|
||||
try {
|
||||
return firstElement();
|
||||
} finally {
|
||||
|
@ -147,7 +147,7 @@ public class DistributedQueue {
|
|||
*/
|
||||
public byte[] peek(long wait) throws KeeperException, InterruptedException {
|
||||
Preconditions.checkArgument(wait > 0);
|
||||
TimerContext time;
|
||||
Timer.Context time;
|
||||
if (wait == Long.MAX_VALUE) {
|
||||
time = stats.time(dir + "_peek_wait_forever");
|
||||
} else {
|
||||
|
@ -177,7 +177,7 @@ public class DistributedQueue {
|
|||
* @return Head of the queue or null.
|
||||
*/
|
||||
public byte[] poll() throws KeeperException, InterruptedException {
|
||||
TimerContext time = stats.time(dir + "_poll");
|
||||
Timer.Context time = stats.time(dir + "_poll");
|
||||
try {
|
||||
return removeFirst();
|
||||
} finally {
|
||||
|
@ -191,7 +191,7 @@ public class DistributedQueue {
|
|||
* @return The former head of the queue
|
||||
*/
|
||||
public byte[] remove() throws NoSuchElementException, KeeperException, InterruptedException {
|
||||
TimerContext time = stats.time(dir + "_remove");
|
||||
Timer.Context time = stats.time(dir + "_remove");
|
||||
try {
|
||||
byte[] result = removeFirst();
|
||||
if (result == null) {
|
||||
|
@ -210,7 +210,7 @@ public class DistributedQueue {
|
|||
*/
|
||||
public byte[] take() throws KeeperException, InterruptedException {
|
||||
// Same as for element. Should refactor this.
|
||||
TimerContext timer = stats.time(dir + "_take");
|
||||
Timer.Context timer = stats.time(dir + "_take");
|
||||
updateLock.lockInterruptibly();
|
||||
try {
|
||||
while (true) {
|
||||
|
@ -234,7 +234,7 @@ public class DistributedQueue {
|
|||
* element to become visible.
|
||||
*/
|
||||
public void offer(byte[] data) throws KeeperException, InterruptedException {
|
||||
TimerContext time = stats.time(dir + "_offer");
|
||||
Timer.Context time = stats.time(dir + "_offer");
|
||||
try {
|
||||
while (true) {
|
||||
try {
|
||||
|
|
|
@ -26,9 +26,9 @@ import java.util.List;
|
|||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import com.codahale.metrics.Timer;
|
||||
import org.apache.solr.client.solrj.SolrResponse;
|
||||
import org.apache.solr.cloud.overseer.ClusterStateMutator;
|
||||
import org.apache.solr.cloud.overseer.CollectionMutator;
|
||||
|
@ -49,9 +49,6 @@ import org.apache.solr.core.CloudConfig;
|
|||
import org.apache.solr.handler.admin.CollectionsHandler;
|
||||
import org.apache.solr.handler.component.ShardHandler;
|
||||
import org.apache.solr.update.UpdateShardHandler;
|
||||
import org.apache.solr.util.stats.Clock;
|
||||
import org.apache.solr.util.stats.Timer;
|
||||
import org.apache.solr.util.stats.TimerContext;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -254,7 +251,7 @@ public class Overseer implements Closeable {
|
|||
private ClusterState processQueueItem(ZkNodeProps message, ClusterState clusterState, ZkStateWriter zkStateWriter, boolean enableBatching, ZkStateWriter.ZkWriteCallback callback) throws Exception {
|
||||
final String operation = message.getStr(QUEUE_OPERATION);
|
||||
List<ZkWriteCommand> zkWriteCommands = null;
|
||||
final TimerContext timerContext = stats.time(operation);
|
||||
final Timer.Context timerContext = stats.time(operation);
|
||||
try {
|
||||
zkWriteCommands = processMessage(clusterState, message, operation);
|
||||
stats.success(operation);
|
||||
|
@ -392,7 +389,7 @@ public class Overseer implements Closeable {
|
|||
}
|
||||
|
||||
private LeaderStatus amILeader() {
|
||||
TimerContext timerContext = stats.time("am_i_leader");
|
||||
Timer.Context timerContext = stats.time("am_i_leader");
|
||||
boolean success = true;
|
||||
try {
|
||||
ZkNodeProps props = ZkNodeProps.load(zkClient.getData(
|
||||
|
@ -795,7 +792,7 @@ public class Overseer implements Closeable {
|
|||
stat.errors.incrementAndGet();
|
||||
}
|
||||
|
||||
public TimerContext time(String operation) {
|
||||
public Timer.Context time(String operation) {
|
||||
String op = operation.toLowerCase(Locale.ROOT);
|
||||
Stat stat = stats.get(op);
|
||||
if (stat == null) {
|
||||
|
@ -853,7 +850,7 @@ public class Overseer implements Closeable {
|
|||
public Stat() {
|
||||
this.success = new AtomicInteger();
|
||||
this.errors = new AtomicInteger();
|
||||
this.requestTime = new Timer(TimeUnit.MILLISECONDS, TimeUnit.MINUTES, Clock.defaultClock());
|
||||
this.requestTime = new Timer();
|
||||
this.failureDetails = new LinkedList<>();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,14 +23,14 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.codahale.metrics.Timer;
|
||||
import org.apache.solr.cloud.OverseerCollectionMessageHandler.Cmd;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.util.stats.Snapshot;
|
||||
import org.apache.solr.util.stats.Timer;
|
||||
import org.apache.solr.util.stats.TimerUtils;
|
||||
import org.apache.zookeeper.data.Stat;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -100,17 +100,7 @@ public class OverseerStatusCmd implements Cmd {
|
|||
lst.add("errors", errors);
|
||||
}
|
||||
Timer timer = entry.getValue().requestTime;
|
||||
Snapshot snapshot = timer.getSnapshot();
|
||||
lst.add("totalTime", timer.getSum());
|
||||
lst.add("avgRequestsPerMinute", timer.getMeanRate());
|
||||
lst.add("5minRateRequestsPerMinute", timer.getFiveMinuteRate());
|
||||
lst.add("15minRateRequestsPerMinute", timer.getFifteenMinuteRate());
|
||||
lst.add("avgTimePerRequest", timer.getMean());
|
||||
lst.add("medianRequestTime", snapshot.getMedian());
|
||||
lst.add("75thPctlRequestTime", snapshot.get75thPercentile());
|
||||
lst.add("95thPctlRequestTime", snapshot.get95thPercentile());
|
||||
lst.add("99thPctlRequestTime", snapshot.get99thPercentile());
|
||||
lst.add("999thPctlRequestTime", snapshot.get999thPercentile());
|
||||
TimerUtils.addMetrics(lst, timer);
|
||||
}
|
||||
results.add("overseer_operations", overseerStats);
|
||||
results.add("collection_operations", collectionStats);
|
||||
|
|
|
@ -30,6 +30,7 @@ import java.util.concurrent.SynchronousQueue;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import com.codahale.metrics.Timer;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.solr.client.solrj.SolrResponse;
|
||||
|
@ -43,7 +44,6 @@ import org.apache.solr.common.util.ExecutorUtil;
|
|||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.util.DefaultSolrThreadFactory;
|
||||
import org.apache.solr.util.stats.TimerContext;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.data.Stat;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -380,7 +380,7 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
|
||||
protected LeaderStatus amILeader() {
|
||||
String statsName = "collection_am_i_leader";
|
||||
TimerContext timerContext = stats.time(statsName);
|
||||
Timer.Context timerContext = stats.time(statsName);
|
||||
boolean success = true;
|
||||
try {
|
||||
ZkNodeProps props = ZkNodeProps.load(zkStateReader.getZkClient().getData(
|
||||
|
@ -451,7 +451,7 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
|
||||
public void run() {
|
||||
String statsName = messageHandler.getTimerName(operation);
|
||||
final TimerContext timerContext = stats.time(statsName);
|
||||
final Timer.Context timerContext = stats.time(statsName);
|
||||
|
||||
boolean success = false;
|
||||
final String asyncId = message.getStr(ASYNC);
|
||||
|
|
|
@ -22,10 +22,10 @@ import java.util.List;
|
|||
import java.util.TreeSet;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import com.codahale.metrics.Timer;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
import org.apache.solr.common.util.Pair;
|
||||
import org.apache.solr.util.stats.TimerContext;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.WatchedEvent;
|
||||
|
@ -85,7 +85,7 @@ public class OverseerTaskQueue extends DistributedQueue {
|
|||
*/
|
||||
public void remove(QueueEvent event) throws KeeperException,
|
||||
InterruptedException {
|
||||
TimerContext time = stats.time(dir + "_remove_event");
|
||||
Timer.Context time = stats.time(dir + "_remove_event");
|
||||
try {
|
||||
String path = event.getId();
|
||||
String responsePath = dir + "/" + response_prefix
|
||||
|
@ -181,7 +181,7 @@ public class OverseerTaskQueue extends DistributedQueue {
|
|||
*/
|
||||
public QueueEvent offer(byte[] data, long timeout) throws KeeperException,
|
||||
InterruptedException {
|
||||
TimerContext time = stats.time(dir + "_offer");
|
||||
Timer.Context time = stats.time(dir + "_offer");
|
||||
try {
|
||||
// Create and watch the response node before creating the request node;
|
||||
// otherwise we may miss the response.
|
||||
|
@ -227,7 +227,7 @@ public class OverseerTaskQueue extends DistributedQueue {
|
|||
ArrayList<QueueEvent> topN = new ArrayList<>();
|
||||
|
||||
LOG.debug("Peeking for top {} elements. ExcludeSet: {}", n, excludeSet);
|
||||
TimerContext time;
|
||||
Timer.Context time;
|
||||
if (waitMillis == Long.MAX_VALUE) time = stats.time(dir + "_peekTopN_wait_forever");
|
||||
else time = stats.time(dir + "_peekTopN_wait" + waitMillis);
|
||||
|
||||
|
|
|
@ -21,12 +21,12 @@ import java.util.HashMap;
|
|||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.codahale.metrics.Timer;
|
||||
import org.apache.solr.cloud.Overseer;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.util.stats.TimerContext;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.data.Stat;
|
||||
|
@ -210,7 +210,7 @@ public class ZkStateWriter {
|
|||
throw new IllegalStateException("ZkStateWriter has seen a tragic error, this instance can no longer be used");
|
||||
}
|
||||
if (!hasPendingUpdates()) return clusterState;
|
||||
TimerContext timerContext = stats.time("update_state");
|
||||
Timer.Context timerContext = stats.time("update_state");
|
||||
boolean success = false;
|
||||
try {
|
||||
if (!updates.isEmpty()) {
|
||||
|
|
|
@ -51,10 +51,13 @@ import org.apache.solr.common.MapWriter.EntryWriter;
|
|||
import org.apache.solr.common.PushWriter;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.JavaBinCodec;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestInfo;
|
||||
import org.apache.solr.response.BinaryResponseWriter;
|
||||
import org.apache.solr.response.JSONResponseWriter;
|
||||
import org.apache.solr.response.QueryResponseWriter;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.BoolField;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
|
@ -125,8 +128,14 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
|
|||
}
|
||||
|
||||
public void write(OutputStream os) throws IOException {
|
||||
respWriter = new OutputStreamWriter(os, StandardCharsets.UTF_8);
|
||||
writer = JSONResponseWriter.getPushWriter(respWriter, req, res);
|
||||
QueryResponseWriter rw = req.getCore().getResponseWriters().get(wt);
|
||||
if (rw instanceof BinaryResponseWriter) {
|
||||
//todo add support for other writers after testing
|
||||
writer = new JavaBinCodec(os, null);
|
||||
} else {
|
||||
respWriter = new OutputStreamWriter(os, StandardCharsets.UTF_8);
|
||||
writer = JSONResponseWriter.getPushWriter(respWriter, req, res);
|
||||
}
|
||||
Exception exception = res.getException();
|
||||
if (exception != null) {
|
||||
if (!(exception instanceof IgnoreException)) {
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.lang.invoke.MethodHandles;
|
|||
import java.net.URL;
|
||||
import java.util.concurrent.atomic.LongAdder;
|
||||
|
||||
import com.codahale.metrics.Timer;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
@ -33,9 +34,7 @@ import org.apache.solr.request.SolrRequestHandler;
|
|||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
import org.apache.solr.util.stats.Snapshot;
|
||||
import org.apache.solr.util.stats.Timer;
|
||||
import org.apache.solr.util.stats.TimerContext;
|
||||
import org.apache.solr.util.stats.TimerUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -144,7 +143,7 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
|
|||
@Override
|
||||
public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
|
||||
numRequests.increment();
|
||||
TimerContext timer = requestTimes.time();
|
||||
Timer.Context timer = requestTimes.time();
|
||||
try {
|
||||
if(pluginInfo != null && pluginInfo.attributes.containsKey(USEPARAM)) req.getContext().put(USEPARAM,pluginInfo.attributes.get(USEPARAM));
|
||||
SolrPluginUtils.setDefaults(this, req, defaults, appends, invariants);
|
||||
|
@ -268,26 +267,16 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
|
|||
@Override
|
||||
public NamedList<Object> getStatistics() {
|
||||
NamedList<Object> lst = new SimpleOrderedMap<>();
|
||||
Snapshot snapshot = requestTimes.getSnapshot();
|
||||
lst.add("handlerStart",handlerStart);
|
||||
lst.add("requests", numRequests.longValue());
|
||||
lst.add("errors", numServerErrors.longValue() + numClientErrors.longValue());
|
||||
lst.add("serverErrors", numServerErrors.longValue());
|
||||
lst.add("clientErrors", numClientErrors.longValue());
|
||||
lst.add("timeouts", numTimeouts.longValue());
|
||||
lst.add("totalTime", requestTimes.getSum());
|
||||
lst.add("avgRequestsPerSecond", requestTimes.getMeanRate());
|
||||
lst.add("5minRateReqsPerSecond", requestTimes.getFiveMinuteRate());
|
||||
lst.add("15minRateReqsPerSecond", requestTimes.getFifteenMinuteRate());
|
||||
lst.add("avgTimePerRequest", requestTimes.getMean());
|
||||
lst.add("medianRequestTime", snapshot.getMedian());
|
||||
lst.add("75thPcRequestTime", snapshot.get75thPercentile());
|
||||
lst.add("95thPcRequestTime", snapshot.get95thPercentile());
|
||||
lst.add("99thPcRequestTime", snapshot.get99thPercentile());
|
||||
lst.add("999thPcRequestTime", snapshot.get999thPercentile());
|
||||
TimerUtils.addMetrics(lst, requestTimes);
|
||||
return lst;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -16,6 +16,14 @@
|
|||
*/
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -29,6 +37,7 @@ import org.apache.solr.core.SolrCore;
|
|||
import org.apache.solr.highlight.DefaultSolrHighlighter;
|
||||
import org.apache.solr.highlight.PostingsSolrHighlighter;
|
||||
import org.apache.solr.highlight.SolrHighlighter;
|
||||
import org.apache.solr.highlight.UnifiedSolrHighlighter;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.QParserPlugin;
|
||||
|
@ -38,9 +47,7 @@ import org.apache.solr.util.SolrPluginUtils;
|
|||
import org.apache.solr.util.plugin.PluginInfoInitialized;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.util.List;
|
||||
import static java.util.stream.Collectors.toMap;
|
||||
|
||||
/**
|
||||
* TODO!
|
||||
|
@ -50,13 +57,50 @@ import java.util.List;
|
|||
*/
|
||||
public class HighlightComponent extends SearchComponent implements PluginInfoInitialized, SolrCoreAware
|
||||
{
|
||||
public static final String COMPONENT_NAME = "highlight";
|
||||
private PluginInfo info = PluginInfo.EMPTY_INFO;
|
||||
private SolrHighlighter highlighter;
|
||||
public enum HighlightMethod {
|
||||
UNIFIED("unified"),
|
||||
FAST_VECTOR("fastVector"),
|
||||
POSTINGS("postings"),
|
||||
ORIGINAL("original");
|
||||
|
||||
private static final Map<String, HighlightMethod> METHODS = Collections.unmodifiableMap(Stream.of(values())
|
||||
.collect(toMap(HighlightMethod::getMethodName, Function.identity())));
|
||||
|
||||
private final String methodName;
|
||||
|
||||
HighlightMethod(String method) {
|
||||
this.methodName = method;
|
||||
}
|
||||
|
||||
public String getMethodName() {
|
||||
return methodName;
|
||||
}
|
||||
|
||||
public static HighlightMethod parse(String method) {
|
||||
return METHODS.get(method);
|
||||
}
|
||||
}
|
||||
|
||||
public static final String COMPONENT_NAME = "highlight";
|
||||
|
||||
private PluginInfo info = PluginInfo.EMPTY_INFO;
|
||||
|
||||
@Deprecated // DWS: in 7.0 lets restructure the abstractions/relationships
|
||||
private SolrHighlighter solrConfigHighlighter;
|
||||
|
||||
/**
|
||||
* @deprecated instead depend on {@link #process(ResponseBuilder)} to choose the highlighter based on
|
||||
* {@link HighlightParams#METHOD}
|
||||
*/
|
||||
@Deprecated
|
||||
public static SolrHighlighter getHighlighter(SolrCore core) {
|
||||
HighlightComponent hl = (HighlightComponent) core.getSearchComponents().get(HighlightComponent.COMPONENT_NAME);
|
||||
return hl==null ? null: hl.getHighlighter();
|
||||
return hl==null ? null: hl.getHighlighter();
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public SolrHighlighter getHighlighter() {
|
||||
return solrConfigHighlighter;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -67,7 +111,7 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
|
|||
@Override
|
||||
public void prepare(ResponseBuilder rb) throws IOException {
|
||||
SolrParams params = rb.req.getParams();
|
||||
rb.doHighlights = highlighter.isHighlightingEnabled(params);
|
||||
rb.doHighlights = solrConfigHighlighter.isHighlightingEnabled(params);
|
||||
if(rb.doHighlights){
|
||||
rb.setNeedDocList(true);
|
||||
String hlq = params.get(HighlightParams.Q);
|
||||
|
@ -90,26 +134,28 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
|
|||
if(children.isEmpty()) {
|
||||
PluginInfo pluginInfo = core.getSolrConfig().getPluginInfo(SolrHighlighter.class.getName()); //TODO deprecated configuration remove later
|
||||
if (pluginInfo != null) {
|
||||
highlighter = core.createInitInstance(pluginInfo, SolrHighlighter.class, null, DefaultSolrHighlighter.class.getName());
|
||||
solrConfigHighlighter = core.createInitInstance(pluginInfo, SolrHighlighter.class, null, DefaultSolrHighlighter.class.getName());
|
||||
} else {
|
||||
DefaultSolrHighlighter defHighlighter = new DefaultSolrHighlighter(core);
|
||||
defHighlighter.init(PluginInfo.EMPTY_INFO);
|
||||
highlighter = defHighlighter;
|
||||
solrConfigHighlighter = defHighlighter;
|
||||
}
|
||||
} else {
|
||||
highlighter = core.createInitInstance(children.get(0),SolrHighlighter.class,null, DefaultSolrHighlighter.class.getName());
|
||||
solrConfigHighlighter = core.createInitInstance(children.get(0),SolrHighlighter.class,null, DefaultSolrHighlighter.class.getName());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void process(ResponseBuilder rb) throws IOException {
|
||||
|
||||
if (rb.doHighlights) {
|
||||
SolrQueryRequest req = rb.req;
|
||||
SolrParams params = req.getParams();
|
||||
|
||||
String[] defaultHighlightFields; //TODO: get from builder by default?
|
||||
SolrHighlighter highlighter = getHighlighter(params);
|
||||
|
||||
String[] defaultHighlightFields; //TODO: get from builder by default?
|
||||
if (rb.getQparser() != null) {
|
||||
defaultHighlightFields = rb.getQparser().getDefaultHighlightFields();
|
||||
} else {
|
||||
|
@ -130,14 +176,8 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
|
|||
rb.setHighlightQuery( highlightQuery );
|
||||
}
|
||||
}
|
||||
|
||||
if(highlightQuery != null) {
|
||||
boolean rewrite = (highlighter instanceof PostingsSolrHighlighter == false) && !(Boolean.valueOf(params.get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true")) &&
|
||||
Boolean.valueOf(params.get(HighlightParams.HIGHLIGHT_MULTI_TERM, "true")));
|
||||
highlightQuery = rewrite ? highlightQuery.rewrite(req.getSearcher().getIndexReader()) : highlightQuery;
|
||||
}
|
||||
|
||||
// No highlighting if there is no query -- consider q.alt="*:*
|
||||
// No highlighting if there is no query -- consider q.alt=*:*
|
||||
if( highlightQuery != null ) {
|
||||
NamedList sumData = highlighter.doHighlighting(
|
||||
rb.getResults().docList,
|
||||
|
@ -152,6 +192,36 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
|
|||
}
|
||||
}
|
||||
|
||||
protected SolrHighlighter getHighlighter(SolrParams params) {
|
||||
HighlightMethod method = HighlightMethod.parse(params.get(HighlightParams.METHOD));
|
||||
if (method == null) {
|
||||
return solrConfigHighlighter;
|
||||
}
|
||||
|
||||
switch (method) {
|
||||
case UNIFIED:
|
||||
if (solrConfigHighlighter instanceof UnifiedSolrHighlighter) {
|
||||
return solrConfigHighlighter;
|
||||
}
|
||||
return new UnifiedSolrHighlighter(); // TODO cache one?
|
||||
case POSTINGS:
|
||||
if (solrConfigHighlighter instanceof PostingsSolrHighlighter) {
|
||||
return solrConfigHighlighter;
|
||||
}
|
||||
return new PostingsSolrHighlighter(); // TODO cache one?
|
||||
case FAST_VECTOR: // fall-through
|
||||
case ORIGINAL:
|
||||
if (solrConfigHighlighter instanceof DefaultSolrHighlighter) {
|
||||
return solrConfigHighlighter;
|
||||
} else {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"In order to use " + HighlightParams.METHOD + "=" + method.getMethodName() + " the configured" +
|
||||
" highlighter in solrconfig must be " + DefaultSolrHighlighter.class);
|
||||
}
|
||||
default: throw new AssertionError();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {
|
||||
if (!rb.doHighlights) return;
|
||||
|
@ -195,10 +265,6 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
public SolrHighlighter getHighlighter() {
|
||||
return highlighter;
|
||||
}
|
||||
////////////////////////////////////////////
|
||||
/// SolrInfoMBean
|
||||
////////////////////////////////////////////
|
||||
|
|
|
@ -199,10 +199,11 @@ public class QueryComponent extends SearchComponent
|
|||
if (fqs!=null && fqs.length!=0) {
|
||||
List<Query> filters = rb.getFilters();
|
||||
// if filters already exists, make a copy instead of modifying the original
|
||||
filters = filters == null ? new ArrayList<Query>(fqs.length) : new ArrayList<>(filters);
|
||||
filters = filters == null ? new ArrayList<>(fqs.length) : new ArrayList<>(filters);
|
||||
for (String fq : fqs) {
|
||||
if (fq != null && fq.trim().length()!=0) {
|
||||
QParser fqp = QParser.getParser(fq, req);
|
||||
fqp.setIsFilter(true);
|
||||
filters.add(fqp.getQuery());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -66,6 +66,7 @@ import org.apache.solr.common.util.NamedList;
|
|||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.core.PluginInfo;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.component.HighlightComponent;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
|
@ -373,6 +374,13 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
if (!isHighlightingEnabled(params)) // also returns early if no unique key field
|
||||
return null;
|
||||
|
||||
boolean rewrite = query != null && !(Boolean.valueOf(params.get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true")) &&
|
||||
Boolean.valueOf(params.get(HighlightParams.HIGHLIGHT_MULTI_TERM, "true")));
|
||||
|
||||
if (rewrite) {
|
||||
query = query.rewrite(req.getSearcher().getIndexReader());
|
||||
}
|
||||
|
||||
SolrIndexSearcher searcher = req.getSearcher();
|
||||
IndexSchema schema = searcher.getSchema();
|
||||
|
||||
|
@ -463,8 +471,11 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
* Determines if we should use the FastVectorHighlighter for this field.
|
||||
*/
|
||||
protected boolean useFastVectorHighlighter(SolrParams params, SchemaField schemaField) {
|
||||
boolean useFvhParam = params.getFieldBool(schemaField.getName(), HighlightParams.USE_FVH, false);
|
||||
if (!useFvhParam) return false;
|
||||
boolean methodFvh =
|
||||
HighlightComponent.HighlightMethod.FAST_VECTOR.getMethodName().equals(
|
||||
params.getFieldParam(schemaField.getName(), HighlightParams.METHOD))
|
||||
|| params.getFieldBool(schemaField.getName(), HighlightParams.USE_FVH, false);
|
||||
if (!methodFvh) return false;
|
||||
boolean termPosOff = schemaField.storeTermPositions() && schemaField.storeTermOffsets();
|
||||
if (!termPosOff) {
|
||||
log.warn("Solr will use the standard Highlighter instead of FastVectorHighlighter because the {} field " +
|
||||
|
|
|
@ -50,8 +50,9 @@ import org.apache.solr.util.plugin.PluginInfoInitialized;
|
|||
* <p>
|
||||
* Example configuration:
|
||||
* <pre class="prettyprint">
|
||||
* <requestHandler name="standard" class="solr.StandardRequestHandler">
|
||||
* <requestHandler name="/select" class="solr.SearchHandler">
|
||||
* <lst name="defaults">
|
||||
* <str name="hl.method">postings</str>
|
||||
* <int name="hl.snippets">1</int>
|
||||
* <str name="hl.tag.pre">&lt;em&gt;</str>
|
||||
* <str name="hl.tag.post">&lt;/em&gt;</str>
|
||||
|
@ -71,12 +72,6 @@ import org.apache.solr.util.plugin.PluginInfoInitialized;
|
|||
* </lst>
|
||||
* </requestHandler>
|
||||
* </pre>
|
||||
* ...
|
||||
* <pre class="prettyprint">
|
||||
* <searchComponent class="solr.HighlightComponent" name="highlight">
|
||||
* <highlighting class="org.apache.solr.highlight.PostingsSolrHighlighter"/>
|
||||
* </searchComponent>
|
||||
* </pre>
|
||||
* <p>
|
||||
* Notes:
|
||||
* <ul>
|
||||
|
|
|
@ -0,0 +1,365 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.highlight;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.BreakIterator;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.postingshighlight.WholeBreakIterator;
|
||||
import org.apache.lucene.search.uhighlight.DefaultPassageFormatter;
|
||||
import org.apache.lucene.search.uhighlight.PassageFormatter;
|
||||
import org.apache.lucene.search.uhighlight.PassageScorer;
|
||||
import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
|
||||
import org.apache.solr.common.params.HighlightParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.core.PluginInfo;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestInfo;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.RTimerTree;
|
||||
import org.apache.solr.util.plugin.PluginInfoInitialized;
|
||||
|
||||
/**
|
||||
* Highlighter impl that uses {@link UnifiedHighlighter}
|
||||
* <p>
|
||||
* Example configuration with default values:
|
||||
* <pre class="prettyprint">
|
||||
* <requestHandler name="/select" class="solr.SearchHandler">
|
||||
* <lst name="defaults">
|
||||
* <str name="hl.method">unified</str>
|
||||
* <int name="hl.snippets">1</int>
|
||||
* <str name="hl.tag.pre">&lt;em&gt;</str>
|
||||
* <str name="hl.tag.post">&lt;/em&gt;</str>
|
||||
* <str name="hl.simple.pre">&lt;em&gt;</str>
|
||||
* <str name="hl.simple.post">&lt;/em&gt;</str>
|
||||
* <str name="hl.tag.ellipsis">... </str>
|
||||
* <bool name="hl.defaultSummary">true</bool>
|
||||
* <str name="hl.encoder">simple</str>
|
||||
* <float name="hl.score.k1">1.2</float>
|
||||
* <float name="hl.score.b">0.75</float>
|
||||
* <float name="hl.score.pivot">87</float>
|
||||
* <str name="hl.bs.language"></str>
|
||||
* <str name="hl.bs.country"></str>
|
||||
* <str name="hl.bs.variant"></str>
|
||||
* <str name="hl.bs.type">SENTENCE</str>
|
||||
* <int name="hl.maxAnalyzedChars">10000</int>
|
||||
* <bool name="hl.highlightMultiTerm">true</bool>
|
||||
* <bool name="hl.usePhraseHighlighter">true</bool>
|
||||
* <int name="hl.cacheFieldValCharsThreshold">524288</int>
|
||||
* <str name="hl.offsetSource"></str>
|
||||
* </lst>
|
||||
* </requestHandler>
|
||||
* </pre>
|
||||
* <p>
|
||||
* Notes:
|
||||
* <ul>
|
||||
* <li>hl.q (string) can specify the query
|
||||
* <li>hl.fl (string) specifies the field list.
|
||||
* <li>hl.snippets (int) specifies how many snippets to return.
|
||||
* <li>hl.tag.pre (string) specifies text which appears before a highlighted term.
|
||||
* <li>hl.tag.post (string) specifies text which appears after a highlighted term.
|
||||
* <li>hl.simple.pre (string) specifies text which appears before a highlighted term. (prefer hl.tag.pre)
|
||||
* <li>hl.simple.post (string) specifies text which appears before a highlighted term. (prefer hl.tag.post)
|
||||
* <li>hl.tag.ellipsis (string) specifies text which joins non-adjacent passages. The default is to retain each
|
||||
* value in a list without joining them.
|
||||
* <li>hl.defaultSummary (bool) specifies if a field should have a default summary of the leading text.
|
||||
* <li>hl.encoder (string) can be 'html' (html escapes content) or 'simple' (no escaping).
|
||||
* <li>hl.score.k1 (float) specifies bm25 scoring parameter 'k1'
|
||||
* <li>hl.score.b (float) specifies bm25 scoring parameter 'b'
|
||||
* <li>hl.score.pivot (float) specifies bm25 scoring parameter 'avgdl'
|
||||
* <li>hl.bs.type (string) specifies how to divide text into passages: [SENTENCE, LINE, WORD, CHAR, WHOLE]
|
||||
* <li>hl.bs.language (string) specifies language code for BreakIterator. default is empty string (root locale)
|
||||
* <li>hl.bs.country (string) specifies country code for BreakIterator. default is empty string (root locale)
|
||||
* <li>hl.bs.variant (string) specifies country code for BreakIterator. default is empty string (root locale)
|
||||
* <li>hl.maxAnalyzedChars (int) specifies how many characters at most will be processed in a document for any one field.
|
||||
* <li>hl.highlightMultiTerm (bool) enables highlighting for range/wildcard/fuzzy/prefix queries at some cost. default is true
|
||||
* <li>hl.usePhraseHighlighter (bool) enables phrase highlighting. default is true
|
||||
* <li>hl.cacheFieldValCharsThreshold (int) controls how many characters from a field are cached. default is 524288 (1MB in 2 byte chars)
|
||||
* <li>hl.offsetSource (string) specifies which offset source to use, prefers postings, but will use what's available if not specified
|
||||
* </ul>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized {
|
||||
|
||||
protected static final String SNIPPET_SEPARATOR = "\u0000";
|
||||
private static final String[] ZERO_LEN_STR_ARRAY = new String[0];
|
||||
|
||||
@Override
|
||||
public void init(PluginInfo info) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
|
||||
final SolrParams params = req.getParams();
|
||||
|
||||
// if highlighting isn't enabled, then why call doHighlighting?
|
||||
if (!isHighlightingEnabled(params))
|
||||
return null;
|
||||
|
||||
int[] docIDs = toDocIDs(docs);
|
||||
|
||||
// fetch the unique keys
|
||||
String[] keys = getUniqueKeys(req.getSearcher(), docIDs);
|
||||
|
||||
// query-time parameters
|
||||
String[] fieldNames = getHighlightFields(query, req, defaultFields);
|
||||
|
||||
int maxPassages[] = new int[fieldNames.length];
|
||||
for (int i = 0; i < fieldNames.length; i++) {
|
||||
maxPassages[i] = params.getFieldInt(fieldNames[i], HighlightParams.SNIPPETS, 1);
|
||||
}
|
||||
|
||||
UnifiedHighlighter highlighter = getHighlighter(req);
|
||||
Map<String, String[]> snippets = highlighter.highlightFields(fieldNames, query, docIDs, maxPassages);
|
||||
return encodeSnippets(keys, fieldNames, snippets);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an instance of the Lucene {@link UnifiedHighlighter}. Provided for subclass extension so that
|
||||
* a subclass can return a subclass of {@link SolrExtendedUnifiedHighlighter}.
|
||||
*/
|
||||
protected UnifiedHighlighter getHighlighter(SolrQueryRequest req) {
|
||||
return new SolrExtendedUnifiedHighlighter(req);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes the resulting snippets into a namedlist
|
||||
*
|
||||
* @param keys the document unique keys
|
||||
* @param fieldNames field names to highlight in the order
|
||||
* @param snippets map from field name to snippet array for the docs
|
||||
* @return encoded namedlist of summaries
|
||||
*/
|
||||
protected NamedList<Object> encodeSnippets(String[] keys, String[] fieldNames, Map<String, String[]> snippets) {
|
||||
NamedList<Object> list = new SimpleOrderedMap<>();
|
||||
for (int i = 0; i < keys.length; i++) {
|
||||
NamedList<Object> summary = new SimpleOrderedMap<>();
|
||||
for (String field : fieldNames) {
|
||||
String snippet = snippets.get(field)[i];
|
||||
if (snippet == null) {
|
||||
//TODO reuse logic of DefaultSolrHighlighter.alternateField
|
||||
summary.add(field, ZERO_LEN_STR_ARRAY);
|
||||
} else {
|
||||
// we used a special snippet separator char and we can now split on it.
|
||||
summary.add(field, snippet.split(SNIPPET_SEPARATOR));
|
||||
}
|
||||
}
|
||||
list.add(keys[i], summary);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts solr's DocList to the int[] docIDs
|
||||
*/
|
||||
protected int[] toDocIDs(DocList docs) {
|
||||
int[] docIDs = new int[docs.size()];
|
||||
DocIterator iterator = docs.iterator();
|
||||
for (int i = 0; i < docIDs.length; i++) {
|
||||
if (!iterator.hasNext()) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
docIDs[i] = iterator.nextDoc();
|
||||
}
|
||||
if (iterator.hasNext()) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
return docIDs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the unique keys for the topdocs to key the results
|
||||
*/
|
||||
protected String[] getUniqueKeys(SolrIndexSearcher searcher, int[] docIDs) throws IOException {
|
||||
IndexSchema schema = searcher.getSchema();
|
||||
SchemaField keyField = schema.getUniqueKeyField();
|
||||
if (keyField != null) {
|
||||
Set<String> selector = Collections.singleton(keyField.getName());
|
||||
String[] uniqueKeys = new String[docIDs.length];
|
||||
for (int i = 0; i < docIDs.length; i++) {
|
||||
int docid = docIDs[i];
|
||||
Document doc = searcher.doc(docid, selector);
|
||||
String id = schema.printableUniqueKey(doc);
|
||||
uniqueKeys[i] = id;
|
||||
}
|
||||
return uniqueKeys;
|
||||
} else {
|
||||
return new String[docIDs.length];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* From {@link #getHighlighter(org.apache.solr.request.SolrQueryRequest)}.
|
||||
*/
|
||||
protected static class SolrExtendedUnifiedHighlighter extends UnifiedHighlighter {
|
||||
protected final SolrParams params;
|
||||
protected final IndexSchema schema;
|
||||
|
||||
protected final RTimerTree loadFieldValuesTimer;
|
||||
|
||||
public SolrExtendedUnifiedHighlighter(SolrQueryRequest req) {
|
||||
super(req.getSearcher(), req.getSchema().getIndexAnalyzer());
|
||||
this.params = req.getParams();
|
||||
this.schema = req.getSchema();
|
||||
this.setMaxLength(
|
||||
params.getInt(HighlightParams.MAX_CHARS, UnifiedHighlighter.DEFAULT_MAX_LENGTH));
|
||||
this.setCacheFieldValCharsThreshold(
|
||||
params.getInt(HighlightParams.CACHE_FIELD_VAL_CHARS_THRESHOLD, DEFAULT_CACHE_CHARS_THRESHOLD));
|
||||
|
||||
// SolrRequestInfo is a thread-local singleton providing access to the ResponseBuilder to code that
|
||||
// otherwise can't get it in a nicer way.
|
||||
SolrQueryRequest request = SolrRequestInfo.getRequestInfo().getReq();
|
||||
final RTimerTree timerTree;
|
||||
if (request.getRequestTimer() != null) { //It may be null if not used in a search context.
|
||||
timerTree = request.getRequestTimer();
|
||||
} else {
|
||||
timerTree = new RTimerTree(); // since null checks are annoying
|
||||
}
|
||||
loadFieldValuesTimer = timerTree.sub("loadFieldValues"); // we assume a new timer, state of STARTED
|
||||
loadFieldValuesTimer.pause(); // state of PAUSED now with about zero time. Will fail if state isn't STARTED.
|
||||
}
|
||||
|
||||
@Override
|
||||
protected OffsetSource getOffsetSource(String field) {
|
||||
String sourceStr = params.getFieldParam(field, HighlightParams.OFFSET_SOURCE);
|
||||
if (sourceStr != null) {
|
||||
return OffsetSource.valueOf(sourceStr.toUpperCase(Locale.ROOT));
|
||||
} else {
|
||||
return super.getOffsetSource(field);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMaxNoHighlightPassages(String field) {
|
||||
boolean defaultSummary = params.getFieldBool(field, HighlightParams.DEFAULT_SUMMARY, false);
|
||||
if (defaultSummary) {
|
||||
return -1;// signifies return first hl.snippets passages worth of the content
|
||||
} else {
|
||||
return 0;// will return null
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected PassageFormatter getFormatter(String fieldName) {
|
||||
String preTag = params.getFieldParam(fieldName, HighlightParams.TAG_PRE,
|
||||
params.getFieldParam(fieldName, HighlightParams.SIMPLE_PRE, "<em>")
|
||||
);
|
||||
|
||||
String postTag = params.getFieldParam(fieldName, HighlightParams.TAG_POST,
|
||||
params.getFieldParam(fieldName, HighlightParams.SIMPLE_POST, "</em>")
|
||||
);
|
||||
String ellipsis = params.getFieldParam(fieldName, HighlightParams.TAG_ELLIPSIS, SNIPPET_SEPARATOR);
|
||||
String encoder = params.getFieldParam(fieldName, HighlightParams.ENCODER, "simple");
|
||||
return new DefaultPassageFormatter(preTag, postTag, ellipsis, "html".equals(encoder));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected PassageScorer getScorer(String fieldName) {
|
||||
float k1 = params.getFieldFloat(fieldName, HighlightParams.SCORE_K1, 1.2f);
|
||||
float b = params.getFieldFloat(fieldName, HighlightParams.SCORE_B, 0.75f);
|
||||
float pivot = params.getFieldFloat(fieldName, HighlightParams.SCORE_PIVOT, 87f);
|
||||
return new PassageScorer(k1, b, pivot);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BreakIterator getBreakIterator(String field) {
|
||||
String language = params.getFieldParam(field, HighlightParams.BS_LANGUAGE);
|
||||
String country = params.getFieldParam(field, HighlightParams.BS_COUNTRY);
|
||||
String variant = params.getFieldParam(field, HighlightParams.BS_VARIANT);
|
||||
Locale locale = parseLocale(language, country, variant);
|
||||
String type = params.getFieldParam(field, HighlightParams.BS_TYPE);
|
||||
return parseBreakIterator(type, locale);
|
||||
}
|
||||
|
||||
/**
|
||||
* parse a break iterator type for the specified locale
|
||||
*/
|
||||
protected BreakIterator parseBreakIterator(String type, Locale locale) {
|
||||
if (type == null || "SENTENCE".equals(type)) {
|
||||
return BreakIterator.getSentenceInstance(locale);
|
||||
} else if ("LINE".equals(type)) {
|
||||
return BreakIterator.getLineInstance(locale);
|
||||
} else if ("WORD".equals(type)) {
|
||||
return BreakIterator.getWordInstance(locale);
|
||||
} else if ("CHARACTER".equals(type)) {
|
||||
return BreakIterator.getCharacterInstance(locale);
|
||||
} else if ("WHOLE".equals(type)) {
|
||||
return new WholeBreakIterator();
|
||||
} else {
|
||||
throw new IllegalArgumentException("Unknown " + HighlightParams.BS_TYPE + ": " + type);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* parse a locale from a language+country+variant spec
|
||||
*/
|
||||
protected Locale parseLocale(String language, String country, String variant) {
|
||||
if (language == null && country == null && variant == null) {
|
||||
return Locale.ROOT;
|
||||
} else if (language == null) {
|
||||
throw new IllegalArgumentException("language is required if country or variant is specified");
|
||||
} else if (country == null && variant != null) {
|
||||
throw new IllegalArgumentException("To specify variant, country is required");
|
||||
} else if (country != null && variant != null) {
|
||||
return new Locale(language, country, variant);
|
||||
} else if (country != null) {
|
||||
return new Locale(language, country);
|
||||
} else {
|
||||
return new Locale(language);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter, int
|
||||
cacheCharsThreshold) throws IOException {
|
||||
// Time loading field values. It can be an expensive part of highlighting.
|
||||
loadFieldValuesTimer.resume();
|
||||
try {
|
||||
return super.loadFieldValues(fields, docIter, cacheCharsThreshold);
|
||||
} finally {
|
||||
loadFieldValuesTimer.pause(); // note: doesn't need to be "stopped"; pause is fine.
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean shouldHandleMultiTermQuery(String field) {
|
||||
return params.getFieldBool(field, HighlightParams.HIGHLIGHT_MULTI_TERM, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean shouldHighlightPhrasesStrictly(String field) {
|
||||
return params.getFieldBool(field, HighlightParams.USE_PHRASE_HIGHLIGHTER, true);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -4,11 +4,12 @@ package org.apache.solr.parser;
|
|||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
|
||||
|
||||
public class QueryParser extends SolrQueryParserBase implements QueryParserConstants {
|
||||
|
@ -135,9 +136,9 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
|||
addClause(clauses, conj, mods, q);
|
||||
}
|
||||
if (clauses.size() == 1 && firstQuery != null)
|
||||
{if (true) return firstQuery;}
|
||||
{if (true) return rawToNormal(firstQuery);}
|
||||
else {
|
||||
{if (true) return getBooleanQuery(clauses);}
|
||||
{if (true) return getBooleanQuery(clauses);}
|
||||
}
|
||||
throw new Error("Missing return statement in function");
|
||||
}
|
||||
|
@ -146,6 +147,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
|||
Query q;
|
||||
Token fieldToken=null, boost=null;
|
||||
Token localParams=null;
|
||||
int flags = 0;
|
||||
if (jj_2_1(2)) {
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case TERM:
|
||||
|
@ -195,6 +197,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
|||
break;
|
||||
case FILTER:
|
||||
jj_consume_token(FILTER);
|
||||
flags=startFilter();
|
||||
q = Query(field);
|
||||
jj_consume_token(RPAREN);
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
|
@ -206,7 +209,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
|||
jj_la1[7] = jj_gen;
|
||||
;
|
||||
}
|
||||
q=getFilter(q);
|
||||
q=getFilter(q); restoreFlags(flags);
|
||||
break;
|
||||
case LPARAMS:
|
||||
localParams = jj_consume_token(LPARAMS);
|
||||
|
|
|
@ -190,9 +190,9 @@ Query Query(String field) throws SyntaxError :
|
|||
)*
|
||||
{
|
||||
if (clauses.size() == 1 && firstQuery != null)
|
||||
return firstQuery;
|
||||
return rawToNormal(firstQuery);
|
||||
else {
|
||||
return getBooleanQuery(clauses);
|
||||
return getBooleanQuery(clauses);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -201,6 +201,7 @@ Query Clause(String field) throws SyntaxError : {
|
|||
Query q;
|
||||
Token fieldToken=null, boost=null;
|
||||
Token localParams=null;
|
||||
int flags = 0;
|
||||
}
|
||||
{
|
||||
|
||||
|
@ -216,7 +217,7 @@ Query Clause(String field) throws SyntaxError : {
|
|||
(
|
||||
q=Term(field)
|
||||
| <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?
|
||||
| (<FILTER> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? { q=getFilter(q); } )
|
||||
| (<FILTER> { flags=startFilter(); } q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? { q=getFilter(q); restoreFlags(flags); } )
|
||||
| (localParams = <LPARAMS> (<CARAT> boost=<NUMBER>)? { q=getLocalParams(field, localParams.image); } )
|
||||
)
|
||||
{ return handleBoost(q, boost); }
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.solr.parser;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
@ -61,6 +62,7 @@ import org.apache.solr.search.SyntaxError;
|
|||
*/
|
||||
public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||
|
||||
public static final int TERMS_QUERY_THRESHOLD = 16; // @lucene.internal Set to a low value temporarily for better test coverage
|
||||
|
||||
static final int CONJ_NONE = 0;
|
||||
static final int CONJ_AND = 1;
|
||||
|
@ -89,7 +91,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
|
||||
|
||||
boolean autoGeneratePhraseQueries = false;
|
||||
|
||||
int flags;
|
||||
|
||||
protected IndexSchema schema;
|
||||
protected QParser parser;
|
||||
|
@ -125,6 +127,31 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
// internal: A simple raw fielded query
|
||||
public static class RawQuery extends Query {
|
||||
final SchemaField sfield;
|
||||
final String externalVal;
|
||||
|
||||
public RawQuery(SchemaField sfield, String externalVal) {
|
||||
this.sfield = sfield;
|
||||
this.externalVal = externalVal;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "RAW(" + field + "," + externalVal + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// So the generated QueryParser(CharStream) won't error out
|
||||
protected SolrQueryParserBase() {
|
||||
|
@ -138,10 +165,22 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
public void init(Version matchVersion, String defaultField, QParser parser) {
|
||||
this.schema = parser.getReq().getSchema();
|
||||
this.parser = parser;
|
||||
this.flags = parser.getFlags();
|
||||
this.defaultField = defaultField;
|
||||
setAnalyzer(schema.getQueryAnalyzer());
|
||||
}
|
||||
|
||||
// Turn on the "filter" bit and return the previous flags for the caller to save
|
||||
int startFilter() {
|
||||
int oldFlags = flags;
|
||||
flags |= QParser.FLAG_FILTER;
|
||||
return oldFlags;
|
||||
}
|
||||
|
||||
void restoreFlags(int flagsToRestore) {
|
||||
flags = flagsToRestore;
|
||||
}
|
||||
|
||||
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
|
||||
* @param query the query string to be parsed.
|
||||
*/
|
||||
|
@ -381,7 +420,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
*/
|
||||
protected Query getFieldQuery(String field, String queryText, int slop)
|
||||
throws SyntaxError {
|
||||
Query query = getFieldQuery(field, queryText, true);
|
||||
Query query = getFieldQuery(field, queryText, true, false);
|
||||
|
||||
// only set slop of the phrase query was a result of this parser
|
||||
// and not a sub-parser.
|
||||
|
@ -492,11 +531,77 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
if (clauses.size()==0) {
|
||||
return null; // all clause words were filtered away by the analyzer.
|
||||
}
|
||||
BooleanQuery.Builder query = newBooleanQuery();
|
||||
for(final BooleanClause clause: clauses) {
|
||||
query.add(clause);
|
||||
|
||||
SchemaField sfield = null;
|
||||
List<String> fieldValues = null;
|
||||
|
||||
|
||||
boolean useTermsQuery = (flags & QParser.FLAG_FILTER)!=0 && clauses.size() > TERMS_QUERY_THRESHOLD;
|
||||
int clausesAdded = 0;
|
||||
|
||||
BooleanQuery.Builder booleanBuilder = newBooleanQuery();
|
||||
Map<SchemaField, List<String>> fmap = new HashMap<>();
|
||||
|
||||
for (BooleanClause clause : clauses) {
|
||||
Query subq = clause.getQuery();
|
||||
if (subq instanceof RawQuery) {
|
||||
if (clause.getOccur() != BooleanClause.Occur.SHOULD) {
|
||||
// We only collect optional terms for set queries. Since this isn't optional,
|
||||
// convert the raw query to a normal query and handle as usual.
|
||||
clause = new BooleanClause( rawToNormal(subq), clause.getOccur() );
|
||||
} else {
|
||||
// Optional raw query.
|
||||
RawQuery rawq = (RawQuery) subq;
|
||||
|
||||
// only look up fmap and type info on a field change
|
||||
if (sfield != rawq.sfield) {
|
||||
sfield = rawq.sfield;
|
||||
fieldValues = fmap.get(sfield);
|
||||
// If this field isn't indexed, or if it is indexed and we want to use TermsQuery, then collect this value.
|
||||
// We are currently relying on things like PointField not being marked as indexed in order to bypass
|
||||
// the "useTermQuery" check.
|
||||
if (fieldValues == null && useTermsQuery || !sfield.indexed()) {
|
||||
fieldValues = new ArrayList<>(2);
|
||||
fmap.put(sfield, fieldValues);
|
||||
}
|
||||
}
|
||||
|
||||
if (fieldValues != null) {
|
||||
fieldValues.add(rawq.externalVal);
|
||||
continue;
|
||||
}
|
||||
|
||||
clause = new BooleanClause( rawToNormal(subq), clause.getOccur() );
|
||||
}
|
||||
}
|
||||
|
||||
clausesAdded++;
|
||||
booleanBuilder.add(clause);
|
||||
}
|
||||
return query.build();
|
||||
|
||||
|
||||
for (Map.Entry<SchemaField,List<String>> entry : fmap.entrySet()) {
|
||||
sfield = entry.getKey();
|
||||
fieldValues = entry.getValue();
|
||||
FieldType ft = sfield.getType();
|
||||
|
||||
// TODO: pull more of this logic out to FieldType? We would need to be able to add clauses to our existing booleanBuilder.
|
||||
if (sfield.indexed() && fieldValues.size() < TERMS_QUERY_THRESHOLD || fieldValues.size() == 1) {
|
||||
// use boolean query instead
|
||||
for (String externalVal : fieldValues) {
|
||||
Query subq = ft.getFieldQuery(this.parser, sfield, externalVal);
|
||||
clausesAdded++;
|
||||
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
} else {
|
||||
Query subq = ft.getSetQuery(this.parser, sfield, fieldValues);
|
||||
if (fieldValues.size() == clauses.size()) return subq; // if this is everything, don't wrap in a boolean query
|
||||
clausesAdded++;
|
||||
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
|
||||
return booleanBuilder.build();
|
||||
}
|
||||
|
||||
|
||||
|
@ -526,7 +631,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
q = getFuzzyQuery(qfield, termImage, fms);
|
||||
} else {
|
||||
String termImage=discardEscapeChar(term.image);
|
||||
q = getFieldQuery(qfield, termImage, false);
|
||||
q = getFieldQuery(qfield, termImage, false, true);
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
@ -540,10 +645,15 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
}
|
||||
catch (Exception ignored) { }
|
||||
}
|
||||
return getFieldQuery(qfield, discardEscapeChar(term.image.substring(1, term.image.length()-1)), s);
|
||||
|
||||
String raw = discardEscapeChar(term.image.substring(1, term.image.length()-1));
|
||||
return getFieldQuery(qfield, raw, s);
|
||||
}
|
||||
|
||||
// called from parser
|
||||
|
||||
|
||||
// Called from parser
|
||||
// Raw queries are transformed to normal queries before wrapping in a BoostQuery
|
||||
Query handleBoost(Query q, Token boost) {
|
||||
// q==null check is to avoid boosting null queries, such as those caused by stop words
|
||||
if (boost == null || boost.image.length()==0 || q == null) {
|
||||
|
@ -556,14 +666,14 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
if (q instanceof ConstantScoreQuery || q instanceof SolrConstantScoreQuery) {
|
||||
// skip
|
||||
} else {
|
||||
newQ = new ConstantScoreQuery(q);
|
||||
newQ = new ConstantScoreQuery( rawToNormal(q) );
|
||||
}
|
||||
return new BoostQuery(newQ, val);
|
||||
}
|
||||
|
||||
float boostVal = Float.parseFloat(boost.image);
|
||||
|
||||
return new BoostQuery(q, boostVal);
|
||||
return new BoostQuery( rawToNormal(q), boostVal);
|
||||
}
|
||||
|
||||
|
||||
|
@ -577,17 +687,21 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
*
|
||||
*/
|
||||
String discardEscapeChar(String input) throws SyntaxError {
|
||||
int start = input.indexOf('\\');
|
||||
if (start < 0) return input;
|
||||
|
||||
// Create char array to hold unescaped char sequence
|
||||
char[] output = new char[input.length()];
|
||||
input.getChars(0, start, output, 0);
|
||||
|
||||
// The length of the output can be less than the input
|
||||
// due to discarded escape chars. This variable holds
|
||||
// the actual length of the output
|
||||
int length = 0;
|
||||
int length = start;
|
||||
|
||||
// We remember whether the last processed character was
|
||||
// an escape character
|
||||
boolean lastCharWasEscapeChar = false;
|
||||
boolean lastCharWasEscapeChar = true;
|
||||
|
||||
// The multiplier the current unicode digit must be multiplied with.
|
||||
// E. g. the first digit must be multiplied with 16^3, the second with 16^2...
|
||||
|
@ -596,7 +710,8 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
// Used to calculate the codepoint of the escaped unicode character
|
||||
int codePoint = 0;
|
||||
|
||||
for (int i = 0; i < input.length(); i++) {
|
||||
// start after the first escape char
|
||||
for (int i = start+1; i < input.length(); i++) {
|
||||
char curChar = input.charAt(i);
|
||||
if (codePointMultiplier > 0) {
|
||||
codePoint += hexToInt(curChar) * codePointMultiplier;
|
||||
|
@ -715,25 +830,57 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
|
||||
private QParser subQParser = null;
|
||||
|
||||
// Create a "normal" query from a RawQuery (or just return the current query if it's not raw)
|
||||
Query rawToNormal(Query q) {
|
||||
if (!(q instanceof RawQuery)) return q;
|
||||
RawQuery rq = (RawQuery)q;
|
||||
return rq.sfield.getType().getFieldQuery(parser, rq.sfield, rq.externalVal);
|
||||
}
|
||||
|
||||
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws SyntaxError {
|
||||
return getFieldQuery(field, queryText, quoted, false);
|
||||
}
|
||||
|
||||
// private use for getFieldQuery
|
||||
private String lastFieldName;
|
||||
private SchemaField lastField;
|
||||
|
||||
// if raw==true, then it's possible for this method to return a RawQuery that will need to be transformed
|
||||
// further before using.
|
||||
protected Query getFieldQuery(String field, String queryText, boolean quoted, boolean raw) throws SyntaxError {
|
||||
checkNullField(field);
|
||||
// intercept magic field name of "_" to use as a hook for our
|
||||
// own functions.
|
||||
if (field.charAt(0) == '_' && parser != null) {
|
||||
MagicFieldName magic = MagicFieldName.get(field);
|
||||
if (null != magic) {
|
||||
subQParser = parser.subQuery(queryText, magic.subParser);
|
||||
return subQParser.getQuery();
|
||||
|
||||
SchemaField sf;
|
||||
if (field.equals(lastFieldName)) {
|
||||
// only look up the SchemaField on a field change... this helps with memory allocation of dynamic fields
|
||||
// and large queries like foo_i:(1 2 3 4 5 6 7 8 9 10) when we are passed "foo_i" each time.
|
||||
sf = lastField;
|
||||
} else {
|
||||
// intercept magic field name of "_" to use as a hook for our
|
||||
// own functions.
|
||||
if (field.charAt(0) == '_' && parser != null) {
|
||||
MagicFieldName magic = MagicFieldName.get(field);
|
||||
if (null != magic) {
|
||||
subQParser = parser.subQuery(queryText, magic.subParser);
|
||||
return subQParser.getQuery();
|
||||
}
|
||||
}
|
||||
|
||||
lastFieldName = field;
|
||||
sf = lastField = schema.getFieldOrNull(field);
|
||||
}
|
||||
SchemaField sf = schema.getFieldOrNull(field);
|
||||
|
||||
if (sf != null) {
|
||||
FieldType ft = sf.getType();
|
||||
// delegate to type for everything except tokenized fields
|
||||
if (ft.isTokenized() && sf.indexed()) {
|
||||
return newFieldQuery(getAnalyzer(), field, queryText, quoted || (ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries()));
|
||||
} else {
|
||||
return sf.getType().getFieldQuery(parser, sf, queryText);
|
||||
if (raw) {
|
||||
return new RawQuery(sf, queryText);
|
||||
} else {
|
||||
return sf.getType().getFieldQuery(parser, sf, queryText);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -742,6 +889,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
}
|
||||
|
||||
|
||||
|
||||
// called from parser
|
||||
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError {
|
||||
checkNullField(field);
|
||||
|
|
|
@ -252,8 +252,8 @@ final class NumericFacets {
|
|||
}
|
||||
|
||||
if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict
|
||||
if (!sf.indexed()) {
|
||||
throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is not indexed");
|
||||
if (!sf.indexed() && !sf.hasDocValues()) {
|
||||
throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is neither indexed nor docValues");
|
||||
}
|
||||
// Add zeros until there are limit results
|
||||
final Set<String> alreadySeen = new HashSet<>();
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.schema;
|
|||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
@ -38,7 +39,10 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.legacy.LegacyNumericType;
|
||||
import org.apache.lucene.queries.TermsQuery;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.DocValuesRangeQuery;
|
||||
import org.apache.lucene.search.DocValuesRewriteMethod;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
|
@ -56,8 +60,8 @@ import org.apache.lucene.util.CharsRefBuilder;
|
|||
import org.apache.lucene.util.Version;
|
||||
import org.apache.solr.analysis.SolrAnalyzer;
|
||||
import org.apache.solr.analysis.TokenizerChain;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.util.Base64;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
|
@ -743,7 +747,27 @@ public abstract class FieldType extends FieldProperties {
|
|||
return new TermQuery(new Term(field.getName(), br));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** @lucene.experimental */
|
||||
public Query getSetQuery(QParser parser, SchemaField field, Collection<String> externalVals) {
|
||||
if (!field.indexed()) {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
for (String externalVal : externalVals) {
|
||||
Query subq = getFieldQuery(parser, field, externalVal);
|
||||
builder.add(subq, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
List<BytesRef> lst = new ArrayList<>(externalVals.size());
|
||||
BytesRefBuilder br = new BytesRefBuilder();
|
||||
for (String externalVal : externalVals) {
|
||||
readableToIndexed(externalVal, br);
|
||||
lst.add( br.toBytesRef() );
|
||||
}
|
||||
return new TermsQuery(field.getName() , lst);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Returns the rewrite method for multiterm queries such as wildcards.
|
||||
* @param parser The {@link org.apache.solr.search.QParser} calling the method
|
||||
|
|
|
@ -1032,8 +1032,8 @@ public class ExtendedDismaxQParser extends QParser {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected Query getFieldQuery(String field, String val, boolean quoted) throws SyntaxError {
|
||||
this.type = QType.FIELD;
|
||||
protected Query getFieldQuery(String field, String val, boolean quoted, boolean raw) throws SyntaxError {
|
||||
this.type = quoted ? QType.PHRASE : QType.FIELD;
|
||||
this.field = field;
|
||||
this.val = val;
|
||||
this.slop = getPhraseSlop(); // unspecified
|
||||
|
@ -1212,7 +1212,7 @@ public class ExtendedDismaxQParser extends QParser {
|
|||
switch (type) {
|
||||
case FIELD: // fallthrough
|
||||
case PHRASE:
|
||||
Query query = super.getFieldQuery(field, val, type == QType.PHRASE);
|
||||
Query query = super.getFieldQuery(field, val, type == QType.PHRASE, false);
|
||||
// Boolean query on a whitespace-separated string
|
||||
// If these were synonyms we would have a SynonymQuery
|
||||
if (query instanceof BooleanQuery) {
|
||||
|
|
|
@ -46,7 +46,9 @@ public class FunctionRangeQParserPlugin extends QParserPlugin {
|
|||
@Override
|
||||
public Query parse() throws SyntaxError {
|
||||
funcStr = localParams.get(QueryParsing.V, null);
|
||||
Query funcQ = subQuery(funcStr, FunctionQParserPlugin.NAME).getQuery();
|
||||
QParser subParser = subQuery(funcStr, FunctionQParserPlugin.NAME);
|
||||
subParser.setIsFilter(false); // the range can be based on the relevancy score of embedded queries.
|
||||
Query funcQ = subParser.getQuery();
|
||||
if (funcQ instanceof FunctionQuery) {
|
||||
vs = ((FunctionQuery)funcQ).getValueSource();
|
||||
} else {
|
||||
|
|
|
@ -110,6 +110,7 @@ public class JoinQParserPlugin extends QParserPlugin {
|
|||
} else {
|
||||
coreName = null;
|
||||
QParser fromQueryParser = subQuery(v, null);
|
||||
fromQueryParser.setIsFilter(true);
|
||||
fromQuery = fromQueryParser.getQuery();
|
||||
}
|
||||
|
||||
|
|
|
@ -32,12 +32,18 @@ import java.util.*;
|
|||
*
|
||||
*/
|
||||
public abstract class QParser {
|
||||
/** @lucene.experimental */
|
||||
public static final int FLAG_FILTER = 0x01;
|
||||
|
||||
protected String qstr;
|
||||
protected SolrParams params;
|
||||
protected SolrParams localParams;
|
||||
protected SolrQueryRequest req;
|
||||
protected int recurseCount;
|
||||
|
||||
/** @lucene.experimental */
|
||||
protected int flags;
|
||||
|
||||
protected Query query;
|
||||
|
||||
protected String stringIncludingLocalParams; // the original query string including any local params
|
||||
|
@ -83,6 +89,28 @@ public abstract class QParser {
|
|||
this.req = req;
|
||||
}
|
||||
|
||||
/** @lucene.experimental */
|
||||
public void setFlags(int flags) {
|
||||
this.flags = flags;
|
||||
}
|
||||
|
||||
/** @lucene.experimental */
|
||||
public int getFlags() {
|
||||
return flags;
|
||||
}
|
||||
|
||||
/** @lucene.experimental Query is in the context of a filter, where scores don't matter */
|
||||
public boolean isFilter() {
|
||||
return (flags & FLAG_FILTER) != 0;
|
||||
}
|
||||
|
||||
/** @lucene.experimental */
|
||||
public void setIsFilter(boolean isFilter) {
|
||||
if (isFilter)
|
||||
flags |= FLAG_FILTER;
|
||||
else
|
||||
flags &= ~FLAG_FILTER;
|
||||
}
|
||||
|
||||
private static void addTag(Map<Object,Collection<Object>> tagMap, Object key, Object val) {
|
||||
Collection<Object> lst = tagMap.get(key);
|
||||
|
@ -201,6 +229,7 @@ public abstract class QParser {
|
|||
defaultType = localParams.get(QueryParsing.DEFTYPE);
|
||||
}
|
||||
QParser nestedParser = getParser(q, defaultType, getReq());
|
||||
nestedParser.flags = this.flags; // TODO: this would be better passed in to the constructor... change to a ParserContext object?
|
||||
nestedParser.recurseCount = recurseCount;
|
||||
recurseCount--;
|
||||
return nestedParser;
|
||||
|
|
|
@ -98,6 +98,7 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
QParser parser = null;
|
||||
try {
|
||||
parser = QParser.getParser((String)rawFilter, fcontext.req);
|
||||
parser.setIsFilter(true);
|
||||
Query symbolicFilter = parser.getQuery();
|
||||
qlist.add(symbolicFilter);
|
||||
} catch (SyntaxError syntaxError) {
|
||||
|
@ -134,6 +135,7 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
QParser parser = null;
|
||||
try {
|
||||
parser = QParser.getParser((String) qstring, fcontext.req);
|
||||
parser.setIsFilter(true);
|
||||
Query symbolicFilter = parser.getQuery();
|
||||
qlist.add(symbolicFilter);
|
||||
} catch (SyntaxError syntaxError) {
|
||||
|
@ -237,6 +239,7 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
Query parentQuery;
|
||||
try {
|
||||
QParser parser = QParser.getParser(parentStr, fcontext.req);
|
||||
parser.setIsFilter(true);
|
||||
parentQuery = parser.getQuery();
|
||||
} catch (SyntaxError err) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing block join parent specification: " + parentStr);
|
||||
|
|
|
@ -568,6 +568,7 @@ class FacetQueryParser extends FacetParser<FacetQuery> {
|
|||
|
||||
if (qstring != null) {
|
||||
QParser parser = QParser.getParser(qstring, getSolrRequest());
|
||||
parser.setIsFilter(true);
|
||||
facet.q = parser.getQuery();
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.update.processor;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
@ -33,6 +34,7 @@ import org.apache.solr.common.SolrInputDocument;
|
|||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.update.AddUpdateCommand;
|
||||
import org.apache.solr.update.processor.ClassificationUpdateProcessorFactory.Algorithm;
|
||||
|
||||
/**
|
||||
* This Class is a Request Update Processor to classify the document in input and add a field
|
||||
|
@ -42,43 +44,54 @@ import org.apache.solr.update.AddUpdateCommand;
|
|||
class ClassificationUpdateProcessor
|
||||
extends UpdateRequestProcessor {
|
||||
|
||||
private String classFieldName; // the field to index the assigned class
|
||||
|
||||
private final String trainingClassField;
|
||||
private final String predictedClassField;
|
||||
private final int maxOutputClasses;
|
||||
private DocumentClassifier<BytesRef> classifier;
|
||||
|
||||
/**
|
||||
* Sole constructor
|
||||
*
|
||||
* @param inputFieldNames fields to be used as classifier's inputs
|
||||
* @param classFieldName field to be used as classifier's output
|
||||
* @param minDf setting for {@link org.apache.lucene.queries.mlt.MoreLikeThis#minDocFreq}, in case algorithm is {@code "knn"}
|
||||
* @param minTf setting for {@link org.apache.lucene.queries.mlt.MoreLikeThis#minTermFreq}, in case algorithm is {@code "knn"}
|
||||
* @param k setting for k nearest neighbors to analyze, in case algorithm is {@code "knn"}
|
||||
* @param algorithm the name of the classifier to use
|
||||
* @param classificationParams classification advanced params
|
||||
* @param next next update processor in the chain
|
||||
* @param indexReader index reader
|
||||
* @param schema schema
|
||||
*/
|
||||
public ClassificationUpdateProcessor(String[] inputFieldNames, String classFieldName, int minDf, int minTf, int k, String algorithm,
|
||||
UpdateRequestProcessor next, IndexReader indexReader, IndexSchema schema) {
|
||||
public ClassificationUpdateProcessor(ClassificationUpdateProcessorParams classificationParams, UpdateRequestProcessor next, IndexReader indexReader, IndexSchema schema) {
|
||||
super(next);
|
||||
this.classFieldName = classFieldName;
|
||||
Map<String, Analyzer> field2analyzer = new HashMap<String, Analyzer>();
|
||||
this.trainingClassField = classificationParams.getTrainingClassField();
|
||||
this.predictedClassField = classificationParams.getPredictedClassField();
|
||||
this.maxOutputClasses = classificationParams.getMaxPredictedClasses();
|
||||
String[] inputFieldNamesWithBoost = classificationParams.getInputFieldNames();
|
||||
Algorithm classificationAlgorithm = classificationParams.getAlgorithm();
|
||||
|
||||
Map<String, Analyzer> field2analyzer = new HashMap<>();
|
||||
String[] inputFieldNames = this.removeBoost(inputFieldNamesWithBoost);
|
||||
for (String fieldName : inputFieldNames) {
|
||||
SchemaField fieldFromSolrSchema = schema.getField(fieldName);
|
||||
Analyzer indexAnalyzer = fieldFromSolrSchema.getType().getQueryAnalyzer();
|
||||
field2analyzer.put(fieldName, indexAnalyzer);
|
||||
}
|
||||
switch (algorithm) {
|
||||
case "knn":
|
||||
classifier = new KNearestNeighborDocumentClassifier(indexReader, null, null, k, minDf, minTf, classFieldName, field2analyzer, inputFieldNames);
|
||||
switch (classificationAlgorithm) {
|
||||
case KNN:
|
||||
classifier = new KNearestNeighborDocumentClassifier(indexReader, null, classificationParams.getTrainingFilterQuery(), classificationParams.getK(), classificationParams.getMinDf(), classificationParams.getMinTf(), trainingClassField, field2analyzer, inputFieldNamesWithBoost);
|
||||
break;
|
||||
case "bayes":
|
||||
classifier = new SimpleNaiveBayesDocumentClassifier(indexReader, null, classFieldName, field2analyzer, inputFieldNames);
|
||||
case BAYES:
|
||||
classifier = new SimpleNaiveBayesDocumentClassifier(indexReader, null, trainingClassField, field2analyzer, inputFieldNamesWithBoost);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private String[] removeBoost(String[] inputFieldNamesWithBoost) {
|
||||
String[] inputFieldNames = new String[inputFieldNamesWithBoost.length];
|
||||
for (int i = 0; i < inputFieldNamesWithBoost.length; i++) {
|
||||
String singleFieldNameWithBoost = inputFieldNamesWithBoost[i];
|
||||
String[] fieldName2boost = singleFieldNameWithBoost.split("\\^");
|
||||
inputFieldNames[i] = fieldName2boost[0];
|
||||
}
|
||||
return inputFieldNames;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param cmd the update command in input containing the Document to classify
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
|
@ -89,12 +102,14 @@ class ClassificationUpdateProcessor
|
|||
SolrInputDocument doc = cmd.getSolrInputDocument();
|
||||
Document luceneDocument = cmd.getLuceneDocument();
|
||||
String assignedClass;
|
||||
Object documentClass = doc.getFieldValue(classFieldName);
|
||||
Object documentClass = doc.getFieldValue(trainingClassField);
|
||||
if (documentClass == null) {
|
||||
ClassificationResult<BytesRef> classificationResult = classifier.assignClass(luceneDocument);
|
||||
if (classificationResult != null) {
|
||||
assignedClass = classificationResult.getAssignedClass().utf8ToString();
|
||||
doc.addField(classFieldName, assignedClass);
|
||||
List<ClassificationResult<BytesRef>> assignedClassifications = classifier.getClasses(luceneDocument, maxOutputClasses);
|
||||
if (assignedClassifications != null) {
|
||||
for (ClassificationResult<BytesRef> singleClassification : assignedClassifications) {
|
||||
assignedClass = singleClassification.getAssignedClass().utf8ToString();
|
||||
doc.addField(predictedClassField, assignedClass);
|
||||
}
|
||||
}
|
||||
}
|
||||
super.processAdd(cmd);
|
||||
|
|
|
@ -17,13 +17,20 @@
|
|||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.search.LuceneQParser;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
|
||||
import static org.apache.solr.update.processor.ClassificationUpdateProcessorFactory.Algorithm.KNN;
|
||||
|
||||
/**
|
||||
* This class implements an UpdateProcessorFactory for the Classification Update Processor.
|
||||
|
@ -33,49 +40,67 @@ public class ClassificationUpdateProcessorFactory extends UpdateRequestProcessor
|
|||
|
||||
// Update Processor Config params
|
||||
private static final String INPUT_FIELDS_PARAM = "inputFields";
|
||||
private static final String CLASS_FIELD_PARAM = "classField";
|
||||
private static final String TRAINING_CLASS_FIELD_PARAM = "classField";
|
||||
private static final String PREDICTED_CLASS_FIELD_PARAM = "predictedClassField";
|
||||
private static final String MAX_CLASSES_TO_ASSIGN_PARAM = "predictedClass.maxCount";
|
||||
private static final String ALGORITHM_PARAM = "algorithm";
|
||||
private static final String KNN_MIN_TF_PARAM = "knn.minTf";
|
||||
private static final String KNN_MIN_DF_PARAM = "knn.minDf";
|
||||
private static final String KNN_K_PARAM = "knn.k";
|
||||
private static final String KNN_FILTER_QUERY = "knn.filterQuery";
|
||||
|
||||
public enum Algorithm {KNN, BAYES}
|
||||
|
||||
//Update Processor Defaults
|
||||
private static final int DEFAULT_MAX_CLASSES_TO_ASSIGN = 1;
|
||||
private static final int DEFAULT_MIN_TF = 1;
|
||||
private static final int DEFAULT_MIN_DF = 1;
|
||||
private static final int DEFAULT_K = 10;
|
||||
private static final String DEFAULT_ALGORITHM = "knn";
|
||||
private static final Algorithm DEFAULT_ALGORITHM = KNN;
|
||||
|
||||
private String[] inputFieldNames; // the array of fields to be sent to the Classifier
|
||||
|
||||
private String classFieldName; // the field containing the class for the Document
|
||||
|
||||
private String algorithm; // the Classification Algorithm to use - currently 'knn' or 'bayes'
|
||||
|
||||
private int minTf; // knn specific - the minimum Term Frequency for considering a term
|
||||
|
||||
private int minDf; // knn specific - the minimum Document Frequency for considering a term
|
||||
|
||||
private int k; // knn specific - thw window of top results to evaluate, when assigning the class
|
||||
private SolrParams params;
|
||||
private ClassificationUpdateProcessorParams classificationParams;
|
||||
|
||||
@Override
|
||||
public void init(final NamedList args) {
|
||||
if (args != null) {
|
||||
SolrParams params = SolrParams.toSolrParams(args);
|
||||
params = SolrParams.toSolrParams(args);
|
||||
classificationParams = new ClassificationUpdateProcessorParams();
|
||||
|
||||
String fieldNames = params.get(INPUT_FIELDS_PARAM);// must be a comma separated list of fields
|
||||
checkNotNull(INPUT_FIELDS_PARAM, fieldNames);
|
||||
inputFieldNames = fieldNames.split("\\,");
|
||||
classificationParams.setInputFieldNames(fieldNames.split("\\,"));
|
||||
|
||||
classFieldName = params.get(CLASS_FIELD_PARAM);
|
||||
checkNotNull(CLASS_FIELD_PARAM, classFieldName);
|
||||
String trainingClassField = (params.get(TRAINING_CLASS_FIELD_PARAM));
|
||||
checkNotNull(TRAINING_CLASS_FIELD_PARAM, trainingClassField);
|
||||
classificationParams.setTrainingClassField(trainingClassField);
|
||||
|
||||
algorithm = params.get(ALGORITHM_PARAM);
|
||||
if (algorithm == null)
|
||||
algorithm = DEFAULT_ALGORITHM;
|
||||
String predictedClassField = (params.get(PREDICTED_CLASS_FIELD_PARAM));
|
||||
if (predictedClassField == null || predictedClassField.isEmpty()) {
|
||||
predictedClassField = trainingClassField;
|
||||
}
|
||||
classificationParams.setPredictedClassField(predictedClassField);
|
||||
|
||||
minTf = getIntParam(params, KNN_MIN_TF_PARAM, DEFAULT_MIN_TF);
|
||||
minDf = getIntParam(params, KNN_MIN_DF_PARAM, DEFAULT_MIN_DF);
|
||||
k = getIntParam(params, KNN_K_PARAM, DEFAULT_K);
|
||||
classificationParams.setMaxPredictedClasses(getIntParam(params, MAX_CLASSES_TO_ASSIGN_PARAM, DEFAULT_MAX_CLASSES_TO_ASSIGN));
|
||||
|
||||
String algorithmString = params.get(ALGORITHM_PARAM);
|
||||
Algorithm classificationAlgorithm;
|
||||
try {
|
||||
if (algorithmString == null || Algorithm.valueOf(algorithmString.toUpperCase(Locale.ROOT)) == null) {
|
||||
classificationAlgorithm = DEFAULT_ALGORITHM;
|
||||
} else {
|
||||
classificationAlgorithm = Algorithm.valueOf(algorithmString.toUpperCase(Locale.ROOT));
|
||||
}
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new SolrException
|
||||
(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Classification UpdateProcessor Algorithm: '" + algorithmString + "' not supported");
|
||||
}
|
||||
classificationParams.setAlgorithm(classificationAlgorithm);
|
||||
|
||||
classificationParams.setMinTf(getIntParam(params, KNN_MIN_TF_PARAM, DEFAULT_MIN_TF));
|
||||
classificationParams.setMinDf(getIntParam(params, KNN_MIN_DF_PARAM, DEFAULT_MIN_DF));
|
||||
classificationParams.setK(getIntParam(params, KNN_K_PARAM, DEFAULT_K));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -108,116 +133,34 @@ public class ClassificationUpdateProcessorFactory extends UpdateRequestProcessor
|
|||
|
||||
@Override
|
||||
public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
|
||||
String trainingFilterQueryString = (params.get(KNN_FILTER_QUERY));
|
||||
try {
|
||||
if (trainingFilterQueryString != null && !trainingFilterQueryString.isEmpty()) {
|
||||
Query trainingFilterQuery = this.parseFilterQuery(trainingFilterQueryString, params, req);
|
||||
classificationParams.setTrainingFilterQuery(trainingFilterQuery);
|
||||
}
|
||||
} catch (SyntaxError | RuntimeException syntaxError) {
|
||||
throw new SolrException
|
||||
(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Classification UpdateProcessor Training Filter Query: '" + trainingFilterQueryString + "' is not supported", syntaxError);
|
||||
}
|
||||
|
||||
IndexSchema schema = req.getSchema();
|
||||
IndexReader indexReader = req.getSearcher().getIndexReader();
|
||||
return new ClassificationUpdateProcessor(inputFieldNames, classFieldName, minDf, minTf, k, algorithm, next, indexReader, schema);
|
||||
|
||||
return new ClassificationUpdateProcessor(classificationParams, next, indexReader, schema);
|
||||
}
|
||||
|
||||
/**
|
||||
* get field names used as classifier's inputs
|
||||
*
|
||||
* @return the input field names
|
||||
*/
|
||||
public String[] getInputFieldNames() {
|
||||
return inputFieldNames;
|
||||
private Query parseFilterQuery(String trainingFilterQueryString, SolrParams params, SolrQueryRequest req) throws SyntaxError {
|
||||
LuceneQParser parser = new LuceneQParser(trainingFilterQueryString, null, params, req);
|
||||
return parser.parse();
|
||||
}
|
||||
|
||||
/**
|
||||
* set field names used as classifier's inputs
|
||||
*
|
||||
* @param inputFieldNames the input field names
|
||||
*/
|
||||
public void setInputFieldNames(String[] inputFieldNames) {
|
||||
this.inputFieldNames = inputFieldNames;
|
||||
public ClassificationUpdateProcessorParams getClassificationParams() {
|
||||
return classificationParams;
|
||||
}
|
||||
|
||||
/**
|
||||
* get field names used as classifier's output
|
||||
*
|
||||
* @return the output field name
|
||||
*/
|
||||
public String getClassFieldName() {
|
||||
return classFieldName;
|
||||
}
|
||||
|
||||
/**
|
||||
* set field names used as classifier's output
|
||||
*
|
||||
* @param classFieldName the output field name
|
||||
*/
|
||||
public void setClassFieldName(String classFieldName) {
|
||||
this.classFieldName = classFieldName;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the name of the classifier algorithm used
|
||||
*
|
||||
* @return the classifier algorithm used
|
||||
*/
|
||||
public String getAlgorithm() {
|
||||
return algorithm;
|
||||
}
|
||||
|
||||
/**
|
||||
* set the name of the classifier algorithm used
|
||||
*
|
||||
* @param algorithm the classifier algorithm used
|
||||
*/
|
||||
public void setAlgorithm(String algorithm) {
|
||||
this.algorithm = algorithm;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the min term frequency value to be used in case algorithm is {@code "knn"}
|
||||
*
|
||||
* @return the min term frequency
|
||||
*/
|
||||
public int getMinTf() {
|
||||
return minTf;
|
||||
}
|
||||
|
||||
/**
|
||||
* set the min term frequency value to be used in case algorithm is {@code "knn"}
|
||||
*
|
||||
* @param minTf the min term frequency
|
||||
*/
|
||||
public void setMinTf(int minTf) {
|
||||
this.minTf = minTf;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the min document frequency value to be used in case algorithm is {@code "knn"}
|
||||
*
|
||||
* @return the min document frequency
|
||||
*/
|
||||
public int getMinDf() {
|
||||
return minDf;
|
||||
}
|
||||
|
||||
/**
|
||||
* set the min document frequency value to be used in case algorithm is {@code "knn"}
|
||||
*
|
||||
* @param minDf the min document frequency
|
||||
*/
|
||||
public void setMinDf(int minDf) {
|
||||
this.minDf = minDf;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the the no. of nearest neighbor to analyze, to be used in case algorithm is {@code "knn"}
|
||||
*
|
||||
* @return the no. of neighbors to analyze
|
||||
*/
|
||||
public int getK() {
|
||||
return k;
|
||||
}
|
||||
|
||||
/**
|
||||
* set the the no. of nearest neighbor to analyze, to be used in case algorithm is {@code "knn"}
|
||||
*
|
||||
* @param k the no. of neighbors to analyze
|
||||
*/
|
||||
public void setK(int k) {
|
||||
this.k = k;
|
||||
public void setClassificationParams(ClassificationUpdateProcessorParams classificationParams) {
|
||||
this.classificationParams = classificationParams;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
public class ClassificationUpdateProcessorParams {
|
||||
|
||||
private String[] inputFieldNames; // the array of fields to be sent to the Classifier
|
||||
|
||||
private Query trainingFilterQuery; // a filter query to reduce the training set to a subset
|
||||
|
||||
private String trainingClassField; // the field containing the class for the Document
|
||||
|
||||
private String predictedClassField; // the field that will contain the predicted class
|
||||
|
||||
private int maxPredictedClasses; // the max number of classes to assign
|
||||
|
||||
private ClassificationUpdateProcessorFactory.Algorithm algorithm; // the Classification Algorithm to use - currently 'knn' or 'bayes'
|
||||
|
||||
private int minTf; // knn specific - the minimum Term Frequency for considering a term
|
||||
|
||||
private int minDf; // knn specific - the minimum Document Frequency for considering a term
|
||||
|
||||
private int k; // knn specific - thw window of top results to evaluate, when assigning the class
|
||||
|
||||
public String[] getInputFieldNames() {
|
||||
return inputFieldNames;
|
||||
}
|
||||
|
||||
public void setInputFieldNames(String[] inputFieldNames) {
|
||||
this.inputFieldNames = inputFieldNames;
|
||||
}
|
||||
|
||||
public Query getTrainingFilterQuery() {
|
||||
return trainingFilterQuery;
|
||||
}
|
||||
|
||||
public void setTrainingFilterQuery(Query trainingFilterQuery) {
|
||||
this.trainingFilterQuery = trainingFilterQuery;
|
||||
}
|
||||
|
||||
public String getTrainingClassField() {
|
||||
return trainingClassField;
|
||||
}
|
||||
|
||||
public void setTrainingClassField(String trainingClassField) {
|
||||
this.trainingClassField = trainingClassField;
|
||||
}
|
||||
|
||||
public String getPredictedClassField() {
|
||||
return predictedClassField;
|
||||
}
|
||||
|
||||
public void setPredictedClassField(String predictedClassField) {
|
||||
this.predictedClassField = predictedClassField;
|
||||
}
|
||||
|
||||
public int getMaxPredictedClasses() {
|
||||
return maxPredictedClasses;
|
||||
}
|
||||
|
||||
public void setMaxPredictedClasses(int maxPredictedClasses) {
|
||||
this.maxPredictedClasses = maxPredictedClasses;
|
||||
}
|
||||
|
||||
public ClassificationUpdateProcessorFactory.Algorithm getAlgorithm() {
|
||||
return algorithm;
|
||||
}
|
||||
|
||||
public void setAlgorithm(ClassificationUpdateProcessorFactory.Algorithm algorithm) {
|
||||
this.algorithm = algorithm;
|
||||
}
|
||||
|
||||
public int getMinTf() {
|
||||
return minTf;
|
||||
}
|
||||
|
||||
public void setMinTf(int minTf) {
|
||||
this.minTf = minTf;
|
||||
}
|
||||
|
||||
public int getMinDf() {
|
||||
return minDf;
|
||||
}
|
||||
|
||||
public void setMinDf(int minDf) {
|
||||
this.minDf = minDf;
|
||||
}
|
||||
|
||||
public int getK() {
|
||||
return k;
|
||||
}
|
||||
|
||||
public void setK(int k) {
|
||||
this.k = k;
|
||||
}
|
||||
}
|
|
@ -91,6 +91,7 @@ import org.apache.solr.client.solrj.impl.HttpClientUtil;
|
|||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient.Builder;
|
||||
import org.apache.solr.client.solrj.impl.SolrHttpClientBuilder;
|
||||
import org.apache.solr.client.solrj.impl.ZkClientClusterStateProvider;
|
||||
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -1514,7 +1515,7 @@ public class SolrCLI {
|
|||
|
||||
echo("Uploading " + confPath.toAbsolutePath().toString() +
|
||||
" for config " + confname + " to ZooKeeper at " + cloudSolrClient.getZkHost());
|
||||
cloudSolrClient.uploadConfig(confPath, confname);
|
||||
((ZkClientClusterStateProvider) cloudSolrClient.getClusterStateProvider()).uploadConfig(confPath, confname);
|
||||
}
|
||||
|
||||
// since creating a collection is a heavy-weight operation, check for existence first
|
||||
|
|
|
@ -907,7 +907,7 @@ public class SolrPluginUtils {
|
|||
* aliases should work)
|
||||
*/
|
||||
@Override
|
||||
protected Query getFieldQuery(String field, String queryText, boolean quoted)
|
||||
protected Query getFieldQuery(String field, String queryText, boolean quoted, boolean raw)
|
||||
throws SyntaxError {
|
||||
|
||||
if (aliases.containsKey(field)) {
|
||||
|
@ -917,7 +917,7 @@ public class SolrPluginUtils {
|
|||
List<Query> disjuncts = new ArrayList<>();
|
||||
for (String f : a.fields.keySet()) {
|
||||
|
||||
Query sub = getFieldQuery(f,queryText,quoted);
|
||||
Query sub = getFieldQuery(f,queryText,quoted, false);
|
||||
if (null != sub) {
|
||||
if (null != a.fields.get(f)) {
|
||||
sub = new BoostQuery(sub, a.fields.get(f));
|
||||
|
@ -931,7 +931,7 @@ public class SolrPluginUtils {
|
|||
|
||||
} else {
|
||||
try {
|
||||
return super.getFieldQuery(field, queryText, quoted);
|
||||
return super.getFieldQuery(field, queryText, quoted, raw);
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -1,84 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Forked from https://github.com/codahale/metrics
|
||||
*/
|
||||
|
||||
package org.apache.solr.util.stats;
|
||||
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.lang.management.ThreadMXBean;
|
||||
|
||||
import org.apache.solr.common.util.SuppressForbidden;
|
||||
|
||||
/**
|
||||
* An abstraction for how time passes. It is passed to {@link Timer} to track timing.
|
||||
*/
|
||||
public abstract class Clock {
|
||||
/**
|
||||
* Returns the current time tick.
|
||||
*
|
||||
* @return time tick in nanoseconds
|
||||
*/
|
||||
public abstract long getTick();
|
||||
|
||||
/**
|
||||
* Returns the current time in milliseconds.
|
||||
*
|
||||
* @return time in milliseconds
|
||||
*/
|
||||
@SuppressForbidden(reason = "Need currentTimeMillis, API used by ExponentiallyDecayingSample for suspect reasons")
|
||||
public long getTime() {
|
||||
return System.currentTimeMillis();
|
||||
}
|
||||
|
||||
private static final Clock DEFAULT = new UserTimeClock();
|
||||
|
||||
/**
|
||||
* The default clock to use.
|
||||
*
|
||||
* @return the default {@link Clock} instance
|
||||
*
|
||||
* @see UserTimeClock
|
||||
*/
|
||||
public static Clock defaultClock() {
|
||||
return DEFAULT;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A clock implementation which returns the current time in epoch nanoseconds.
|
||||
*/
|
||||
public static class UserTimeClock extends Clock {
|
||||
@Override
|
||||
public long getTick() {
|
||||
return System.nanoTime();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A clock implementation which returns the current thread's CPU time.
|
||||
*/
|
||||
public static class CpuTimeClock extends Clock {
|
||||
private static final ThreadMXBean THREAD_MX_BEAN = ManagementFactory.getThreadMXBean();
|
||||
|
||||
@Override
|
||||
public long getTick() {
|
||||
return THREAD_MX_BEAN.getCurrentThreadCpuTime();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,126 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Forked from https://github.com/codahale/metrics
|
||||
*/
|
||||
|
||||
package org.apache.solr.util.stats;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import static java.lang.Math.exp;
|
||||
|
||||
/**
|
||||
* An exponentially-weighted moving average.
|
||||
*
|
||||
* @see <a href="http://www.teamquest.com/pdfs/whitepaper/ldavg1.pdf">UNIX Load Average Part 1: How
|
||||
* It Works</a>
|
||||
* @see <a href="http://www.teamquest.com/pdfs/whitepaper/ldavg2.pdf">UNIX Load Average Part 2: Not
|
||||
* Your Average Average</a>
|
||||
*/
|
||||
public class EWMA {
|
||||
private static final int INTERVAL = 5;
|
||||
private static final double SECONDS_PER_MINUTE = 60.0;
|
||||
private static final int ONE_MINUTE = 1;
|
||||
private static final int FIVE_MINUTES = 5;
|
||||
private static final int FIFTEEN_MINUTES = 15;
|
||||
private static final double M1_ALPHA = 1 - exp(-INTERVAL / SECONDS_PER_MINUTE / ONE_MINUTE);
|
||||
private static final double M5_ALPHA = 1 - exp(-INTERVAL / SECONDS_PER_MINUTE / FIVE_MINUTES);
|
||||
private static final double M15_ALPHA = 1 - exp(-INTERVAL / SECONDS_PER_MINUTE / FIFTEEN_MINUTES);
|
||||
|
||||
private volatile boolean initialized = false;
|
||||
private volatile double rate = 0.0;
|
||||
|
||||
private final AtomicLong uncounted = new AtomicLong();
|
||||
private final double alpha, interval;
|
||||
|
||||
/**
|
||||
* Creates a new EWMA which is equivalent to the UNIX one minute load average and which expects
|
||||
* to be ticked every 5 seconds.
|
||||
*
|
||||
* @return a one-minute EWMA
|
||||
*/
|
||||
public static EWMA oneMinuteEWMA() {
|
||||
return new EWMA(M1_ALPHA, INTERVAL, TimeUnit.SECONDS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new EWMA which is equivalent to the UNIX five minute load average and which expects
|
||||
* to be ticked every 5 seconds.
|
||||
*
|
||||
* @return a five-minute EWMA
|
||||
*/
|
||||
public static EWMA fiveMinuteEWMA() {
|
||||
return new EWMA(M5_ALPHA, INTERVAL, TimeUnit.SECONDS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new EWMA which is equivalent to the UNIX fifteen minute load average and which
|
||||
* expects to be ticked every 5 seconds.
|
||||
*
|
||||
* @return a fifteen-minute EWMA
|
||||
*/
|
||||
public static EWMA fifteenMinuteEWMA() {
|
||||
return new EWMA(M15_ALPHA, INTERVAL, TimeUnit.SECONDS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new EWMA with a specific smoothing constant.
|
||||
*
|
||||
* @param alpha the smoothing constant
|
||||
* @param interval the expected tick interval
|
||||
* @param intervalUnit the time unit of the tick interval
|
||||
*/
|
||||
public EWMA(double alpha, long interval, TimeUnit intervalUnit) {
|
||||
this.interval = intervalUnit.toNanos(interval);
|
||||
this.alpha = alpha;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the moving average with a new value.
|
||||
*
|
||||
* @param n the new value
|
||||
*/
|
||||
public void update(long n) {
|
||||
uncounted.addAndGet(n);
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark the passage of time and decay the current rate accordingly.
|
||||
*/
|
||||
public void tick() {
|
||||
final long count = uncounted.getAndSet(0);
|
||||
final double instantRate = count / interval;
|
||||
if (initialized) {
|
||||
rate += (alpha * (instantRate - rate));
|
||||
} else {
|
||||
rate = instantRate;
|
||||
initialized = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the rate in the given units of time.
|
||||
*
|
||||
* @param rateUnit the unit of time
|
||||
* @return the rate
|
||||
*/
|
||||
public double getRate(TimeUnit rateUnit) {
|
||||
return rate * (double) rateUnit.toNanos(1);
|
||||
}
|
||||
}
|
|
@ -1,218 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Forked from https://github.com/codahale/metrics
|
||||
*/
|
||||
|
||||
package org.apache.solr.util.stats;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.ConcurrentSkipListMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
|
||||
import static java.lang.Math.exp;
|
||||
import static java.lang.Math.min;
|
||||
|
||||
/**
|
||||
* An exponentially-decaying random sample of {@code long}s. Uses Cormode et al's forward-decaying
|
||||
* priority reservoir sampling method to produce a statistically representative sample,
|
||||
* exponentially biased towards newer entries.
|
||||
*
|
||||
* See <a href="http://www.research.att.com/people/Cormode_Graham/library/publications/CormodeShkapenyukSrivastavaXu09.pdf">
|
||||
* Cormode et al. Forward Decay: A Practical Time Decay Model for Streaming Systems. ICDE '09: Proceedings of the 2009 IEEE International Conference on Data Engineering (2009)</a>
|
||||
*/
|
||||
public class ExponentiallyDecayingSample implements Sample {
|
||||
|
||||
private static final long RESCALE_THRESHOLD = TimeUnit.HOURS.toNanos(1);
|
||||
private final ConcurrentSkipListMap<Double, Long> values;
|
||||
private final ReentrantReadWriteLock lock;
|
||||
private final double alpha;
|
||||
private final int reservoirSize;
|
||||
private final AtomicLong count = new AtomicLong(0);
|
||||
private volatile long startTime;
|
||||
private final AtomicLong nextScaleTime = new AtomicLong(0);
|
||||
private final Clock clock;
|
||||
// TODO: Maybe replace this with a Mersenne Twister?
|
||||
private final Random random = new Random();
|
||||
|
||||
/**
|
||||
* Creates a new {@link ExponentiallyDecayingSample}.
|
||||
*
|
||||
* @param reservoirSize the number of samples to keep in the sampling reservoir
|
||||
* @param alpha the exponential decay factor; the higher this is, the more biased the
|
||||
* sample will be towards newer values
|
||||
*/
|
||||
public ExponentiallyDecayingSample(int reservoirSize, double alpha) {
|
||||
this(reservoirSize, alpha, Clock.defaultClock());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link ExponentiallyDecayingSample}.
|
||||
*
|
||||
* @param reservoirSize the number of samples to keep in the sampling reservoir
|
||||
* @param alpha the exponential decay factor; the higher this is, the more biased the
|
||||
* sample will be towards newer values
|
||||
*/
|
||||
public ExponentiallyDecayingSample(int reservoirSize, double alpha, Clock clock) {
|
||||
this.values = new ConcurrentSkipListMap<>();
|
||||
this.lock = new ReentrantReadWriteLock();
|
||||
this.alpha = alpha;
|
||||
this.reservoirSize = reservoirSize;
|
||||
this.clock = clock;
|
||||
clear();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
lockForRescale();
|
||||
try {
|
||||
values.clear();
|
||||
count.set(0);
|
||||
this.startTime = currentTimeInSeconds();
|
||||
nextScaleTime.set(clock.getTick() + RESCALE_THRESHOLD);
|
||||
} finally {
|
||||
unlockForRescale();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return (int) min(reservoirSize, count.get());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update(long value) {
|
||||
update(value, currentTimeInSeconds());
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds an old value with a fixed timestamp to the sample.
|
||||
*
|
||||
* @param value the value to be added
|
||||
* @param timestamp the epoch timestamp of {@code value} in seconds
|
||||
*/
|
||||
public void update(long value, long timestamp) {
|
||||
|
||||
rescaleIfNeeded();
|
||||
|
||||
lockForRegularUsage();
|
||||
try {
|
||||
final double priority = weight(timestamp - startTime) / random.nextDouble();
|
||||
final long newCount = count.incrementAndGet();
|
||||
if (newCount <= reservoirSize) {
|
||||
values.put(priority, value);
|
||||
} else {
|
||||
Double first = values.firstKey();
|
||||
if (first < priority) {
|
||||
if (values.putIfAbsent(priority, value) == null) {
|
||||
// ensure we always remove an item
|
||||
while (values.remove(first) == null) {
|
||||
first = values.firstKey();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
unlockForRegularUsage();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
private void rescaleIfNeeded() {
|
||||
final long now = clock.getTick();
|
||||
final long next = nextScaleTime.get();
|
||||
if (now >= next) {
|
||||
rescale(now, next);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Snapshot getSnapshot() {
|
||||
lockForRegularUsage();
|
||||
try {
|
||||
return new Snapshot(values.values());
|
||||
} finally {
|
||||
unlockForRegularUsage();
|
||||
}
|
||||
}
|
||||
|
||||
private long currentTimeInSeconds() {
|
||||
return TimeUnit.MILLISECONDS.toSeconds(clock.getTime());
|
||||
}
|
||||
|
||||
private double weight(long t) {
|
||||
return exp(alpha * t);
|
||||
}
|
||||
|
||||
/* "A common feature of the above techniques—indeed, the key technique that
|
||||
* allows us to track the decayed weights efficiently—is that they maintain
|
||||
* counts and other quantities based on g(ti − L), and only scale by g(t − L)
|
||||
* at query time. But while g(ti −L)/g(t−L) is guaranteed to lie between zero
|
||||
* and one, the intermediate values of g(ti − L) could become very large. For
|
||||
* polynomial functions, these values should not grow too large, and should be
|
||||
* effectively represented in practice by floating point values without loss of
|
||||
* precision. For exponential functions, these values could grow quite large as
|
||||
* new values of (ti − L) become large, and potentially exceed the capacity of
|
||||
* common floating point types. However, since the values stored by the
|
||||
* algorithms are linear combinations of g values (scaled sums), they can be
|
||||
* rescaled relative to a new landmark. That is, by the analysis of exponential
|
||||
* decay in Section III-A, the choice of L does not affect the final result. We
|
||||
* can therefore multiply each value based on L by a factor of exp(−α(L′ − L)),
|
||||
* and obtain the correct value as if we had instead computed relative to a new
|
||||
* landmark L′ (and then use this new L′ at query time). This can be done with
|
||||
* a linear pass over whatever data structure is being used."
|
||||
*/
|
||||
private void rescale(long now, long next) {
|
||||
if (nextScaleTime.compareAndSet(next, now + RESCALE_THRESHOLD)) {
|
||||
lockForRescale();
|
||||
try {
|
||||
final long oldStartTime = startTime;
|
||||
this.startTime = currentTimeInSeconds();
|
||||
final ArrayList<Double> keys = new ArrayList<>(values.keySet());
|
||||
for (Double key : keys) {
|
||||
final Long value = values.remove(key);
|
||||
values.put(key * exp(-alpha * (startTime - oldStartTime)), value);
|
||||
}
|
||||
|
||||
// make sure the counter is in sync with the number of stored samples.
|
||||
count.set(values.size());
|
||||
} finally {
|
||||
unlockForRescale();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void unlockForRescale() {
|
||||
lock.writeLock().unlock();
|
||||
}
|
||||
|
||||
private void lockForRescale() {
|
||||
lock.writeLock().lock();
|
||||
}
|
||||
|
||||
private void lockForRegularUsage() {
|
||||
lock.readLock().lock();
|
||||
}
|
||||
|
||||
private void unlockForRegularUsage() {
|
||||
lock.readLock().unlock();
|
||||
}
|
||||
}
|
|
@ -1,238 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Forked from https://github.com/codahale/metrics
|
||||
*/
|
||||
|
||||
package org.apache.solr.util.stats;
|
||||
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
import static java.lang.Math.sqrt;
|
||||
|
||||
/**
|
||||
* A metric which calculates the distribution of a value.
|
||||
*
|
||||
* @see <a href="http://www.johndcook.com/standard_deviation.html">Accurately computing running
|
||||
* variance</a>
|
||||
*/
|
||||
public class Histogram {
|
||||
|
||||
private static final int DEFAULT_SAMPLE_SIZE = 1028;
|
||||
private static final double DEFAULT_ALPHA = 0.015;
|
||||
|
||||
/**
|
||||
* The type of sampling the histogram should be performing.
|
||||
*/
|
||||
enum SampleType {
|
||||
/**
|
||||
* Uses a uniform sample of 1028 elements, which offers a 99.9% confidence level with a 5%
|
||||
* margin of error assuming a normal distribution.
|
||||
*/
|
||||
UNIFORM {
|
||||
@Override
|
||||
public Sample newSample() {
|
||||
return new UniformSample(DEFAULT_SAMPLE_SIZE);
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Uses an exponentially decaying sample of 1028 elements, which offers a 99.9% confidence
|
||||
* level with a 5% margin of error assuming a normal distribution, and an alpha factor of
|
||||
* 0.015, which heavily biases the sample to the past 5 minutes of measurements.
|
||||
*/
|
||||
BIASED {
|
||||
@Override
|
||||
public Sample newSample() {
|
||||
return new ExponentiallyDecayingSample(DEFAULT_SAMPLE_SIZE, DEFAULT_ALPHA);
|
||||
}
|
||||
};
|
||||
|
||||
public abstract Sample newSample();
|
||||
}
|
||||
|
||||
private final Sample sample;
|
||||
private final AtomicLong min = new AtomicLong();
|
||||
private final AtomicLong max = new AtomicLong();
|
||||
private final AtomicLong sum = new AtomicLong();
|
||||
// These are for the Welford algorithm for calculating running variance
|
||||
// without floating-point doom.
|
||||
private final AtomicReference<double[]> variance =
|
||||
new AtomicReference<>(new double[]{-1, 0}); // M, S
|
||||
private final AtomicLong count = new AtomicLong();
|
||||
|
||||
/**
|
||||
* Creates a new {@link Histogram} with the given sample type.
|
||||
*
|
||||
* @param type the type of sample to use
|
||||
*/
|
||||
Histogram(SampleType type) {
|
||||
this(type.newSample());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link Histogram} with the given sample.
|
||||
*
|
||||
* @param sample the sample to create a histogram from
|
||||
*/
|
||||
Histogram(Sample sample) {
|
||||
this.sample = sample;
|
||||
clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears all recorded values.
|
||||
*/
|
||||
public void clear() {
|
||||
sample.clear();
|
||||
count.set(0);
|
||||
max.set(Long.MIN_VALUE);
|
||||
min.set(Long.MAX_VALUE);
|
||||
sum.set(0);
|
||||
variance.set(new double[]{ -1, 0 });
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a recorded value.
|
||||
*
|
||||
* @param value the length of the value
|
||||
*/
|
||||
public void update(int value) {
|
||||
update((long) value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a recorded value.
|
||||
*
|
||||
* @param value the length of the value
|
||||
*/
|
||||
public void update(long value) {
|
||||
count.incrementAndGet();
|
||||
sample.update(value);
|
||||
setMax(value);
|
||||
setMin(value);
|
||||
sum.getAndAdd(value);
|
||||
updateVariance(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of values recorded.
|
||||
*
|
||||
* @return the number of values recorded
|
||||
*/
|
||||
public long getCount() {
|
||||
return count.get();
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see com.yammer.metrics.core.Summarizable#max()
|
||||
*/
|
||||
public double getMax() {
|
||||
if (getCount() > 0) {
|
||||
return max.get();
|
||||
}
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see com.yammer.metrics.core.Summarizable#min()
|
||||
*/
|
||||
public double getMin() {
|
||||
if (getCount() > 0) {
|
||||
return min.get();
|
||||
}
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see com.yammer.metrics.core.Summarizable#mean()
|
||||
*/
|
||||
public double getMean() {
|
||||
if (getCount() > 0) {
|
||||
return sum.get() / (double) getCount();
|
||||
}
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see com.yammer.metrics.core.Summarizable#stdDev()
|
||||
*/
|
||||
public double getStdDev() {
|
||||
if (getCount() > 0) {
|
||||
return sqrt(getVariance());
|
||||
}
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see com.yammer.metrics.core.Summarizable#sum()
|
||||
*/
|
||||
public double getSum() {
|
||||
return (double) sum.get();
|
||||
}
|
||||
|
||||
public Snapshot getSnapshot() {
|
||||
return sample.getSnapshot();
|
||||
}
|
||||
|
||||
private double getVariance() {
|
||||
if (getCount() <= 1) {
|
||||
return 0.0;
|
||||
}
|
||||
return variance.get()[1] / (getCount() - 1);
|
||||
}
|
||||
|
||||
private void setMax(long potentialMax) {
|
||||
boolean done = false;
|
||||
while (!done) {
|
||||
final long currentMax = max.get();
|
||||
done = currentMax >= potentialMax || max.compareAndSet(currentMax, potentialMax);
|
||||
}
|
||||
}
|
||||
|
||||
private void setMin(long potentialMin) {
|
||||
boolean done = false;
|
||||
while (!done) {
|
||||
final long currentMin = min.get();
|
||||
done = currentMin <= potentialMin || min.compareAndSet(currentMin, potentialMin);
|
||||
}
|
||||
}
|
||||
|
||||
private void updateVariance(long value) {
|
||||
while (true) {
|
||||
final double[] oldValues = variance.get();
|
||||
final double[] newValues = new double[2];
|
||||
if (oldValues[0] == -1) {
|
||||
newValues[0] = value;
|
||||
newValues[1] = 0;
|
||||
} else {
|
||||
final double oldM = oldValues[0];
|
||||
final double oldS = oldValues[1];
|
||||
|
||||
final double newM = oldM + ((value - oldM) / getCount());
|
||||
final double newS = oldS + ((value - oldM) * (value - newM));
|
||||
|
||||
newValues[0] = newM;
|
||||
newValues[1] = newS;
|
||||
}
|
||||
if (variance.compareAndSet(oldValues, newValues)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,143 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Forked from https://github.com/codahale/metrics
|
||||
*/
|
||||
|
||||
package org.apache.solr.util.stats;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
/**
|
||||
* A meter metric which measures mean throughput and one-, five-, and fifteen-minute
|
||||
* exponentially-weighted moving average throughputs.
|
||||
*
|
||||
* @see <a href="http://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average">EMA</a>
|
||||
*/
|
||||
public class Meter {
|
||||
|
||||
private static final long TICK_INTERVAL = TimeUnit.SECONDS.toNanos(5);
|
||||
|
||||
private final EWMA m1Rate = EWMA.oneMinuteEWMA();
|
||||
private final EWMA m5Rate = EWMA.fiveMinuteEWMA();
|
||||
private final EWMA m15Rate = EWMA.fifteenMinuteEWMA();
|
||||
|
||||
private final AtomicLong count = new AtomicLong();
|
||||
private final long startTime;
|
||||
private final AtomicLong lastTick;
|
||||
private final TimeUnit rateUnit;
|
||||
private final String eventType;
|
||||
private final Clock clock;
|
||||
|
||||
/**
|
||||
* Creates a new {@link Meter}.
|
||||
*
|
||||
* @param eventType the plural name of the event the meter is measuring (e.g., {@code
|
||||
* "requests"})
|
||||
* @param rateUnit the rate unit of the new meter
|
||||
* @param clock the clock to use for the meter ticks
|
||||
*/
|
||||
Meter(String eventType, TimeUnit rateUnit, Clock clock) {
|
||||
this.rateUnit = rateUnit;
|
||||
this.eventType = eventType;
|
||||
this.clock = clock;
|
||||
this.startTime = this.clock.getTick();
|
||||
this.lastTick = new AtomicLong(startTime);
|
||||
}
|
||||
|
||||
public TimeUnit getRateUnit() {
|
||||
return rateUnit;
|
||||
}
|
||||
|
||||
public String getEventType() {
|
||||
return eventType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the moving averages.
|
||||
*/
|
||||
void tick() {
|
||||
m1Rate.tick();
|
||||
m5Rate.tick();
|
||||
m15Rate.tick();
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark the occurrence of an event.
|
||||
*/
|
||||
public void mark() {
|
||||
mark(1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark the occurrence of a given number of events.
|
||||
*
|
||||
* @param n the number of events
|
||||
*/
|
||||
public void mark(long n) {
|
||||
tickIfNecessary();
|
||||
count.addAndGet(n);
|
||||
m1Rate.update(n);
|
||||
m5Rate.update(n);
|
||||
m15Rate.update(n);
|
||||
}
|
||||
|
||||
private void tickIfNecessary() {
|
||||
final long oldTick = lastTick.get();
|
||||
final long newTick = clock.getTick();
|
||||
final long age = newTick - oldTick;
|
||||
if (age > TICK_INTERVAL && lastTick.compareAndSet(oldTick, newTick)) {
|
||||
final long requiredTicks = age / TICK_INTERVAL;
|
||||
for (long i = 0; i < requiredTicks; i++) {
|
||||
tick();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public long getCount() {
|
||||
return count.get();
|
||||
}
|
||||
|
||||
public double getFifteenMinuteRate() {
|
||||
tickIfNecessary();
|
||||
return m15Rate.getRate(rateUnit);
|
||||
}
|
||||
|
||||
public double getFiveMinuteRate() {
|
||||
tickIfNecessary();
|
||||
return m5Rate.getRate(rateUnit);
|
||||
}
|
||||
|
||||
public double getMeanRate() {
|
||||
if (getCount() == 0) {
|
||||
return 0.0;
|
||||
} else {
|
||||
final long elapsed = (clock.getTick() - startTime);
|
||||
return convertNsRate(getCount() / (double) elapsed);
|
||||
}
|
||||
}
|
||||
|
||||
public double getOneMinuteRate() {
|
||||
tickIfNecessary();
|
||||
return m1Rate.getRate(rateUnit);
|
||||
}
|
||||
|
||||
private double convertNsRate(double ratePerNs) {
|
||||
return ratePerNs * (double) rateUnit.toNanos(1);
|
||||
}
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Forked from https://github.com/codahale/metrics
|
||||
*/
|
||||
|
||||
package org.apache.solr.util.stats;
|
||||
|
||||
/**
|
||||
* A statistically representative sample of a data stream.
|
||||
*/
|
||||
public interface Sample {
|
||||
/**
|
||||
* Clears all recorded values.
|
||||
*/
|
||||
void clear();
|
||||
|
||||
/**
|
||||
* Returns the number of values recorded.
|
||||
*
|
||||
* @return the number of values recorded
|
||||
*/
|
||||
int size();
|
||||
|
||||
/**
|
||||
* Adds a new recorded value to the sample.
|
||||
*
|
||||
* @param value a new recorded value
|
||||
*/
|
||||
void update(long value);
|
||||
|
||||
/**
|
||||
* Returns a snapshot of the sample's values.
|
||||
*
|
||||
* @return a snapshot of the sample's values
|
||||
*/
|
||||
Snapshot getSnapshot();
|
||||
}
|
|
@ -1,168 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Forked from https://github.com/codahale/metrics
|
||||
*/
|
||||
|
||||
package org.apache.solr.util.stats;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
||||
import static java.lang.Math.floor;
|
||||
|
||||
/**
|
||||
* A statistical snapshot of a {@link Snapshot}.
|
||||
*/
|
||||
public class Snapshot {
|
||||
private static final double MEDIAN_Q = 0.5;
|
||||
private static final double P75_Q = 0.75;
|
||||
private static final double P95_Q = 0.95;
|
||||
private static final double P98_Q = 0.98;
|
||||
private static final double P99_Q = 0.99;
|
||||
private static final double P999_Q = 0.999;
|
||||
|
||||
private final double[] values;
|
||||
|
||||
/**
|
||||
* Create a new {@link Snapshot} with the given values.
|
||||
*
|
||||
* @param values an unordered set of values in the sample
|
||||
*/
|
||||
public Snapshot(Collection<Long> values) {
|
||||
final Object[] copy = values.toArray();
|
||||
this.values = new double[copy.length];
|
||||
for (int i = 0; i < copy.length; i++) {
|
||||
this.values[i] = (Long) copy[i];
|
||||
}
|
||||
Arrays.sort(this.values);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new {@link Snapshot} with the given values.
|
||||
*
|
||||
* @param values an unordered set of values in the sample
|
||||
*/
|
||||
public Snapshot(double[] values) {
|
||||
this.values = new double[values.length];
|
||||
System.arraycopy(values, 0, this.values, 0, values.length);
|
||||
Arrays.sort(this.values);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value at the given quantile.
|
||||
*
|
||||
* @param quantile a given quantile, in {@code [0..1]}
|
||||
* @return the value in the distribution at {@code quantile}
|
||||
*/
|
||||
public double getValue(double quantile) {
|
||||
if (quantile < 0.0 || quantile > 1.0) {
|
||||
throw new IllegalArgumentException(quantile + " is not in [0..1]");
|
||||
}
|
||||
|
||||
if (values.length == 0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
final double pos = quantile * (values.length + 1);
|
||||
|
||||
if (pos < 1) {
|
||||
return values[0];
|
||||
}
|
||||
|
||||
if (pos >= values.length) {
|
||||
return values[values.length - 1];
|
||||
}
|
||||
|
||||
final double lower = values[(int) pos - 1];
|
||||
final double upper = values[(int) pos];
|
||||
return lower + (pos - floor(pos)) * (upper - lower);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of values in the snapshot.
|
||||
*
|
||||
* @return the number of values in the snapshot
|
||||
*/
|
||||
public int size() {
|
||||
return values.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the median value in the distribution.
|
||||
*
|
||||
* @return the median value in the distribution
|
||||
*/
|
||||
public double getMedian() {
|
||||
return getValue(MEDIAN_Q);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value at the 75th percentile in the distribution.
|
||||
*
|
||||
* @return the value at the 75th percentile in the distribution
|
||||
*/
|
||||
public double get75thPercentile() {
|
||||
return getValue(P75_Q);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value at the 95th percentile in the distribution.
|
||||
*
|
||||
* @return the value at the 95th percentile in the distribution
|
||||
*/
|
||||
public double get95thPercentile() {
|
||||
return getValue(P95_Q);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value at the 98th percentile in the distribution.
|
||||
*
|
||||
* @return the value at the 98th percentile in the distribution
|
||||
*/
|
||||
public double get98thPercentile() {
|
||||
return getValue(P98_Q);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value at the 99th percentile in the distribution.
|
||||
*
|
||||
* @return the value at the 99th percentile in the distribution
|
||||
*/
|
||||
public double get99thPercentile() {
|
||||
return getValue(P99_Q);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value at the 99.9th percentile in the distribution.
|
||||
*
|
||||
* @return the value at the 99.9th percentile in the distribution
|
||||
*/
|
||||
public double get999thPercentile() {
|
||||
return getValue(P999_Q);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the entire set of values in the snapshot.
|
||||
*
|
||||
* @return the entire set of values in the snapshot
|
||||
*/
|
||||
public double[] getValues() {
|
||||
return Arrays.copyOf(values, values.length);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,203 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Forked from https://github.com/codahale/metrics
|
||||
*/
|
||||
|
||||
package org.apache.solr.util.stats;
|
||||
|
||||
import org.apache.solr.util.stats.Histogram.SampleType;
|
||||
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* A timer metric which aggregates timing durations and provides duration statistics, plus
|
||||
* throughput statistics via {@link Meter}.
|
||||
*/
|
||||
public class Timer {
|
||||
|
||||
private final TimeUnit durationUnit, rateUnit;
|
||||
private final Meter meter;
|
||||
private final Histogram histogram = new Histogram(SampleType.BIASED);
|
||||
private final Clock clock;
|
||||
|
||||
public Timer() {
|
||||
this(TimeUnit.MILLISECONDS, TimeUnit.SECONDS, Clock.defaultClock());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link Timer}.
|
||||
*
|
||||
* @param durationUnit the scale unit for this timer's duration metrics
|
||||
* @param rateUnit the scale unit for this timer's rate metrics
|
||||
* @param clock the clock used to calculate duration
|
||||
*/
|
||||
public Timer(TimeUnit durationUnit, TimeUnit rateUnit, Clock clock) {
|
||||
this.durationUnit = durationUnit;
|
||||
this.rateUnit = rateUnit;
|
||||
this.meter = new Meter("calls", rateUnit, clock);
|
||||
this.clock = clock;
|
||||
clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the timer's duration scale unit.
|
||||
*
|
||||
* @return the timer's duration scale unit
|
||||
*/
|
||||
public TimeUnit getDurationUnit() {
|
||||
return durationUnit;
|
||||
}
|
||||
|
||||
public TimeUnit getRateUnit() {
|
||||
return rateUnit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears all recorded durations.
|
||||
*/
|
||||
public void clear() {
|
||||
histogram.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a recorded duration.
|
||||
*
|
||||
* @param duration the length of the duration
|
||||
* @param unit the scale unit of {@code duration}
|
||||
*/
|
||||
public void update(long duration, TimeUnit unit) {
|
||||
update(unit.toNanos(duration));
|
||||
}
|
||||
|
||||
/**
|
||||
* Times and records the duration of event.
|
||||
*
|
||||
* @param event a {@link Callable} whose {@link Callable#call()} method implements a process
|
||||
* whose duration should be timed
|
||||
* @param <T> the type of the value returned by {@code event}
|
||||
* @return the value returned by {@code event}
|
||||
* @throws Exception if {@code event} throws an {@link Exception}
|
||||
*/
|
||||
public <T> T time(Callable<T> event) throws Exception {
|
||||
final long startTime = clock.getTick();
|
||||
try {
|
||||
return event.call();
|
||||
} finally {
|
||||
update(clock.getTick() - startTime);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a timing {@link TimerContext}, which measures an elapsed time in nanoseconds.
|
||||
*
|
||||
* @return a new {@link TimerContext}
|
||||
*/
|
||||
public TimerContext time() {
|
||||
return new TimerContext(this, clock);
|
||||
}
|
||||
|
||||
public long getCount() {
|
||||
return histogram.getCount();
|
||||
}
|
||||
|
||||
public double getFifteenMinuteRate() {
|
||||
return meter.getFifteenMinuteRate();
|
||||
}
|
||||
|
||||
public double getFiveMinuteRate() {
|
||||
return meter.getFiveMinuteRate();
|
||||
}
|
||||
|
||||
public double getMeanRate() {
|
||||
return meter.getMeanRate();
|
||||
}
|
||||
|
||||
public double getOneMinuteRate() {
|
||||
return meter.getOneMinuteRate();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the longest recorded duration.
|
||||
*
|
||||
* @return the longest recorded duration
|
||||
*/
|
||||
public double getMax() {
|
||||
return convertFromNS(histogram.getMax());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the shortest recorded duration.
|
||||
*
|
||||
* @return the shortest recorded duration
|
||||
*/
|
||||
public double getMin() {
|
||||
return convertFromNS(histogram.getMin());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the arithmetic mean of all recorded durations.
|
||||
*
|
||||
* @return the arithmetic mean of all recorded durations
|
||||
*/
|
||||
public double getMean() {
|
||||
return convertFromNS(histogram.getMean());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the standard deviation of all recorded durations.
|
||||
*
|
||||
* @return the standard deviation of all recorded durations
|
||||
*/
|
||||
public double getStdDev() {
|
||||
return convertFromNS(histogram.getStdDev());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the sum of all recorded durations.
|
||||
*
|
||||
* @return the sum of all recorded durations
|
||||
*/
|
||||
public double getSum() {
|
||||
return convertFromNS(histogram.getSum());
|
||||
}
|
||||
|
||||
public Snapshot getSnapshot() {
|
||||
final double[] values = histogram.getSnapshot().getValues();
|
||||
final double[] converted = new double[values.length];
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
converted[i] = convertFromNS(values[i]);
|
||||
}
|
||||
return new Snapshot(converted);
|
||||
}
|
||||
|
||||
public String getEventType() {
|
||||
return meter.getEventType();
|
||||
}
|
||||
|
||||
private void update(long duration) {
|
||||
if (duration >= 0) {
|
||||
histogram.update(duration);
|
||||
meter.mark();
|
||||
}
|
||||
}
|
||||
|
||||
private double convertFromNS(double ns) {
|
||||
return ns / TimeUnit.NANOSECONDS.convert(1, durationUnit);
|
||||
}
|
||||
}
|
|
@ -1,55 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Forked from https://github.com/codahale/metrics
|
||||
*/
|
||||
|
||||
package org.apache.solr.util.stats;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* A timing context.
|
||||
*
|
||||
* @see Timer#time()
|
||||
*/
|
||||
public class TimerContext {
|
||||
private final Timer timer;
|
||||
private final Clock clock;
|
||||
private final long startTime;
|
||||
|
||||
/**
|
||||
* Creates a new {@link TimerContext} with the current time as its starting value and with the
|
||||
* given {@link Timer}.
|
||||
*
|
||||
* @param timer the {@link Timer} to report the elapsed time to
|
||||
*/
|
||||
TimerContext(Timer timer, Clock clock) {
|
||||
this.timer = timer;
|
||||
this.clock = clock;
|
||||
this.startTime = clock.getTick();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stops recording the elapsed time, updates the timer and returns the elapsed time
|
||||
*/
|
||||
public long stop() {
|
||||
final long elapsedNanos = clock.getTick() - startTime;
|
||||
timer.update(elapsedNanos, TimeUnit.NANOSECONDS);
|
||||
return elapsedNanos;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.util.stats;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.codahale.metrics.Snapshot;
|
||||
import com.codahale.metrics.Timer;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
||||
/**
|
||||
* Solr specific {@link Timer} utility functions.
|
||||
*/
|
||||
public class TimerUtils {
|
||||
|
||||
/**
|
||||
* Adds metrics from a Timer to a NamedList, using well-known names.
|
||||
* @param lst The NamedList to add the metrics data to
|
||||
* @param timer The Timer to extract the metrics from
|
||||
*/
|
||||
public static void addMetrics(NamedList<Object> lst, Timer timer) {
|
||||
Snapshot snapshot = timer.getSnapshot();
|
||||
lst.add("avgRequestsPerSecond", timer.getMeanRate());
|
||||
lst.add("5minRateRequestsPerSecond", timer.getFiveMinuteRate());
|
||||
lst.add("15minRateRequestsPerSecond", timer.getFifteenMinuteRate());
|
||||
lst.add("avgTimePerRequest", nsToMs(snapshot.getMean()));
|
||||
lst.add("medianRequestTime", nsToMs(snapshot.getMedian()));
|
||||
lst.add("75thPcRequestTime", nsToMs(snapshot.get75thPercentile()));
|
||||
lst.add("95thPcRequestTime", nsToMs(snapshot.get95thPercentile()));
|
||||
lst.add("99thPcRequestTime", nsToMs(snapshot.get99thPercentile()));
|
||||
lst.add("999thPcRequestTime", nsToMs(snapshot.get999thPercentile()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a double representing nanoseconds to a double representing milliseconds.
|
||||
*
|
||||
* @param ns the amount of time in nanoseconds
|
||||
* @return the amount of time in milliseconds
|
||||
*/
|
||||
static double nsToMs(double ns) {
|
||||
return ns / TimeUnit.MILLISECONDS.toNanos(1);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,108 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Forked from https://github.com/codahale/metrics
|
||||
*/
|
||||
|
||||
package org.apache.solr.util.stats;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.atomic.AtomicLongArray;
|
||||
|
||||
/**
|
||||
* A random sample of a stream of {@code long}s. Uses Vitter's Algorithm R to produce a
|
||||
* statistically representative sample.
|
||||
*
|
||||
* @see <a href="http://www.cs.umd.edu/~samir/498/vitter.pdf">Random Sampling with a Reservoir</a>
|
||||
*/
|
||||
public class UniformSample implements Sample {
|
||||
|
||||
private static final int BITS_PER_LONG = 63;
|
||||
private final AtomicLong count = new AtomicLong();
|
||||
private final AtomicLongArray values;
|
||||
//TODO: Maybe replace with a Mersenne twister for better distribution
|
||||
private static final Random random = new Random();
|
||||
|
||||
/**
|
||||
* Creates a new {@link UniformSample}.
|
||||
*
|
||||
* @param reservoirSize the number of samples to keep in the sampling reservoir
|
||||
*/
|
||||
public UniformSample(int reservoirSize) {
|
||||
this.values = new AtomicLongArray(reservoirSize);
|
||||
clear();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
for (int i = 0; i < values.length(); i++) {
|
||||
values.set(i, 0);
|
||||
}
|
||||
count.set(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
final long c = count.get();
|
||||
if (c > values.length()) {
|
||||
return values.length();
|
||||
}
|
||||
return (int) c;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update(long value) {
|
||||
final long c = count.incrementAndGet();
|
||||
if (c <= values.length()) {
|
||||
values.set((int) c - 1, value);
|
||||
} else {
|
||||
final long r = nextLong(c);
|
||||
if (r < values.length()) {
|
||||
values.set((int) r, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a pseudo-random long uniformly between 0 and n-1. Stolen from
|
||||
* {@link java.util.Random#nextInt()}.
|
||||
*
|
||||
* @param n the bound
|
||||
* @return a value select randomly from the range {@code [0..n)}.
|
||||
*/
|
||||
private static long nextLong(long n) {
|
||||
long bits, val;
|
||||
do {
|
||||
bits = random.nextLong() & (~(1L << BITS_PER_LONG));
|
||||
val = bits % n;
|
||||
} while (bits - val + (n - 1) < 0L);
|
||||
return val;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Snapshot getSnapshot() {
|
||||
final int s = size();
|
||||
final List<Long> copy = new ArrayList<>(s);
|
||||
for (int i = 0; i < s; i++) {
|
||||
copy.add(values.get(i));
|
||||
}
|
||||
return new Snapshot(copy);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!-- Test schema file for UnifiedHighlighter -->
|
||||
|
||||
<schema name="unifiedhighlight" version="1.0">
|
||||
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
|
||||
<!-- basic text field: no offsets! -->
|
||||
<fieldType name="text" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- text field with offsets -->
|
||||
<fieldType name="text_offsets" class="solr.TextField" storeOffsetsWithPositions="true">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||
<field name="text" type="text_offsets" indexed="true" stored="true"/>
|
||||
<field name="text2" type="text" indexed="true" stored="true"/>
|
||||
<field name="text3" type="text_offsets" indexed="true" stored="true"/>
|
||||
|
||||
<defaultSearchField>text</defaultSearchField>
|
||||
<uniqueKey>id</uniqueKey>
|
||||
</schema>
|
|
@ -47,6 +47,21 @@
|
|||
<str name="knn.minTf">1</str>
|
||||
<str name="knn.minDf">1</str>
|
||||
<str name="knn.k">5</str>
|
||||
<str name="knn.filterQuery">cat:(class1 OR class2)</str>
|
||||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory"/>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="classification-unsupported-filterQuery">
|
||||
<processor class="solr.ClassificationUpdateProcessorFactory">
|
||||
<str name="inputFields">title,content,author</str>
|
||||
<str name="classField">cat</str>
|
||||
<!-- Knn algorithm specific-->
|
||||
<str name="algorithm">knn</str>
|
||||
<str name="knn.minTf">1</str>
|
||||
<str name="knn.minDf">1</str>
|
||||
<str name="knn.k">5</str>
|
||||
<str name="knn.filterQuery">not valid ( lucene query</str>
|
||||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory"/>
|
||||
</updateRequestProcessorChain>
|
||||
|
|
|
@ -0,0 +1,527 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.cloud;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.client.solrj.request.schema.FieldTypeDefinition;
|
||||
import org.apache.solr.client.solrj.request.schema.SchemaRequest;
|
||||
import org.apache.solr.client.solrj.response.FacetField;
|
||||
import org.apache.solr.client.solrj.response.Group;
|
||||
import org.apache.solr.client.solrj.response.GroupCommand;
|
||||
import org.apache.solr.client.solrj.response.GroupResponse;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.client.solrj.response.schema.SchemaResponse;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.RuleChain;
|
||||
import org.junit.rules.TestRule;
|
||||
|
||||
import static org.apache.solr.client.solrj.request.schema.SchemaRequest.*;
|
||||
|
||||
public class DocValuesNotIndexedTest extends SolrCloudTestCase {
|
||||
|
||||
@Rule
|
||||
public TestRule solrTestRules = RuleChain.outerRule(new SystemPropertiesRestoreRule());
|
||||
|
||||
static final String COLLECTION = "dv_coll";
|
||||
|
||||
|
||||
static List<FieldProps> fieldsToTestSingle = null;
|
||||
static List<FieldProps> fieldsToTestMulti = null;
|
||||
static List<FieldProps> fieldsToTestGroupSortFirst = null;
|
||||
static List<FieldProps> fieldsToTestGroupSortLast = null;
|
||||
|
||||
@BeforeClass
|
||||
public static void createCluster() throws Exception {
|
||||
System.setProperty("managed.schema.mutable", "true");
|
||||
configureCluster(2)
|
||||
.addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-managed").resolve("conf"))
|
||||
.configure();
|
||||
|
||||
// Need enough shards that we have some shards that don't have any docs on them.
|
||||
CollectionAdminRequest.createCollection(COLLECTION, "conf1", 4, 1)
|
||||
.setMaxShardsPerNode(2)
|
||||
.process(cluster.getSolrClient());
|
||||
|
||||
fieldsToTestSingle =
|
||||
Collections.unmodifiableList(Stream.of(
|
||||
new FieldProps("intField", "int", 1),
|
||||
new FieldProps("longField", "long", 1),
|
||||
new FieldProps("doubleField", "double", 1),
|
||||
new FieldProps("floatField", "float", 1),
|
||||
new FieldProps("dateField", "date", 1),
|
||||
new FieldProps("stringField", "string", 1),
|
||||
new FieldProps("boolField", "boolean", 1)
|
||||
).collect(Collectors.toList()));
|
||||
|
||||
fieldsToTestMulti =
|
||||
Collections.unmodifiableList(Stream.of(
|
||||
new FieldProps("intFieldMulti", "int", 5),
|
||||
new FieldProps("longFieldMulti", "long", 5),
|
||||
new FieldProps("doubleFieldMulti", "double", 5),
|
||||
new FieldProps("floatFieldMulti", "float", 5),
|
||||
new FieldProps("dateFieldMulti", "date", 5),
|
||||
new FieldProps("stringFieldMulti", "string", 5),
|
||||
new FieldProps("boolFieldMulti", "boolean", 2)
|
||||
).collect(Collectors.toList()));
|
||||
|
||||
// Fields to test for grouping and sorting with sortMinssingFirst/Last.
|
||||
fieldsToTestGroupSortFirst =
|
||||
Collections.unmodifiableList(Stream.of(
|
||||
new FieldProps("intGSF", "int"),
|
||||
new FieldProps("longGSF", "long"),
|
||||
new FieldProps("doubleGSF", "double"),
|
||||
new FieldProps("floatGSF", "float"),
|
||||
new FieldProps("dateGSF", "date"),
|
||||
new FieldProps("stringGSF", "string"),
|
||||
new FieldProps("boolGSF", "boolean")
|
||||
).collect(Collectors.toList()));
|
||||
|
||||
fieldsToTestGroupSortLast =
|
||||
Collections.unmodifiableList(Stream.of(
|
||||
new FieldProps("intGSL", "int"),
|
||||
new FieldProps("longGSL", "long"),
|
||||
new FieldProps("doubleGSL", "double"),
|
||||
new FieldProps("floatGSL", "float"),
|
||||
new FieldProps("dateGSL", "date"),
|
||||
new FieldProps("stringGSL", "string"),
|
||||
new FieldProps("boolGSL", "boolean")
|
||||
).collect(Collectors.toList()));
|
||||
|
||||
List<Update> updateList = new ArrayList<>(fieldsToTestSingle.size() +
|
||||
fieldsToTestMulti.size() + fieldsToTestGroupSortFirst.size() + fieldsToTestGroupSortLast.size() +
|
||||
4);
|
||||
|
||||
updateList.add(getType("name", "float", "class", "solr.TrieFloatField", "precisionStep", "0"));
|
||||
|
||||
updateList.add(getType("name", "double", "class", "solr.TrieDoubleField", "precisionStep", "0"));
|
||||
|
||||
updateList.add(getType("name", "date", "class", "solr.TrieDateField", "precisionStep", "0"));
|
||||
|
||||
updateList.add(getType("name", "boolean", "class", "solr.BoolField"));
|
||||
|
||||
|
||||
// Add a field for each of the types we want to the schema.
|
||||
|
||||
defineFields(updateList, fieldsToTestSingle, false);
|
||||
defineFields(updateList, fieldsToTestMulti, true);
|
||||
defineFields(updateList, fieldsToTestGroupSortFirst, false, "sorMissingFirst", "true");
|
||||
defineFields(updateList, fieldsToTestGroupSortLast, false, "sorMissingLast", "true");
|
||||
|
||||
|
||||
MultiUpdate multiUpdateRequest = new MultiUpdate(updateList);
|
||||
SchemaResponse.UpdateResponse multipleUpdatesResponse = multiUpdateRequest.process(cluster.getSolrClient(), COLLECTION);
|
||||
assertNull("Error adding fields", multipleUpdatesResponse.getResponse().get("errors"));
|
||||
|
||||
cluster.getSolrClient().setDefaultCollection(COLLECTION);
|
||||
}
|
||||
|
||||
|
||||
@Before
|
||||
public void before() throws IOException, SolrServerException {
|
||||
CloudSolrClient client = cluster.getSolrClient();
|
||||
client.deleteByQuery("*:*");
|
||||
client.commit();
|
||||
resetFieldBases(fieldsToTestSingle);
|
||||
resetFieldBases(fieldsToTestMulti);
|
||||
resetFieldBases(fieldsToTestGroupSortFirst);
|
||||
resetFieldBases(fieldsToTestGroupSortLast);
|
||||
}
|
||||
|
||||
private void resetFieldBases(List<FieldProps> props) {
|
||||
// OK, it's not bad with the int and string fields, but every time a new test counts on docs being
|
||||
// indexed so they sort in a particular order, then particularly the boolean and string fields need to be
|
||||
// reset to a known state.
|
||||
for (FieldProps prop : props) {
|
||||
prop.resetBase();
|
||||
}
|
||||
}
|
||||
@Test
|
||||
public void testDistribFaceting() throws IOException, SolrServerException {
|
||||
// For this test, I want to insure that there are shards that do _not_ have a doc with any of the DV_only
|
||||
// fields, see SOLR-5260. So I'll add exactly 1 document to a 4 shard collection.
|
||||
|
||||
CloudSolrClient client = cluster.getSolrClient();
|
||||
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField("id", "1");
|
||||
for (FieldProps prop : fieldsToTestSingle) {
|
||||
doc.addField(prop.getName(), prop.getValue(true));
|
||||
}
|
||||
|
||||
for (FieldProps prop : fieldsToTestMulti) {
|
||||
for (int idx = 0; idx < 5; ++idx) {
|
||||
doc.addField(prop.getName(), prop.getValue(true));
|
||||
}
|
||||
}
|
||||
|
||||
new UpdateRequest()
|
||||
.add(doc)
|
||||
.commit(client, COLLECTION);
|
||||
|
||||
final SolrQuery solrQuery = new SolrQuery("q", "*:*", "rows", "0");
|
||||
solrQuery.setFacet(true);
|
||||
for (FieldProps prop : fieldsToTestSingle) {
|
||||
solrQuery.addFacetField(prop.getName());
|
||||
}
|
||||
|
||||
for (FieldProps prop : fieldsToTestMulti) {
|
||||
solrQuery.addFacetField(prop.getName());
|
||||
}
|
||||
|
||||
final QueryResponse rsp = client.query(COLLECTION, solrQuery);
|
||||
|
||||
for (FieldProps props : fieldsToTestSingle) {
|
||||
testFacet(props, rsp);
|
||||
}
|
||||
|
||||
for (FieldProps props : fieldsToTestMulti) {
|
||||
testFacet(props, rsp);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// We should be able to sort thing with missing first/last and that are _NOT_ present at all on one server.
|
||||
@Test
|
||||
public void testGroupingSorting() throws IOException, SolrServerException {
|
||||
CloudSolrClient client = cluster.getSolrClient();
|
||||
|
||||
// The point of these is to have at least one shard w/o the value.
|
||||
// While getting values for each of these fields starts _out_ random, each successive
|
||||
// _value_ increases.
|
||||
List<SolrInputDocument> docs = new ArrayList<>(3);
|
||||
docs.add(makeGSDoc(2, fieldsToTestGroupSortFirst, fieldsToTestGroupSortLast));
|
||||
docs.add(makeGSDoc(1, fieldsToTestGroupSortFirst, fieldsToTestGroupSortLast));
|
||||
docs.add(makeGSDoc(3, fieldsToTestGroupSortFirst, fieldsToTestGroupSortLast));
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField("id", 4);
|
||||
docs.add(doc);
|
||||
|
||||
new UpdateRequest()
|
||||
.add(docs)
|
||||
.commit(client, COLLECTION);
|
||||
|
||||
checkSortOrder(client, fieldsToTestGroupSortFirst, "asc", new String[]{"4", "2", "1", "3"}, new String[]{"4", "1", "2", "3"});
|
||||
checkSortOrder(client, fieldsToTestGroupSortFirst, "desc", new String[]{"3", "1", "2", "4"}, new String[]{"2", "3", "1", "4"});
|
||||
|
||||
checkSortOrder(client, fieldsToTestGroupSortLast, "asc", new String[]{"4", "2", "1", "3"}, new String[]{"4", "1", "2", "3"});
|
||||
checkSortOrder(client, fieldsToTestGroupSortLast, "desc", new String[]{"3", "1", "2", "4"}, new String[]{"2", "3", "1", "4"});
|
||||
|
||||
}
|
||||
|
||||
private void checkSortOrder(CloudSolrClient client, List<FieldProps> props, String sortDir, String[] order, String[] orderBool) throws IOException, SolrServerException {
|
||||
for (FieldProps prop : props) {
|
||||
final SolrQuery solrQuery = new SolrQuery("q", "*:*", "rows", "100");
|
||||
solrQuery.setSort(prop.getName(), "asc".equals(sortDir) ? SolrQuery.ORDER.asc : SolrQuery.ORDER.desc);
|
||||
solrQuery.addSort("id", SolrQuery.ORDER.asc);
|
||||
final QueryResponse rsp = client.query(COLLECTION, solrQuery);
|
||||
SolrDocumentList res = rsp.getResults();
|
||||
assertEquals("Should have exactly " + order.length + " documents returned", order.length, res.getNumFound());
|
||||
String expected;
|
||||
for (int idx = 0; idx < res.size(); ++idx) {
|
||||
if (prop.getName().startsWith("bool")) expected = orderBool[idx];
|
||||
else expected = order[idx];
|
||||
assertEquals("Documents in wrong order for field: " + prop.getName(),
|
||||
expected, res.get(idx).get("id"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupingDocAbsent() throws IOException, SolrServerException {
|
||||
List<SolrInputDocument> docs = new ArrayList<>(3);
|
||||
docs.add(makeGSDoc(2, fieldsToTestGroupSortFirst, null));
|
||||
docs.add(makeGSDoc(1, fieldsToTestGroupSortFirst, null));
|
||||
docs.add(makeGSDoc(3, fieldsToTestGroupSortFirst, null));
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField("id", 4);
|
||||
docs.add(doc);
|
||||
CloudSolrClient client = cluster.getSolrClient();
|
||||
|
||||
new UpdateRequest()
|
||||
.add(docs)
|
||||
.commit(client, COLLECTION);
|
||||
|
||||
// when grouping on any of these DV-only (not indexed) fields we expect exactly 4 groups except for Boolean.
|
||||
for (FieldProps prop : fieldsToTestGroupSortFirst) {
|
||||
// Special handling until SOLR-9802 is fixed
|
||||
if (prop.getName().startsWith("date")) continue;
|
||||
// SOLR-9802 to here
|
||||
|
||||
final SolrQuery solrQuery = new SolrQuery("q", "*:*",
|
||||
"group", "true",
|
||||
"group.field", prop.getName());
|
||||
|
||||
final QueryResponse rsp = client.query(COLLECTION, solrQuery);
|
||||
|
||||
GroupResponse groupResponse = rsp.getGroupResponse();
|
||||
List<GroupCommand> commands = groupResponse.getValues();
|
||||
GroupCommand fieldCommand = commands.get(0);
|
||||
int expected = 4;
|
||||
if (prop.getName().startsWith("bool")) expected = 3; //true, false and null
|
||||
|
||||
List<Group> fieldCommandGroups = fieldCommand.getValues();
|
||||
assertEquals("Did not find the expected number of groups!", expected, fieldCommandGroups.size());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
// Verify that we actually form groups that are "expected". Most of the processing takes some care to
|
||||
// make sure all the values for each field are unique. We need to have docs that have values that are _not_
|
||||
// unique.
|
||||
public void testGroupingDVOnly() throws IOException, SolrServerException {
|
||||
List<SolrInputDocument> docs = new ArrayList<>(50);
|
||||
for (int idx = 0; idx < 49; ++idx) {
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField("id", idx);
|
||||
boolean doInc = ((idx % 7) == 0);
|
||||
for (FieldProps prop : fieldsToTestGroupSortFirst) {
|
||||
doc.addField(prop.getName(), prop.getValue(doInc));
|
||||
}
|
||||
docs.add(doc);
|
||||
if ((idx % 5) == 0) {
|
||||
doc = new SolrInputDocument();
|
||||
doc.addField("id", idx + 100);
|
||||
docs.add(doc);
|
||||
}
|
||||
}
|
||||
|
||||
CloudSolrClient client = cluster.getSolrClient();
|
||||
|
||||
new UpdateRequest()
|
||||
.add(docs)
|
||||
.commit(client, COLLECTION);
|
||||
|
||||
// OK, we should have one group with 10 entries for null, a group with 1 entry and 7 groups with 7
|
||||
for (FieldProps prop : fieldsToTestGroupSortFirst) {
|
||||
// Special handling until SOLR-9802 is fixed
|
||||
if (prop.getName().startsWith("date")) continue;
|
||||
// SOLR-9802 to here
|
||||
|
||||
final SolrQuery solrQuery = new SolrQuery(
|
||||
"q", "*:*",
|
||||
"rows", "100",
|
||||
"group", "true",
|
||||
"group.field", prop.getName(),
|
||||
"group.limit", "100");
|
||||
|
||||
final QueryResponse rsp = client.query(COLLECTION, solrQuery);
|
||||
|
||||
GroupResponse groupResponse = rsp.getGroupResponse();
|
||||
List<GroupCommand> commands = groupResponse.getValues();
|
||||
|
||||
|
||||
int nullCount = 0;
|
||||
int sevenCount = 0;
|
||||
int boolCount = 0;
|
||||
for (int idx = 0; idx < commands.size(); ++idx) {
|
||||
GroupCommand fieldCommand = commands.get(idx);
|
||||
for (Group grp : fieldCommand.getValues()) {
|
||||
switch (grp.getResult().size()) {
|
||||
case 7:
|
||||
++sevenCount;
|
||||
assertNotNull("Every group with 7 entries should have a group value.", grp.getGroupValue());
|
||||
break;
|
||||
case 10:
|
||||
++nullCount;
|
||||
assertNull("This should be the null group", grp.getGroupValue());
|
||||
break;
|
||||
case 25:
|
||||
case 24:
|
||||
++boolCount;
|
||||
assertEquals("We should have more counts for boolean fields!", "boolGSF", prop.getName());
|
||||
break;
|
||||
|
||||
default:
|
||||
fail("Unexpected number of elements in the group for " + prop.getName() + ": " + grp.getResult().size());
|
||||
}
|
||||
}
|
||||
}
|
||||
assertEquals("Should be exactly one group with 1 entry of 10 for null for field " + prop.getName(), 1, nullCount);
|
||||
if (prop.getName().startsWith("bool")) {
|
||||
assertEquals("Should be exactly 2 groups with non-null Boolean types " + prop.getName(), 2, boolCount);
|
||||
assertEquals("Should be no seven count groups for Boolean types " + prop.getName(), 0, sevenCount);
|
||||
} else {
|
||||
assertEquals("Should be exactly 7 groups with seven entries for field " + prop.getName(), 7, sevenCount);
|
||||
assertEquals("Should be no gropus with 24 or 25 entries for field " + prop.getName(), 0, boolCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private SolrInputDocument makeGSDoc(int id, List<FieldProps> p1, List<FieldProps> p2, String... args) {
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField("id", id);
|
||||
for (FieldProps prop : p1) {
|
||||
doc.addField(prop.getName(), prop.getValue(true));
|
||||
}
|
||||
|
||||
if (p2 != null) {
|
||||
for (FieldProps prop : p2) {
|
||||
doc.addField(prop.getName(), prop.getValue(true));
|
||||
}
|
||||
}
|
||||
|
||||
for (int idx = 0; idx < args.length; idx += 2) {
|
||||
doc.addField(args[idx], args[idx + 1]);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
||||
private static void defineFields(List<Update> updateList, List<FieldProps> props, boolean multi, String... extras) {
|
||||
for (FieldProps prop : props) {
|
||||
Map<String, Object> fieldAttributes = new LinkedHashMap<>();
|
||||
fieldAttributes.put("name", prop.getName());
|
||||
fieldAttributes.put("type", prop.getType());
|
||||
fieldAttributes.put("indexed", "false");
|
||||
fieldAttributes.put("multiValued", multi ? "true" : "false");
|
||||
fieldAttributes.put("docValues", "true");
|
||||
updateList.add(new AddField(fieldAttributes));
|
||||
}
|
||||
}
|
||||
|
||||
private static AddFieldType getType(String... args) {
|
||||
|
||||
FieldTypeDefinition ftd = new FieldTypeDefinition();
|
||||
Map<String, Object> ftas = new LinkedHashMap<>();
|
||||
for (int idx = 0; idx < args.length; idx += 2) {
|
||||
ftas.put(args[idx], args[idx + 1]);
|
||||
}
|
||||
ftd.setAttributes(ftas);
|
||||
|
||||
return new SchemaRequest.AddFieldType(ftd);
|
||||
}
|
||||
|
||||
|
||||
private void testFacet(FieldProps props, QueryResponse rsp) {
|
||||
String name = props.getName();
|
||||
final List<FacetField.Count> counts = rsp.getFacetField(name).getValues();
|
||||
long expectedCount = props.getExpectedCount();
|
||||
long foundCount = getCount(counts);
|
||||
assertEquals("Field " + name + " should have a count of " + expectedCount, expectedCount, foundCount);
|
||||
|
||||
}
|
||||
|
||||
private long getCount(final List<FacetField.Count> counts) {
|
||||
return counts.stream().mapToLong(FacetField.Count::getCount).sum();
|
||||
}
|
||||
}
|
||||
|
||||
class FieldProps {
|
||||
|
||||
private final String name;
|
||||
private final String type;
|
||||
private final int expectedCount;
|
||||
private Object base;
|
||||
private int counter = 0;
|
||||
|
||||
static SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.ROOT);
|
||||
|
||||
FieldProps(String name, String type, int expectedCount) {
|
||||
this.name = name;
|
||||
this.type = type;
|
||||
this.expectedCount = expectedCount;
|
||||
resetBase();
|
||||
}
|
||||
void resetBase() {
|
||||
if (name.startsWith("int")) {
|
||||
base = Math.abs(DocValuesNotIndexedTest.random().nextInt());
|
||||
} else if (name.startsWith("long")) {
|
||||
base = Math.abs(DocValuesNotIndexedTest.random().nextLong());
|
||||
} else if (name.startsWith("float")) {
|
||||
base = Math.abs(DocValuesNotIndexedTest.random().nextFloat());
|
||||
} else if (name.startsWith("double")) {
|
||||
base = Math.abs(DocValuesNotIndexedTest.random().nextDouble());
|
||||
} else if (name.startsWith("date")) {
|
||||
base = Math.abs(DocValuesNotIndexedTest.random().nextLong());
|
||||
} else if (name.startsWith("bool")) {
|
||||
base = true; // Must start with a known value since bools only have a two values....
|
||||
} else if (name.startsWith("string")) {
|
||||
base = "base_string_" + DocValuesNotIndexedTest.random().nextInt(1_000_000) + "_";
|
||||
} else {
|
||||
throw new RuntimeException("Should have found a prefix for the field before now!");
|
||||
}
|
||||
}
|
||||
|
||||
FieldProps(String name, String type) {
|
||||
this(name, type, -1);
|
||||
}
|
||||
|
||||
String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
int getExpectedCount() {
|
||||
return expectedCount;
|
||||
}
|
||||
|
||||
public String getValue(boolean incrementCounter) {
|
||||
if (incrementCounter) {
|
||||
counter += DocValuesNotIndexedTest.random().nextInt(10) + 100;
|
||||
}
|
||||
if (name.startsWith("int")) {
|
||||
return Integer.toString((int) base + counter);
|
||||
}
|
||||
if (name.startsWith("long")) {
|
||||
return Long.toString((long) base + counter);
|
||||
}
|
||||
if (name.startsWith("float")) {
|
||||
return Float.toString((float) base + counter);
|
||||
}
|
||||
if (name.startsWith("double")) {
|
||||
return Double.toString((double) base + counter);
|
||||
}
|
||||
if (name.startsWith("date")) {
|
||||
return format.format(985_847_645 + (long) base + counter);
|
||||
}
|
||||
if (name.startsWith("bool")) {
|
||||
String ret = Boolean.toString((boolean) base);
|
||||
base = !((boolean) base);
|
||||
return ret;
|
||||
}
|
||||
if (name.startsWith("string")) {
|
||||
return String.format(Locale.ROOT, "%s_%08d", (String) base, counter);
|
||||
}
|
||||
throw new RuntimeException("Should have found a prefix for the field before now!");
|
||||
}
|
||||
}
|
||||
|
|
@ -34,6 +34,8 @@ import java.util.concurrent.TimeUnit;
|
|||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import com.codahale.metrics.Snapshot;
|
||||
import com.codahale.metrics.Timer;
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.cloud.overseer.OverseerAction;
|
||||
|
@ -52,9 +54,6 @@ import org.apache.solr.handler.component.HttpShardHandlerFactory;
|
|||
import org.apache.solr.update.UpdateShardHandler;
|
||||
import org.apache.solr.update.UpdateShardHandlerConfig;
|
||||
import org.apache.solr.util.DefaultSolrThreadFactory;
|
||||
import org.apache.solr.util.stats.Snapshot;
|
||||
import org.apache.solr.util.stats.Timer;
|
||||
import org.apache.solr.util.stats.TimerContext;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.KeeperException.NoNodeException;
|
||||
|
@ -1027,7 +1026,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
q.offer(Utils.toJSON(m));
|
||||
|
||||
Timer t = new Timer();
|
||||
TimerContext context = t.time();
|
||||
Timer.Context context = t.time();
|
||||
try {
|
||||
overseerClient = electNewOverseer(server.getZkAddress());
|
||||
assertTrue(overseers.size() > 0);
|
||||
|
@ -1072,16 +1071,19 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
|
||||
private void printTimingStats(Timer timer) {
|
||||
Snapshot snapshot = timer.getSnapshot();
|
||||
log.info("\t totalTime: {}", timer.getSum());
|
||||
log.info("\t avgRequestsPerMinute: {}", timer.getMeanRate());
|
||||
log.info("\t 5minRateRequestsPerMinute: {}", timer.getFiveMinuteRate());
|
||||
log.info("\t 15minRateRequestsPerMinute: {}", timer.getFifteenMinuteRate());
|
||||
log.info("\t avgTimePerRequest: {}", timer.getMean());
|
||||
log.info("\t medianRequestTime: {}", snapshot.getMedian());
|
||||
log.info("\t 75thPctlRequestTime: {}", snapshot.get75thPercentile());
|
||||
log.info("\t 95thPctlRequestTime: {}", snapshot.get95thPercentile());
|
||||
log.info("\t 99thPctlRequestTime: {}", snapshot.get99thPercentile());
|
||||
log.info("\t 999thPctlRequestTime: {}", snapshot.get999thPercentile());
|
||||
log.info("\t avgRequestsPerSecond: {}", timer.getMeanRate());
|
||||
log.info("\t 5minRateRequestsPerSecond: {}", timer.getFiveMinuteRate());
|
||||
log.info("\t 15minRateRequestsPerSecond: {}", timer.getFifteenMinuteRate());
|
||||
log.info("\t avgTimePerRequest: {}", nsToMs(snapshot.getMean()));
|
||||
log.info("\t medianRequestTime: {}", nsToMs(snapshot.getMedian()));
|
||||
log.info("\t 75thPcRequestTime: {}", nsToMs(snapshot.get75thPercentile()));
|
||||
log.info("\t 95thPcRequestTime: {}", nsToMs(snapshot.get95thPercentile()));
|
||||
log.info("\t 99thPcRequestTime: {}", nsToMs(snapshot.get99thPercentile()));
|
||||
log.info("\t 999thPcRequestTime: {}", nsToMs(snapshot.get999thPercentile()));
|
||||
}
|
||||
|
||||
private static long nsToMs(double ns) {
|
||||
return TimeUnit.NANOSECONDS.convert((long)ns, TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
private void close(MockZKController mockController) {
|
||||
|
|
|
@ -108,8 +108,8 @@ public class RequestHandlersTest extends SolrTestCaseJ4 {
|
|||
NamedList updateStats = updateHandler.getStatistics();
|
||||
NamedList termStats = termHandler.getStatistics();
|
||||
|
||||
Double updateTime = (Double) updateStats.get("totalTime");
|
||||
Double termTime = (Double) termStats.get("totalTime");
|
||||
Double updateTime = (Double) updateStats.get("avgTimePerRequest");
|
||||
Double termTime = (Double) termStats.get("avgTimePerRequest");
|
||||
|
||||
assertFalse("RequestHandlers should not share statistics!", updateTime.equals(termTime));
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.Map;
|
|||
|
||||
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
|
@ -283,6 +284,7 @@ public class CoreAdminHandlerTest extends SolrTestCaseJ4 {
|
|||
|
||||
@Test
|
||||
public void testDeleteInstanceDirAfterCreateFailure() throws Exception {
|
||||
assumeFalse("Ignore test on windows because it does not delete data directory immediately after unload", Constants.WINDOWS);
|
||||
File solrHomeDirectory = new File(initCoreDataDir, getClass().getName() + "-corex-"
|
||||
+ System.nanoTime());
|
||||
solrHomeDirectory.mkdirs();
|
||||
|
|
|
@ -70,7 +70,12 @@ public class FastVectorHighlighterTest extends SolrTestCaseJ4 {
|
|||
args.put("hl", "true");
|
||||
args.put("hl.fl", "tv_text");
|
||||
args.put("hl.snippets", "2");
|
||||
args.put("hl.useFastVectorHighlighter", "true");
|
||||
args.put("hl.tag.pre", "<fvpre>"); //... and let post default to </em>. This is just a test.
|
||||
if (random().nextBoolean()) {
|
||||
args.put("hl.useFastVectorHighlighter", "true"); // old way
|
||||
} else {
|
||||
args.put("hl.method", "fastVector"); // the new way
|
||||
}
|
||||
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
|
||||
"standard",0,200,args);
|
||||
|
||||
|
@ -81,7 +86,7 @@ public class FastVectorHighlighterTest extends SolrTestCaseJ4 {
|
|||
assertQ("Basic summarization",
|
||||
sumLRF.makeRequest("tv_text:vector"),
|
||||
"//lst[@name='highlighting']/lst[@name='1']",
|
||||
"//lst[@name='1']/arr[@name='tv_text']/str[.='basic fast <em>vector</em> highlighter test']"
|
||||
"//lst[@name='1']/arr[@name='tv_text']/str[.='basic fast <fvpre>vector</em> highlighter test']"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,10 +43,6 @@ import org.junit.After;
|
|||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Tests some basic functionality of Solr while demonstrating good
|
||||
* Best Practices for using AbstractSolrTestCase
|
||||
*/
|
||||
public class HighlighterTest extends SolrTestCaseJ4 {
|
||||
|
||||
private static String LONG_TEXT = "a long days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is " +
|
||||
|
@ -90,6 +86,25 @@ public class HighlighterTest extends SolrTestCaseJ4 {
|
|||
assertTrue(regex instanceof RegexFragmenter);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMethodPostings() {
|
||||
String field = "t_text";
|
||||
assertU(adoc(field, LONG_TEXT,
|
||||
"id", "1"));
|
||||
assertU(commit());
|
||||
|
||||
try {
|
||||
assertQ("Tried PostingsSolrHighlighter but failed due to offsets not in postings",
|
||||
req("q", "long", "hl.method", "postings", "df", field, "hl", "true"));
|
||||
fail("Did not encounter exception for no offsets");
|
||||
} catch (Exception e) {
|
||||
assertTrue("Cause should be illegal argument", e.getCause() instanceof IllegalArgumentException);
|
||||
assertTrue("Should warn no offsets", e.getCause().getMessage().contains("indexed without offsets"));
|
||||
}
|
||||
// note: the default schema.xml has no offsets in postings to test the PostingsHighlighter. Leave that for another
|
||||
// test class.
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMergeContiguous() throws Exception {
|
||||
HashMap<String,String> args = new HashMap<>();
|
||||
|
@ -99,6 +114,7 @@ public class HighlighterTest extends SolrTestCaseJ4 {
|
|||
args.put(HighlightParams.SNIPPETS, String.valueOf(4));
|
||||
args.put(HighlightParams.FRAGSIZE, String.valueOf(40));
|
||||
args.put(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "true");
|
||||
args.put(HighlightParams.METHOD, "original"); // test works; no complaints
|
||||
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
|
||||
"standard", 0, 200, args);
|
||||
String input = "this is some long text. It has the word long in many places. In fact, it has long on some different fragments. " +
|
||||
|
@ -763,7 +779,7 @@ public class HighlighterTest extends SolrTestCaseJ4 {
|
|||
);
|
||||
|
||||
// Prove fallback highlighting works also with FVH
|
||||
args.put("hl.useFastVectorHighlighter", "true");
|
||||
args.put("hl.method", "fastVector");
|
||||
args.put("hl.tag.pre", "<fvhpre>");
|
||||
args.put("hl.tag.post", "</fvhpost>");
|
||||
args.put("f.t_text.hl.maxAlternateFieldLength", "18");
|
||||
|
|
|
@ -52,7 +52,7 @@ public class TestPostingsSolrHighlighter extends SolrTestCaseJ4 {
|
|||
|
||||
public void testSimple() {
|
||||
assertQ("simplest test",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true"),
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.method", "postings"), // test hl.method is happy too
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
|
||||
|
|
|
@ -0,0 +1,229 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.highlight;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/** Tests for the UnifiedHighlighter Solr plugin **/
|
||||
public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-basic.xml", "schema-unifiedhighlight.xml");
|
||||
|
||||
// test our config is sane, just to be sure:
|
||||
|
||||
// 'text' and 'text3' should have offsets, 'text2' should not
|
||||
IndexSchema schema = h.getCore().getLatestSchema();
|
||||
assertTrue(schema.getField("text").storeOffsetsWithPositions());
|
||||
assertTrue(schema.getField("text3").storeOffsetsWithPositions());
|
||||
assertFalse(schema.getField("text2").storeOffsetsWithPositions());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
clearIndex();
|
||||
assertU(adoc("text", "document one", "text2", "document one", "text3", "crappy document", "id", "101"));
|
||||
assertU(adoc("text", "second document", "text2", "second document", "text3", "crappier document", "id", "102"));
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
public static SolrQueryRequest req(String... params) {
|
||||
return SolrTestCaseJ4.req(params, "hl.method", "unified");
|
||||
}
|
||||
|
||||
public void testSimple() {
|
||||
assertQ("simplest test",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
|
||||
}
|
||||
|
||||
public void testImpossibleOffsetSource() {
|
||||
try {
|
||||
assertQ("impossible offset source",
|
||||
req("q", "text2:document", "hl.offsetSource", "postings", "hl.fl", "text2", "sort", "id asc", "hl", "true"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
|
||||
fail("Did not encounter exception for no offsets");
|
||||
} catch (Exception e) {
|
||||
assertTrue("Cause should be illegal argument", e.getCause() instanceof IllegalArgumentException);
|
||||
assertTrue("Should warn no offsets", e.getCause().getMessage().contains("indexed without offsets"));
|
||||
}
|
||||
}
|
||||
|
||||
public void testMultipleSnippetsReturned() {
|
||||
clearIndex();
|
||||
assertU(adoc("text", "Document snippet one. Intermediate sentence. Document snippet two.",
|
||||
"text2", "document one", "text3", "crappy document", "id", "101"));
|
||||
assertU(commit());
|
||||
assertQ("multiple snippets test",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.snippets", "2", "hl.bs.type", "SENTENCE"),
|
||||
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Document</em> snippet one. '",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr/str[2]='<em>Document</em> snippet two.'");
|
||||
}
|
||||
|
||||
public void testStrictPhrasesEnabledByDefault() {
|
||||
clearIndex();
|
||||
assertU(adoc("text", "Strict phrases should be enabled for phrases",
|
||||
"text2", "document one", "text3", "crappy document", "id", "101"));
|
||||
assertU(commit());
|
||||
assertQ("strict phrase handling",
|
||||
req("q", "text:\"strict phrases\"", "sort", "id asc", "hl", "true"),
|
||||
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=1",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Strict</em> <em>phrases</em> should be enabled for phrases'");
|
||||
}
|
||||
|
||||
public void testStrictPhrasesCanBeDisabled() {
|
||||
clearIndex();
|
||||
assertU(adoc("text", "Strict phrases should be disabled for phrases",
|
||||
"text2", "document one", "text3", "crappy document", "id", "101"));
|
||||
assertU(commit());
|
||||
assertQ("strict phrase handling",
|
||||
req("q", "text:\"strict phrases\"", "sort", "id asc", "hl", "true", "hl.usePhraseHighlighter", "false"),
|
||||
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=1",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Strict</em> <em>phrases</em> should be disabled for <em>phrases</em>'");
|
||||
}
|
||||
|
||||
public void testMultiTermQueryEnabledByDefault() {
|
||||
clearIndex();
|
||||
assertU(adoc("text", "Aviary Avenue document",
|
||||
"text2", "document one", "text3", "crappy document", "id", "101"));
|
||||
assertU(commit());
|
||||
assertQ("multi term query handling",
|
||||
req("q", "text:av*", "sort", "id asc", "hl", "true"),
|
||||
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=1",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Aviary</em> <em>Avenue</em> document'");
|
||||
}
|
||||
|
||||
public void testMultiTermQueryCanBeDisabled() {
|
||||
clearIndex();
|
||||
assertU(adoc("text", "Aviary Avenue document",
|
||||
"text2", "document one", "text3", "crappy document", "id", "101"));
|
||||
assertU(commit());
|
||||
assertQ("multi term query handling",
|
||||
req("q", "text:av*", "sort", "id asc", "hl", "true", "hl.highlightMultiTerm", "false"),
|
||||
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=0");
|
||||
}
|
||||
|
||||
public void testPagination() {
|
||||
assertQ("pagination test",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "rows", "1", "start", "1"),
|
||||
"count(//lst[@name='highlighting']/*)=1",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
|
||||
}
|
||||
|
||||
public void testEmptySnippet() {
|
||||
assertQ("null snippet test",
|
||||
req("q", "text:one OR *:*", "sort", "id asc", "hl", "true"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='document <em>one</em>'",
|
||||
"count(//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/*)=0");
|
||||
}
|
||||
|
||||
public void testDefaultSummary() {
|
||||
assertQ("null snippet test",
|
||||
req("q", "text:one OR *:*", "sort", "id asc", "hl", "true", "hl.defaultSummary", "true"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='document <em>one</em>'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second document'");
|
||||
}
|
||||
|
||||
public void testDifferentField() {
|
||||
assertQ("highlighting text3",
|
||||
req("q", "text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text3"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy <em>document</em>'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier <em>document</em>'");
|
||||
}
|
||||
|
||||
public void testTwoFields() {
|
||||
assertQ("highlighting text and text3",
|
||||
req("q", "text:document text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text,text3"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy <em>document</em>'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier <em>document</em>'");
|
||||
}
|
||||
|
||||
public void testTags() {
|
||||
assertQ("different pre/post tags",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.tag.pre", "[", "hl.tag.post", "]"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='[document] one'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second [document]'");
|
||||
}
|
||||
|
||||
public void testUsingSimplePrePostTags() {
|
||||
assertQ("different pre/post tags",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.simple.pre", "[", "hl.simple.post", "]"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='[document] one'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second [document]'");
|
||||
}
|
||||
|
||||
public void testUsingSimplePrePostTagsPerField() {
|
||||
assertQ("different pre/post tags",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "f.text.hl.simple.pre", "[", "f.text.hl.simple.post", "]"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='[document] one'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second [document]'");
|
||||
}
|
||||
|
||||
public void testTagsPerField() {
|
||||
assertQ("highlighting text and text3",
|
||||
req("q", "text:document text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text,text3", "f.text3.hl.tag.pre", "[", "f.text3.hl.tag.post", "]"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy [document]'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier [document]'");
|
||||
}
|
||||
|
||||
public void testBreakIterator() {
|
||||
assertQ("different breakiterator",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WORD"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em>'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='<em>document</em>'");
|
||||
}
|
||||
|
||||
public void testBreakIterator2() {
|
||||
assertU(adoc("text", "Document one has a first sentence. Document two has a second sentence.", "id", "103"));
|
||||
assertU(commit());
|
||||
assertQ("different breakiterator",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WHOLE"),
|
||||
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first sentence. <em>Document</em> two has a second sentence.'");
|
||||
}
|
||||
|
||||
public void testEncoder() {
|
||||
assertU(adoc("text", "Document one has a first <i>sentence</i>.", "id", "103"));
|
||||
assertU(commit());
|
||||
assertQ("html escaped",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.encoder", "html"),
|
||||
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first <i>sentence</i>.'");
|
||||
}
|
||||
|
||||
}
|
|
@ -147,13 +147,8 @@ public class SmileWriterTest extends SolrTestCaseJ4 {
|
|||
|
||||
@Test
|
||||
public void test10Docs() throws IOException {
|
||||
SolrDocumentList l = new SolrDocumentList();
|
||||
for(int i=0;i<10; i++){
|
||||
l.add(sampleDoc(random(), i));
|
||||
}
|
||||
|
||||
SolrQueryResponse response = new SolrQueryResponse();
|
||||
response.getValues().add("results", l);
|
||||
SolrDocumentList l = constructSolrDocList(response);
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
new SmileResponseWriter().write(baos, new LocalSolrQueryRequest(null, new ModifiableSolrParams()), response);
|
||||
|
||||
|
@ -171,6 +166,16 @@ public class SmileWriterTest extends SolrTestCaseJ4 {
|
|||
|
||||
}
|
||||
|
||||
public static SolrDocumentList constructSolrDocList(SolrQueryResponse response) {
|
||||
SolrDocumentList l = new SolrDocumentList();
|
||||
for(int i=0;i<10; i++){
|
||||
l.add(sampleDoc(random(), i));
|
||||
}
|
||||
|
||||
response.getValues().add("results", l);
|
||||
return l;
|
||||
}
|
||||
|
||||
public static SolrDocument sampleDoc(Random r, int bufnum) {
|
||||
SolrDocument sdoc = new SolrDocument();
|
||||
sdoc.put("id", "my_id_" + bufnum);
|
||||
|
|
|
@ -0,0 +1,195 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.response;
|
||||
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.client.solrj.io.Tuple;
|
||||
import org.apache.solr.client.solrj.io.comp.StreamComparator;
|
||||
import org.apache.solr.client.solrj.io.stream.JavabinTupleStreamParser;
|
||||
import org.apache.solr.client.solrj.io.stream.StreamContext;
|
||||
import org.apache.solr.client.solrj.io.stream.TupleStream;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.Explanation;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.util.JavaBinCodec;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
|
||||
import static org.apache.solr.response.SmileWriterTest.constructSolrDocList;
|
||||
|
||||
public class TestJavabinTupleStreamParser extends SolrTestCaseJ4 {
|
||||
|
||||
public void testKnown() throws IOException {
|
||||
String payload = "{\n" +
|
||||
" \"responseHeader\":{\n" +
|
||||
" \"zkConnected\":true,\n" +
|
||||
" \"status\":0,\n" +
|
||||
" \"QTime\":46},\n" +
|
||||
" \"response\":{\n" +
|
||||
" \"numFound\":2,\n" +
|
||||
" \"start\":0,\n" +
|
||||
" \"docs\":[\n" +
|
||||
" {\n" +
|
||||
" \"id\":\"2\",\n" +
|
||||
" \"a_s\":\"hello2\",\n" +
|
||||
" \"a_i\":2,\n" +
|
||||
" \"a_f\":0.0},\n" +
|
||||
" {\n" +
|
||||
" \"id\":\"3\",\n" +
|
||||
" \"a_s\":\"hello3\",\n" +
|
||||
" \"a_i\":3,\n" +
|
||||
" \"a_f\":3.0}]}}";
|
||||
SimpleOrderedMap nl = convert2OrderedMap((Map) Utils.fromJSONString(payload));
|
||||
|
||||
byte[] bytes = serialize(nl);
|
||||
|
||||
JavabinTupleStreamParser parser = new JavabinTupleStreamParser(new ByteArrayInputStream(bytes), true);
|
||||
Map<String, Object> map = parser.next();
|
||||
assertEquals("2", map.get("id"));
|
||||
map = parser.next();
|
||||
assertEquals("3", map.get("id"));
|
||||
System.out.println();
|
||||
map = parser.next();
|
||||
assertNull(map);
|
||||
|
||||
}
|
||||
|
||||
public SimpleOrderedMap convert2OrderedMap(Map m) {
|
||||
SimpleOrderedMap result = new SimpleOrderedMap<>();
|
||||
m.forEach((k, v) -> {
|
||||
if (v instanceof List) v = ((List) v).iterator();
|
||||
if (v instanceof Map) v = convert2OrderedMap((Map) v);
|
||||
result.add((String) k, v);
|
||||
});
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
public void testSimple() throws IOException {
|
||||
List<Map<String, Object>> l = new ArrayList();
|
||||
l.add(Utils.makeMap("id", 1, "f", 1.0f, "s", "Some str 1"));
|
||||
l.add(Utils.makeMap("id", 2, "f", 2.0f, "s", "Some str 2"));
|
||||
l.add(Utils.makeMap("id", 3, "f", 1.0f, "s", "Some str 3"));
|
||||
l.add(Utils.makeMap("EOF", true, "RESPONSE_TIME", 206, "sleepMillis", 1000));
|
||||
Iterator<Map<String, Object>> iterator = l.iterator();
|
||||
TupleStream tupleStream = new TupleStream() {
|
||||
@Override
|
||||
public void setStreamContext(StreamContext context) {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<TupleStream> children() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open() throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tuple read() throws IOException {
|
||||
if (iterator.hasNext()) return new Tuple(iterator.next());
|
||||
else return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public StreamComparator getStreamSort() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation toExplanation(StreamFactory factory) throws IOException {
|
||||
return new StreamExplanation(getStreamNodeId().toString())
|
||||
.withFunctionName("Dummy")
|
||||
.withImplementingClass(this.getClass().getName())
|
||||
.withExpressionType(Explanation.ExpressionType.STREAM_SOURCE)
|
||||
.withExpression("--non-expressible--");
|
||||
}
|
||||
};
|
||||
|
||||
byte[] bytes = serialize(tupleStream);
|
||||
JavabinTupleStreamParser parser = new JavabinTupleStreamParser(new ByteArrayInputStream(bytes), true);
|
||||
Map m = parser.next();
|
||||
assertEquals(1L, m.get("id"));
|
||||
assertEquals(1.0, (Double) m.get("f"), 0.01);
|
||||
m = parser.next();
|
||||
assertEquals(2L, m.get("id"));
|
||||
assertEquals(2.0, (Double) m.get("f"), 0.01);
|
||||
m = parser.next();
|
||||
assertEquals(3L, m.get("id"));
|
||||
assertEquals(1.0, (Double) m.get("f"), 0.01);
|
||||
m = parser.next();
|
||||
assertEquals(Boolean.TRUE, m.get("EOF"));
|
||||
|
||||
parser = new JavabinTupleStreamParser(new ByteArrayInputStream(bytes), false);
|
||||
m = parser.next();
|
||||
assertEquals(1, m.get("id"));
|
||||
assertEquals(1.0, (Float) m.get("f"), 0.01);
|
||||
m = parser.next();
|
||||
assertEquals(2, m.get("id"));
|
||||
assertEquals(2.0, (Float) m.get("f"), 0.01);
|
||||
m = parser.next();
|
||||
assertEquals(3, m.get("id"));
|
||||
assertEquals(1.0, (Float) m.get("f"), 0.01);
|
||||
m = parser.next();
|
||||
assertEquals(Boolean.TRUE, m.get("EOF"));
|
||||
}
|
||||
|
||||
public void testSolrDocumentList() throws IOException {
|
||||
SolrQueryResponse response = new SolrQueryResponse();
|
||||
SolrDocumentList l = constructSolrDocList(response);
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
new JavaBinCodec().marshal(response.getValues(), baos);
|
||||
byte[] bytes = serialize(response.getValues());
|
||||
Object o = new JavaBinCodec().unmarshal(new ByteArrayInputStream(bytes));
|
||||
List list = new ArrayList<>();
|
||||
Map m = null;
|
||||
JavabinTupleStreamParser parser = new JavabinTupleStreamParser(new ByteArrayInputStream(bytes), false);
|
||||
while ((m = parser.next()) != null) {
|
||||
list.add(m);
|
||||
}
|
||||
assertEquals(l.size(), list.size());
|
||||
for(int i =0;i<list.size();i++){
|
||||
compareSolrDocument(l.get(i),new SolrDocument((Map<String, Object>) list.get(i)));
|
||||
}
|
||||
|
||||
}
|
||||
public static byte[] serialize(Object o) throws IOException {
|
||||
SolrQueryResponse response = new SolrQueryResponse();
|
||||
response.getValues().add("results", o);
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
new JavaBinCodec().marshal(response.getValues(), baos);
|
||||
return baos.toByteArray();
|
||||
}
|
||||
}
|
|
@ -1453,11 +1453,11 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
|
|||
|
||||
@Override
|
||||
protected Query getFieldQuery(String field,
|
||||
String val, boolean quoted) throws SyntaxError {
|
||||
String val, boolean quoted, boolean raw) throws SyntaxError {
|
||||
if(frequentlyMisspelledWords.contains(val)) {
|
||||
return getFuzzyQuery(field, val, 0.75F);
|
||||
}
|
||||
return super.getFieldQuery(field, val, quoted);
|
||||
return super.getFieldQuery(field, val, quoted, raw);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,11 +16,20 @@
|
|||
*/
|
||||
package org.apache.solr.search;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.queries.TermsQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.core.SolrInfoMBean;
|
||||
import org.apache.solr.parser.QueryParser;
|
||||
import org.apache.solr.query.FilterQuery;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
@ -37,9 +46,9 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
|
|||
public static void createIndex() {
|
||||
String v;
|
||||
v = "how now brown cow";
|
||||
assertU(adoc("id", "1", "text", v, "text_np", v));
|
||||
assertU(adoc("id", "1", "text", v, "text_np", v, "foo_i","11"));
|
||||
v = "now cow";
|
||||
assertU(adoc("id", "2", "text", v, "text_np", v));
|
||||
assertU(adoc("id", "2", "text", v, "text_np", v, "foo_i","12"));
|
||||
assertU(adoc("id", "3", "foo_s", "a ' \" \\ {! ) } ( { z")); // A value filled with special chars
|
||||
|
||||
assertU(adoc("id", "10", "qqq_s", "X"));
|
||||
|
@ -184,6 +193,92 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
|
|||
req.close();
|
||||
}
|
||||
|
||||
|
||||
// automatically use TermsQuery when appropriate
|
||||
@Test
|
||||
public void testAutoTerms() throws Exception {
|
||||
SolrQueryRequest req = req();
|
||||
QParser qParser;
|
||||
Query q,qq;
|
||||
|
||||
// relevance query should not be a filter
|
||||
qParser = QParser.getParser("foo_s:(a b c)", req);
|
||||
q = qParser.getQuery();
|
||||
assertEquals(3, ((BooleanQuery)q).clauses().size());
|
||||
|
||||
// small filter query should still use BooleanQuery
|
||||
if (QueryParser.TERMS_QUERY_THRESHOLD > 3) {
|
||||
qParser = QParser.getParser("foo_s:(a b c)", req);
|
||||
qParser.setIsFilter(true); // this may change in the future
|
||||
q = qParser.getQuery();
|
||||
assertEquals(3, ((BooleanQuery) q).clauses().size());
|
||||
}
|
||||
|
||||
// large relevancy query should use BooleanQuery
|
||||
// TODO: we may decide that string fields shouldn't have relevance in the future... change to a text field w/o a stop filter if so
|
||||
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
|
||||
q = qParser.getQuery();
|
||||
assertEquals(26, ((BooleanQuery)q).clauses().size());
|
||||
|
||||
// large filter query should use TermsQuery
|
||||
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
|
||||
qParser.setIsFilter(true); // this may change in the future
|
||||
q = qParser.getQuery();
|
||||
assertEquals(26, ((TermsQuery)q).getTermData().size());
|
||||
|
||||
// large numeric filter query should use TermsQuery (for trie fields)
|
||||
qParser = QParser.getParser("foo_i:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req);
|
||||
qParser.setIsFilter(true); // this may change in the future
|
||||
q = qParser.getQuery();
|
||||
assertEquals(20, ((TermsQuery)q).getTermData().size());
|
||||
|
||||
// a filter() clause inside a relevancy query should be able to use a TermsQuery
|
||||
qParser = QParser.getParser("foo_s:aaa filter(foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z))", req);
|
||||
q = qParser.getQuery();
|
||||
assertEquals(2, ((BooleanQuery)q).clauses().size());
|
||||
qq = ((BooleanQuery)q).clauses().get(0).getQuery();
|
||||
if (qq instanceof TermQuery) {
|
||||
qq = ((BooleanQuery)q).clauses().get(1).getQuery();
|
||||
}
|
||||
|
||||
if (qq instanceof FilterQuery) {
|
||||
qq = ((FilterQuery)qq).getQuery();
|
||||
}
|
||||
|
||||
assertEquals(26, ((TermsQuery)qq).getTermData().size());
|
||||
|
||||
// test mixed boolean query, including quotes (which shouldn't matter)
|
||||
qParser = QParser.getParser("foo_s:(a +aaa b -bbb c d e f bar_s:(qqq www) g h i j k l m n o p q r s t u v w x y z)", req);
|
||||
qParser.setIsFilter(true); // this may change in the future
|
||||
q = qParser.getQuery();
|
||||
assertEquals(4, ((BooleanQuery)q).clauses().size());
|
||||
qq = null;
|
||||
for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
|
||||
qq = clause.getQuery();
|
||||
if (qq instanceof TermsQuery) break;
|
||||
}
|
||||
assertEquals(26, ((TermsQuery)qq).getTermData().size());
|
||||
|
||||
req.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testManyClauses() throws Exception {
|
||||
String a = "1 a 2 b 3 c 10 d 11 12 "; // 10 terms
|
||||
StringBuilder sb = new StringBuilder("id:(");
|
||||
for (int i = 0; i < 1024; i++) { // historically, the max number of boolean clauses defaulted to 1024
|
||||
sb.append('z').append(i).append(' ');
|
||||
}
|
||||
sb.append(a);
|
||||
sb.append(")");
|
||||
|
||||
String q = sb.toString();
|
||||
|
||||
// This will still fail when used as the main query, but will pass in a filter query since TermsQuery can be used.
|
||||
assertJQ(req("q","*:*", "fq", q)
|
||||
,"/response/numFound==6");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComments() throws Exception {
|
||||
assertJQ(req("q", "id:1 id:2 /* *:* */ id:3")
|
||||
|
@ -317,4 +412,103 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
|
|||
|
||||
}
|
||||
|
||||
// parsing performance test
|
||||
// Run from command line with ant test -Dtestcase=TestSolrQueryParser -Dtestmethod=testParsingPerformance -Dtests.asserts=false 2>/dev/null | grep QPS
|
||||
@Test
|
||||
public void testParsingPerformance() throws Exception {
|
||||
String[] args = {"-queries","100" ,"-iter","1000", "-clauses","100", "-format","term%d", "-seed","0"};
|
||||
args = new String[] {"-queries","1000" ,"-iter","2000", "-clauses","10", "-format","term%d", "-seed","0"};
|
||||
// args = new String[] {"-queries","1000" ,"-iter","1000000000", "-clauses","10", "-format","term%d", "-seed","0"};
|
||||
|
||||
boolean assertOn = false;
|
||||
assert assertOn = true;
|
||||
if (assertOn) {
|
||||
// System.out.println("WARNING! Assertions are enabled!!!! Will only execute small run. Change with -Dtests.asserts=false");
|
||||
args = new String[]{"-queries","10" ,"-iter","2", "-clauses","20", "-format","term%d", "-seed","0"};
|
||||
}
|
||||
|
||||
|
||||
int iter = 1000;
|
||||
int numQueries = 100;
|
||||
int maxClauses = 5;
|
||||
int maxTerm = 10000000;
|
||||
String format = "term%d";
|
||||
String field = "foo_s";
|
||||
long seed = 0;
|
||||
boolean isFilter = true;
|
||||
boolean rewrite = false;
|
||||
|
||||
String otherStuff = "";
|
||||
|
||||
for (int i = 0; i < args.length; i++) {
|
||||
String a = args[i];
|
||||
if ("-queries".equals(a)) {
|
||||
numQueries = Integer.parseInt(args[++i]);
|
||||
} else if ("-iter".equals(a)) {
|
||||
iter = Integer.parseInt(args[++i]);
|
||||
} else if ("-clauses".equals(a)) {
|
||||
maxClauses = Integer.parseInt(args[++i]);
|
||||
} else if ("-format".equals(a)) {
|
||||
format = args[++i];
|
||||
} else if ("-seed".equals(a)) {
|
||||
seed = Long.parseLong(args[++i]);
|
||||
} else {
|
||||
otherStuff = otherStuff + " " + a;
|
||||
}
|
||||
}
|
||||
|
||||
Random r = new Random(seed);
|
||||
|
||||
String[] queries = new String[numQueries];
|
||||
for (int i = 0; i < queries.length; i++) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
boolean explicitField = r.nextInt(5) == 0;
|
||||
if (!explicitField) {
|
||||
sb.append(field + ":(");
|
||||
}
|
||||
|
||||
sb.append(otherStuff).append(" ");
|
||||
|
||||
int nClauses = r.nextInt(maxClauses) + 1; // TODO: query parse can't parse () for some reason???
|
||||
|
||||
for (int c = 0; c<nClauses; c++) {
|
||||
String termString = String.format(Locale.US, format, r.nextInt(maxTerm));
|
||||
if (explicitField) {
|
||||
sb.append(field).append(':');
|
||||
}
|
||||
sb.append(termString);
|
||||
sb.append(' ');
|
||||
}
|
||||
|
||||
if (!explicitField) {
|
||||
sb.append(")");
|
||||
}
|
||||
queries[i] = sb.toString();
|
||||
// System.out.println(queries[i]);
|
||||
}
|
||||
|
||||
SolrQueryRequest req = req();
|
||||
|
||||
long start = System.nanoTime();
|
||||
|
||||
int ret = 0;
|
||||
for (int i=0; i<iter; i++) {
|
||||
for (String qStr : queries) {
|
||||
QParser parser = QParser.getParser(qStr,req);
|
||||
parser.setIsFilter(isFilter);
|
||||
Query q = parser.getQuery();
|
||||
if (rewrite) {
|
||||
// TODO: do rewrite
|
||||
}
|
||||
ret += q.getClass().hashCode(); // use the query somehow
|
||||
}
|
||||
}
|
||||
|
||||
long end = System.nanoTime();
|
||||
|
||||
System.out.println((assertOn ? "WARNING, assertions enabled. " : "") + "ret=" + ret + " Parser QPS:" + ((long)numQueries * iter)*1000000000/(end-start));
|
||||
|
||||
req.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -14,71 +14,31 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.MultiMapSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.UpdateParams;
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.util.ContentStreamBase;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.handler.UpdateRequestHandler;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequestBase;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.hamcrest.core.Is.is;
|
||||
import static org.mockito.Mockito.mock;
|
||||
|
||||
/**
|
||||
* Tests for {@link ClassificationUpdateProcessor} and {@link ClassificationUpdateProcessorFactory}
|
||||
* Tests for {@link ClassificationUpdateProcessorFactory}
|
||||
*/
|
||||
public class ClassificationUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
|
||||
// field names are used in accordance with the solrconfig and schema supplied
|
||||
private static final String ID = "id";
|
||||
private static final String TITLE = "title";
|
||||
private static final String CONTENT = "content";
|
||||
private static final String AUTHOR = "author";
|
||||
private static final String CLASS = "cat";
|
||||
|
||||
private static final String CHAIN = "classification";
|
||||
|
||||
|
||||
private ClassificationUpdateProcessorFactory cFactoryToTest = new ClassificationUpdateProcessorFactory();
|
||||
private NamedList args = new NamedList<String>();
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
System.setProperty("enable.update.log", "false");
|
||||
initCore("solrconfig-classification.xml", "schema-classification.xml");
|
||||
}
|
||||
|
||||
@Override
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
clearIndex();
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
@Before
|
||||
public void initArgs() {
|
||||
args.add("inputFields", "inputField1,inputField2");
|
||||
args.add("classField", "classField1");
|
||||
args.add("predictedClassField", "classFieldX");
|
||||
args.add("algorithm", "bayes");
|
||||
args.add("knn.k", "9");
|
||||
args.add("knn.minDf", "8");
|
||||
|
@ -86,22 +46,23 @@ public class ClassificationUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testFullInit() {
|
||||
public void init_fullArgs_shouldInitFullClassificationParams() {
|
||||
cFactoryToTest.init(args);
|
||||
ClassificationUpdateProcessorParams classificationParams = cFactoryToTest.getClassificationParams();
|
||||
|
||||
String[] inputFieldNames = cFactoryToTest.getInputFieldNames();
|
||||
String[] inputFieldNames = classificationParams.getInputFieldNames();
|
||||
assertEquals("inputField1", inputFieldNames[0]);
|
||||
assertEquals("inputField2", inputFieldNames[1]);
|
||||
assertEquals("classField1", cFactoryToTest.getClassFieldName());
|
||||
assertEquals("bayes", cFactoryToTest.getAlgorithm());
|
||||
assertEquals(8, cFactoryToTest.getMinDf());
|
||||
assertEquals(10, cFactoryToTest.getMinTf());
|
||||
assertEquals(9, cFactoryToTest.getK());
|
||||
|
||||
assertEquals("classField1", classificationParams.getTrainingClassField());
|
||||
assertEquals("classFieldX", classificationParams.getPredictedClassField());
|
||||
assertEquals(ClassificationUpdateProcessorFactory.Algorithm.BAYES, classificationParams.getAlgorithm());
|
||||
assertEquals(8, classificationParams.getMinDf());
|
||||
assertEquals(10, classificationParams.getMinTf());
|
||||
assertEquals(9, classificationParams.getK());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInitEmptyInputField() {
|
||||
public void init_emptyInputFields_shouldThrowExceptionWithDetailedMessage() {
|
||||
args.removeAll("inputFields");
|
||||
try {
|
||||
cFactoryToTest.init(args);
|
||||
|
@ -111,7 +72,7 @@ public class ClassificationUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testInitEmptyClassField() {
|
||||
public void init_emptyClassField_shouldThrowExceptionWithDetailedMessage() {
|
||||
args.removeAll("classField");
|
||||
try {
|
||||
cFactoryToTest.init(args);
|
||||
|
@ -121,114 +82,53 @@ public class ClassificationUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testDefaults() {
|
||||
public void init_emptyPredictedClassField_shouldDefaultToTrainingClassField() {
|
||||
args.removeAll("predictedClassField");
|
||||
|
||||
cFactoryToTest.init(args);
|
||||
|
||||
ClassificationUpdateProcessorParams classificationParams = cFactoryToTest.getClassificationParams();
|
||||
assertThat(classificationParams.getPredictedClassField(), is("classField1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void init_unsupportedAlgorithm_shouldThrowExceptionWithDetailedMessage() {
|
||||
args.removeAll("algorithm");
|
||||
args.add("algorithm", "unsupported");
|
||||
try {
|
||||
cFactoryToTest.init(args);
|
||||
} catch (SolrException e) {
|
||||
assertEquals("Classification UpdateProcessor Algorithm: 'unsupported' not supported", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void init_unsupportedFilterQuery_shouldThrowExceptionWithDetailedMessage() {
|
||||
UpdateRequestProcessor mockProcessor = mock(UpdateRequestProcessor.class);
|
||||
SolrQueryRequest mockRequest = mock(SolrQueryRequest.class);
|
||||
SolrQueryResponse mockResponse = mock(SolrQueryResponse.class);
|
||||
args.add("knn.filterQuery", "not supported query");
|
||||
try {
|
||||
cFactoryToTest.init(args);
|
||||
/* parsing failure happens because of the mocks, fine enough to check a proper exception propagation */
|
||||
cFactoryToTest.getInstance(mockRequest, mockResponse, mockProcessor);
|
||||
} catch (SolrException e) {
|
||||
assertEquals("Classification UpdateProcessor Training Filter Query: 'not supported query' is not supported", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void init_emptyArgs_shouldDefaultClassificationParams() {
|
||||
args.removeAll("algorithm");
|
||||
args.removeAll("knn.k");
|
||||
args.removeAll("knn.minDf");
|
||||
args.removeAll("knn.minTf");
|
||||
cFactoryToTest.init(args);
|
||||
assertEquals("knn", cFactoryToTest.getAlgorithm());
|
||||
assertEquals(1, cFactoryToTest.getMinDf());
|
||||
assertEquals(1, cFactoryToTest.getMinTf());
|
||||
assertEquals(10, cFactoryToTest.getK());
|
||||
}
|
||||
ClassificationUpdateProcessorParams classificationParams = cFactoryToTest.getClassificationParams();
|
||||
|
||||
@Test
|
||||
public void testBasicClassification() throws Exception {
|
||||
prepareTrainedIndex();
|
||||
// To be classified,we index documents without a class and verify the expected one is returned
|
||||
addDoc(adoc(ID, "10",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word5 word5 ",
|
||||
AUTHOR, "Name1 Surname1"));
|
||||
addDoc(adoc(ID, "11",
|
||||
TITLE, "word1 word1",
|
||||
CONTENT, "word2 word2",
|
||||
AUTHOR, "Name Surname"));
|
||||
addDoc(commit());
|
||||
|
||||
Document doc10 = getDoc("10");
|
||||
assertEquals("class2", doc10.get(CLASS));
|
||||
Document doc11 = getDoc("11");
|
||||
assertEquals("class1", doc11.get(CLASS));
|
||||
}
|
||||
|
||||
/**
|
||||
* Index some example documents with a class manually assigned.
|
||||
* This will be our trained model.
|
||||
*
|
||||
* @throws Exception If there is a low-level I/O error
|
||||
*/
|
||||
private void prepareTrainedIndex() throws Exception {
|
||||
//class1
|
||||
addDoc(adoc(ID, "1",
|
||||
TITLE, "word1 word1 word1",
|
||||
CONTENT, "word2 word2 word2",
|
||||
AUTHOR, "Name Surname",
|
||||
CLASS, "class1"));
|
||||
addDoc(adoc(ID, "2",
|
||||
TITLE, "word1 word1",
|
||||
CONTENT, "word2 word2",
|
||||
AUTHOR, "Name Surname",
|
||||
CLASS, "class1"));
|
||||
addDoc(adoc(ID, "3",
|
||||
TITLE, "word1 word1 word1",
|
||||
CONTENT, "word2",
|
||||
AUTHOR, "Name Surname",
|
||||
CLASS, "class1"));
|
||||
addDoc(adoc(ID, "4",
|
||||
TITLE, "word1 word1 word1",
|
||||
CONTENT, "word2 word2 word2",
|
||||
AUTHOR, "Name Surname",
|
||||
CLASS, "class1"));
|
||||
//class2
|
||||
addDoc(adoc(ID, "5",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word5 word5",
|
||||
AUTHOR, "Name1 Surname1",
|
||||
CLASS, "class2"));
|
||||
addDoc(adoc(ID, "6",
|
||||
TITLE, "word4 word4",
|
||||
CONTENT, "word5",
|
||||
AUTHOR, "Name1 Surname1",
|
||||
CLASS, "class2"));
|
||||
addDoc(adoc(ID, "7",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word5 word5 word5",
|
||||
AUTHOR, "Name1 Surname1",
|
||||
CLASS, "class2"));
|
||||
addDoc(adoc(ID, "8",
|
||||
TITLE, "word4",
|
||||
CONTENT, "word5 word5 word5 word5",
|
||||
AUTHOR, "Name1 Surname1",
|
||||
CLASS, "class2"));
|
||||
addDoc(commit());
|
||||
}
|
||||
|
||||
private Document getDoc(String id) throws IOException {
|
||||
try (SolrQueryRequest req = req()) {
|
||||
SolrIndexSearcher searcher = req.getSearcher();
|
||||
TermQuery query = new TermQuery(new Term(ID, id));
|
||||
TopDocs doc1 = searcher.search(query, 1);
|
||||
ScoreDoc scoreDoc = doc1.scoreDocs[0];
|
||||
return searcher.doc(scoreDoc.doc);
|
||||
}
|
||||
}
|
||||
|
||||
static void addDoc(String doc) throws Exception {
|
||||
Map<String, String[]> params = new HashMap<>();
|
||||
MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
|
||||
params.put(UpdateParams.UPDATE_CHAIN, new String[]{CHAIN});
|
||||
SolrQueryRequestBase req = new SolrQueryRequestBase(h.getCore(),
|
||||
(SolrParams) mmparams) {
|
||||
};
|
||||
|
||||
UpdateRequestHandler handler = new UpdateRequestHandler();
|
||||
handler.init(null);
|
||||
ArrayList<ContentStream> streams = new ArrayList<>(2);
|
||||
streams.add(new ContentStreamBase.StringStream(doc));
|
||||
req.setContentStreams(streams);
|
||||
handler.handleRequestBody(req, new SolrQueryResponse());
|
||||
req.close();
|
||||
assertEquals(ClassificationUpdateProcessorFactory.Algorithm.KNN, classificationParams.getAlgorithm());
|
||||
assertEquals(1, classificationParams.getMinDf());
|
||||
assertEquals(1, classificationParams.getMinTf());
|
||||
assertEquals(10, classificationParams.getK());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,192 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.hamcrest.core.Is.is;
|
||||
|
||||
/**
|
||||
* Tests for {@link ClassificationUpdateProcessor} and {@link ClassificationUpdateProcessorFactory}
|
||||
*/
|
||||
public class ClassificationUpdateProcessorIntegrationTest extends SolrTestCaseJ4 {
|
||||
/* field names are used in accordance with the solrconfig and schema supplied */
|
||||
private static final String ID = "id";
|
||||
private static final String TITLE = "title";
|
||||
private static final String CONTENT = "content";
|
||||
private static final String AUTHOR = "author";
|
||||
private static final String CLASS = "cat";
|
||||
|
||||
private static final String CHAIN = "classification";
|
||||
private static final String BROKEN_CHAIN_FILTER_QUERY = "classification-unsupported-filterQuery";
|
||||
|
||||
private ClassificationUpdateProcessorFactory cFactoryToTest = new ClassificationUpdateProcessorFactory();
|
||||
private NamedList args = new NamedList<String>();
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
System.setProperty("enable.update.log", "false");
|
||||
initCore("solrconfig-classification.xml", "schema-classification.xml");
|
||||
}
|
||||
|
||||
@Override
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
clearIndex();
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void classify_fullConfiguration_shouldAutoClassify() throws Exception {
|
||||
indexTrainingSet();
|
||||
// To be classified,we index documents without a class and verify the expected one is returned
|
||||
addDoc(adoc(ID, "22",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word5 word5 ",
|
||||
AUTHOR, "Name1 Surname1"), CHAIN);
|
||||
addDoc(adoc(ID, "21",
|
||||
TITLE, "word1 word1",
|
||||
CONTENT, "word2 word2",
|
||||
AUTHOR, "Name Surname"), CHAIN);
|
||||
addDoc(commit());
|
||||
|
||||
Document doc22 = getDoc("22");
|
||||
assertThat(doc22.get(CLASS),is("class2"));
|
||||
Document doc21 = getDoc("21");
|
||||
assertThat(doc21.get(CLASS),is("class1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void classify_unsupportedFilterQueryConfiguration_shouldThrowExceptionWithDetailedMessage() throws Exception {
|
||||
indexTrainingSet();
|
||||
try {
|
||||
addDoc(adoc(ID, "21",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word5 word5 ",
|
||||
AUTHOR, "Name1 Surname1"), BROKEN_CHAIN_FILTER_QUERY);
|
||||
addDoc(adoc(ID, "22",
|
||||
TITLE, "word1 word1",
|
||||
CONTENT, "word2 word2",
|
||||
AUTHOR, "Name Surname"), BROKEN_CHAIN_FILTER_QUERY);
|
||||
addDoc(commit());
|
||||
} catch (SolrException e) {
|
||||
assertEquals("Classification UpdateProcessor Training Filter Query: 'not valid ( lucene query' is not supported", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Index some example documents with a class manually assigned.
|
||||
* This will be our trained model.
|
||||
*
|
||||
* @throws Exception If there is a low-level I/O error
|
||||
*/
|
||||
private void indexTrainingSet() throws Exception {
|
||||
//class1
|
||||
addDoc(adoc(ID, "1",
|
||||
TITLE, "word1 word1 word1",
|
||||
CONTENT, "word2 word2 word2",
|
||||
AUTHOR, "Name Surname",
|
||||
CLASS, "class1"), CHAIN);
|
||||
addDoc(adoc(ID, "2",
|
||||
TITLE, "word1 word1",
|
||||
CONTENT, "word2 word2",
|
||||
AUTHOR, "Name Surname",
|
||||
CLASS, "class1"), CHAIN);
|
||||
addDoc(adoc(ID, "3",
|
||||
TITLE, "word1 word1 word1",
|
||||
CONTENT, "word2",
|
||||
AUTHOR, "Name Surname",
|
||||
CLASS, "class1"), CHAIN);
|
||||
addDoc(adoc(ID, "4",
|
||||
TITLE, "word1 word1 word1",
|
||||
CONTENT, "word2 word2 word2",
|
||||
AUTHOR, "Name Surname",
|
||||
CLASS, "class1"), CHAIN);
|
||||
//class2
|
||||
addDoc(adoc(ID, "5",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word5 word5",
|
||||
AUTHOR, "Name Surname",
|
||||
CLASS, "class2"), CHAIN);
|
||||
addDoc(adoc(ID, "6",
|
||||
TITLE, "word4 word4",
|
||||
CONTENT, "word5",
|
||||
AUTHOR, "Name Surname",
|
||||
CLASS, "class2"), CHAIN);
|
||||
addDoc(adoc(ID, "7",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word5 word5 word5",
|
||||
AUTHOR, "Name Surname",
|
||||
CLASS, "class2"), CHAIN);
|
||||
addDoc(adoc(ID, "8",
|
||||
TITLE, "word4",
|
||||
CONTENT, "word5 word5 word5 word5",
|
||||
AUTHOR, "Name Surname",
|
||||
CLASS, "class2"), CHAIN);
|
||||
//class3
|
||||
addDoc(adoc(ID, "9",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word5 word5",
|
||||
AUTHOR, "Name1 Surname1",
|
||||
CLASS, "class3"), CHAIN);
|
||||
addDoc(adoc(ID, "10",
|
||||
TITLE, "word4 word4",
|
||||
CONTENT, "word5",
|
||||
AUTHOR, "Name1 Surname1",
|
||||
CLASS, "class3"), CHAIN);
|
||||
addDoc(adoc(ID, "11",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word5 word5 word5",
|
||||
AUTHOR, "Name1 Surname1",
|
||||
CLASS, "class3"), CHAIN);
|
||||
addDoc(adoc(ID, "12",
|
||||
TITLE, "word4",
|
||||
CONTENT, "word5 word5 word5 word5",
|
||||
AUTHOR, "Name1 Surname1",
|
||||
CLASS, "class3"), CHAIN);
|
||||
addDoc(commit());
|
||||
}
|
||||
|
||||
private Document getDoc(String id) throws IOException {
|
||||
try (SolrQueryRequest req = req()) {
|
||||
SolrIndexSearcher searcher = req.getSearcher();
|
||||
TermQuery query = new TermQuery(new Term(ID, id));
|
||||
TopDocs doc1 = searcher.search(query, 1);
|
||||
ScoreDoc scoreDoc = doc1.scoreDocs[0];
|
||||
return searcher.doc(scoreDoc.doc);
|
||||
}
|
||||
}
|
||||
|
||||
private void addDoc(String doc) throws Exception {
|
||||
addDoc(doc, CHAIN);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,506 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.update.AddUpdateCommand;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.hamcrest.core.Is.is;
|
||||
import static org.mockito.Mockito.mock;
|
||||
|
||||
/**
|
||||
* Tests for {@link ClassificationUpdateProcessor}
|
||||
*/
|
||||
public class ClassificationUpdateProcessorTest extends SolrTestCaseJ4 {
|
||||
/* field names are used in accordance with the solrconfig and schema supplied */
|
||||
private static final String ID = "id";
|
||||
private static final String TITLE = "title";
|
||||
private static final String CONTENT = "content";
|
||||
private static final String AUTHOR = "author";
|
||||
private static final String TRAINING_CLASS = "cat";
|
||||
private static final String PREDICTED_CLASS = "predicted";
|
||||
public static final String KNN = "knn";
|
||||
|
||||
protected Directory directory;
|
||||
protected IndexReader reader;
|
||||
protected IndexSearcher searcher;
|
||||
protected Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||
private ClassificationUpdateProcessor updateProcessorToTest;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
System.setProperty("enable.update.log", "false");
|
||||
initCore("solrconfig-classification.xml", "schema-classification.xml");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
reader.close();
|
||||
directory.close();
|
||||
analyzer.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void classificationMonoClass_predictedClassFieldSet_shouldAssignClassInPredictedClassField() throws Exception {
|
||||
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
|
||||
prepareTrainedIndexMonoClass();
|
||||
|
||||
AddUpdateCommand update=new AddUpdateCommand(req());
|
||||
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word2 word2 ",
|
||||
AUTHOR, "unseenAuthor");
|
||||
update.solrDoc=unseenDocument1;
|
||||
|
||||
ClassificationUpdateProcessorParams params = initParams(ClassificationUpdateProcessorFactory.Algorithm.KNN);
|
||||
params.setPredictedClassField(PREDICTED_CLASS);
|
||||
|
||||
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
|
||||
updateProcessorToTest.processAdd(update);
|
||||
|
||||
assertThat(unseenDocument1.getFieldValue(PREDICTED_CLASS),is("class1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void knnMonoClass_sampleParams_shouldAssignCorrectClass() throws Exception {
|
||||
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
|
||||
prepareTrainedIndexMonoClass();
|
||||
|
||||
AddUpdateCommand update=new AddUpdateCommand(req());
|
||||
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word2 word2 ",
|
||||
AUTHOR, "unseenAuthor");
|
||||
update.solrDoc=unseenDocument1;
|
||||
|
||||
ClassificationUpdateProcessorParams params = initParams(ClassificationUpdateProcessorFactory.Algorithm.KNN);
|
||||
|
||||
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
|
||||
updateProcessorToTest.processAdd(update);
|
||||
|
||||
assertThat(unseenDocument1.getFieldValue(TRAINING_CLASS),is("class1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void knnMonoClass_boostFields_shouldAssignCorrectClass() throws Exception {
|
||||
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
|
||||
prepareTrainedIndexMonoClass();
|
||||
|
||||
AddUpdateCommand update=new AddUpdateCommand(req());
|
||||
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word2 word2 ",
|
||||
AUTHOR, "unseenAuthor");
|
||||
update.solrDoc=unseenDocument1;
|
||||
|
||||
ClassificationUpdateProcessorParams params = initParams(ClassificationUpdateProcessorFactory.Algorithm.KNN);
|
||||
params.setInputFieldNames(new String[]{TITLE + "^1.5", CONTENT + "^0.5", AUTHOR + "^2.5"});
|
||||
|
||||
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
|
||||
|
||||
updateProcessorToTest.processAdd(update);
|
||||
|
||||
assertThat(unseenDocument1.getFieldValue(TRAINING_CLASS),is("class2"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void bayesMonoClass_sampleParams_shouldAssignCorrectClass() throws Exception {
|
||||
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
|
||||
prepareTrainedIndexMonoClass();
|
||||
|
||||
AddUpdateCommand update=new AddUpdateCommand(req());
|
||||
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word2 word2 ",
|
||||
AUTHOR, "unseenAuthor");
|
||||
update.solrDoc=unseenDocument1;
|
||||
|
||||
ClassificationUpdateProcessorParams params= initParams(ClassificationUpdateProcessorFactory.Algorithm.BAYES);
|
||||
|
||||
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
|
||||
updateProcessorToTest.processAdd(update);
|
||||
|
||||
assertThat(unseenDocument1.getFieldValue(TRAINING_CLASS),is("class1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void knnMonoClass_contextQueryFiltered_shouldAssignCorrectClass() throws Exception {
|
||||
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
|
||||
prepareTrainedIndexMonoClass();
|
||||
|
||||
AddUpdateCommand update=new AddUpdateCommand(req());
|
||||
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word2 word2 ",
|
||||
AUTHOR, "a");
|
||||
update.solrDoc=unseenDocument1;
|
||||
|
||||
ClassificationUpdateProcessorParams params= initParams(ClassificationUpdateProcessorFactory.Algorithm.KNN);
|
||||
Query class3DocsChunk=new TermQuery(new Term(TITLE,"word6"));
|
||||
params.setTrainingFilterQuery(class3DocsChunk);
|
||||
|
||||
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
|
||||
updateProcessorToTest.processAdd(update);
|
||||
|
||||
assertThat(unseenDocument1.getFieldValue(TRAINING_CLASS),is("class3"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void bayesMonoClass_boostFields_shouldAssignCorrectClass() throws Exception {
|
||||
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
|
||||
prepareTrainedIndexMonoClass();
|
||||
|
||||
AddUpdateCommand update=new AddUpdateCommand(req());
|
||||
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word2 word2 ",
|
||||
AUTHOR, "unseenAuthor");
|
||||
update.solrDoc=unseenDocument1;
|
||||
|
||||
ClassificationUpdateProcessorParams params= initParams(ClassificationUpdateProcessorFactory.Algorithm.BAYES);
|
||||
params.setInputFieldNames(new String[]{TITLE+"^1.5",CONTENT+"^0.5",AUTHOR+"^2.5"});
|
||||
|
||||
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
|
||||
|
||||
updateProcessorToTest.processAdd(update);
|
||||
|
||||
assertThat(unseenDocument1.getFieldValue(TRAINING_CLASS),is("class2"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void knnClassification_maxOutputClassesGreaterThanAvailable_shouldAssignCorrectClass() throws Exception {
|
||||
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
|
||||
prepareTrainedIndexMultiClass();
|
||||
|
||||
AddUpdateCommand update=new AddUpdateCommand(req());
|
||||
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
|
||||
TITLE, "word1 word1 word1",
|
||||
CONTENT, "word2 word2 ",
|
||||
AUTHOR, "unseenAuthor");
|
||||
update.solrDoc=unseenDocument1;
|
||||
|
||||
ClassificationUpdateProcessorParams params= initParams(ClassificationUpdateProcessorFactory.Algorithm.KNN);
|
||||
params.setMaxPredictedClasses(100);
|
||||
|
||||
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
|
||||
updateProcessorToTest.processAdd(update);
|
||||
|
||||
ArrayList<Object> assignedClasses = (ArrayList)unseenDocument1.getFieldValues(TRAINING_CLASS);
|
||||
assertThat(assignedClasses.get(0),is("class2"));
|
||||
assertThat(assignedClasses.get(1),is("class1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void knnMultiClass_maxOutputClasses2_shouldAssignMax2Classes() throws Exception {
|
||||
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
|
||||
prepareTrainedIndexMultiClass();
|
||||
|
||||
AddUpdateCommand update=new AddUpdateCommand(req());
|
||||
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
|
||||
TITLE, "word1 word1 word1",
|
||||
CONTENT, "word2 word2 ",
|
||||
AUTHOR, "unseenAuthor");
|
||||
update.solrDoc=unseenDocument1;
|
||||
|
||||
ClassificationUpdateProcessorParams params= initParams(ClassificationUpdateProcessorFactory.Algorithm.KNN);
|
||||
params.setMaxPredictedClasses(2);
|
||||
|
||||
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
|
||||
updateProcessorToTest.processAdd(update);
|
||||
|
||||
ArrayList<Object> assignedClasses = (ArrayList)unseenDocument1.getFieldValues(TRAINING_CLASS);
|
||||
assertThat(assignedClasses.size(),is(2));
|
||||
assertThat(assignedClasses.get(0),is("class2"));
|
||||
assertThat(assignedClasses.get(1),is("class1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void bayesMultiClass_maxOutputClasses2_shouldAssignMax2Classes() throws Exception {
|
||||
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
|
||||
prepareTrainedIndexMultiClass();
|
||||
|
||||
AddUpdateCommand update=new AddUpdateCommand(req());
|
||||
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
|
||||
TITLE, "word1 word1 word1",
|
||||
CONTENT, "word2 word2 ",
|
||||
AUTHOR, "unseenAuthor");
|
||||
update.solrDoc=unseenDocument1;
|
||||
|
||||
ClassificationUpdateProcessorParams params= initParams(ClassificationUpdateProcessorFactory.Algorithm.BAYES);
|
||||
params.setMaxPredictedClasses(2);
|
||||
|
||||
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
|
||||
updateProcessorToTest.processAdd(update);
|
||||
|
||||
ArrayList<Object> assignedClasses = (ArrayList)unseenDocument1.getFieldValues(TRAINING_CLASS);
|
||||
assertThat(assignedClasses.size(),is(2));
|
||||
assertThat(assignedClasses.get(0),is("class2"));
|
||||
assertThat(assignedClasses.get(1),is("class1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void knnMultiClass_boostFieldsMaxOutputClasses2_shouldAssignMax2Classes() throws Exception {
|
||||
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
|
||||
prepareTrainedIndexMultiClass();
|
||||
|
||||
AddUpdateCommand update=new AddUpdateCommand(req());
|
||||
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word2 word2 ",
|
||||
AUTHOR, "unseenAuthor");
|
||||
update.solrDoc=unseenDocument1;
|
||||
|
||||
ClassificationUpdateProcessorParams params= initParams(ClassificationUpdateProcessorFactory.Algorithm.KNN);
|
||||
params.setInputFieldNames(new String[]{TITLE+"^1.5",CONTENT+"^0.5",AUTHOR+"^2.5"});
|
||||
params.setMaxPredictedClasses(2);
|
||||
|
||||
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
|
||||
|
||||
updateProcessorToTest.processAdd(update);
|
||||
|
||||
ArrayList<Object> assignedClasses = (ArrayList)unseenDocument1.getFieldValues(TRAINING_CLASS);
|
||||
assertThat(assignedClasses.size(),is(2));
|
||||
assertThat(assignedClasses.get(0),is("class4"));
|
||||
assertThat(assignedClasses.get(1),is("class6"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void bayesMultiClass_boostFieldsMaxOutputClasses2_shouldAssignMax2Classes() throws Exception {
|
||||
UpdateRequestProcessor mockProcessor=mock(UpdateRequestProcessor.class);
|
||||
prepareTrainedIndexMultiClass();
|
||||
|
||||
AddUpdateCommand update=new AddUpdateCommand(req());
|
||||
SolrInputDocument unseenDocument1 = sdoc(ID, "10",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word2 word2 ",
|
||||
AUTHOR, "unseenAuthor");
|
||||
update.solrDoc=unseenDocument1;
|
||||
|
||||
ClassificationUpdateProcessorParams params= initParams(ClassificationUpdateProcessorFactory.Algorithm.BAYES);
|
||||
params.setInputFieldNames(new String[]{TITLE+"^1.5",CONTENT+"^0.5",AUTHOR+"^2.5"});
|
||||
params.setMaxPredictedClasses(2);
|
||||
|
||||
updateProcessorToTest=new ClassificationUpdateProcessor(params,mockProcessor,reader,req().getSchema());
|
||||
|
||||
updateProcessorToTest.processAdd(update);
|
||||
|
||||
ArrayList<Object> assignedClasses = (ArrayList)unseenDocument1.getFieldValues(TRAINING_CLASS);
|
||||
assertThat(assignedClasses.size(),is(2));
|
||||
assertThat(assignedClasses.get(0),is("class4"));
|
||||
assertThat(assignedClasses.get(1),is("class6"));
|
||||
}
|
||||
|
||||
private ClassificationUpdateProcessorParams initParams(ClassificationUpdateProcessorFactory.Algorithm classificationAlgorithm) {
|
||||
ClassificationUpdateProcessorParams params= new ClassificationUpdateProcessorParams();
|
||||
params.setInputFieldNames(new String[]{TITLE,CONTENT,AUTHOR});
|
||||
params.setTrainingClassField(TRAINING_CLASS);
|
||||
params.setPredictedClassField(TRAINING_CLASS);
|
||||
params.setMinTf(1);
|
||||
params.setMinDf(1);
|
||||
params.setK(5);
|
||||
params.setAlgorithm(classificationAlgorithm);
|
||||
params.setMaxPredictedClasses(1);
|
||||
return params;
|
||||
}
|
||||
|
||||
/**
|
||||
* Index some example documents with a class manually assigned.
|
||||
* This will be our trained model.
|
||||
*
|
||||
* @throws Exception If there is a low-level I/O error
|
||||
*/
|
||||
private void prepareTrainedIndexMonoClass() throws Exception {
|
||||
directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
||||
|
||||
//class1
|
||||
addDoc(writer, buildLuceneDocument(ID, "1",
|
||||
TITLE, "word1 word1 word1",
|
||||
CONTENT, "word2 word2 word2",
|
||||
AUTHOR, "a",
|
||||
TRAINING_CLASS, "class1"));
|
||||
addDoc(writer, buildLuceneDocument(ID, "2",
|
||||
TITLE, "word1 word1",
|
||||
CONTENT, "word2 word2",
|
||||
AUTHOR, "a",
|
||||
TRAINING_CLASS, "class1"));
|
||||
addDoc(writer, buildLuceneDocument(ID, "3",
|
||||
TITLE, "word1 word1 word1",
|
||||
CONTENT, "word2",
|
||||
AUTHOR, "a",
|
||||
TRAINING_CLASS, "class1"));
|
||||
addDoc(writer, buildLuceneDocument(ID, "4",
|
||||
TITLE, "word1 word1 word1",
|
||||
CONTENT, "word2 word2 word2",
|
||||
AUTHOR, "a",
|
||||
TRAINING_CLASS, "class1"));
|
||||
//class2
|
||||
addDoc(writer, buildLuceneDocument(ID, "5",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word5 word5",
|
||||
AUTHOR, "c",
|
||||
TRAINING_CLASS, "class2"));
|
||||
addDoc(writer, buildLuceneDocument(ID, "6",
|
||||
TITLE, "word4 word4",
|
||||
CONTENT, "word5",
|
||||
AUTHOR, "c",
|
||||
TRAINING_CLASS, "class2"));
|
||||
addDoc(writer, buildLuceneDocument(ID, "7",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word5 word5 word5",
|
||||
AUTHOR, "c",
|
||||
TRAINING_CLASS, "class2"));
|
||||
addDoc(writer, buildLuceneDocument(ID, "8",
|
||||
TITLE, "word4",
|
||||
CONTENT, "word5 word5 word5 word5",
|
||||
AUTHOR, "c",
|
||||
TRAINING_CLASS, "class2"));
|
||||
//class3
|
||||
addDoc(writer, buildLuceneDocument(ID, "9",
|
||||
TITLE, "word6",
|
||||
CONTENT, "word7",
|
||||
AUTHOR, "a",
|
||||
TRAINING_CLASS, "class3"));
|
||||
addDoc(writer, buildLuceneDocument(ID, "10",
|
||||
TITLE, "word6",
|
||||
CONTENT, "word7",
|
||||
AUTHOR, "a",
|
||||
TRAINING_CLASS, "class3"));
|
||||
addDoc(writer, buildLuceneDocument(ID, "11",
|
||||
TITLE, "word6",
|
||||
CONTENT, "word7",
|
||||
AUTHOR, "a",
|
||||
TRAINING_CLASS, "class3"));
|
||||
addDoc(writer, buildLuceneDocument(ID, "12",
|
||||
TITLE, "word6",
|
||||
CONTENT, "word7",
|
||||
AUTHOR, "a",
|
||||
TRAINING_CLASS, "class3"));
|
||||
|
||||
reader = writer.getReader();
|
||||
writer.close();
|
||||
searcher = newSearcher(reader);
|
||||
}
|
||||
|
||||
private void prepareTrainedIndexMultiClass() throws Exception {
|
||||
directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
||||
|
||||
//class1
|
||||
addDoc(writer, buildLuceneDocument(ID, "1",
|
||||
TITLE, "word1 word1 word1",
|
||||
CONTENT, "word2 word2 word2",
|
||||
AUTHOR, "Name Surname",
|
||||
TRAINING_CLASS, "class1",
|
||||
TRAINING_CLASS, "class2"
|
||||
));
|
||||
addDoc(writer, buildLuceneDocument(ID, "2",
|
||||
TITLE, "word1 word1",
|
||||
CONTENT, "word2 word2",
|
||||
AUTHOR, "Name Surname",
|
||||
TRAINING_CLASS, "class3",
|
||||
TRAINING_CLASS, "class2"
|
||||
));
|
||||
addDoc(writer, buildLuceneDocument(ID, "3",
|
||||
TITLE, "word1 word1 word1",
|
||||
CONTENT, "word2",
|
||||
AUTHOR, "Name Surname",
|
||||
TRAINING_CLASS, "class1",
|
||||
TRAINING_CLASS, "class2"
|
||||
));
|
||||
addDoc(writer, buildLuceneDocument(ID, "4",
|
||||
TITLE, "word1 word1 word1",
|
||||
CONTENT, "word2 word2 word2",
|
||||
AUTHOR, "Name Surname",
|
||||
TRAINING_CLASS, "class1",
|
||||
TRAINING_CLASS, "class2"
|
||||
));
|
||||
//class2
|
||||
addDoc(writer, buildLuceneDocument(ID, "5",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word5 word5",
|
||||
AUTHOR, "Name1 Surname1",
|
||||
TRAINING_CLASS, "class6",
|
||||
TRAINING_CLASS, "class4"
|
||||
));
|
||||
addDoc(writer, buildLuceneDocument(ID, "6",
|
||||
TITLE, "word4 word4",
|
||||
CONTENT, "word5",
|
||||
AUTHOR, "Name1 Surname1",
|
||||
TRAINING_CLASS, "class5",
|
||||
TRAINING_CLASS, "class4"
|
||||
));
|
||||
addDoc(writer, buildLuceneDocument(ID, "7",
|
||||
TITLE, "word4 word4 word4",
|
||||
CONTENT, "word5 word5 word5",
|
||||
AUTHOR, "Name1 Surname1",
|
||||
TRAINING_CLASS, "class6",
|
||||
TRAINING_CLASS, "class4"
|
||||
));
|
||||
addDoc(writer, buildLuceneDocument(ID, "8",
|
||||
TITLE, "word4",
|
||||
CONTENT, "word5 word5 word5 word5",
|
||||
AUTHOR, "Name1 Surname1",
|
||||
TRAINING_CLASS, "class6",
|
||||
TRAINING_CLASS, "class4"
|
||||
));
|
||||
|
||||
reader = writer.getReader();
|
||||
writer.close();
|
||||
searcher = newSearcher(reader);
|
||||
}
|
||||
|
||||
public static Document buildLuceneDocument(Object... fieldsAndValues) {
|
||||
Document luceneDoc = new Document();
|
||||
for (int i=0; i<fieldsAndValues.length; i+=2) {
|
||||
luceneDoc.add(newTextField((String)fieldsAndValues[i], (String)fieldsAndValues[i+1], Field.Store.YES));
|
||||
}
|
||||
return luceneDoc;
|
||||
}
|
||||
|
||||
private int addDoc(RandomIndexWriter writer, Document doc) throws IOException {
|
||||
writer.addDocument(doc);
|
||||
return writer.numDocs() - 1;
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue