mirror of https://github.com/apache/lucene.git
improve DV faceting tests, support missing count for single valued string fields, remove required/default restriction
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515520 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
98522b2262
commit
d208878c11
|
@ -45,12 +45,13 @@ public class BytesRefFieldSource extends FieldCacheSource {
|
||||||
// To be sorted or not to be sorted, that is the question
|
// To be sorted or not to be sorted, that is the question
|
||||||
// TODO: do it cleaner?
|
// TODO: do it cleaner?
|
||||||
if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.BINARY) {
|
if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.BINARY) {
|
||||||
|
final Bits docsWithField = FieldCache.DEFAULT.getDocsWithField(readerContext.reader(), field);
|
||||||
final BinaryDocValues binaryValues = FieldCache.DEFAULT.getTerms(readerContext.reader(), field);
|
final BinaryDocValues binaryValues = FieldCache.DEFAULT.getTerms(readerContext.reader(), field);
|
||||||
return new FunctionValues() {
|
return new FunctionValues() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean exists(int doc) {
|
public boolean exists(int doc) {
|
||||||
return true; // doc values are dense
|
return docsWithField.get(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -650,6 +650,43 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
|
||||||
ireader.close();
|
ireader.close();
|
||||||
directory.close();
|
directory.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSortedMergeAwayAllValues() throws IOException {
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||||
|
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new StringField("id", "0", Field.Store.NO));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new StringField("id", "1", Field.Store.NO));
|
||||||
|
doc.add(new SortedDocValuesField("field", new BytesRef("hello")));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
iwriter.commit();
|
||||||
|
iwriter.deleteDocuments(new Term("id", "1"));
|
||||||
|
iwriter.forceMerge(1);
|
||||||
|
|
||||||
|
DirectoryReader ireader = iwriter.getReader();
|
||||||
|
iwriter.close();
|
||||||
|
|
||||||
|
SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field");
|
||||||
|
if (codecSupportsDocsWithField("field")) {
|
||||||
|
assertEquals(-1, dv.getOrd(0));
|
||||||
|
assertEquals(0, dv.getValueCount());
|
||||||
|
} else {
|
||||||
|
assertEquals(0, dv.getOrd(0));
|
||||||
|
assertEquals(1, dv.getValueCount());
|
||||||
|
BytesRef ref = new BytesRef();
|
||||||
|
dv.lookupOrd(0, ref);
|
||||||
|
assertEquals(new BytesRef(), ref);
|
||||||
|
}
|
||||||
|
|
||||||
|
ireader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
public void testBytesWithNewline() throws IOException {
|
public void testBytesWithNewline() throws IOException {
|
||||||
Analyzer analyzer = new MockAnalyzer(random());
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
|
|
@ -218,12 +218,7 @@ public class DocValuesFacets {
|
||||||
static NamedList<Integer> finalize(NamedList<Integer> res, SolrIndexSearcher searcher, SchemaField schemaField, DocSet docs, int missingCount, boolean missing) throws IOException {
|
static NamedList<Integer> finalize(NamedList<Integer> res, SolrIndexSearcher searcher, SchemaField schemaField, DocSet docs, int missingCount, boolean missing) throws IOException {
|
||||||
if (missing) {
|
if (missing) {
|
||||||
if (missingCount < 0) {
|
if (missingCount < 0) {
|
||||||
if (schemaField.multiValued()) {
|
missingCount = SimpleFacets.getFieldMissingCount(searcher,docs,schemaField.getName());
|
||||||
missingCount = SimpleFacets.getFieldMissingCount(searcher,docs,schemaField.getName());
|
|
||||||
} else {
|
|
||||||
// nocommit: support missing count (ord = -1) for single-valued here.
|
|
||||||
missingCount = 0; // single-valued dv is implicitly 0
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
res.add(null, missingCount);
|
res.add(null, missingCount);
|
||||||
}
|
}
|
||||||
|
@ -232,12 +227,12 @@ public class DocValuesFacets {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** accumulates per-segment single-valued facet counts, mapping to global ordinal space */
|
/** accumulates per-segment single-valued facet counts, mapping to global ordinal space */
|
||||||
// specialized since the single-valued case is simpler: you don't have to deal with missing count, etc
|
// specialized since the single-valued case is different
|
||||||
static void accumSingle(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
|
static void accumSingle(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
|
||||||
int doc;
|
int doc;
|
||||||
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
int term = si.getOrd(doc);
|
int term = si.getOrd(doc);
|
||||||
if (map != null) {
|
if (map != null && term >= 0) {
|
||||||
term = (int) map.getGlobalOrd(subIndex, term);
|
term = (int) map.getGlobalOrd(subIndex, term);
|
||||||
}
|
}
|
||||||
int arrIdx = term-startTermIndex;
|
int arrIdx = term-startTermIndex;
|
||||||
|
|
|
@ -255,7 +255,7 @@ final class NumericFacets {
|
||||||
|
|
||||||
if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict
|
if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict
|
||||||
if (!sf.indexed()) {
|
if (!sf.indexed()) {
|
||||||
throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on a field which is not indexed");
|
throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is not indexed");
|
||||||
}
|
}
|
||||||
// Add zeros until there are limit results
|
// Add zeros until there are limit results
|
||||||
final Set<String> alreadySeen = new HashSet<String>();
|
final Set<String> alreadySeen = new HashSet<String>();
|
||||||
|
|
|
@ -80,9 +80,6 @@ public class StrField extends PrimitiveFieldType {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void checkSchemaField(SchemaField field) {
|
public void checkSchemaField(SchemaField field) {
|
||||||
if (field.hasDocValues() && !field.multiValued() && !(field.isRequired() || field.getDefaultValue() != null)) {
|
|
||||||
throw new IllegalStateException("Field " + this + " has single-valued doc values enabled, but has no default value and is not required");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -696,9 +696,6 @@ public class TrieField extends PrimitiveFieldType {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void checkSchemaField(final SchemaField field) {
|
public void checkSchemaField(final SchemaField field) {
|
||||||
if (field.hasDocValues() && !field.multiValued() && !(field.isRequired() || field.getDefaultValue() != null)) {
|
|
||||||
throw new IllegalStateException("Field " + this + " has single-valued doc values enabled, but has no default value and is not required");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,33 +0,0 @@
|
||||||
<?xml version="1.0" ?>
|
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
|
|
||||||
<schema name="bad-schema-docValues-not-required-no-default" version="1.0">
|
|
||||||
<types>
|
|
||||||
<fieldType name="string" class="solr.StrField" />
|
|
||||||
</types>
|
|
||||||
|
|
||||||
|
|
||||||
<fields>
|
|
||||||
<!-- docValues must be required or have a default value -->
|
|
||||||
<field name="id" type="string" docValues="true" multiValued="false"/>
|
|
||||||
</fields>
|
|
||||||
|
|
||||||
<defaultSearchField>id</defaultSearchField>
|
|
||||||
<uniqueKey>id</uniqueKey>
|
|
||||||
|
|
||||||
</schema>
|
|
|
@ -26,17 +26,16 @@
|
||||||
<fields>
|
<fields>
|
||||||
<field name="id" type="string" indexed="true" stored="true" docValues="false" multiValued="false" required="true"/>
|
<field name="id" type="string" indexed="true" stored="true" docValues="false" multiValued="false" required="true"/>
|
||||||
<field name="id_dv" type="string" indexed="false" stored="false" docValues="true" multiValued="false" required="true"/>
|
<field name="id_dv" type="string" indexed="false" stored="false" docValues="true" multiValued="false" required="true"/>
|
||||||
<!-- TODO: improve this test so we don't have to make all these DV types multivalued (for missing values) -->
|
|
||||||
<dynamicField name="*_i" type="int" indexed="true" stored="false" docValues="false"/>
|
<dynamicField name="*_i" type="int" indexed="true" stored="false" docValues="false"/>
|
||||||
<dynamicField name="*_i_dv" type="int" indexed="false" stored="false" docValues="true" multiValued="true"/>
|
<dynamicField name="*_i_dv" type="int" indexed="false" stored="false" docValues="true"/>
|
||||||
<dynamicField name="*_is" type="int" indexed="true" stored="false" docValues="false" multiValued="true"/>
|
<dynamicField name="*_is" type="int" indexed="true" stored="false" docValues="false" multiValued="true"/>
|
||||||
<dynamicField name="*_is_dv" type="int" indexed="false" stored="false" docValues="true" multiValued="true"/>
|
<dynamicField name="*_is_dv" type="int" indexed="false" stored="false" docValues="true" multiValued="true"/>
|
||||||
<dynamicField name="*_s" type="string" indexed="true" stored="false" docValues="false" multiValued="true"/>
|
<dynamicField name="*_s" type="string" indexed="true" stored="false" docValues="false"/>
|
||||||
<dynamicField name="*_s_dv" type="string" indexed="false" stored="false" docValues="true" multiValued="true"/>
|
<dynamicField name="*_s_dv" type="string" indexed="false" stored="false" docValues="true"/>
|
||||||
<dynamicField name="*_ss" type="string" indexed="true" stored="false" docValues="false" multiValued="true"/>
|
<dynamicField name="*_ss" type="string" indexed="true" stored="false" docValues="false" multiValued="true"/>
|
||||||
<dynamicField name="*_ss_dv" type="string" indexed="false" stored="false" docValues="true" multiValued="true"/>
|
<dynamicField name="*_ss_dv" type="string" indexed="false" stored="false" docValues="true" multiValued="true"/>
|
||||||
<dynamicField name="*_f" type="float" indexed="true" stored="false" docValues="false"/>
|
<dynamicField name="*_f" type="float" indexed="true" stored="false" docValues="false"/>
|
||||||
<dynamicField name="*_f_dv" type="float" indexed="false" stored="false" docValues="true" multiValued="true"/>
|
<dynamicField name="*_f_dv" type="float" indexed="false" stored="false" docValues="true"/>
|
||||||
</fields>
|
</fields>
|
||||||
|
|
||||||
<defaultSearchField>id</defaultSearchField>
|
<defaultSearchField>id</defaultSearchField>
|
||||||
|
|
|
@ -39,7 +39,7 @@ import org.junit.Test;
|
||||||
* to the indexed facet results as if it were just another faceting method.
|
* to the indexed facet results as if it were just another faceting method.
|
||||||
*/
|
*/
|
||||||
@Slow
|
@Slow
|
||||||
@SuppressCodecs({"Lucene40", "Lucene41"})
|
@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"})
|
||||||
public class TestRandomDVFaceting extends SolrTestCaseJ4 {
|
public class TestRandomDVFaceting extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
|
@ -162,6 +162,8 @@ public class TestRandomDVFaceting extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
SchemaField sf = req.getSchema().getField(ftype.fname);
|
SchemaField sf = req.getSchema().getField(ftype.fname);
|
||||||
boolean multiValued = sf.getType().multiValuedFieldCache();
|
boolean multiValued = sf.getType().multiValuedFieldCache();
|
||||||
|
boolean indexed = sf.indexed();
|
||||||
|
boolean numeric = sf.getType().getNumericType() != null;
|
||||||
|
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
if (rand.nextInt(100) < 20) {
|
if (rand.nextInt(100) < 20) {
|
||||||
|
@ -179,8 +181,21 @@ public class TestRandomDVFaceting extends SolrTestCaseJ4 {
|
||||||
params.add("facet.limit", Integer.toString(limit));
|
params.add("facet.limit", Integer.toString(limit));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rand.nextBoolean()) {
|
// the following two situations cannot work for unindexed single-valued numerics:
|
||||||
params.add("facet.sort", rand.nextBoolean() ? "index" : "count");
|
// (currently none of the dv fields in this test config)
|
||||||
|
// facet.sort = index
|
||||||
|
// facet.minCount = 0
|
||||||
|
if (!numeric || sf.multiValued()) {
|
||||||
|
if (rand.nextBoolean()) {
|
||||||
|
params.add("facet.sort", rand.nextBoolean() ? "index" : "count");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rand.nextInt(100) < 10) {
|
||||||
|
params.add("facet.mincount", Integer.toString(rand.nextInt(5)));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
params.add("facet.sort", "count");
|
||||||
|
params.add("facet.mincount", Integer.toString(1+rand.nextInt(5)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((ftype.vals instanceof SVal) && rand.nextInt(100) < 20) {
|
if ((ftype.vals instanceof SVal) && rand.nextInt(100) < 20) {
|
||||||
|
@ -192,10 +207,6 @@ public class TestRandomDVFaceting extends SolrTestCaseJ4 {
|
||||||
params.add("facet.prefix", prefix);
|
params.add("facet.prefix", prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rand.nextInt(100) < 10) {
|
|
||||||
params.add("facet.mincount", Integer.toString(rand.nextInt(5)));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rand.nextInt(100) < 20) {
|
if (rand.nextInt(100) < 20) {
|
||||||
params.add("facet.missing", "true");
|
params.add("facet.missing", "true");
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,10 +93,6 @@ public class BadIndexSchemaTest extends AbstractBadConfigTestBase {
|
||||||
doTest("bad-schema-codec-global-vs-ft-mismatch.xml", "codec does not support");
|
doTest("bad-schema-codec-global-vs-ft-mismatch.xml", "codec does not support");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDocValuesNotRequiredNoDefault() throws Exception {
|
|
||||||
doTest("bad-schema-docValues-not-required-no-default.xml", "has no default value and is not required");
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testDocValuesUnsupported() throws Exception {
|
public void testDocValuesUnsupported() throws Exception {
|
||||||
doTest("bad-schema-unsupported-docValues.xml", "does not support doc values");
|
doTest("bad-schema-unsupported-docValues.xml", "does not support doc values");
|
||||||
}
|
}
|
||||||
|
|
|
@ -168,8 +168,10 @@
|
||||||
<!--
|
<!--
|
||||||
Some fields such as popularity and manu_exact could be modified to
|
Some fields such as popularity and manu_exact could be modified to
|
||||||
leverage doc values:
|
leverage doc values:
|
||||||
<field name="popularity" type="int" indexed="true" stored="true" docValues="true" default="0" />
|
<field name="popularity" type="int" indexed="true" stored="true" docValues="true" />
|
||||||
<field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" default="" />
|
<field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" />
|
||||||
|
<field name="cat" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
|
||||||
|
|
||||||
|
|
||||||
Although it would make indexing slightly slower and the index bigger, it
|
Although it would make indexing slightly slower and the index bigger, it
|
||||||
would also make the index faster to load, more memory-efficient and more
|
would also make the index faster to load, more memory-efficient and more
|
||||||
|
|
Loading…
Reference in New Issue