mirror of https://github.com/apache/lucene.git
improve DV faceting tests, support missing count for single valued string fields, remove required/default restriction
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515520 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
98522b2262
commit
d208878c11
|
@ -45,12 +45,13 @@ public class BytesRefFieldSource extends FieldCacheSource {
|
|||
// To be sorted or not to be sorted, that is the question
|
||||
// TODO: do it cleaner?
|
||||
if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.BINARY) {
|
||||
final Bits docsWithField = FieldCache.DEFAULT.getDocsWithField(readerContext.reader(), field);
|
||||
final BinaryDocValues binaryValues = FieldCache.DEFAULT.getTerms(readerContext.reader(), field);
|
||||
return new FunctionValues() {
|
||||
|
||||
@Override
|
||||
public boolean exists(int doc) {
|
||||
return true; // doc values are dense
|
||||
return docsWithField.get(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -651,6 +651,43 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
|
|||
directory.close();
|
||||
}
|
||||
|
||||
public void testSortedMergeAwayAllValues() throws IOException {
|
||||
Directory directory = newDirectory();
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("id", "0", Field.Store.NO));
|
||||
iwriter.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new StringField("id", "1", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField("field", new BytesRef("hello")));
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.commit();
|
||||
iwriter.deleteDocuments(new Term("id", "1"));
|
||||
iwriter.forceMerge(1);
|
||||
|
||||
DirectoryReader ireader = iwriter.getReader();
|
||||
iwriter.close();
|
||||
|
||||
SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field");
|
||||
if (codecSupportsDocsWithField("field")) {
|
||||
assertEquals(-1, dv.getOrd(0));
|
||||
assertEquals(0, dv.getValueCount());
|
||||
} else {
|
||||
assertEquals(0, dv.getOrd(0));
|
||||
assertEquals(1, dv.getValueCount());
|
||||
BytesRef ref = new BytesRef();
|
||||
dv.lookupOrd(0, ref);
|
||||
assertEquals(new BytesRef(), ref);
|
||||
}
|
||||
|
||||
ireader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testBytesWithNewline() throws IOException {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
|
||||
|
|
|
@ -218,12 +218,7 @@ public class DocValuesFacets {
|
|||
static NamedList<Integer> finalize(NamedList<Integer> res, SolrIndexSearcher searcher, SchemaField schemaField, DocSet docs, int missingCount, boolean missing) throws IOException {
|
||||
if (missing) {
|
||||
if (missingCount < 0) {
|
||||
if (schemaField.multiValued()) {
|
||||
missingCount = SimpleFacets.getFieldMissingCount(searcher,docs,schemaField.getName());
|
||||
} else {
|
||||
// nocommit: support missing count (ord = -1) for single-valued here.
|
||||
missingCount = 0; // single-valued dv is implicitly 0
|
||||
}
|
||||
missingCount = SimpleFacets.getFieldMissingCount(searcher,docs,schemaField.getName());
|
||||
}
|
||||
res.add(null, missingCount);
|
||||
}
|
||||
|
@ -232,12 +227,12 @@ public class DocValuesFacets {
|
|||
}
|
||||
|
||||
/** accumulates per-segment single-valued facet counts, mapping to global ordinal space */
|
||||
// specialized since the single-valued case is simpler: you don't have to deal with missing count, etc
|
||||
// specialized since the single-valued case is different
|
||||
static void accumSingle(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
|
||||
int doc;
|
||||
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
int term = si.getOrd(doc);
|
||||
if (map != null) {
|
||||
if (map != null && term >= 0) {
|
||||
term = (int) map.getGlobalOrd(subIndex, term);
|
||||
}
|
||||
int arrIdx = term-startTermIndex;
|
||||
|
|
|
@ -255,7 +255,7 @@ final class NumericFacets {
|
|||
|
||||
if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict
|
||||
if (!sf.indexed()) {
|
||||
throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on a field which is not indexed");
|
||||
throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is not indexed");
|
||||
}
|
||||
// Add zeros until there are limit results
|
||||
final Set<String> alreadySeen = new HashSet<String>();
|
||||
|
|
|
@ -80,9 +80,6 @@ public class StrField extends PrimitiveFieldType {
|
|||
|
||||
@Override
|
||||
public void checkSchemaField(SchemaField field) {
|
||||
if (field.hasDocValues() && !field.multiValued() && !(field.isRequired() || field.getDefaultValue() != null)) {
|
||||
throw new IllegalStateException("Field " + this + " has single-valued doc values enabled, but has no default value and is not required");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -696,9 +696,6 @@ public class TrieField extends PrimitiveFieldType {
|
|||
|
||||
@Override
|
||||
public void checkSchemaField(final SchemaField field) {
|
||||
if (field.hasDocValues() && !field.multiValued() && !(field.isRequired() || field.getDefaultValue() != null)) {
|
||||
throw new IllegalStateException("Field " + this + " has single-valued doc values enabled, but has no default value and is not required");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,33 +0,0 @@
|
|||
<?xml version="1.0" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<schema name="bad-schema-docValues-not-required-no-default" version="1.0">
|
||||
<types>
|
||||
<fieldType name="string" class="solr.StrField" />
|
||||
</types>
|
||||
|
||||
|
||||
<fields>
|
||||
<!-- docValues must be required or have a default value -->
|
||||
<field name="id" type="string" docValues="true" multiValued="false"/>
|
||||
</fields>
|
||||
|
||||
<defaultSearchField>id</defaultSearchField>
|
||||
<uniqueKey>id</uniqueKey>
|
||||
|
||||
</schema>
|
|
@ -26,17 +26,16 @@
|
|||
<fields>
|
||||
<field name="id" type="string" indexed="true" stored="true" docValues="false" multiValued="false" required="true"/>
|
||||
<field name="id_dv" type="string" indexed="false" stored="false" docValues="true" multiValued="false" required="true"/>
|
||||
<!-- TODO: improve this test so we don't have to make all these DV types multivalued (for missing values) -->
|
||||
<dynamicField name="*_i" type="int" indexed="true" stored="false" docValues="false"/>
|
||||
<dynamicField name="*_i_dv" type="int" indexed="false" stored="false" docValues="true" multiValued="true"/>
|
||||
<dynamicField name="*_i_dv" type="int" indexed="false" stored="false" docValues="true"/>
|
||||
<dynamicField name="*_is" type="int" indexed="true" stored="false" docValues="false" multiValued="true"/>
|
||||
<dynamicField name="*_is_dv" type="int" indexed="false" stored="false" docValues="true" multiValued="true"/>
|
||||
<dynamicField name="*_s" type="string" indexed="true" stored="false" docValues="false" multiValued="true"/>
|
||||
<dynamicField name="*_s_dv" type="string" indexed="false" stored="false" docValues="true" multiValued="true"/>
|
||||
<dynamicField name="*_s" type="string" indexed="true" stored="false" docValues="false"/>
|
||||
<dynamicField name="*_s_dv" type="string" indexed="false" stored="false" docValues="true"/>
|
||||
<dynamicField name="*_ss" type="string" indexed="true" stored="false" docValues="false" multiValued="true"/>
|
||||
<dynamicField name="*_ss_dv" type="string" indexed="false" stored="false" docValues="true" multiValued="true"/>
|
||||
<dynamicField name="*_f" type="float" indexed="true" stored="false" docValues="false"/>
|
||||
<dynamicField name="*_f_dv" type="float" indexed="false" stored="false" docValues="true" multiValued="true"/>
|
||||
<dynamicField name="*_f_dv" type="float" indexed="false" stored="false" docValues="true"/>
|
||||
</fields>
|
||||
|
||||
<defaultSearchField>id</defaultSearchField>
|
||||
|
|
|
@ -39,7 +39,7 @@ import org.junit.Test;
|
|||
* to the indexed facet results as if it were just another faceting method.
|
||||
*/
|
||||
@Slow
|
||||
@SuppressCodecs({"Lucene40", "Lucene41"})
|
||||
@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"})
|
||||
public class TestRandomDVFaceting extends SolrTestCaseJ4 {
|
||||
|
||||
@BeforeClass
|
||||
|
@ -162,6 +162,8 @@ public class TestRandomDVFaceting extends SolrTestCaseJ4 {
|
|||
|
||||
SchemaField sf = req.getSchema().getField(ftype.fname);
|
||||
boolean multiValued = sf.getType().multiValuedFieldCache();
|
||||
boolean indexed = sf.indexed();
|
||||
boolean numeric = sf.getType().getNumericType() != null;
|
||||
|
||||
int offset = 0;
|
||||
if (rand.nextInt(100) < 20) {
|
||||
|
@ -179,8 +181,21 @@ public class TestRandomDVFaceting extends SolrTestCaseJ4 {
|
|||
params.add("facet.limit", Integer.toString(limit));
|
||||
}
|
||||
|
||||
if (rand.nextBoolean()) {
|
||||
params.add("facet.sort", rand.nextBoolean() ? "index" : "count");
|
||||
// the following two situations cannot work for unindexed single-valued numerics:
|
||||
// (currently none of the dv fields in this test config)
|
||||
// facet.sort = index
|
||||
// facet.minCount = 0
|
||||
if (!numeric || sf.multiValued()) {
|
||||
if (rand.nextBoolean()) {
|
||||
params.add("facet.sort", rand.nextBoolean() ? "index" : "count");
|
||||
}
|
||||
|
||||
if (rand.nextInt(100) < 10) {
|
||||
params.add("facet.mincount", Integer.toString(rand.nextInt(5)));
|
||||
}
|
||||
} else {
|
||||
params.add("facet.sort", "count");
|
||||
params.add("facet.mincount", Integer.toString(1+rand.nextInt(5)));
|
||||
}
|
||||
|
||||
if ((ftype.vals instanceof SVal) && rand.nextInt(100) < 20) {
|
||||
|
@ -192,10 +207,6 @@ public class TestRandomDVFaceting extends SolrTestCaseJ4 {
|
|||
params.add("facet.prefix", prefix);
|
||||
}
|
||||
|
||||
if (rand.nextInt(100) < 10) {
|
||||
params.add("facet.mincount", Integer.toString(rand.nextInt(5)));
|
||||
}
|
||||
|
||||
if (rand.nextInt(100) < 20) {
|
||||
params.add("facet.missing", "true");
|
||||
}
|
||||
|
|
|
@ -93,10 +93,6 @@ public class BadIndexSchemaTest extends AbstractBadConfigTestBase {
|
|||
doTest("bad-schema-codec-global-vs-ft-mismatch.xml", "codec does not support");
|
||||
}
|
||||
|
||||
public void testDocValuesNotRequiredNoDefault() throws Exception {
|
||||
doTest("bad-schema-docValues-not-required-no-default.xml", "has no default value and is not required");
|
||||
}
|
||||
|
||||
public void testDocValuesUnsupported() throws Exception {
|
||||
doTest("bad-schema-unsupported-docValues.xml", "does not support doc values");
|
||||
}
|
||||
|
|
|
@ -168,8 +168,10 @@
|
|||
<!--
|
||||
Some fields such as popularity and manu_exact could be modified to
|
||||
leverage doc values:
|
||||
<field name="popularity" type="int" indexed="true" stored="true" docValues="true" default="0" />
|
||||
<field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" default="" />
|
||||
<field name="popularity" type="int" indexed="true" stored="true" docValues="true" />
|
||||
<field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" />
|
||||
<field name="cat" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
|
||||
|
||||
|
||||
Although it would make indexing slightly slower and the index bigger, it
|
||||
would also make the index faster to load, more memory-efficient and more
|
||||
|
|
Loading…
Reference in New Issue