improve DV faceting tests, support missing count for single valued string fields, remove required/default restriction

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515520 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-08-19 17:23:52 +00:00
parent 98522b2262
commit d208878c11
11 changed files with 69 additions and 67 deletions

View File

@ -45,12 +45,13 @@ public class BytesRefFieldSource extends FieldCacheSource {
// To be sorted or not to be sorted, that is the question
// TODO: do it cleaner?
if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.BINARY) {
final Bits docsWithField = FieldCache.DEFAULT.getDocsWithField(readerContext.reader(), field);
final BinaryDocValues binaryValues = FieldCache.DEFAULT.getTerms(readerContext.reader(), field);
return new FunctionValues() {
@Override
public boolean exists(int doc) {
return true; // doc values are dense
return docsWithField.get(doc);
}
@Override

View File

@ -650,6 +650,43 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
ireader.close();
directory.close();
}
public void testSortedMergeAwayAllValues() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.NO));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new SortedDocValuesField("field", new BytesRef("hello")));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.deleteDocuments(new Term("id", "1"));
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field");
if (codecSupportsDocsWithField("field")) {
assertEquals(-1, dv.getOrd(0));
assertEquals(0, dv.getValueCount());
} else {
assertEquals(0, dv.getOrd(0));
assertEquals(1, dv.getValueCount());
BytesRef ref = new BytesRef();
dv.lookupOrd(0, ref);
assertEquals(new BytesRef(), ref);
}
ireader.close();
directory.close();
}
public void testBytesWithNewline() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());

View File

@ -218,12 +218,7 @@ public class DocValuesFacets {
static NamedList<Integer> finalize(NamedList<Integer> res, SolrIndexSearcher searcher, SchemaField schemaField, DocSet docs, int missingCount, boolean missing) throws IOException {
if (missing) {
if (missingCount < 0) {
if (schemaField.multiValued()) {
missingCount = SimpleFacets.getFieldMissingCount(searcher,docs,schemaField.getName());
} else {
// nocommit: support missing count (ord = -1) for single-valued here.
missingCount = 0; // single-valued dv is implicitly 0
}
missingCount = SimpleFacets.getFieldMissingCount(searcher,docs,schemaField.getName());
}
res.add(null, missingCount);
}
@ -232,12 +227,12 @@ public class DocValuesFacets {
}
/** accumulates per-segment single-valued facet counts, mapping to global ordinal space */
// specialized since the single-valued case is simpler: you don't have to deal with missing count, etc
// specialized since the single-valued case is different
static void accumSingle(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
int term = si.getOrd(doc);
if (map != null) {
if (map != null && term >= 0) {
term = (int) map.getGlobalOrd(subIndex, term);
}
int arrIdx = term-startTermIndex;

View File

@ -255,7 +255,7 @@ final class NumericFacets {
if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict
if (!sf.indexed()) {
throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on a field which is not indexed");
throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is not indexed");
}
// Add zeros until there are limit results
final Set<String> alreadySeen = new HashSet<String>();

View File

@ -80,9 +80,6 @@ public class StrField extends PrimitiveFieldType {
@Override
public void checkSchemaField(SchemaField field) {
if (field.hasDocValues() && !field.multiValued() && !(field.isRequired() || field.getDefaultValue() != null)) {
throw new IllegalStateException("Field " + this + " has single-valued doc values enabled, but has no default value and is not required");
}
}
}

View File

@ -696,9 +696,6 @@ public class TrieField extends PrimitiveFieldType {
@Override
public void checkSchemaField(final SchemaField field) {
if (field.hasDocValues() && !field.multiValued() && !(field.isRequired() || field.getDefaultValue() != null)) {
throw new IllegalStateException("Field " + this + " has single-valued doc values enabled, but has no default value and is not required");
}
}
}

View File

@ -1,33 +0,0 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<schema name="bad-schema-docValues-not-required-no-default" version="1.0">
<types>
<fieldType name="string" class="solr.StrField" />
</types>
<fields>
<!-- docValues must be required or have a default value -->
<field name="id" type="string" docValues="true" multiValued="false"/>
</fields>
<defaultSearchField>id</defaultSearchField>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -26,17 +26,16 @@
<fields>
<field name="id" type="string" indexed="true" stored="true" docValues="false" multiValued="false" required="true"/>
<field name="id_dv" type="string" indexed="false" stored="false" docValues="true" multiValued="false" required="true"/>
<!-- TODO: improve this test so we don't have to make all these DV types multivalued (for missing values) -->
<dynamicField name="*_i" type="int" indexed="true" stored="false" docValues="false"/>
<dynamicField name="*_i_dv" type="int" indexed="false" stored="false" docValues="true" multiValued="true"/>
<dynamicField name="*_i_dv" type="int" indexed="false" stored="false" docValues="true"/>
<dynamicField name="*_is" type="int" indexed="true" stored="false" docValues="false" multiValued="true"/>
<dynamicField name="*_is_dv" type="int" indexed="false" stored="false" docValues="true" multiValued="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="false" docValues="false" multiValued="true"/>
<dynamicField name="*_s_dv" type="string" indexed="false" stored="false" docValues="true" multiValued="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="false" docValues="false"/>
<dynamicField name="*_s_dv" type="string" indexed="false" stored="false" docValues="true"/>
<dynamicField name="*_ss" type="string" indexed="true" stored="false" docValues="false" multiValued="true"/>
<dynamicField name="*_ss_dv" type="string" indexed="false" stored="false" docValues="true" multiValued="true"/>
<dynamicField name="*_f" type="float" indexed="true" stored="false" docValues="false"/>
<dynamicField name="*_f_dv" type="float" indexed="false" stored="false" docValues="true" multiValued="true"/>
<dynamicField name="*_f_dv" type="float" indexed="false" stored="false" docValues="true"/>
</fields>
<defaultSearchField>id</defaultSearchField>

View File

@ -39,7 +39,7 @@ import org.junit.Test;
* to the indexed facet results as if it were just another faceting method.
*/
@Slow
@SuppressCodecs({"Lucene40", "Lucene41"})
@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"})
public class TestRandomDVFaceting extends SolrTestCaseJ4 {
@BeforeClass
@ -162,6 +162,8 @@ public class TestRandomDVFaceting extends SolrTestCaseJ4 {
SchemaField sf = req.getSchema().getField(ftype.fname);
boolean multiValued = sf.getType().multiValuedFieldCache();
boolean indexed = sf.indexed();
boolean numeric = sf.getType().getNumericType() != null;
int offset = 0;
if (rand.nextInt(100) < 20) {
@ -179,8 +181,21 @@ public class TestRandomDVFaceting extends SolrTestCaseJ4 {
params.add("facet.limit", Integer.toString(limit));
}
if (rand.nextBoolean()) {
params.add("facet.sort", rand.nextBoolean() ? "index" : "count");
// the following two situations cannot work for unindexed single-valued numerics:
// (currently none of the dv fields in this test config)
// facet.sort = index
// facet.minCount = 0
if (!numeric || sf.multiValued()) {
if (rand.nextBoolean()) {
params.add("facet.sort", rand.nextBoolean() ? "index" : "count");
}
if (rand.nextInt(100) < 10) {
params.add("facet.mincount", Integer.toString(rand.nextInt(5)));
}
} else {
params.add("facet.sort", "count");
params.add("facet.mincount", Integer.toString(1+rand.nextInt(5)));
}
if ((ftype.vals instanceof SVal) && rand.nextInt(100) < 20) {
@ -192,10 +207,6 @@ public class TestRandomDVFaceting extends SolrTestCaseJ4 {
params.add("facet.prefix", prefix);
}
if (rand.nextInt(100) < 10) {
params.add("facet.mincount", Integer.toString(rand.nextInt(5)));
}
if (rand.nextInt(100) < 20) {
params.add("facet.missing", "true");
}

View File

@ -93,10 +93,6 @@ public class BadIndexSchemaTest extends AbstractBadConfigTestBase {
doTest("bad-schema-codec-global-vs-ft-mismatch.xml", "codec does not support");
}
public void testDocValuesNotRequiredNoDefault() throws Exception {
doTest("bad-schema-docValues-not-required-no-default.xml", "has no default value and is not required");
}
public void testDocValuesUnsupported() throws Exception {
doTest("bad-schema-unsupported-docValues.xml", "does not support doc values");
}

View File

@ -168,8 +168,10 @@
<!--
Some fields such as popularity and manu_exact could be modified to
leverage doc values:
<field name="popularity" type="int" indexed="true" stored="true" docValues="true" default="0" />
<field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" default="" />
<field name="popularity" type="int" indexed="true" stored="true" docValues="true" />
<field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" />
<field name="cat" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
Although it would make indexing slightly slower and the index bigger, it
would also make the index faster to load, more memory-efficient and more