SOLR-181 -- add "required" fields to IndexSchema.

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@533571 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Ryan McKinley 2007-04-29 23:03:03 +00:00
parent aa93003946
commit 2787609f91
13 changed files with 815 additions and 27 deletions

View File

@ -162,6 +162,12 @@ New Features
Using the ';' syntax is still supported, but it is recommended to
transition to the new syntax. (ryan)
27. SOLR-181: The index schema now supports "required" fields. Attempts
to add a document without a required field will fail, returning a
descriptive error message. By default, the uniqueKey field is
a required field. This can be disabled by setting required=false
in schema.xml. (Greg Ludington via ryan)
Changes in runtime behavior
1. Highlighting using DisMax will only pick up terms from the main
user query, not boost or filter queries (klaas).

View File

@ -234,7 +234,7 @@
fields or fields that need an index-time boost need norms.
-->
<field name="id" type="string" indexed="true" stored="true"/>
<field name="id" type="string" indexed="true" stored="true" required="true" />
<field name="sku" type="textTight" indexed="true" stored="true" omitNorms="true"/>
<field name="name" type="text" indexed="true" stored="true"/>
<field name="nameSort" type="string" indexed="true" stored="false"/>
@ -291,7 +291,9 @@
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
</fields>
<!-- field to use to determine and enforce document uniqueness. -->
<!-- Field to use to determine and enforce document uniqueness.
Unless this field is marked with required="false", it will be a required field
-->
<uniqueKey>id</uniqueKey>
<!-- field for the QueryParser to use when an explicit fieldname is absent -->

View File

@ -44,12 +44,14 @@ abstract class FieldProperties {
final static int SORT_MISSING_FIRST = 0x00000400;
final static int SORT_MISSING_LAST = 0x00000800;
final static int REQUIRED = 0x00001000;
static final String[] propertyNames = {
"indexed", "tokenized", "stored",
"binary", "compressed", "omitNorms",
"termVectors", "termPositions", "termOffsets",
"multiValued",
"sortMissingFirst","sortMissingLast"
"sortMissingFirst","sortMissingLast","required"
};
static final Map<String,Integer> propertyMap = new HashMap<String,Integer>();

View File

@ -19,7 +19,6 @@ package org.apache.solr.schema;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
@ -92,6 +91,7 @@ public final class IndexSchema {
private final HashMap<String, SchemaField> fields = new HashMap<String,SchemaField>();
private final HashMap<String, FieldType> fieldTypes = new HashMap<String,FieldType>();
private final List<SchemaField> fieldsWithDefaultValue = new ArrayList<SchemaField>();
private final Collection<SchemaField> requiredFields = new HashSet<SchemaField>();
/**
* Provides direct access to the Map containing all explicit
@ -118,6 +118,12 @@ public final class IndexSchema {
*/
public List<SchemaField> getFieldsWithDefaultValue() { return fieldsWithDefaultValue; }
/**
* Provides direct access to the List containing all required fields. This
* list contains all fields with default values.
*/
public Collection<SchemaField> getRequiredFields() { return requiredFields; }
private Similarity similarity;
/**
@ -338,6 +344,8 @@ public final class IndexSchema {
}
// Hang on to the fields that say if they are required -- this lets us set a reasonable default for the unique key
Map<String,Boolean> explicitRequiredProp = new HashMap<String, Boolean>();
ArrayList<DynamicField> dFields = new ArrayList<DynamicField>();
expression = "/schema/fields/field | /schema/fields/dynamicField";
nodes = (NodeList) xpath.evaluate(expression, document, XPathConstants.NODESET);
@ -358,6 +366,9 @@ public final class IndexSchema {
}
Map<String,String> args = DOMUtil.toMapExcept(attrs, "name", "type");
if( args.get( "required" ) != null ) {
explicitRequiredProp.put( name, Boolean.valueOf( args.get( "required" ) ) );
}
SchemaField f = SchemaField.create(name,ft,args);
@ -368,6 +379,10 @@ public final class IndexSchema {
log.fine(name+" contains default value: " + f.getDefaultValue());
fieldsWithDefaultValue.add( f );
}
if (f.isRequired()) {
log.fine(name+" is required in this schema");
requiredFields.add(f);
}
} else if (node.getNodeName().equals("dynamicField")) {
dFields.add(new DynamicField(f));
log.fine("dynamic field defined: " + f);
@ -377,6 +392,11 @@ public final class IndexSchema {
}
}
//fields with default values are by definition required
//add them to required fields, and we only have to loop once
// in DocumentBuilder.getDoc()
requiredFields.addAll(getFieldsWithDefaultValue());
// OK, now sort the dynamic fields largest to smallest size so we don't get
// any false matches. We want to act like a compiler tool and try and match
// the largest string possible.
@ -423,6 +443,12 @@ public final class IndexSchema {
uniqueKeyFieldName=uniqueKeyField.getName();
uniqueKeyFieldType=uniqueKeyField.getType();
log.info("unique key field: "+uniqueKeyFieldName);
// Unless the uniqueKeyField is marked 'required=false' then make sure it exists
if( Boolean.FALSE != explicitRequiredProp.get( uniqueKeyFieldName ) ) {
uniqueKeyField.required = true;
requiredFields.add(uniqueKeyField);
}
}
/////////////// parse out copyField commands ///////////////
@ -824,3 +850,4 @@ public final class IndexSchema {
}

View File

@ -37,6 +37,7 @@ public final class SchemaField extends FieldProperties {
final FieldType type;
final int properties;
final String defaultValue;
boolean required = false; // this can't be final since it may be changed dynamically
/** Create a new SchemaField with the given name and type,
@ -64,6 +65,9 @@ public final class SchemaField extends FieldProperties {
this.type = type;
this.properties = properties;
this.defaultValue = defaultValue;
// initalize with the required property flag
required = (properties & REQUIRED) !=0;
}
public String getName() { return name; }
@ -80,6 +84,7 @@ public final class SchemaField extends FieldProperties {
public boolean sortMissingFirst() { return (properties & SORT_MISSING_FIRST)!=0; }
public boolean sortMissingLast() { return (properties & SORT_MISSING_LAST)!=0; }
public boolean isCompressed() { return (properties & COMPRESSED)!=0; }
public boolean isRequired() { return required; }
// things that should be determined by field type, not set as options
boolean isTokenized() { return (properties & TOKENIZED)!=0; }
@ -89,10 +94,12 @@ public final class SchemaField extends FieldProperties {
return type.createField(this,val,boost);
}
@Override
public String toString() {
return name + "{type="+type.getTypeName()
+ ((defaultValue==null)?"":(",default="+defaultValue))
+ ",properties=" + propertiesToString(properties)
+ ( required ? ", required=true" : "" )
+ "}";
}
@ -111,7 +118,7 @@ public final class SchemaField extends FieldProperties {
}
static SchemaField create(String name, FieldType ft, Map props) {
static SchemaField create(String name, FieldType ft, Map<String,String> props) {
int trueProps = parseProperties(props,true);
int falseProps = parseProperties(props,false);
@ -175,3 +182,5 @@ public final class SchemaField extends FieldProperties {

View File

@ -17,13 +17,15 @@
package org.apache.solr.update;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.solr.core.SolrException;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.core.SolrException;
import java.util.HashMap;
/**
* @author yonik
@ -104,14 +106,36 @@ public class DocumentBuilder {
}
// specific to this type of document builder
public Document getDoc() {
public Document getDoc() throws IllegalArgumentException {
// Check for default fields in our schema...
for( SchemaField field : schema.getFieldsWithDefaultValue() ) {
// Check for all required fields -- Note, all fields with a
// default value are defacto 'required' fields.
List<String> missingFields = new ArrayList<String>( schema.getRequiredFields().size() );
for (SchemaField field : schema.getRequiredFields()) {
if (doc.getField(field.getName() ) == null) {
if (field.getDefaultValue() != null) {
doc.add( field.createField( field.getDefaultValue(), 1.0f ) );
} else {
missingFields.add(field.getName());
}
}
}
if (missingFields.size() > 0) {
StringBuilder builder = new StringBuilder();
// add the uniqueKey if possible
if( schema.getUniqueKeyField() != null ) {
String n = schema.getUniqueKeyField().getName();
String v = doc.get( n );
builder.append( "Document ["+n+"="+v+"] " );
}
builder.append("missing required fields: " );
for (String field : missingFields) {
builder.append(field);
builder.append(" ");
}
throw new SolrException(400, builder.toString());
}
Document ret = doc; doc=null;
return ret;

View File

@ -130,12 +130,32 @@ public abstract class AbstractSolrTestCase extends TestCase {
/** Validates an update XML String is successful
*/
public void assertU(String message, String update) {
checkUpdateU(message, update, true);
}
/** Validates an update XML String failed
*/
public void assertFailedU(String update) {
assertFailedU(null, update);
}
/** Validates an update XML String failed
*/
public void assertFailedU(String message, String update) {
checkUpdateU(message, update, false);
}
/** Checks the success or failure of an update message
*/
private void checkUpdateU(String message, String update, boolean shouldSucceed) {
try {
String m = (null == message) ? "" : message + " ";
if (shouldSucceed) {
String res = h.validateUpdate(update);
if (null != res) {
fail(m + "update was not successful: " + res);
if (res != null) fail(m + "update was not successful: " + res);
} else {
String res = h.validateErrorUpdate(update);
if (res != null) fail(m + "update succeeded, but should have failed: " + res);
}
} catch (SAXException e) {
throw new RuntimeException("Invalid XML", e);
@ -284,6 +304,4 @@ public abstract class AbstractSolrTestCase extends TestCase {
}
return f.delete();
}
}

View File

@ -138,9 +138,33 @@ public class TestHarness {
* @return null if succesful, otherwise the XML response to the update
*/
public String validateUpdate(String xml) throws SAXException {
return checkUpdateStatus(xml, "0");
}
/**
* Validates that an "update" (add, commit or optimize) results in success.
*
* :TODO: currently only deals with one add/doc at a time, this will need changed if/when SOLR-2 is resolved
*
* @param xml The XML of the update
* @return null if succesful, otherwise the XML response to the update
*/
public String validateErrorUpdate(String xml) throws SAXException {
return checkUpdateStatus(xml, "1");
}
/**
* Validates that an "update" (add, commit or optimize) results in success.
*
* :TODO: currently only deals with one add/doc at a time, this will need changed if/when SOLR-2 is resolved
*
* @param xml The XML of the update
* @return null if succesful, otherwise the XML response to the update
*/
public String checkUpdateStatus(String xml, String code) throws SAXException {
try {
String res = update(xml);
String valid = validateXPath(res, "//result[@status=0]" );
String valid = validateXPath(res, "//result[@status="+code+"]" );
return (null == valid) ? null : res;
} catch (XPathExpressionException e) {
throw new RuntimeException
@ -148,7 +172,6 @@ public class TestHarness {
}
}
/**
* Validates that an add of a single document results in success.
*

View File

@ -0,0 +1,58 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import java.util.Collection;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.util.AbstractSolrTestCase;
/**
* This is a simple test to make sure the unique key is not required
* when it is specified as 'false'
*
* It needs its own file so it can load a special schema file
*/
public class NotRequiredUniqueKeyTest extends AbstractSolrTestCase {
@Override public String getSchemaFile() { return "schema-not-required-unique-key.xml"; }
@Override public String getSolrConfigFile() { return "solrconfig.xml"; }
@Override
public void setUp() throws Exception {
super.setUp();
}
@Override
public void tearDown() throws Exception {
super.tearDown();
}
public void testSchemaLoading()
{
SolrCore core = SolrCore.getSolrCore();
IndexSchema schema = core.getSchema();
SchemaField uniqueKey = schema.getUniqueKeyField();
assertFalse( uniqueKey.isRequired() );
assertFalse( schema.getRequiredFields().contains( uniqueKey ) );
}
}

View File

@ -0,0 +1,140 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import java.util.Collection;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.util.AbstractSolrTestCase;
/**
*
* @author Greg Ludington
*/
public class RequiredFieldsTest extends AbstractSolrTestCase {
@Override public String getSchemaFile() { return "schema-required-fields.xml"; }
@Override public String getSolrConfigFile() { return "solrconfig.xml"; }
@Override
public void setUp() throws Exception {
super.setUp();
}
@Override
public void tearDown() throws Exception {
super.tearDown();
}
public void testRequiredFieldsConfig() {
SolrCore core = SolrCore.getSolrCore();
IndexSchema schema = core.getSchema();
SchemaField uniqueKey = schema.getUniqueKeyField();
// Make sure the uniqueKey is required
assertTrue( uniqueKey.isRequired() );
assertTrue( schema.getRequiredFields().contains( uniqueKey ) );
// we specified one required field, but all devault valued fields are also required
Collection<SchemaField> requiredFields =schema.getRequiredFields();
int numDefaultFields = schema.getFieldsWithDefaultValue().size();
assertEquals( numDefaultFields+1+1, requiredFields.size()); // also the uniqueKey
}
public void testRequiredFieldsSingleAdd() {
// Add a single document
assertU("adding document",
adoc("id", "529", "name", "document with id, name, and subject", "field_t", "what's inside?", "subject", "info"));
assertU(commit());
// Check it it is in the index
assertQ("should find one", req("id:529") ,"//result[@numFound=1]" );
// Add another document without the required subject field, which
// has a configured defaultValue of "Stuff"
assertU("adding a doc without field w/ configured default",
adoc("id", "530", "name", "document with id and name", "field_t", "what's inside?"));
assertU(commit());
// Add another document without a subject, which has a default in schema
String subjectDefault = SolrCore.getSolrCore().getSchema().getField("subject").getDefaultValue();
assertNotNull("subject has no default value", subjectDefault);
assertQ("should find one with subject="+subjectDefault, req("id:530 subject:"+subjectDefault) ,"//result[@numFound=1]" );
// Add another document without a required name, which has no default
assertNull(SolrCore.getSolrCore().getSchema().getField("name").getDefaultValue());
assertFailedU("adding doc without required field",
adoc("id", "531", "subject", "no name document", "field_t", "what's inside?") );
assertU(commit());
// Check to make sure this submission did not succeed
assertQ("should not find any", req("id:531") ,"//result[@numFound=0]" );
}
public void testAddMultipleDocumentsWithErrors() {
//Add three documents at once to make sure the baseline succeeds
assertU("adding 3 documents",
"<add>" +doc("id", "601", "name", "multiad one", "field_t", "what's inside?", "subject", "info") +
doc("id", "602", "name", "multiad two", "field_t", "what's inside?", "subject", "info") +
doc("id", "603", "name", "multiad three", "field_t", "what's inside?", "subject", "info") +
"</add>");
assertU(commit());
// Check that they are in the index
assertQ("should find three", req("name:multiad") ,"//result[@numFound=3]" );
// Add three documents at once, with the middle one missing a field that has a default
assertU("adding 3 docs, with 2nd one missing a field that has a default value",
"<add>" +doc("id", "601", "name", "nosubject batch one", "field_t", "what's inside?", "subject", "info") +
doc("id", "602", "name", "nosubject batch two", "field_t", "what's inside?") +
doc("id", "603", "name", "nosubject batch three", "field_t", "what's inside?", "subject", "info") +
"</add>");
assertU(commit());
// Since the missing field had a devault value,
// All three should have made it into the index
assertQ("should find three", req("name:nosubject") ,"//result[@numFound=3]" );
// Add three documents at once, with the middle with a bad field definition,
// to establish the baselinie behavior for errors in a multi-ad submission
assertFailedU("adding 3 documents, with 2nd one with undefined field",
"<add>" +doc("id", "801", "name", "baddef batch one", "field_t", "what's inside?", "subject", "info") +
doc("id", "802", "field_t", "name", "baddef batch two", "what's inside?", "subject", "info", "GaRbAgeFiElD", "garbage") +
doc("id", "803", "name", "baddef batch three", "field_t", "what's inside?", "subject", "info") +
"</add>");
assertU(commit());
// Check that only docs before the error should be in the index
assertQ("should find one", req("name:baddef") ,"//result[@numFound=1]" );
// Add three documents at once, with the middle one missing a required field that has no default
assertFailedU("adding 3 docs, with 2nd one missing required field",
"<add>" +doc("id", "701", "name", "noname batch one", "field_t", "what's inside?", "subject", "info") +
doc("id", "702", "field_t", "what's inside?", "subject", "info") +
doc("id", "703", "name", "noname batch batch three", "field_t", "what's inside?", "subject", "info") +
"</add>");
assertU(commit());
// Check that only docs before the error should be in the index
assertQ("should find one", req("name:noname") ,"//result[@numFound=1]" );
}
}

View File

@ -0,0 +1,45 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
Striped down schema used to make sure an explicit required=false is
observed for the uniqueKey field
$Id$
$Source: /cvs/main/searching/solr-configs/test/WEB-INF/classes/schema.xml,v $
-->
<schema name="test" version="1.0">
<types>
<fieldtype name="sint" class="solr.SortableIntField" />
<fieldtype name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
</types>
<fields>
<field name="id" type="sint" indexed="true" stored="true" required="false"/>
<field name="subject" type="text" indexed="true" stored="true"/>
</fields>
<defaultSearchField>subject</defaultSearchField>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -0,0 +1,434 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- The Solr schema file. This file should be named "schema.xml" and
should be located where the classloader for the Solr webapp can find it.
This schema is used for testing, and as such has everything and the
kitchen sink thrown in. See example/solr/conf/schema.xml for a
more concise example.
$Id$
$Source: /cvs/main/searching/solr-configs/test/WEB-INF/classes/schema.xml,v $
$Name: $
-->
<schema name="test" version="1.0">
<types>
<!-- field type definitions... note that the "name" attribute is
just a label to be used by field definitions. The "class"
attribute and any other attributes determine the real type and
behavior of the fieldtype.
-->
<!-- numeric field types that store and index the text
value verbatim (and hence don't sort correctly or support range queries.)
These are provided more for backward compatability, allowing one
to create a schema that matches an existing lucene index.
-->
<fieldtype name="integer" class="solr.IntField"/>
<fieldtype name="long" class="solr.LongField"/>
<fieldtype name="float" class="solr.FloatField"/>
<fieldtype name="double" class="solr.DoubleField"/>
<!-- numeric field types that manipulate the value into
a string value that isn't human readable in it's internal form,
but sorts correctly and supports range queries.
If sortMissingLast="true" then a sort on this field will cause documents
without the field to come after documents with the field,
regardless of the requested sort order.
If sortMissingFirst="true" then a sort on this field will cause documents
without the field to come before documents with the field,
regardless of the requested sort order.
If sortMissingLast="false" and sortMissingFirst="false" (the default),
then default lucene sorting will be used which places docs without the field
first in an ascending sort and last in a descending sort.
-->
<fieldtype name="sint" class="solr.SortableIntField" sortMissingLast="true"/>
<fieldtype name="slong" class="solr.SortableLongField" sortMissingLast="true"/>
<fieldtype name="sfloat" class="solr.SortableFloatField" sortMissingLast="true"/>
<fieldtype name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true"/>
<!-- bcd versions of sortable numeric type may provide smaller
storage space and support very large numbers.
-->
<fieldtype name="bcdint" class="solr.BCDIntField" sortMissingLast="true"/>
<fieldtype name="bcdlong" class="solr.BCDLongField" sortMissingLast="true"/>
<fieldtype name="bcdstr" class="solr.BCDStrField" sortMissingLast="true"/>
<!-- Field type demonstrating an Analyzer failure -->
<fieldtype name="failtype1" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<!-- HighlitText optimizes storage for (long) columns which will be highlit -->
<fieldtype name="highlittext" class="solr.TextField" compressThreshold="345" />
<fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true"/>
<fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
<!-- format for date is 1995-12-31T23:59:59.999Z and only the fractional
seconds part (.999) is optional.
-->
<fieldtype name="date" class="solr.DateField" sortMissingLast="true"/>
<!-- solr.TextField allows the specification of custom
text analyzers specified as a tokenizer and a list
of token filters.
-->
<fieldtype name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<!-- lucene PorterStemFilterFactory deprecated
<filter class="solr.PorterStemFilterFactory"/>
-->
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="nametext" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.WhitespaceAnalyzer"/>
</fieldtype>
<fieldtype name="teststop" class="solr.TextField">
<analyzer>
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldtype>
<!-- fieldtypes in this section isolate tokenizers and tokenfilters for testing -->
<fieldtype name="lowertok" class="solr.TextField">
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="keywordtok" class="solr.TextField">
<analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="standardtok" class="solr.TextField">
<analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="lettertok" class="solr.TextField">
<analyzer><tokenizer class="solr.LetterTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="whitetok" class="solr.TextField">
<analyzer><tokenizer class="solr.WhitespaceTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="HTMLstandardtok" class="solr.TextField">
<analyzer><tokenizer class="solr.HTMLStripStandardTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="HTMLwhitetok" class="solr.TextField">
<analyzer><tokenizer class="solr.HTMLStripWhitespaceTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="standardtokfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="standardfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="lowerfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="patternreplacefilt" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-zA-Z])" replacement="_" replace="all"
/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="porterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<!-- fieldtype name="snowballfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SnowballPorterFilterFactory"/>
</analyzer>
</fieldtype -->
<fieldtype name="engporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
</analyzer>
</fieldtype>
<fieldtype name="custstopfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="lengthfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LengthFilterFactory" min="2" max="5"/>
</analyzer>
</fieldtype>
<fieldtype name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<!-- more flexible in matching skus, but more chance of a false match -->
<fieldtype name="skutype1" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="skutype2" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="syn" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt"/>
</analyzer>
</fieldtype>
<!-- Demonstrates How RemoveDuplicatesTokenFilter makes stemmed
synonyms "better"
-->
<fieldtype name="dedup" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt" expand="true" />
<filter class="solr.EnglishPorterFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
</analyzer>
</fieldtype>
<fieldtype name="unstored" class="solr.StrField" indexed="true" stored="false"/>
<fieldtype name="textgap" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
</types>
<fields>
<field name="id" type="integer" indexed="true" stored="true"/> <!-- the uniqueKey is required by default -->
<field name="name" type="nametext" indexed="true" stored="true" required="true"/>
<field name="text" type="text" indexed="true" stored="false"/>
<field name="subject" type="text" indexed="true" stored="true" required="true" default="Stuff"/>
<field name="title" type="nametext" indexed="true" stored="true"/>
<field name="weight" type="float" indexed="true" stored="true"/>
<field name="bday" type="date" indexed="true" stored="true"/>
<field name="title_stemmed" type="text" indexed="true" stored="false"/>
<field name="title_lettertok" type="lettertok" indexed="true" stored="false"/>
<field name="syn" type="syn" indexed="true" stored="true"/>
<!-- to test property inheritance and overriding -->
<field name="shouldbeunstored" type="unstored" />
<field name="shouldbestored" type="unstored" stored="true"/>
<field name="shouldbeunindexed" type="unstored" indexed="false" stored="true"/>
<!-- test different combinations of indexed and stored -->
<field name="bind" type="boolean" indexed="true" stored="false"/>
<field name="bsto" type="boolean" indexed="false" stored="true"/>
<field name="bindsto" type="boolean" indexed="true" stored="true"/>
<field name="isto" type="integer" indexed="false" stored="true"/>
<field name="iind" type="integer" indexed="true" stored="false"/>
<field name="ssto" type="string" indexed="false" stored="true"/>
<field name="sind" type="string" indexed="true" stored="false"/>
<field name="sindsto" type="string" indexed="true" stored="true"/>
<!-- test combinations of term vector settings -->
<field name="test_basictv" type="text" termVectors="true"/>
<field name="test_notv" type="text" termVectors="false"/>
<field name="test_postv" type="text" termVectors="true" termPositions="true"/>
<field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
<field name="test_posofftv" type="text" termVectors="true"
termPositions="true" termOffsets="true"/>
<!-- test highlit field settings -->
<field name="test_hlt" type="highlittext" indexed="true" compressed="true"/>
<field name="test_hlt_off" type="highlittext" indexed="true" compressed="false"/>
<!-- fields to test individual tokenizers and tokenfilters -->
<field name="teststop" type="teststop" indexed="true" stored="true"/>
<field name="lowertok" type="lowertok" indexed="true" stored="true"/>
<field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
<field name="lettertok" type="lettertok" indexed="true" stored="true"/>
<field name="whitetok" type="whitetok" indexed="true" stored="true"/>
<field name="HTMLwhitetok" type="HTMLwhitetok" indexed="true" stored="true"/>
<field name="standardtokfilt" type="standardtokfilt" indexed="true" stored="true"/>
<field name="standardfilt" type="standardfilt" indexed="true" stored="true"/>
<field name="lowerfilt" type="lowerfilt" indexed="true" stored="true"/>
<field name="patternreplacefilt" type="patternreplacefilt" indexed="true" stored="true"/>
<field name="porterfilt" type="porterfilt" indexed="true" stored="true"/>
<field name="engporterfilt" type="engporterfilt" indexed="true" stored="true"/>
<field name="custengporterfilt" type="custengporterfilt" indexed="true" stored="true"/>
<field name="stopfilt" type="stopfilt" indexed="true" stored="true"/>
<field name="custstopfilt" type="custstopfilt" indexed="true" stored="true"/>
<field name="lengthfilt" type="lengthfilt" indexed="true" stored="true"/>
<field name="dedup" type="dedup" indexed="true" stored="true"/>
<field name="numberpartfail" type="failtype1" indexed="true" stored="true"/>
<field name="nullfirst" type="string" indexed="true" stored="true" sortMissingFirst="true"/>
<field name="subword" type="subword" indexed="true" stored="true"/>
<field name="sku1" type="skutype1" indexed="true" stored="true"/>
<field name="sku2" type="skutype2" indexed="true" stored="true"/>
<field name="textgap" type="textgap" indexed="true" stored="true"/>
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
<field name="multiDefault" type="string" indexed="true" stored="true" default="muLti-Default" multiValued="true"/>
<field name="intDefault" type="sint" indexed="true" stored="true" default="42" multiValued="false"/>
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns.
RESTRICTION: the glob-like pattern in the name attribute must have
a "*" only at the start or the end.
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
Longer patterns will be matched first. if equal size patterns
both match, the first appearing in the schema will be used.
-->
<dynamicField name="*_i" type="sint" indexed="true" stored="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_s1" type="string" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="*_l" type="slong" indexed="true" stored="true"/>
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_f" type="sfloat" indexed="true" stored="true"/>
<dynamicField name="*_d" type="sdouble" indexed="true" stored="true"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<dynamicField name="*_bcd" type="bcdstr" indexed="true" stored="true"/>
<dynamicField name="*_sI" type="string" indexed="true" stored="false"/>
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
<dynamicField name="t_*" type="text" indexed="true" stored="true"/>
<dynamicField name="tv_*" type="text" indexed="true" stored="true"
termVectors="true" termPositions="true" termOffsets="true"/>
<!-- for testing to ensure that longer patterns are matched first -->
<dynamicField name="*aa" type="string" indexed="true" stored="true"/>
<dynamicField name="*aaa" type="integer" indexed="false" stored="true"/>
</fields>
<defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey>
<!-- copyField commands copy one field to another at the time a document
is added to the index. It's used either to index the same field different
ways, or to add multiple fields to the same field for easier/faster searching.
-->
<copyField source="title" dest="title_stemmed"/>
<copyField source="title" dest="title_lettertok"/>
<copyField source="title" dest="text"/>
<copyField source="subject" dest="text"/>
<copyField source="*_t" dest="text"/>
<!-- Similarity is the scoring routine for each document vs a query.
A custom similarity may be specified here, but the default is fine
for most applications.
-->
<!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
</schema>

View File

@ -298,7 +298,7 @@
<fields>
<field name="id" type="integer" indexed="true" stored="true" multiValued="false"/>
<field name="id" type="integer" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="name" type="nametext" indexed="true" stored="true"/>
<field name="text" type="text" indexed="true" stored="false"/>
<field name="subject" type="text" indexed="true" stored="true"/>