SOLR-538: added maxChars attribute for copyField

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@721758 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Koji Sekiguchi 2008-11-30 04:46:20 +00:00
parent 2ded3e2345
commit cea5444e91
6 changed files with 827 additions and 42 deletions

View File

@ -94,6 +94,10 @@ New Features
18. SOLR-877: Added TermsComponent for accessing Lucene's TermEnum capabilities. 18. SOLR-877: Added TermsComponent for accessing Lucene's TermEnum capabilities.
Useful for auto suggest and possibly distributed search. Not distributed search compliant. (gsingers) Useful for auto suggest and possibly distributed search. Not distributed search compliant. (gsingers)
19. SOLR-538: Add maxChars attribute for copyField function so that the length limit for destination
can be specified.
(Georgios Stamatis, Lars Kotthoff, Chris Harris via koji)
Optimizations Optimizations
---------------------- ----------------------
1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the 1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the

View File

@ -0,0 +1,82 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
/**
* <code>CopyField</code> contains all the information of a valid copy fields in an index.
*
* @since solr 1.4
*/
public class CopyField {
private final SchemaField source;
private final SchemaField destination;
private final int maxChars;
public static final int UNLIMITED = 0;
public CopyField(final SchemaField source, final SchemaField destination) {
this(source, destination, UNLIMITED);
}
/**
* @param source The SchemaField of the source field.
* @param destination The SchemaField of the destination field.
* @param maxChars Maximum number of chars in source field to copy to destination field.
* If equal to 0, there is no limit.
*/
public CopyField(final SchemaField source, final SchemaField destination,
final int maxChars) {
if (source == null || destination == null) {
throw new IllegalArgumentException(
"Source or Destination SchemaField can't be NULL.");
}
if (maxChars < 0) {
throw new IllegalArgumentException(
"Attribute maxChars can't have a negative value.");
}
this.source = source;
this.destination = destination;
this.maxChars = maxChars;
}
public String getLimitedValue( final String val ){
return maxChars == UNLIMITED || val.length() < maxChars ?
val : val.substring( 0, maxChars );
}
/**
* @return source SchemaField
*/
public SchemaField getSource() {
return source;
}
/**
* @return destination SchemaField
*/
public SchemaField getDestination() {
return destination;
}
/**
* @return tha maximum number of chars in source field to copy to destination field.
*/
public int getMaxChars() {
return maxChars;
}
}

View File

@ -621,8 +621,18 @@ public final class IndexSchema {
String source = DOMUtil.getAttr(attrs,"source","copyField definition"); String source = DOMUtil.getAttr(attrs,"source","copyField definition");
String dest = DOMUtil.getAttr(attrs,"dest", "copyField definition"); String dest = DOMUtil.getAttr(attrs,"dest", "copyField definition");
String maxChars = DOMUtil.getAttr(attrs, "maxChars");
int maxCharsInt = CopyField.UNLIMITED;
if (maxChars != null) {
try {
maxCharsInt = Integer.parseInt(maxChars);
} catch (NumberFormatException e) {
log.warn("Couldn't parse maxChars attribute for copyField from "
+ source + " to " + dest + " as integer. The whole field will be copied.");
}
}
registerCopyField(source, dest); registerCopyField(source, dest, maxCharsInt);
} }
for (Map.Entry<SchemaField, Integer> entry : copyFieldTargetCounts.entrySet()) { for (Map.Entry<SchemaField, Integer> entry : copyFieldTargetCounts.entrySet()) {
@ -646,6 +656,11 @@ public final class IndexSchema {
refreshAnalyzers(); refreshAnalyzers();
} }
public void registerCopyField( String source, String dest )
{
registerCopyField(source, dest, CopyField.UNLIMITED);
}
/** /**
* <p> * <p>
* NOTE: this function is not thread safe. However, it is safe to use within the standard * NOTE: this function is not thread safe. However, it is safe to use within the standard
@ -655,12 +670,12 @@ public final class IndexSchema {
* *
* @see SolrCoreAware * @see SolrCoreAware
*/ */
public void registerCopyField( String source, String dest ) public void registerCopyField( String source, String dest, int maxChars )
{ {
boolean sourceIsPattern = isWildCard(source); boolean sourceIsPattern = isWildCard(source);
boolean destIsPattern = isWildCard(dest); boolean destIsPattern = isWildCard(dest);
log.debug("copyField source='"+source+"' dest='"+dest+"'"); log.debug("copyField source='"+source+"' dest='"+dest+"' maxChars='"+maxChars);
SchemaField d = getFieldOrNull(dest); SchemaField d = getFieldOrNull(dest);
if(d == null){ if(d == null){
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "copyField destination :'"+dest+"' does not exist" ); throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "copyField destination :'"+dest+"' does not exist" );
@ -678,10 +693,10 @@ public final class IndexSchema {
if( df == null ) { if( df == null ) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "copyField dynamic destination must match a dynamicField." ); throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "copyField dynamic destination must match a dynamicField." );
} }
registerDynamicCopyField(new DynamicDestCopy(source, df )); registerDynamicCopyField(new DynamicDestCopy(source, df, maxChars ));
} }
else { else {
registerDynamicCopyField(new DynamicCopy(source, d)); registerDynamicCopyField(new DynamicCopy(source, d, maxChars));
} }
} }
else if( destIsPattern ) { else if( destIsPattern ) {
@ -692,13 +707,12 @@ public final class IndexSchema {
// retrieve the field to force an exception if it doesn't exist // retrieve the field to force an exception if it doesn't exist
SchemaField f = getField(source); SchemaField f = getField(source);
SchemaField[] destArr = copyFields.get(source); List<CopyField> copyFieldList = copyFieldsMap.get(source);
if (destArr==null) { if (copyFieldList == null) {
destArr=new SchemaField[]{d}; copyFieldList = new ArrayList<CopyField>();
} else { copyFieldsMap.put(source, copyFieldList);
destArr = (SchemaField[])append(destArr,d);
} }
copyFields.put(source,destArr); copyFieldList.add(new CopyField(f, d, maxChars));
copyFieldTargetCounts.put(d, (copyFieldTargetCounts.containsKey(d) ? copyFieldTargetCounts.get(d) + 1 : 1)); copyFieldTargetCounts.put(d, (copyFieldTargetCounts.containsKey(d) ? copyFieldTargetCounts.get(d) + 1 : 1));
} }
@ -894,9 +908,16 @@ public final class IndexSchema {
static class DynamicCopy extends DynamicReplacement { static class DynamicCopy extends DynamicReplacement {
final SchemaField targetField; final SchemaField targetField;
final int maxChars;
DynamicCopy(String regex, SchemaField targetField) { DynamicCopy(String regex, SchemaField targetField) {
this(regex, targetField, CopyField.UNLIMITED);
}
DynamicCopy(String regex, SchemaField targetField, int maxChars) {
super(regex); super(regex);
this.targetField = targetField; this.targetField = targetField;
this.maxChars = maxChars;
} }
public SchemaField getTargetField( String sourceField ) public SchemaField getTargetField( String sourceField )
@ -918,7 +939,11 @@ public final class IndexSchema {
final String dstr; final String dstr;
DynamicDestCopy(String source, DynamicField dynamic) { DynamicDestCopy(String source, DynamicField dynamic) {
super(source, dynamic.prototype ); this(source, dynamic, CopyField.UNLIMITED);
}
DynamicDestCopy(String source, DynamicField dynamic, int maxChars) {
super(source, dynamic.prototype, maxChars);
this.dynamic = dynamic; this.dynamic = dynamic;
String dest = dynamic.regex; String dest = dynamic.regex;
@ -1098,7 +1123,7 @@ public final class IndexSchema {
}; };
private final Map<String, SchemaField[]> copyFields = new HashMap<String,SchemaField[]>(); private final Map<String, List<CopyField>> copyFieldsMap = new HashMap<String, List<CopyField>>();
private DynamicCopy[] dynamicCopyFields; private DynamicCopy[] dynamicCopyFields;
/** /**
* keys are all fields copied to, count is num of copyField * keys are all fields copied to, count is num of copyField
@ -1119,46 +1144,69 @@ public final class IndexSchema {
return new SchemaField[0]; return new SchemaField[0];
} }
List<SchemaField> sf = new ArrayList<SchemaField>(); List<SchemaField> sf = new ArrayList<SchemaField>();
for (Map.Entry<String, SchemaField[]> cfs : copyFields.entrySet()) { for (Map.Entry<String, List<CopyField>> cfs : copyFieldsMap.entrySet()) {
for (SchemaField cf : cfs.getValue()) { for (CopyField copyField : cfs.getValue()) {
if (cf.getName().equals(destField)) { if (copyField.getDestination().getName().equals(destField)) {
sf.add(getField(cfs.getKey())); sf.add(copyField.getSource());
} }
} }
} }
return sf.toArray(new SchemaField[1]); return sf.toArray(new SchemaField[sf.size()]);
} }
/** /**
* Get all copy fields, both the static and the dynamic ones. * Get all copy fields, both the static and the dynamic ones.
*
* @param sourceField * @param sourceField
* @return Array of fields to copy to. * @return Array of fields to copy to.
* @deprecated Use {@link #getCopyFieldsList(String)} instead.
*/ */
@Deprecated
public SchemaField[] getCopyFields(String sourceField) { public SchemaField[] getCopyFields(String sourceField) {
// Get the dynamic ones into a list. // This is the List that holds all the results, dynamic or not.
List<SchemaField> matchCopyFields = new ArrayList<SchemaField>(); List<SchemaField> matchCopyFields = new ArrayList<SchemaField>();
// Get the dynamic results into the list.
for(DynamicCopy dynamicCopy : dynamicCopyFields) { for(DynamicCopy dynamicCopy : dynamicCopyFields) {
if(dynamicCopy.matches(sourceField)) { if(dynamicCopy.matches(sourceField)) {
matchCopyFields.add(dynamicCopy.getTargetField(sourceField)); matchCopyFields.add(dynamicCopy.getTargetField(sourceField));
} }
} }
// Get the fixed ones, if there are any. // Get the fixed ones, if there are any and add them.
SchemaField[] fixedCopyFields = copyFields.get(sourceField); final List<CopyField> copyFields = copyFieldsMap.get(sourceField);
if (copyFields!=null) {
boolean appendFixed = copyFields.containsKey(sourceField); final Iterator<CopyField> it = copyFields.iterator();
while (it.hasNext()) {
// Construct the results by concatenating dynamic and fixed into a results array. matchCopyFields.add(it.next().getDestination());
}
SchemaField[] results = new SchemaField[matchCopyFields.size() + (appendFixed ? fixedCopyFields.length : 0)];
matchCopyFields.toArray(results);
if(appendFixed) {
System.arraycopy(fixedCopyFields, 0, results, matchCopyFields.size(), fixedCopyFields.length);
} }
return results; // Construct the results by transforming the list into an array.
return matchCopyFields.toArray(new SchemaField[matchCopyFields.size()]);
}
/**
* Get all copy fields for a specified source field, both static
* and dynamic ones.
* @param sourceField
* @return List of CopyFields to copy to.
* @since solr 1.4
*/
// This is useful when we need the maxSize param of each CopyField
public List<CopyField> getCopyFieldsList(final String sourceField){
final List<CopyField> result = new ArrayList<CopyField>();
for (DynamicCopy dynamicCopy : dynamicCopyFields) {
if (dynamicCopy.matches(sourceField)) {
result.add(new CopyField(getField(sourceField), dynamicCopy.getTargetField(sourceField), dynamicCopy.maxChars));
}
}
List<CopyField> fixedCopyFields = copyFieldsMap.get(sourceField);
if (fixedCopyFields != null)
{
result.addAll(fixedCopyFields);
}
return result;
} }
/** /**

View File

@ -29,6 +29,7 @@ import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField; import org.apache.solr.common.SolrInputField;
import org.apache.solr.schema.CopyField;
import org.apache.solr.schema.DateField; import org.apache.solr.schema.DateField;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
@ -120,15 +121,15 @@ public class DocumentBuilder {
// Check if we should copy this field to any other fields. // Check if we should copy this field to any other fields.
// This could happen whether it is explicit or not. // This could happen whether it is explicit or not.
SchemaField[] destArr = schema.getCopyFields(name); final List<CopyField> copyFields = schema.getCopyFieldsList(name);
if (destArr != null) { if (copyFields != null) {
for (SchemaField destField : destArr) { for(CopyField cf : copyFields) {
addSingleField(destField,val,boost); addSingleField(cf.getDestination(), cf.getLimitedValue( val ), boost);
} }
} }
// error if this field name doesn't match anything // error if this field name doesn't match anything
if (sfield==null && (destArr==null || destArr.length==0)) { if (sfield==null && (copyFields==null || copyFields.size()==0)) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"ERROR:unknown field '" + name + "'"); throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"ERROR:unknown field '" + name + "'");
} }
} }
@ -218,7 +219,7 @@ public class DocumentBuilder {
sfield.getName() + ": " +field.getValue() ); sfield.getName() + ": " +field.getValue() );
} }
SchemaField[] destArr = schema.getCopyFields(name); final List<CopyField> copyFields = schema.getCopyFieldsList(name);
// load each field value // load each field value
boolean hasField = false; boolean hasField = false;
@ -246,8 +247,10 @@ public class DocumentBuilder {
} }
} }
// Add the copy fields // Check if we should copy this field to any other fields.
for( SchemaField sf : destArr ) { // This could happen whether it is explicit or not.
for( CopyField cf : copyFields ) {
SchemaField sf = cf.getDestination();
// check if the copy field is a multivalued or not // check if the copy field is a multivalued or not
if( !sf.multiValued() && out.get( sf.getName() ) != null ) { if( !sf.multiValued() && out.get( sf.getName() ) != null ) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
@ -256,7 +259,7 @@ public class DocumentBuilder {
} }
used = true; used = true;
Field f = sf.createField( val, boost ); Field f = sf.createField( cf.getLimitedValue( val ), boost );
if( f != null ) { // null fields are not added if( f != null ) { // null fields are not added
out.add( f ); out.add( f );
} }

View File

@ -0,0 +1,180 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import java.util.HashMap;
import java.util.Map;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.util.AbstractSolrTestCase;
import org.junit.Test;
/**
* This is a simple test to make sure the <code>CopyField</code> works.
* It uses its own special schema file.
*
* @since solr 1.4
*/
public class CopyFieldTest extends AbstractSolrTestCase {
@Override
public String getSchemaFile() {
return "schema-copyfield-test.xml";
}
@Override
public String getSolrConfigFile() {
return "solrconfig.xml";
}
@Override
public void setUp() throws Exception {
super.setUp();
}
@Override
public void tearDown() throws Exception {
super.tearDown();
}
@Test
public void testCopyFieldSchemaFieldSchemaField() {
try {
new CopyField(new SchemaField("source", new TextField()), null);
fail("CopyField failed with null SchemaField argument.");
} catch (IllegalArgumentException e) {
assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
}
try {
new CopyField(null, new SchemaField("destination", new TextField()));
fail("CopyField failed with null SchemaField argument.");
} catch (IllegalArgumentException e) {
assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
}
try {
new CopyField(null, null);
fail("CopyField failed with null SchemaField argument.");
} catch (IllegalArgumentException e) {
assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
}
}
@Test
public void testCopyFieldSchemaFieldSchemaFieldInt() {
try {
new CopyField(null,
new SchemaField("destination", new TextField()), 1000);
fail("CopyField failed with null SchemaField argument.");
} catch (IllegalArgumentException e) {
assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
}
try {
new CopyField(new SchemaField("source", new TextField()), null,
1000);
fail("CopyField failed with null SchemaField argument.");
} catch (IllegalArgumentException e) {
assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
}
try {
new CopyField(null, null, 1000);
fail("CopyField failed with null SchemaField argument.");
} catch (IllegalArgumentException e) {
assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
}
try {
new CopyField(new SchemaField("source", new TextField()),
new SchemaField("destination", new TextField()), -1000);
fail("CopyField failed with negative length argument.");
} catch (IllegalArgumentException e) {
assertTrue(e.getLocalizedMessage().contains(
"can't have a negative value"));
}
new CopyField(new SchemaField("source", new TextField()),
new SchemaField("destination", new TextField()), CopyField.UNLIMITED);
}
@Test
public void testGetSource() {
final CopyField copyField = new CopyField(new SchemaField("source",
new TextField()), new SchemaField("destination",
new TextField()), 1000);
assertEquals("source", copyField.getSource().name);
}
@Test
public void testGetDestination() {
final CopyField copyField = new CopyField(new SchemaField("source",
new TextField()), new SchemaField("destination",
new TextField()), 1000);
assertEquals("destination", copyField.getDestination().name);
}
@Test
public void testGetMaxChars() {
final CopyField copyField = new CopyField(new SchemaField("source",
new TextField()), new SchemaField("destination",
new TextField()), 1000);
assertEquals(1000, copyField.getMaxChars());
}
@Test
public void testCopyFieldFunctionality()
{
SolrCore core = h.getCore();
assertU(adoc("id", "10", "title", "test copy field", "text_en", "this is a simple test of the copy field functionality"));
assertU(commit());
Map<String,String> args = new HashMap<String, String>();
args.put( CommonParams.Q, "text_en:simple" );
args.put( "indent", "true" );
SolrQueryRequest req = new LocalSolrQueryRequest( core, new MapSolrParams( args) );
assertQ("Make sure they got in", req
,"//*[@numFound='1']"
,"//result/doc[1]/int[@name='id'][.='10']"
);
args = new HashMap<String, String>();
args.put( CommonParams.Q, "highlight:simple" );
args.put( "indent", "true" );
req = new LocalSolrQueryRequest( core, new MapSolrParams( args) );
assertQ("dynamic source", req
,"//*[@numFound='1']"
,"//result/doc[1]/int[@name='id'][.='10']"
,"//result/doc[1]/arr[@name='highlight']/str[.='this is a simple test of ']"
);
args = new HashMap<String, String>();
args.put( CommonParams.Q, "text_en:functionality" );
args.put( "indent", "true" );
req = new LocalSolrQueryRequest( core, new MapSolrParams( args) );
assertQ("Make sure they got in", req
,"//*[@numFound='1']");
args = new HashMap<String, String>();
args.put( CommonParams.Q, "highlight:functionality" );
args.put( "indent", "true" );
req = new LocalSolrQueryRequest( core, new MapSolrParams( args) );
assertQ("dynamic source", req
,"//*[@numFound='0']");
}
}

View File

@ -0,0 +1,468 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- The Solr schema file. This file should be named "schema.xml" and
should be located where the classloader for the Solr webapp can find it.
This schema is used for testing, and as such has everything and the
kitchen sink thrown in. See example/solr/conf/schema.xml for a
more concise example.
$Id$
$Source: /cvs/main/searching/solr-configs/test/WEB-INF/classes/schema.xml,v $
$Name: $
-->
<schema name="test" version="1.0">
<types>
<!-- field type definitions... note that the "name" attribute is
just a label to be used by field definitions. The "class"
attribute and any other attributes determine the real type and
behavior of the fieldtype.
-->
<!-- numeric field types that store and index the text
value verbatim (and hence don't sort correctly or support range queries.)
These are provided more for backward compatability, allowing one
to create a schema that matches an existing lucene index.
-->
<fieldType name="integer" class="solr.IntField"/>
<fieldType name="long" class="solr.LongField"/>
<fieldtype name="float" class="solr.FloatField"/>
<fieldType name="double" class="solr.DoubleField"/>
<!-- numeric field types that manipulate the value into
a string value that isn't human readable in it's internal form,
but sorts correctly and supports range queries.
If sortMissingLast="true" then a sort on this field will cause documents
without the field to come after documents with the field,
regardless of the requested sort order.
If sortMissingFirst="true" then a sort on this field will cause documents
without the field to come before documents with the field,
regardless of the requested sort order.
If sortMissingLast="false" and sortMissingFirst="false" (the default),
then default lucene sorting will be used which places docs without the field
first in an ascending sort and last in a descending sort.
-->
<fieldtype name="sint" class="solr.SortableIntField" sortMissingLast="true"/>
<fieldtype name="slong" class="solr.SortableLongField" sortMissingLast="true"/>
<fieldtype name="sfloat" class="solr.SortableFloatField" sortMissingLast="true"/>
<fieldtype name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true"/>
<!-- bcd versions of sortable numeric type may provide smaller
storage space and support very large numbers.
-->
<fieldtype name="bcdint" class="solr.BCDIntField" sortMissingLast="true"/>
<fieldtype name="bcdlong" class="solr.BCDLongField" sortMissingLast="true"/>
<fieldtype name="bcdstr" class="solr.BCDStrField" sortMissingLast="true"/>
<!-- Field type demonstrating an Analyzer failure -->
<fieldtype name="failtype1" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<!-- Demonstrating ignoreCaseChange -->
<fieldtype name="wdf_nocase" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<!-- HighlitText optimizes storage for (long) columns which will be highlit -->
<fieldtype name="highlittext" class="solr.TextField" compressThreshold="345" />
<fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true"/>
<fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
<!-- format for date is 1995-12-31T23:59:59.999Z and only the fractional
seconds part (.999) is optional.
-->
<fieldtype name="date" class="solr.DateField" sortMissingLast="true"/>
<!-- solr.TextField allows the specification of custom
text analyzers specified as a tokenizer and a list
of token filters.
-->
<fieldtype name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<!-- lucene PorterStemFilterFactory deprecated
<filter class="solr.PorterStemFilterFactory"/>
-->
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="nametext" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.WhitespaceAnalyzer"/>
</fieldtype>
<fieldtype name="teststop" class="solr.TextField">
<analyzer>
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldtype>
<!-- fieldtypes in this section isolate tokenizers and tokenfilters for testing -->
<fieldtype name="lowertok" class="solr.TextField">
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="keywordtok" class="solr.TextField">
<analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="standardtok" class="solr.TextField">
<analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="lettertok" class="solr.TextField">
<analyzer><tokenizer class="solr.LetterTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="whitetok" class="solr.TextField">
<analyzer><tokenizer class="solr.WhitespaceTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="HTMLstandardtok" class="solr.TextField">
<analyzer><tokenizer class="solr.HTMLStripStandardTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="HTMLwhitetok" class="solr.TextField">
<analyzer><tokenizer class="solr.HTMLStripWhitespaceTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="standardtokfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="standardfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="lowerfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="patternreplacefilt" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-zA-Z])" replacement="_" replace="all"
/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="porterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<!-- fieldtype name="snowballfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SnowballPorterFilterFactory"/>
</analyzer>
</fieldtype -->
<fieldtype name="engporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
</analyzer>
</fieldtype>
<fieldtype name="custstopfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="lengthfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LengthFilterFactory" min="2" max="5"/>
</analyzer>
</fieldtype>
<fieldtype name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<!-- more flexible in matching skus, but more chance of a false match -->
<fieldtype name="skutype1" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="skutype2" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="syn" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt"/>
</analyzer>
</fieldtype>
<!-- Demonstrates How RemoveDuplicatesTokenFilter makes stemmed
synonyms "better"
-->
<fieldtype name="dedup" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt" expand="true" />
<filter class="solr.EnglishPorterFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
</analyzer>
</fieldtype>
<fieldtype name="unstored" class="solr.StrField" indexed="true" stored="false"/>
<fieldtype name="textgap" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
</types>
<fields>
<field name="id" type="integer" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="name" type="nametext" indexed="true" stored="true"/>
<field name="text" type="text" indexed="true" stored="false"/>
<field name="subject" type="text" indexed="true" stored="true"/>
<field name="title" type="nametext" indexed="true" stored="true"/>
<field name="weight" type="float" indexed="true" stored="true"/>
<field name="bday" type="date" indexed="true" stored="true"/>
<field name="title_stemmed" type="text" indexed="true" stored="false"/>
<field name="title_lettertok" type="lettertok" indexed="true" stored="false"/>
<field name="syn" type="syn" indexed="true" stored="true"/>
<!-- to test property inheritance and overriding -->
<field name="shouldbeunstored" type="unstored" />
<field name="shouldbestored" type="unstored" stored="true"/>
<field name="shouldbeunindexed" type="unstored" indexed="false" stored="true"/>
<!-- test different combinations of indexed and stored -->
<field name="bind" type="boolean" indexed="true" stored="false"/>
<field name="bsto" type="boolean" indexed="false" stored="true"/>
<field name="bindsto" type="boolean" indexed="true" stored="true"/>
<field name="isto" type="integer" indexed="false" stored="true"/>
<field name="iind" type="integer" indexed="true" stored="false"/>
<field name="ssto" type="string" indexed="false" stored="true"/>
<field name="sind" type="string" indexed="true" stored="false"/>
<field name="sindsto" type="string" indexed="true" stored="true"/>
<!-- test combinations of term vector settings -->
<field name="test_basictv" type="text" termVectors="true"/>
<field name="test_notv" type="text" termVectors="false"/>
<field name="test_postv" type="text" termVectors="true" termPositions="true"/>
<field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
<field name="test_posofftv" type="text" termVectors="true"
termPositions="true" termOffsets="true"/>
<!-- test highlit field settings -->
<field name="test_hlt" type="highlittext" indexed="true" compressed="true"/>
<field name="test_hlt_off" type="highlittext" indexed="true" compressed="false"/>
<!-- fields to test individual tokenizers and tokenfilters -->
<field name="teststop" type="teststop" indexed="true" stored="true"/>
<field name="lowertok" type="lowertok" indexed="true" stored="true"/>
<field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
<field name="lettertok" type="lettertok" indexed="true" stored="true"/>
<field name="whitetok" type="whitetok" indexed="true" stored="true"/>
<field name="HTMLwhitetok" type="HTMLwhitetok" indexed="true" stored="true"/>
<field name="standardtokfilt" type="standardtokfilt" indexed="true" stored="true"/>
<field name="standardfilt" type="standardfilt" indexed="true" stored="true"/>
<field name="lowerfilt" type="lowerfilt" indexed="true" stored="true"/>
<field name="patternreplacefilt" type="patternreplacefilt" indexed="true" stored="true"/>
<field name="porterfilt" type="porterfilt" indexed="true" stored="true"/>
<field name="engporterfilt" type="engporterfilt" indexed="true" stored="true"/>
<field name="custengporterfilt" type="custengporterfilt" indexed="true" stored="true"/>
<field name="stopfilt" type="stopfilt" indexed="true" stored="true"/>
<field name="custstopfilt" type="custstopfilt" indexed="true" stored="true"/>
<field name="lengthfilt" type="lengthfilt" indexed="true" stored="true"/>
<field name="dedup" type="dedup" indexed="true" stored="true"/>
<field name="wdf_nocase" type="wdf_nocase" indexed="true" stored="true"/>
<field name="numberpartfail" type="failtype1" indexed="true" stored="true"/>
<field name="nullfirst" type="string" indexed="true" stored="true" sortMissingFirst="true"/>
<field name="subword" type="subword" indexed="true" stored="true"/>
<field name="sku1" type="skutype1" indexed="true" stored="true"/>
<field name="sku2" type="skutype2" indexed="true" stored="true"/>
<field name="textgap" type="textgap" indexed="true" stored="true"/>
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
<field name="multiDefault" type="string" indexed="true" stored="true" default="muLti-Default" multiValued="true"/>
<field name="intDefault" type="sint" indexed="true" stored="true" default="42" multiValued="false"/>
<!-- test maxChars copyField attribute -->
<field name="text_fr" type="text" indexed="true" stored="true"
termVectors="true" termPositions="true" termOffsets="true"/>
<field name="text_en" type="text" indexed="true" stored="true"
termVectors="true" termPositions="true" termOffsets="true"/>
<field name="highlight" type="text" indexed="true" stored="true"
termVectors="true" termPositions="true" termOffsets="true"/>
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns.
RESTRICTION: the glob-like pattern in the name attribute must have
a "*" only at the start or the end.
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
Longer patterns will be matched first. if equal size patterns
both match, the first appearing in the schema will be used.
-->
<dynamicField name="*_i" type="sint" indexed="true" stored="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_s1" type="string" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="*_l" type="slong" indexed="true" stored="true"/>
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_f" type="sfloat" indexed="true" stored="true"/>
<dynamicField name="*_d" type="sdouble" indexed="true" stored="true"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<dynamicField name="*_bcd" type="bcdstr" indexed="true" stored="true"/>
<dynamicField name="*_sI" type="string" indexed="true" stored="false"/>
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
<dynamicField name="t_*" type="text" indexed="true" stored="true"/>
<dynamicField name="tv_*" type="text" indexed="true" stored="true"
termVectors="true" termPositions="true" termOffsets="true"/>
<!-- special fields for dynamic copyField test -->
<dynamicField name="dynamic_*" type="string" indexed="true" stored="true"/>
<dynamicField name="*_dynamic" type="string" indexed="true" stored="true"/>
<!-- for testing to ensure that longer patterns are matched first -->
<dynamicField name="*aa" type="string" indexed="true" stored="true"/>
<dynamicField name="*aaa" type="integer" indexed="false" stored="true"/>
<!-- ignored becuase not stored or indexed -->
<dynamicField name="*_ignored" type="text" indexed="false" stored="false"/>
<!-- test maxSize copyField attribute -->
<dynamicField name="text_*" type="text" indexed="true" stored="true"
termVectors="true" termPositions="true" termOffsets="true"/>
</fields>
<defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey>
<!-- copyField commands copy one field to another at the time a document
is added to the index. It's used either to index the same field different
ways, or to add multiple fields to the same field for easier/faster searching.
-->
<copyField source="title" dest="title_stemmed"/>
<copyField source="title" dest="title_lettertok"/>
<copyField source="title" dest="text"/>
<copyField source="subject" dest="text"/>
<copyField source="*_t" dest="text"/>
<!-- dynamic destination -->
<copyField source="*_dynamic" dest="dynamic_*"/>
<!-- test maxSize copyField attribute -->
<copyField source="text_fr" dest="highlight" maxChars="25" />
<copyField source="text_en" dest="highlight" maxChars="25" />
<copyField source="text_*" dest="highlight" maxChars="25" />
<!-- Similarity is the scoring routine for each document vs a query.
A custom similarity may be specified here, but the default is fine
for most applications.
-->
<!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
</schema>