mirror of https://github.com/apache/lucene.git
SOLR-4648 Add PreAnalyzedUpdateProcessorFactory.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1464889 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
03c9371830
commit
0b999ebb46
|
@ -80,6 +80,10 @@ Detailed Change List
|
|||
New Features
|
||||
----------------------
|
||||
|
||||
* SOLR-4648 PreAnalyzedUpdateProcessorFactory allows using the functionality
|
||||
of PreAnalyzedField with other field types. See javadoc for details and
|
||||
examples. (Andrzej Bialecki)
|
||||
|
||||
* SOLR-4196 (and others). Solr.xml is being deprecated in favor of a simple
|
||||
properties file. In the absence of a <solr_home>/solr.xml but the presence of
|
||||
<solr_home>/solr.properties, two things will happen
|
||||
|
|
|
@ -28,9 +28,8 @@ import java.util.Map;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.GeneralField;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.StorableField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.AttributeSource.State;
|
||||
|
@ -57,23 +56,27 @@ public class PreAnalyzedField extends FieldType {
|
|||
private PreAnalyzedParser parser;
|
||||
|
||||
@Override
|
||||
protected void init(IndexSchema schema, Map<String, String> args) {
|
||||
public void init(IndexSchema schema, Map<String, String> args) {
|
||||
super.init(schema, args);
|
||||
String implName = args.get(PARSER_IMPL);
|
||||
if (implName == null) {
|
||||
parser = new JsonPreAnalyzedParser();
|
||||
} else {
|
||||
try {
|
||||
Class<?> implClazz = Class.forName(implName);
|
||||
if (!PreAnalyzedParser.class.isAssignableFrom(implClazz)) {
|
||||
throw new Exception("must implement " + PreAnalyzedParser.class.getName());
|
||||
}
|
||||
Constructor<?> c = implClazz.getConstructor(new Class<?>[0]);
|
||||
parser = (PreAnalyzedParser) c.newInstance(new Object[0]);
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Can't use the configured PreAnalyzedParser class '" + implName + "' (" +
|
||||
e.getMessage() + "), using default " + DEFAULT_IMPL);
|
||||
// short name
|
||||
if ("json".equalsIgnoreCase(implName)) {
|
||||
parser = new JsonPreAnalyzedParser();
|
||||
} else if ("simple".equalsIgnoreCase(implName)) {
|
||||
parser = new SimplePreAnalyzedParser();
|
||||
} else {
|
||||
try {
|
||||
Class<? extends PreAnalyzedParser> implClazz = schema.getResourceLoader().findClass(implName, PreAnalyzedParser.class);
|
||||
Constructor<?> c = implClazz.getConstructor(new Class<?>[0]);
|
||||
parser = (PreAnalyzedParser) c.newInstance(new Object[0]);
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Can't use the configured PreAnalyzedParser class '" + implName +
|
||||
"', using default " + DEFAULT_IMPL, e);
|
||||
parser = new JsonPreAnalyzedParser();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -102,7 +105,7 @@ public class PreAnalyzedField extends FieldType {
|
|||
try {
|
||||
f = fromString(field, String.valueOf(value), boost);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
LOG.warn("Error parsing pre-analyzed field '" + field.getName() + "'", e);
|
||||
return null;
|
||||
}
|
||||
return f;
|
||||
|
@ -129,6 +132,36 @@ public class PreAnalyzedField extends FieldType {
|
|||
return parser.toFormattedString(f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method to create a {@link org.apache.lucene.document.FieldType}
|
||||
* based on the {@link SchemaField}
|
||||
*/
|
||||
public static org.apache.lucene.document.FieldType createFieldType(SchemaField field) {
|
||||
if (!field.indexed() && !field.stored()) {
|
||||
if (log.isTraceEnabled())
|
||||
log.trace("Ignoring unindexed/unstored field: " + field);
|
||||
return null;
|
||||
}
|
||||
org.apache.lucene.document.FieldType newType = new org.apache.lucene.document.FieldType();
|
||||
newType.setIndexed(field.indexed());
|
||||
newType.setTokenized(field.isTokenized());
|
||||
newType.setStored(field.stored());
|
||||
newType.setOmitNorms(field.omitNorms());
|
||||
IndexOptions options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
if (field.omitTermFreqAndPositions()) {
|
||||
options = IndexOptions.DOCS_ONLY;
|
||||
} else if (field.omitPositions()) {
|
||||
options = IndexOptions.DOCS_AND_FREQS;
|
||||
} else if (field.storeOffsetsWithPositions()) {
|
||||
options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
||||
}
|
||||
newType.setIndexOptions(options);
|
||||
newType.setStoreTermVectors(field.storeTermVector());
|
||||
newType.setStoreTermVectorOffsets(field.storeTermOffsets());
|
||||
newType.setStoreTermVectorPositions(field.storeTermPositions());
|
||||
return newType;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a simple holder of a stored part and the collected states (tokens with attributes).
|
||||
*/
|
||||
|
@ -167,19 +200,44 @@ public class PreAnalyzedField extends FieldType {
|
|||
}
|
||||
PreAnalyzedTokenizer parse = new PreAnalyzedTokenizer(new StringReader(val), parser);
|
||||
parse.reset(); // consume
|
||||
Field f = (Field)super.createField(field, val, boost);
|
||||
org.apache.lucene.document.FieldType type = createFieldType(field);
|
||||
if (type == null) {
|
||||
parse.close();
|
||||
return null;
|
||||
}
|
||||
Field f = null;
|
||||
if (parse.getStringValue() != null) {
|
||||
f.setStringValue(parse.getStringValue());
|
||||
if (field.stored()) {
|
||||
f = new Field(field.getName(), parse.getStringValue(), type);
|
||||
} else {
|
||||
type.setStored(false);
|
||||
}
|
||||
} else if (parse.getBinaryValue() != null) {
|
||||
f.setBytesValue(parse.getBinaryValue());
|
||||
if (field.isBinary()) {
|
||||
f = new Field(field.getName(), parse.getBinaryValue(), type);
|
||||
}
|
||||
} else {
|
||||
f.fieldType().setStored(false);
|
||||
type.setStored(false);
|
||||
}
|
||||
|
||||
if (parse.hasTokenStream()) {
|
||||
f.fieldType().setIndexed(true);
|
||||
f.fieldType().setTokenized(true);
|
||||
f.setTokenStream(parse);
|
||||
if (field.indexed()) {
|
||||
type.setIndexed(true);
|
||||
type.setTokenized(true);
|
||||
if (f != null) {
|
||||
f.setTokenStream(parse);
|
||||
} else {
|
||||
f = new Field(field.getName(), parse, type);
|
||||
}
|
||||
} else {
|
||||
if (f != null) {
|
||||
f.fieldType().setIndexed(false);
|
||||
f.fieldType().setTokenized(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (f != null) {
|
||||
f.setBoost(boost);
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
|
|
@ -175,6 +175,12 @@ public class DocumentBuilder {
|
|||
|
||||
|
||||
private static void addField(Document doc, SchemaField field, Object val, float boost) {
|
||||
if (val instanceof StorableField) {
|
||||
// set boost to the calculated compound boost
|
||||
((Field)val).setBoost(boost);
|
||||
doc.add((Field)val);
|
||||
return;
|
||||
}
|
||||
for (StorableField f : field.getType().createFields(field, val, boost)) {
|
||||
if (f != null) doc.add((Field) f); // null fields are not added
|
||||
}
|
||||
|
|
|
@ -0,0 +1,172 @@
|
|||
package org.apache.solr.update.processor;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.StorableField;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.JsonPreAnalyzedParser;
|
||||
import org.apache.solr.schema.PreAnalyzedField;
|
||||
import org.apache.solr.schema.PreAnalyzedField.PreAnalyzedParser;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.SimplePreAnalyzedParser;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>An update processor that parses configured fields of any document being added
|
||||
* using {@link PreAnalyzedField} with the configured format parser.</p>
|
||||
*
|
||||
* <p>Fields are specified using the same patterns as in {@link FieldMutatingUpdateProcessorFactory}.
|
||||
* They are then checked whether they follow a pre-analyzed format defined by <code>parser</code>.
|
||||
* Valid fields are then parsed. The original {@link SchemaField} is used for the initial
|
||||
* creation of {@link StorableField}, which is then modified to add the results from
|
||||
* parsing (token stream value and/or string value) and then it will be directly added to
|
||||
* the final Lucene {@link Document} to be indexed.</p>
|
||||
* <p>Fields that are declared in the patterns list but are not present
|
||||
* in the current schema will be removed from the input document.</p>
|
||||
* <h3>Implementation details</h3>
|
||||
* <p>This update processor uses {@link PreAnalyzedParser}
|
||||
* to parse the original field content (interpreted as a string value), and thus
|
||||
* obtain the stored part and the token stream part. Then it creates the "template"
|
||||
* {@link Field}-s using the original {@link SchemaField#createFields(Object, float)}
|
||||
* as declared in the current schema. Finally it sets the pre-analyzed parts if
|
||||
* available (string value and the token
|
||||
* stream value) on the first field of these "template" fields. If the declared
|
||||
* field type does not support stored or indexed parts then such parts are silently
|
||||
* discarded. Finally the updated "template" {@link Field}-s are added to the resulting
|
||||
* {@link SolrInputField}, and the original value of that field is removed.</p>
|
||||
* <h3>Example configuration</h3>
|
||||
* <p>In the example configuration below there are two update chains, one that
|
||||
* uses the "simple" parser ({@link SimplePreAnalyzedParser}) and one that uses
|
||||
* the "json" parser ({@link JsonPreAnalyzedParser}). Field "nonexistent" will be
|
||||
* removed from input documents if not present in the schema. Other fields will be
|
||||
* analyzed and if valid they will be converted to {@link StorableField}-s or if
|
||||
* they are not in a valid format that can be parsed with the selected parser they
|
||||
* will be passed as-is. Assuming that <code>ssto</code> field is stored but not
|
||||
* indexed, and <code>sind</code> field is indexed but not stored: if
|
||||
* <code>ssto</code> input value contains the indexed part then this part will
|
||||
* be discarded and only the stored value part will be retained. Similarly,
|
||||
* if <code>sind</code> input value contains the stored part then it
|
||||
* will be discarded and only the token stream part will be retained.</p>
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* <updateRequestProcessorChain name="pre-analyzed-simple">
|
||||
* <processor class="solr.PreAnalyzedUpdateProcessorFactory">
|
||||
* <str name="fieldName">title</str>
|
||||
* <str name="fieldName">nonexistent</str>
|
||||
* <str name="fieldName">ssto</str>
|
||||
* <str name="fieldName">sind</str>
|
||||
* <str name="parser">simple</str>
|
||||
* </processor>
|
||||
* <processor class="solr.RunUpdateProcessorFactory" />
|
||||
* </updateRequestProcessorChain>
|
||||
*
|
||||
* <updateRequestProcessorChain name="pre-analyzed-json">
|
||||
* <processor class="solr.PreAnalyzedUpdateProcessorFactory">
|
||||
* <str name="fieldName">title</str>
|
||||
* <str name="fieldName">nonexistent</str>
|
||||
* <str name="fieldName">ssto</str>
|
||||
* <str name="fieldName">sind</str>
|
||||
* <str name="parser">json</str>
|
||||
* </processor>
|
||||
* <processor class="solr.RunUpdateProcessorFactory" />
|
||||
* </updateRequestProcessorChain>
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
public class PreAnalyzedUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
|
||||
|
||||
private PreAnalyzedField parser;
|
||||
private String parserImpl;
|
||||
|
||||
@Override
|
||||
public void init(final NamedList args) {
|
||||
parserImpl = (String)args.get("parser");
|
||||
args.remove("parser");
|
||||
// initialize inclusion / exclusion patterns
|
||||
super.init(args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
|
||||
SolrQueryResponse rsp, UpdateRequestProcessor next) {
|
||||
return new PreAnalyzedUpdateProcessor(getSelector(), next, req.getSchema(), parser);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void inform(SolrCore core) {
|
||||
super.inform(core);
|
||||
parser = new PreAnalyzedField();
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
if (parserImpl != null) {
|
||||
args.put(PreAnalyzedField.PARSER_IMPL, parserImpl);
|
||||
}
|
||||
parser.init(core.getSchema(), args);
|
||||
}
|
||||
}
|
||||
|
||||
class PreAnalyzedUpdateProcessor extends FieldMutatingUpdateProcessor {
|
||||
|
||||
private PreAnalyzedField parser;
|
||||
private IndexSchema schema;
|
||||
|
||||
public PreAnalyzedUpdateProcessor(FieldNameSelector sel, UpdateRequestProcessor next, IndexSchema schema, PreAnalyzedField parser) {
|
||||
super(sel, next);
|
||||
this.schema = schema;
|
||||
this.parser = parser;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SolrInputField mutate(SolrInputField src) {
|
||||
SchemaField sf = schema.getFieldOrNull(src.getName());
|
||||
if (sf == null) { // remove this field
|
||||
return null;
|
||||
}
|
||||
FieldType type = PreAnalyzedField.createFieldType(sf);
|
||||
if (type == null) { // neither indexed nor stored - skip
|
||||
return null;
|
||||
}
|
||||
SolrInputField res = new SolrInputField(src.getName());
|
||||
res.setBoost(src.getBoost());
|
||||
for (Object o : src) {
|
||||
if (o == null) {
|
||||
continue;
|
||||
}
|
||||
Field pre = (Field)parser.createField(sf, o, 1.0f);
|
||||
if (pre != null) {
|
||||
res.addValue(pre, 1.0f);
|
||||
} else { // restore the original value
|
||||
log.warn("Could not parse field {} - using original value as is: {}", src.getName(), o);
|
||||
res.addValue(o, 1.0f);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
}
|
|
@ -390,4 +390,30 @@
|
|||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="pre-analyzed-simple">
|
||||
<processor class="solr.PreAnalyzedUpdateProcessorFactory">
|
||||
<str name="fieldName">subject</str>
|
||||
<str name="fieldName">title</str>
|
||||
<str name="fieldName">teststop</str>
|
||||
<str name="fieldName">nonexistent</str>
|
||||
<str name="fieldName">ssto</str>
|
||||
<str name="fieldName">sind</str>
|
||||
<str name="parser">simple</str>
|
||||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="pre-analyzed-json">
|
||||
<processor class="solr.PreAnalyzedUpdateProcessorFactory">
|
||||
<str name="fieldName">subject</str>
|
||||
<str name="fieldName">title</str>
|
||||
<str name="fieldName">teststop</str>
|
||||
<str name="fieldName">nonexistent</str>
|
||||
<str name="fieldName">ssto</str>
|
||||
<str name="fieldName">sind</str>
|
||||
<str name="parser">json</str>
|
||||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
</config>
|
||||
|
|
|
@ -22,11 +22,13 @@ import java.util.HashMap;
|
|||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.util.Base64;
|
||||
import org.apache.solr.schema.PreAnalyzedField.PreAnalyzedParser;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
public class PreAnalyzedFieldTest extends LuceneTestCase {
|
||||
public class PreAnalyzedFieldTest extends SolrTestCaseJ4 {
|
||||
|
||||
private static final String[] valid = {
|
||||
"1 one two three", // simple parsing
|
||||
|
@ -70,6 +72,11 @@ public class PreAnalyzedFieldTest extends LuceneTestCase {
|
|||
int props =
|
||||
FieldProperties.INDEXED | FieldProperties.STORED;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig.xml","schema.xml");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
@ -82,7 +89,7 @@ public class PreAnalyzedFieldTest extends LuceneTestCase {
|
|||
// use Simple format
|
||||
HashMap<String,String> args = new HashMap<String,String>();
|
||||
args.put(PreAnalyzedField.PARSER_IMPL, SimplePreAnalyzedParser.class.getName());
|
||||
paf.init((IndexSchema)null, args);
|
||||
paf.init(h.getCore().getSchema(), args);
|
||||
PreAnalyzedParser parser = new SimplePreAnalyzedParser();
|
||||
for (int i = 0; i < valid.length; i++) {
|
||||
String s = valid[i];
|
||||
|
@ -100,7 +107,7 @@ public class PreAnalyzedFieldTest extends LuceneTestCase {
|
|||
@Test
|
||||
public void testInvalidSimple() {
|
||||
PreAnalyzedField paf = new PreAnalyzedField();
|
||||
paf.init((IndexSchema)null, Collections.<String,String>emptyMap());
|
||||
paf.init(h.getCore().getSchema(), Collections.<String,String>emptyMap());
|
||||
for (String s : invalid) {
|
||||
try {
|
||||
paf.fromString(field, s, 1.0f);
|
||||
|
@ -125,7 +132,7 @@ public class PreAnalyzedFieldTest extends LuceneTestCase {
|
|||
// use Simple format
|
||||
HashMap<String,String> args = new HashMap<String,String>();
|
||||
args.put(PreAnalyzedField.PARSER_IMPL, SimplePreAnalyzedParser.class.getName());
|
||||
paf.init((IndexSchema)null, args);
|
||||
paf.init(h.getCore().getSchema(), args);
|
||||
try {
|
||||
Field f = (Field)paf.fromString(field, valid[0], 1.0f);
|
||||
} catch (Exception e) {
|
||||
|
@ -133,7 +140,7 @@ public class PreAnalyzedFieldTest extends LuceneTestCase {
|
|||
}
|
||||
// use JSON format
|
||||
args.put(PreAnalyzedField.PARSER_IMPL, JsonPreAnalyzedParser.class.getName());
|
||||
paf.init((IndexSchema)null, args);
|
||||
paf.init(h.getCore().getSchema(), args);
|
||||
try {
|
||||
Field f = (Field)paf.fromString(field, valid[0], 1.0f);
|
||||
fail("Should fail JSON parsing: '" + valid[0]);
|
||||
|
|
|
@ -0,0 +1,120 @@
|
|||
package org.apache.solr.update.processor;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public class PreAnalyzedUpdateProcessorTest extends UpdateProcessorTestBase {
|
||||
String[] simpleTitle = new String[] {
|
||||
"not pre-analyzed",
|
||||
"1 =string value=foo bar"
|
||||
};
|
||||
String[] jsonTitle = new String[] {
|
||||
"not pre-analyzed",
|
||||
"{\"v\":\"1\",\"str\":\"string value\",\"tokens\":[{\"t\":\"foo\"},{\"t\":\"bar\"}]}",
|
||||
};
|
||||
String[] simpleTeststop = new String[] {
|
||||
"1 =this is a test.=one two three",
|
||||
"1 =this is a test.=three four five"
|
||||
};
|
||||
String[] jsonTeststop = new String[] {
|
||||
"{\"v\":\"1\",\"str\":\"this is a test.\",\"tokens\":[{\"t\":\"one\"},{\"t\":\"two\"},{\"t\":\"three\"}]}",
|
||||
"{\"v\":\"1\",\"str\":\"this is a test.\",\"tokens\":[{\"t\":\"three\"},{\"t\":\"four\"},{\"t\":\"five\"}]}",
|
||||
};
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-update-processor-chains.xml", "schema12.xml");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimple() throws Exception {
|
||||
test("pre-analyzed-simple", simpleTitle, simpleTeststop);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJson() throws Exception {
|
||||
test("pre-analyzed-json", jsonTitle, jsonTeststop);
|
||||
}
|
||||
|
||||
private void test(String chain, String[] title, String[] teststop) throws Exception {
|
||||
SolrInputDocument doc = processAdd(chain,
|
||||
doc(f("id", "1"),
|
||||
f("title", title[0]),
|
||||
f("teststop", teststop[0]),
|
||||
f("nonexistent", "foobar"),
|
||||
f("ssto", teststop[0]),
|
||||
f("sind", teststop[0])));
|
||||
assertEquals("title should be unchanged", title[0], doc.getFieldValue("title"));
|
||||
assertTrue("teststop should be a Field", doc.getFieldValue("teststop") instanceof Field);
|
||||
Field f = (Field)doc.getFieldValue("teststop");
|
||||
assertEquals("teststop should have stringValue", "this is a test.", f.stringValue());
|
||||
assertNotNull("teststop should have tokensStreamValue", f.tokenStreamValue());
|
||||
assertNull("nonexistent should be dropped", doc.getField("nonexistent"));
|
||||
// check how SchemaField type affects stored/indexed part processing
|
||||
f = (Field)doc.getFieldValue("ssto");
|
||||
assertNotNull("should have ssto", f);
|
||||
assertNotNull("should have stringValue", f.stringValue());
|
||||
assertNull("should not have tokenStreamValue", f.tokenStreamValue());
|
||||
f = (Field)doc.getFieldValue("sind");
|
||||
assertNotNull("should have sind", f);
|
||||
assertNull("should not have stringValue: '" + f.stringValue() + "'", f.stringValue());
|
||||
assertNotNull("should have tokenStreamValue", f.tokenStreamValue());
|
||||
|
||||
doc = processAdd(chain,
|
||||
doc(f("id", "2"),
|
||||
f("title", title[1]),
|
||||
f("teststop", teststop[1]),
|
||||
f("nonexistent", "foobar"),
|
||||
f("ssto", teststop[1]),
|
||||
f("sind", teststop[1])));
|
||||
assertTrue("title should be a Field", doc.getFieldValue("title") instanceof Field);
|
||||
assertTrue("teststop should be a Field", doc.getFieldValue("teststop") instanceof Field);
|
||||
f = (Field)doc.getFieldValue("teststop");
|
||||
assertEquals("teststop should have stringValue", "this is a test.", f.stringValue());
|
||||
assertNotNull("teststop should have tokensStreamValue", f.tokenStreamValue());
|
||||
assertNull("nonexistent should be dropped", doc.getField("nonexistent"));
|
||||
// check how SchemaField type affects stored/indexed part processing
|
||||
f = (Field)doc.getFieldValue("ssto");
|
||||
assertNotNull("should have ssto", f);
|
||||
assertNotNull("should have stringValue", f.stringValue());
|
||||
assertNull("should not have tokenStreamValue", f.tokenStreamValue());
|
||||
f = (Field)doc.getFieldValue("sind");
|
||||
assertNotNull("should have sind", f);
|
||||
assertNull("should not have stringValue: '" + f.stringValue() + "'", f.stringValue());
|
||||
assertNotNull("should have tokenStreamValue", f.tokenStreamValue());
|
||||
|
||||
assertU(commit());
|
||||
assertQ(req("teststop:\"one two three\"")
|
||||
,"//str[@name='id'][.='1']"
|
||||
,"//str[@name='teststop'][.='this is a test.']"
|
||||
);
|
||||
assertQ(req("teststop:three")
|
||||
,"//*[@numFound='2']"
|
||||
,"//result/doc[1]/str[@name='id'][.='1']"
|
||||
,"//result/doc[1]/str[@name='title'][.='not pre-analyzed']"
|
||||
,"//result/doc[2]/str[@name='id'][.='2']"
|
||||
,"//result/doc[2]/arr[@name='title']/str[.='string value']"
|
||||
);
|
||||
assertQ(req("ssto:three"), "//*[@numFound='0']");
|
||||
assertQ(req("sind:three"), "//*[@numFound='2']");
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue