mirror of https://github.com/apache/lucene.git
SOLR-477: Added in AnalysisRequestHandler and tests/sample documents
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@629342 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e1f97aff7e
commit
d5ef760bbc
|
@ -0,0 +1,55 @@
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<docs>
|
||||||
|
<doc>
|
||||||
|
<field name="id">TWINX2048-3200PRO</field>
|
||||||
|
<field name="name">CORSAIR XMS 2GB (2 x 1GB) 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) Dual Channel Kit System Memory - Retail</field>
|
||||||
|
<field name="manu">Corsair Microsystems Inc.</field>
|
||||||
|
<field name="cat">electronics</field>
|
||||||
|
<field name="cat">memory</field>
|
||||||
|
<field name="features">CAS latency 2, 2-3-3-6 timing, 2.75v, unbuffered, heat-spreader</field>
|
||||||
|
<field name="price">185</field>
|
||||||
|
<field name="popularity">5</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
</doc>
|
||||||
|
|
||||||
|
<doc>
|
||||||
|
<field name="id">VS1GB400C3</field>
|
||||||
|
<field name="name">CORSAIR ValueSelect 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - Retail</field>
|
||||||
|
<field name="manu">Corsair Microsystems Inc.</field>
|
||||||
|
<field name="cat">electronics</field>
|
||||||
|
<field name="cat">memory</field>
|
||||||
|
<field name="price">74.99</field>
|
||||||
|
<field name="popularity">7</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
</doc>
|
||||||
|
|
||||||
|
<doc>
|
||||||
|
<field name="id">VDBDB1A16</field>
|
||||||
|
<field name="name">A-DATA V-Series 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - OEM</field>
|
||||||
|
<field name="manu">A-DATA Technology Inc.</field>
|
||||||
|
<field name="cat">electronics</field>
|
||||||
|
<field name="cat">memory</field>
|
||||||
|
<field name="features">CAS latency 3, 2.7v</field>
|
||||||
|
<!-- note: price is missing on this one -->
|
||||||
|
<field name="popularity">5</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
|
||||||
|
</doc>
|
||||||
|
</docs>
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
#!/bin/sh
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
FILES=$*
|
||||||
|
URL=http://localhost:8983/solr/analysis
|
||||||
|
|
||||||
|
for f in $FILES; do
|
||||||
|
echo Posting file $f to $URL
|
||||||
|
curl $URL --data-binary @$f -H 'Content-type:text/xml; charset=utf-8'
|
||||||
|
echo
|
||||||
|
done
|
|
@ -0,0 +1,24 @@
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<docs>
|
||||||
|
<doc>
|
||||||
|
<field name="id">TWINX2048-3200PRO</field>
|
||||||
|
<field name="name">CORSAIR XMS 2GB (2 x 1GB) 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) Dual Channel Kit System Memory - Retail</field>
|
||||||
|
</doc>
|
||||||
|
</docs>
|
||||||
|
|
|
@ -505,6 +505,16 @@
|
||||||
-->
|
-->
|
||||||
</requestHandler>
|
</requestHandler>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Analysis request handler. Since Solr 1.3. Use to returnhow a document is analyzed. Useful
|
||||||
|
for debugging and as a token server for other types of applications
|
||||||
|
-->
|
||||||
|
<requestHandler name="/analysis" class="solr.AnalysisRequestHandler" >
|
||||||
|
<!--
|
||||||
|
<str name="update.processor.class">org.apache.solr.handler.UpdateRequestProcessor</str>
|
||||||
|
-->
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
<!-- CSV update handler, loaded on demand -->
|
<!-- CSV update handler, loaded on demand -->
|
||||||
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" />
|
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" />
|
||||||
|
|
||||||
|
|
|
@ -82,6 +82,10 @@ public class XML {
|
||||||
escape(str, out, attribute_escapes);
|
escape(str, out, attribute_escapes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void escapeAttributeValue(char [] chars, int start, int length, Writer out) throws IOException {
|
||||||
|
escape(chars, start, length, out, attribute_escapes);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public final static void writeXML(Writer out, String tag, String val) throws IOException {
|
public final static void writeXML(Writer out, String tag, String val) throws IOException {
|
||||||
out.write('<');
|
out.write('<');
|
||||||
|
@ -149,6 +153,19 @@ public class XML {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void escape(char [] chars, int offset, int length, Writer out, String [] escapes) throws IOException{
|
||||||
|
for (int i=offset; i<length; i++) {
|
||||||
|
char ch = chars[i];
|
||||||
|
if (ch<escapes.length) {
|
||||||
|
String replacement = escapes[ch];
|
||||||
|
if (replacement != null) {
|
||||||
|
out.write(replacement);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out.write(ch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static void escape(String str, Writer out, String[] escapes) throws IOException {
|
private static void escape(String str, Writer out, String[] escapes) throws IOException {
|
||||||
for (int i=0; i<str.length(); i++) {
|
for (int i=0; i<str.length(); i++) {
|
||||||
|
|
|
@ -0,0 +1,223 @@
|
||||||
|
package org.apache.solr.handler;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import javanet.staxutils.BaseXMLInputFactory;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
import org.apache.solr.common.util.ContentStream;
|
||||||
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.request.SolrQueryResponse;
|
||||||
|
import org.apache.solr.schema.FieldType;
|
||||||
|
import org.apache.solr.schema.IndexSchema;
|
||||||
|
import org.apache.solr.schema.SchemaField;
|
||||||
|
|
||||||
|
import javax.xml.stream.XMLInputFactory;
|
||||||
|
import javax.xml.stream.XMLStreamConstants;
|
||||||
|
import javax.xml.stream.XMLStreamException;
|
||||||
|
import javax.xml.stream.XMLStreamReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.logging.Logger;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
public class AnalysisRequestHandler extends RequestHandlerBase {
|
||||||
|
|
||||||
|
public static Logger log = Logger.getLogger(AnalysisRequestHandler.class.getName());
|
||||||
|
|
||||||
|
private XMLInputFactory inputFactory;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init(NamedList args) {
|
||||||
|
super.init(args);
|
||||||
|
|
||||||
|
inputFactory = BaseXMLInputFactory.newInstance();
|
||||||
|
try {
|
||||||
|
// The java 1.6 bundled stax parser (sjsxp) does not currently have a thread-safe
|
||||||
|
// XMLInputFactory, as that implementation tries to cache and reuse the
|
||||||
|
// XMLStreamReader. Setting the parser-specific "reuse-instance" property to false
|
||||||
|
// prevents this.
|
||||||
|
// All other known open-source stax parsers (and the bea ref impl)
|
||||||
|
// have thread-safe factories.
|
||||||
|
inputFactory.setProperty("reuse-instance", Boolean.FALSE);
|
||||||
|
}
|
||||||
|
catch (IllegalArgumentException ex) {
|
||||||
|
// Other implementations will likely throw this exception since "reuse-instance"
|
||||||
|
// isimplementation specific.
|
||||||
|
log.fine("Unable to set the 'reuse-instance' property for the input factory: " + inputFactory);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
|
||||||
|
RequestHandlerUtils.addExperimentalFormatWarning(rsp);
|
||||||
|
SolrParams params = req.getParams();
|
||||||
|
Iterable<ContentStream> streams = req.getContentStreams();
|
||||||
|
if (streams != null) {
|
||||||
|
for (ContentStream stream : req.getContentStreams()) {
|
||||||
|
Reader reader = stream.getReader();
|
||||||
|
try {
|
||||||
|
XMLStreamReader parser = inputFactory.createXMLStreamReader(reader);
|
||||||
|
NamedList<Object> result = processContent(parser, req.getSchema());
|
||||||
|
rsp.add("response", result);
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
IOUtils.closeQuietly(reader);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
NamedList<Object> processContent(XMLStreamReader parser,
|
||||||
|
IndexSchema schema) throws XMLStreamException, IOException {
|
||||||
|
NamedList<Object> result = new NamedList<Object>();
|
||||||
|
while (true) {
|
||||||
|
int event = parser.next();
|
||||||
|
switch (event) {
|
||||||
|
case XMLStreamConstants.END_DOCUMENT: {
|
||||||
|
parser.close();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
case XMLStreamConstants.START_ELEMENT: {
|
||||||
|
String currTag = parser.getLocalName();
|
||||||
|
if ("doc".equals(currTag)) {
|
||||||
|
log.finest("Tokenizing doc...");
|
||||||
|
|
||||||
|
SolrInputDocument doc = readDoc(parser);
|
||||||
|
SchemaField uniq = schema.getUniqueKeyField();
|
||||||
|
NamedList<NamedList<NamedList<Object>>> theTokens = new NamedList<NamedList<NamedList<Object>>>();
|
||||||
|
result.add(doc.getFieldValue(uniq.getName()).toString(), theTokens);
|
||||||
|
for (String name : doc.getFieldNames()) {
|
||||||
|
FieldType ft = schema.getFieldType(name);
|
||||||
|
Analyzer analyzer = ft.getAnalyzer();
|
||||||
|
Collection<Object> vals = doc.getFieldValues(name);
|
||||||
|
for (Object val : vals) {
|
||||||
|
Reader reader = new StringReader(val.toString());
|
||||||
|
TokenStream tstream = analyzer.tokenStream(name, reader);
|
||||||
|
NamedList<NamedList<Object>> tokens = getTokens(tstream);
|
||||||
|
theTokens.add(name, tokens);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static NamedList<NamedList<Object>> getTokens(TokenStream tstream) throws IOException {
|
||||||
|
NamedList<NamedList<Object>> tokens = new NamedList<NamedList<Object>>();
|
||||||
|
Token t = null;
|
||||||
|
while (((t = tstream.next()) != null)) {
|
||||||
|
NamedList<Object> token = new NamedList<Object>();
|
||||||
|
tokens.add("token", token);
|
||||||
|
token.add("value", new String(t.termBuffer(), 0, t.termLength()));
|
||||||
|
token.add("start", t.startOffset());
|
||||||
|
token.add("end", t.endOffset());
|
||||||
|
token.add("posInc", t.getPositionIncrement());
|
||||||
|
token.add("type", t.type());
|
||||||
|
//TODO: handle payloads
|
||||||
|
}
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
SolrInputDocument readDoc(XMLStreamReader parser) throws XMLStreamException {
|
||||||
|
SolrInputDocument doc = new SolrInputDocument();
|
||||||
|
|
||||||
|
StringBuilder text = new StringBuilder();
|
||||||
|
String name = null;
|
||||||
|
String attrName = "";
|
||||||
|
float boost = 1.0f;
|
||||||
|
boolean isNull = false;
|
||||||
|
while (true) {
|
||||||
|
int event = parser.next();
|
||||||
|
switch (event) {
|
||||||
|
// Add everything to the text
|
||||||
|
case XMLStreamConstants.SPACE:
|
||||||
|
case XMLStreamConstants.CDATA:
|
||||||
|
case XMLStreamConstants.CHARACTERS:
|
||||||
|
text.append(parser.getText());
|
||||||
|
break;
|
||||||
|
|
||||||
|
case XMLStreamConstants.END_ELEMENT:
|
||||||
|
if ("doc".equals(parser.getLocalName())) {
|
||||||
|
return doc;
|
||||||
|
} else if ("field".equals(parser.getLocalName())) {
|
||||||
|
if (!isNull) {
|
||||||
|
doc.addField(name, text.toString(), boost);
|
||||||
|
boost = 1.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case XMLStreamConstants.START_ELEMENT:
|
||||||
|
text.setLength(0);
|
||||||
|
String localName = parser.getLocalName();
|
||||||
|
if (!"field".equals(localName)) {
|
||||||
|
log.warning("unexpected XML tag doc/" + localName);
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
|
"unexpected XML tag doc/" + localName);
|
||||||
|
}
|
||||||
|
|
||||||
|
String attrVal = "";
|
||||||
|
for (int i = 0; i < parser.getAttributeCount(); i++) {
|
||||||
|
attrName = parser.getAttributeLocalName(i);
|
||||||
|
attrVal = parser.getAttributeValue(i);
|
||||||
|
if ("name".equals(attrName)) {
|
||||||
|
name = attrVal;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////// SolrInfoMBeans methods //////////////////////
|
||||||
|
@Override
|
||||||
|
public String getDescription() {
|
||||||
|
return "Provide Analysis of text";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getVersion() {
|
||||||
|
return "$Revision:$";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getSourceId() {
|
||||||
|
return "$Id:$";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getSource() {
|
||||||
|
return "$URL:$";
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,105 @@
|
||||||
|
package org.apache.solr.handler;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright 2004 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import javanet.staxutils.BaseXMLInputFactory;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
import org.apache.solr.util.AbstractSolrTestCase;
|
||||||
|
|
||||||
|
import javax.xml.stream.XMLInputFactory;
|
||||||
|
import javax.xml.stream.XMLStreamReader;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class AnalysisRequestHandlerTest extends AbstractSolrTestCase {
|
||||||
|
private XMLInputFactory inputFactory = BaseXMLInputFactory.newInstance();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getSchemaFile() {
|
||||||
|
return "schema.xml";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getSolrConfigFile() {
|
||||||
|
return "solrconfig.xml";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void testReadDoc() throws Exception {
|
||||||
|
String xml =
|
||||||
|
"<docs><doc >" +
|
||||||
|
" <field name=\"id\" >12345</field>" +
|
||||||
|
" <field name=\"name\">cute little kitten</field>" +
|
||||||
|
" <field name=\"text\">the quick red fox jumped over the lazy brown dogs</field>" +
|
||||||
|
"</doc>" +
|
||||||
|
"<doc >" +
|
||||||
|
" <field name=\"id\" >12346</field>" +
|
||||||
|
" <field name=\"name\">big mean dog</field>" +
|
||||||
|
" <field name=\"text\">cats like to purr</field>" +
|
||||||
|
"</doc>" +
|
||||||
|
"</docs>";
|
||||||
|
|
||||||
|
XMLStreamReader parser =
|
||||||
|
inputFactory.createXMLStreamReader(new StringReader(xml));
|
||||||
|
AnalysisRequestHandler handler = new AnalysisRequestHandler();
|
||||||
|
NamedList<Object> result = handler.processContent(parser, h.getCore().getSchema());
|
||||||
|
assertTrue("result is null and it shouldn't be", result != null);
|
||||||
|
NamedList<NamedList<NamedList<Object>>> theTokens = (NamedList<NamedList<NamedList<Object>>>) result.get("12345");
|
||||||
|
assertTrue("theTokens is null and it shouldn't be", theTokens != null);
|
||||||
|
NamedList<NamedList<Object>> tokens = theTokens.get("name");
|
||||||
|
assertTrue("tokens is null and it shouldn't be", tokens != null);
|
||||||
|
assertTrue("tokens Size: " + tokens.size() + " is not : " + 3, tokens.size() == 3);
|
||||||
|
NamedList<Object> token;
|
||||||
|
String value;
|
||||||
|
token = tokens.get("token", 0);
|
||||||
|
value = (String) token.get("value");
|
||||||
|
assertTrue(value + " is not equal to " + "cute", value.equals("cute") == true);
|
||||||
|
token = tokens.get("token", 1);
|
||||||
|
value = (String) token.get("value");
|
||||||
|
assertTrue(value + " is not equal to " + "little", value.equals("little") == true);
|
||||||
|
|
||||||
|
token = tokens.get("token", 2);
|
||||||
|
value = (String) token.get("value");
|
||||||
|
assertTrue(value + " is not equal to " + "kitten", value.equals("kitten") == true);
|
||||||
|
|
||||||
|
tokens = theTokens.get("text");
|
||||||
|
assertTrue("tokens is null and it shouldn't be", tokens != null);
|
||||||
|
assertTrue("tokens Size: " + tokens.size() + " is not : " + 8, tokens.size() == 8);//stopwords are removed
|
||||||
|
|
||||||
|
String[] gold = new String[]{"quick", "red", "fox", "jump", "over", "lazi", "brown", "dog"};
|
||||||
|
for (int j = 0; j < gold.length; j++) {
|
||||||
|
NamedList<Object> tok = tokens.get("token", j);
|
||||||
|
value = (String) tok.get("value");
|
||||||
|
assertTrue(value + " is not equal to " + gold[j], value.equals(gold[j]) == true);
|
||||||
|
}
|
||||||
|
theTokens = (NamedList<NamedList<NamedList<Object>>>) result.get("12346");
|
||||||
|
assertTrue("theTokens is null and it shouldn't be", theTokens != null);
|
||||||
|
tokens = theTokens.get("name");
|
||||||
|
assertTrue("tokens is null and it shouldn't be", tokens != null);
|
||||||
|
assertTrue("tokens Size: " + tokens.size() + " is not : " + 3, tokens.size() == 3);
|
||||||
|
gold = new String[]{"cat", "like", "purr"};
|
||||||
|
tokens = theTokens.get("text");
|
||||||
|
assertTrue("tokens is null and it shouldn't be", tokens != null);
|
||||||
|
assertTrue("tokens Size: " + tokens.size() + " is not : " + 3, tokens.size() == 3);//stopwords are removed
|
||||||
|
for (int j = 0; j < gold.length; j++) {
|
||||||
|
NamedList<Object> tok = tokens.get("token", j);
|
||||||
|
value = (String) tok.get("value");
|
||||||
|
assertTrue(value + " is not equal to " + gold[j], value.equals(gold[j]) == true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue