2006-12-15 14:53:39 -05:00
|
|
|
<%@ page contentType="text/html; charset=utf-8" pageEncoding="UTF-8"%>
|
2006-12-14 21:37:49 -05:00
|
|
|
<%--
|
|
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
|
|
this work for additional information regarding copyright ownership.
|
|
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
(the "License"); you may not use this file except in compliance with
|
|
|
|
the License. You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
--%>
|
2006-01-26 00:37:29 -05:00
|
|
|
<%@ page import="org.apache.lucene.analysis.Analyzer,
|
|
|
|
org.apache.lucene.analysis.Token,
|
|
|
|
org.apache.lucene.analysis.TokenStream,
|
2008-07-12 10:12:29 -04:00
|
|
|
org.apache.lucene.index.Payload,
|
2009-07-15 13:21:04 -04:00
|
|
|
org.apache.lucene.analysis.CharReader,
|
|
|
|
org.apache.lucene.analysis.CharStream,
|
2010-03-14 20:43:05 -04:00
|
|
|
org.apache.lucene.analysis.tokenattributes.*,
|
2009-07-08 21:35:30 -04:00
|
|
|
org.apache.solr.analysis.CharFilterFactory,
|
2006-01-26 00:37:29 -05:00
|
|
|
org.apache.solr.analysis.TokenFilterFactory,
|
|
|
|
org.apache.solr.analysis.TokenizerChain,
|
|
|
|
org.apache.solr.analysis.TokenizerFactory,
|
|
|
|
org.apache.solr.schema.FieldType,
|
2006-02-22 00:53:23 -05:00
|
|
|
org.apache.solr.schema.SchemaField,
|
2008-07-15 17:30:37 -04:00
|
|
|
org.apache.solr.common.util.XML,
|
2006-02-22 00:53:23 -05:00
|
|
|
javax.servlet.jsp.JspWriter,java.io.IOException
|
2006-01-26 00:37:29 -05:00
|
|
|
"%>
|
|
|
|
<%@ page import="java.io.Reader"%>
|
|
|
|
<%@ page import="java.io.StringReader"%>
|
|
|
|
<%@ page import="java.util.*"%>
|
2008-07-12 10:12:29 -04:00
|
|
|
<%@ page import="java.math.BigInteger" %>
|
2006-02-22 00:53:23 -05:00
|
|
|
|
2006-12-15 14:53:39 -05:00
|
|
|
<%-- $Id$ --%>
|
|
|
|
<%-- $Source: /cvs/main/searching/org.apache.solrolarServer/resources/admin/analysis.jsp,v $ --%>
|
|
|
|
<%-- $Name: $ --%>
|
2006-01-26 00:37:29 -05:00
|
|
|
|
2006-12-15 14:53:39 -05:00
|
|
|
<%@include file="header.jsp" %>
|
2006-01-26 00:37:29 -05:00
|
|
|
|
|
|
|
<%
|
2007-07-20 01:28:42 -04:00
|
|
|
// is name a field name or a type name?
|
|
|
|
String nt = request.getParameter("nt");
|
|
|
|
if (nt==null || nt.length()==0) nt="name"; // assume field name
|
|
|
|
nt = nt.toLowerCase().trim();
|
2006-01-26 00:37:29 -05:00
|
|
|
String name = request.getParameter("name");
|
2006-12-15 14:53:39 -05:00
|
|
|
if (name==null || name.length()==0) name="";
|
2006-01-26 00:37:29 -05:00
|
|
|
String val = request.getParameter("val");
|
2006-12-15 14:53:39 -05:00
|
|
|
if (val==null || val.length()==0) val="";
|
2006-01-26 00:37:29 -05:00
|
|
|
String qval = request.getParameter("qval");
|
2006-12-15 14:53:39 -05:00
|
|
|
if (qval==null || qval.length()==0) qval="";
|
2006-01-26 00:37:29 -05:00
|
|
|
String verboseS = request.getParameter("verbose");
|
2006-12-15 14:53:39 -05:00
|
|
|
boolean verbose = verboseS!=null && verboseS.equalsIgnoreCase("on");
|
2006-01-26 00:37:29 -05:00
|
|
|
String qverboseS = request.getParameter("qverbose");
|
2006-12-15 14:53:39 -05:00
|
|
|
boolean qverbose = qverboseS!=null && qverboseS.equalsIgnoreCase("on");
|
2006-01-26 00:37:29 -05:00
|
|
|
String highlightS = request.getParameter("highlight");
|
2006-12-15 14:53:39 -05:00
|
|
|
boolean highlight = highlightS!=null && highlightS.equalsIgnoreCase("on");
|
2006-01-26 00:37:29 -05:00
|
|
|
%>
|
|
|
|
|
2006-12-15 14:53:39 -05:00
|
|
|
<br clear="all">
|
|
|
|
|
|
|
|
<h2>Field Analysis</h2>
|
|
|
|
|
2008-07-12 10:59:12 -04:00
|
|
|
<form method="POST" action="analysis.jsp" accept-charset="UTF-8">
|
2006-12-15 14:53:39 -05:00
|
|
|
<table>
|
|
|
|
<tr>
|
|
|
|
<td>
|
2007-07-20 01:28:42 -04:00
|
|
|
<strong>Field
|
|
|
|
<select name="nt">
|
|
|
|
<option <%= nt.equals("name") ? "selected=\"selected\"" : "" %> >name</option>
|
2008-07-12 10:12:29 -04:00
|
|
|
<option <%= nt.equals("type") ? "selected=\"selected\"" : "" %>>type</option>
|
|
|
|
</select></strong>
|
2006-12-15 14:53:39 -05:00
|
|
|
</td>
|
|
|
|
<td>
|
2006-12-16 03:38:09 -05:00
|
|
|
<input class="std" name="name" type="text" value="<% XML.escapeCharData(name, out); %>">
|
2006-12-15 14:53:39 -05:00
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
<tr>
|
|
|
|
<td>
|
|
|
|
<strong>Field value (Index)</strong>
|
|
|
|
<br/>
|
|
|
|
verbose output
|
|
|
|
<input name="verbose" type="checkbox"
|
|
|
|
<%= verbose ? "checked=\"true\"" : "" %> >
|
|
|
|
<br/>
|
|
|
|
highlight matches
|
|
|
|
<input name="highlight" type="checkbox"
|
|
|
|
<%= highlight ? "checked=\"true\"" : "" %> >
|
|
|
|
</td>
|
|
|
|
<td>
|
2008-02-23 21:44:32 -05:00
|
|
|
<textarea class="std" rows="8" cols="70" name="val"><% XML.escapeCharData(val,out); %></textarea>
|
2006-12-15 14:53:39 -05:00
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
<tr>
|
|
|
|
<td>
|
|
|
|
<strong>Field value (Query)</strong>
|
|
|
|
<br/>
|
|
|
|
verbose output
|
|
|
|
<input name="qverbose" type="checkbox"
|
|
|
|
<%= qverbose ? "checked=\"true\"" : "" %> >
|
|
|
|
</td>
|
|
|
|
<td>
|
2006-12-16 03:38:09 -05:00
|
|
|
<textarea class="std" rows="1" cols="70" name="qval"><% XML.escapeCharData(qval,out); %></textarea>
|
2006-12-15 14:53:39 -05:00
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
<tr>
|
|
|
|
|
|
|
|
<td>
|
|
|
|
</td>
|
|
|
|
|
|
|
|
<td>
|
|
|
|
<input class="stdbutton" type="submit" value="analyze">
|
|
|
|
</td>
|
|
|
|
|
|
|
|
</tr>
|
|
|
|
</table>
|
|
|
|
</form>
|
|
|
|
|
2006-01-26 00:37:29 -05:00
|
|
|
|
|
|
|
<%
|
2006-12-15 14:53:39 -05:00
|
|
|
SchemaField field=null;
|
2006-01-26 00:37:29 -05:00
|
|
|
|
2006-12-15 14:53:39 -05:00
|
|
|
if (name!="") {
|
2007-07-20 01:28:42 -04:00
|
|
|
if (nt.equals("name")) {
|
|
|
|
try {
|
|
|
|
field = schema.getField(name);
|
|
|
|
} catch (Exception e) {
|
2008-06-19 22:20:39 -04:00
|
|
|
out.print("<strong>Unknown Field: ");
|
|
|
|
XML.escapeCharData(name, out);
|
|
|
|
out.println("</strong>");
|
2007-07-20 01:28:42 -04:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
FieldType t = schema.getFieldTypes().get(name);
|
|
|
|
if (null == t) {
|
2008-06-19 22:20:39 -04:00
|
|
|
out.print("<strong>Unknown Field Type: ");
|
|
|
|
XML.escapeCharData(name, out);
|
|
|
|
out.println("</strong>");
|
2007-07-20 01:28:42 -04:00
|
|
|
} else {
|
|
|
|
field = new SchemaField("fakefieldoftype:"+name, t);
|
|
|
|
}
|
2006-01-26 00:37:29 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-12-15 14:53:39 -05:00
|
|
|
if (field!=null) {
|
2006-01-26 00:37:29 -05:00
|
|
|
HashSet<Tok> matches = null;
|
2006-12-15 14:53:39 -05:00
|
|
|
if (qval!="" && highlight) {
|
2006-01-26 00:37:29 -05:00
|
|
|
Reader reader = new StringReader(qval);
|
|
|
|
Analyzer analyzer = field.getType().getQueryAnalyzer();
|
2009-08-30 12:18:21 -04:00
|
|
|
TokenStream tstream = analyzer.reusableTokenStream(field.getName(),reader);
|
|
|
|
tstream.reset();
|
2006-01-26 00:37:29 -05:00
|
|
|
List<Token> tokens = getTokens(tstream);
|
|
|
|
matches = new HashSet<Tok>();
|
2006-12-15 14:53:39 -05:00
|
|
|
for (Token t : tokens) { matches.add( new Tok(t,0)); }
|
2006-01-26 00:37:29 -05:00
|
|
|
}
|
|
|
|
|
2006-12-15 14:53:39 -05:00
|
|
|
if (val!="") {
|
|
|
|
out.println("<h3>Index Analyzer</h3>");
|
|
|
|
doAnalyzer(out, field, val, false, verbose,matches);
|
2006-01-26 00:37:29 -05:00
|
|
|
}
|
2006-12-15 14:53:39 -05:00
|
|
|
if (qval!="") {
|
|
|
|
out.println("<h3>Query Analyzer</h3>");
|
|
|
|
doAnalyzer(out, field, qval, true, qverbose,null);
|
2006-01-26 00:37:29 -05:00
|
|
|
}
|
|
|
|
}
|
2006-12-15 14:53:39 -05:00
|
|
|
|
2006-01-26 00:37:29 -05:00
|
|
|
%>
|
2006-12-15 14:53:39 -05:00
|
|
|
|
|
|
|
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
|
2006-01-26 00:37:29 -05:00
|
|
|
|
|
|
|
<%!
|
|
|
|
private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set<Tok> match) throws Exception {
|
|
|
|
|
|
|
|
FieldType ft = field.getType();
|
2006-12-15 14:53:39 -05:00
|
|
|
Analyzer analyzer = queryAnalyser ?
|
|
|
|
ft.getQueryAnalyzer() : ft.getAnalyzer();
|
2006-01-26 00:37:29 -05:00
|
|
|
if (analyzer instanceof TokenizerChain) {
|
|
|
|
TokenizerChain tchain = (TokenizerChain)analyzer;
|
2009-07-08 21:35:30 -04:00
|
|
|
CharFilterFactory[] cfiltfacs = tchain.getCharFilterFactories();
|
2006-01-26 00:37:29 -05:00
|
|
|
TokenizerFactory tfac = tchain.getTokenizerFactory();
|
|
|
|
TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories();
|
|
|
|
|
2009-07-08 21:35:30 -04:00
|
|
|
if( cfiltfacs != null ){
|
2009-08-07 19:05:05 -04:00
|
|
|
String source = val;
|
2009-07-08 21:35:30 -04:00
|
|
|
for(CharFilterFactory cfiltfac : cfiltfacs ){
|
2009-08-07 19:05:05 -04:00
|
|
|
CharStream reader = CharReader.get(new StringReader(source));
|
2009-07-08 21:35:30 -04:00
|
|
|
reader = cfiltfac.create(reader);
|
|
|
|
if(verbose){
|
|
|
|
writeHeader(out, cfiltfac.getClass(), cfiltfac.getArgs());
|
2009-08-07 19:05:05 -04:00
|
|
|
source = writeCharStream(out, reader);
|
2009-07-08 21:35:30 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-08-07 19:05:05 -04:00
|
|
|
TokenStream tstream = tfac.create(tchain.charStream(new StringReader(val)));
|
2006-01-26 00:37:29 -05:00
|
|
|
List<Token> tokens = getTokens(tstream);
|
|
|
|
if (verbose) {
|
|
|
|
writeHeader(out, tfac.getClass(), tfac.getArgs());
|
|
|
|
}
|
|
|
|
|
|
|
|
writeTokens(out, tokens, ft, verbose, match);
|
|
|
|
|
|
|
|
for (TokenFilterFactory filtfac : filtfacs) {
|
|
|
|
if (verbose) {
|
|
|
|
writeHeader(out, filtfac.getClass(), filtfac.getArgs());
|
|
|
|
}
|
|
|
|
|
|
|
|
final Iterator<Token> iter = tokens.iterator();
|
|
|
|
tstream = filtfac.create( new TokenStream() {
|
2010-04-11 06:31:34 -04:00
|
|
|
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
|
|
|
OffsetAttribute offsetAtt = addAttribute (OffsetAttribute.class);
|
|
|
|
TypeAttribute typeAtt = addAttribute (TypeAttribute.class);
|
|
|
|
FlagsAttribute flagsAtt = addAttribute (FlagsAttribute.class);
|
|
|
|
PayloadAttribute payloadAtt = addAttribute (PayloadAttribute.class);
|
|
|
|
PositionIncrementAttribute posIncAtt = addAttribute (PositionIncrementAttribute.class);
|
2010-03-14 20:43:05 -04:00
|
|
|
|
|
|
|
public boolean incrementToken() throws IOException {
|
|
|
|
if (iter.hasNext()) {
|
|
|
|
Token token = iter.next();
|
2010-04-11 06:31:34 -04:00
|
|
|
termAtt.copyBuffer(token.termBuffer(), 0, token.termLength());
|
2010-03-14 20:43:05 -04:00
|
|
|
offsetAtt.setOffset(token.startOffset(), token.endOffset());
|
|
|
|
typeAtt.setType(token.type());
|
|
|
|
flagsAtt.setFlags(token.getFlags());
|
|
|
|
posIncAtt.setPositionIncrement(token.getPositionIncrement());
|
|
|
|
payloadAtt.setPayload(token.getPayload());
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
2006-01-26 00:37:29 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
);
|
|
|
|
tokens = getTokens(tstream);
|
|
|
|
|
|
|
|
writeTokens(out, tokens, ft, verbose, match);
|
|
|
|
}
|
2006-12-15 14:53:39 -05:00
|
|
|
|
2006-01-26 00:37:29 -05:00
|
|
|
} else {
|
2009-08-30 12:18:21 -04:00
|
|
|
TokenStream tstream = analyzer.reusableTokenStream(field.getName(),new StringReader(val));
|
|
|
|
tstream.reset();
|
2006-01-26 00:37:29 -05:00
|
|
|
List<Token> tokens = getTokens(tstream);
|
|
|
|
if (verbose) {
|
|
|
|
writeHeader(out, analyzer.getClass(), new HashMap<String,String>());
|
|
|
|
}
|
|
|
|
writeTokens(out, tokens, ft, verbose, match);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static List<Token> getTokens(TokenStream tstream) throws IOException {
|
|
|
|
List<Token> tokens = new ArrayList<Token>();
|
2010-04-11 06:31:34 -04:00
|
|
|
CharTermAttribute termAtt = tstream.addAttribute(CharTermAttribute.class);
|
|
|
|
OffsetAttribute offsetAtt = tstream.addAttribute (OffsetAttribute.class);
|
|
|
|
TypeAttribute typeAtt = tstream.addAttribute (TypeAttribute.class);
|
|
|
|
FlagsAttribute flagsAtt = tstream.addAttribute (FlagsAttribute.class);
|
|
|
|
PayloadAttribute payloadAtt = tstream.addAttribute (PayloadAttribute.class);
|
|
|
|
PositionIncrementAttribute posIncAtt = tstream.addAttribute (PositionIncrementAttribute.class);
|
2010-03-14 20:43:05 -04:00
|
|
|
|
2006-01-26 00:37:29 -05:00
|
|
|
while (true) {
|
2010-03-14 20:43:05 -04:00
|
|
|
if (!tstream.incrementToken())
|
|
|
|
break;
|
|
|
|
else {
|
|
|
|
Token token = new Token();
|
2010-04-11 06:31:34 -04:00
|
|
|
token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
|
2010-03-14 20:43:05 -04:00
|
|
|
token.setType(typeAtt.type());
|
|
|
|
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
|
|
|
|
token.setPayload(payloadAtt.getPayload());
|
|
|
|
token.setFlags(flagsAtt.getFlags());
|
|
|
|
token.setPositionIncrement(posIncAtt.getPositionIncrement());
|
|
|
|
tokens.add(token);
|
|
|
|
}
|
2006-01-26 00:37:29 -05:00
|
|
|
}
|
|
|
|
return tokens;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
private static class Tok {
|
|
|
|
Token token;
|
|
|
|
int pos;
|
|
|
|
Tok(Token token, int pos) {
|
2006-12-15 14:53:39 -05:00
|
|
|
this.token=token;
|
|
|
|
this.pos=pos;
|
2006-01-26 00:37:29 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
public boolean equals(Object o) {
|
2010-03-14 20:43:05 -04:00
|
|
|
return ((Tok)o).token.term().equals(token.term());
|
2006-01-26 00:37:29 -05:00
|
|
|
}
|
|
|
|
public int hashCode() {
|
2010-03-14 20:43:05 -04:00
|
|
|
return token.term().hashCode();
|
2006-01-26 00:37:29 -05:00
|
|
|
}
|
|
|
|
public String toString() {
|
2010-03-14 20:43:05 -04:00
|
|
|
return token.term();
|
2006-01-26 00:37:29 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private static interface ToStr {
|
|
|
|
public String toStr(Object o);
|
|
|
|
}
|
|
|
|
|
2006-12-15 14:53:39 -05:00
|
|
|
private static void printRow(JspWriter out, String header, List[] arrLst, ToStr converter, boolean multival, boolean verbose, Set<Tok> match) throws IOException {
|
|
|
|
// find the maximum number of terms for any position
|
|
|
|
int maxSz=1;
|
|
|
|
if (multival) {
|
|
|
|
for (List lst : arrLst) {
|
|
|
|
maxSz = Math.max(lst.size(), maxSz);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for (int idx=0; idx<maxSz; idx++) {
|
|
|
|
out.println("<tr>");
|
|
|
|
if (idx==0 && verbose) {
|
|
|
|
if (header != null) {
|
|
|
|
out.print("<th NOWRAP rowspan=\""+maxSz+"\">");
|
|
|
|
XML.escapeCharData(header,out);
|
|
|
|
out.println("</th>");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-03-01 00:46:34 -05:00
|
|
|
for (int posIndex=0; posIndex<arrLst.length; posIndex++) {
|
|
|
|
List<Tok> lst = arrLst[posIndex];
|
2006-12-15 14:53:39 -05:00
|
|
|
if (lst.size() <= idx) continue;
|
|
|
|
if (match!=null && match.contains(lst.get(idx))) {
|
|
|
|
out.print("<td class=\"highlight\"");
|
|
|
|
} else {
|
|
|
|
out.print("<td class=\"debugdata\"");
|
|
|
|
}
|
|
|
|
|
2007-03-01 00:46:34 -05:00
|
|
|
// if the last value in the column, use up
|
|
|
|
// the rest of the space via rowspan.
|
|
|
|
if (lst.size() == idx+1 && lst.size() < maxSz) {
|
|
|
|
out.print("rowspan=\""+(maxSz-lst.size()+1)+'"');
|
2006-12-15 14:53:39 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
out.print('>');
|
|
|
|
|
|
|
|
XML.escapeCharData(converter.toStr(lst.get(idx)), out);
|
|
|
|
out.print("</td>");
|
|
|
|
}
|
|
|
|
|
|
|
|
out.println("</tr>");
|
2006-12-12 17:24:47 -05:00
|
|
|
}
|
2006-12-15 14:53:39 -05:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2008-07-12 10:12:29 -04:00
|
|
|
static String isPayloadString( Payload p ) {
|
|
|
|
String sp = new String( p.getData() );
|
|
|
|
for( int i=0; i < sp.length(); i++ ) {
|
|
|
|
if( !Character.isDefined( sp.charAt(i) ) || Character.isISOControl( sp.charAt(i) ) )
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
return "(" + sp + ")";
|
|
|
|
}
|
2006-12-15 14:53:39 -05:00
|
|
|
|
|
|
|
static void writeHeader(JspWriter out, Class clazz, Map<String,String> args) throws IOException {
|
|
|
|
out.print("<h4>");
|
|
|
|
out.print(clazz.getName());
|
|
|
|
XML.escapeCharData(" "+args,out);
|
|
|
|
out.println("</h4>");
|
2006-01-26 00:37:29 -05:00
|
|
|
}
|
|
|
|
|
2006-12-15 14:53:39 -05:00
|
|
|
|
|
|
|
|
2006-01-26 00:37:29 -05:00
|
|
|
// readable, raw, pos, type, start/end
|
|
|
|
static void writeTokens(JspWriter out, List<Token> tokens, final FieldType ft, boolean verbose, Set<Tok> match) throws IOException {
|
|
|
|
|
|
|
|
// Use a map to tell what tokens are in what positions
|
|
|
|
// because some tokenizers/filters may do funky stuff with
|
|
|
|
// very large increments, or negative increments.
|
2006-12-15 14:53:39 -05:00
|
|
|
HashMap<Integer,List<Tok>> map = new HashMap<Integer,List<Tok>>();
|
|
|
|
boolean needRaw=false;
|
|
|
|
int pos=0;
|
2006-01-26 00:37:29 -05:00
|
|
|
for (Token t : tokens) {
|
2010-03-14 20:43:05 -04:00
|
|
|
if (!t.term().equals(ft.indexedToReadable(t.term()))) {
|
2006-12-15 14:53:39 -05:00
|
|
|
needRaw=true;
|
2006-01-26 00:37:29 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
pos += t.getPositionIncrement();
|
|
|
|
List lst = map.get(pos);
|
2006-12-15 14:53:39 -05:00
|
|
|
if (lst==null) {
|
2006-01-26 00:37:29 -05:00
|
|
|
lst = new ArrayList(1);
|
2006-12-15 14:53:39 -05:00
|
|
|
map.put(pos,lst);
|
2006-01-26 00:37:29 -05:00
|
|
|
}
|
2006-12-15 14:53:39 -05:00
|
|
|
Tok tok = new Tok(t,pos);
|
2006-01-26 00:37:29 -05:00
|
|
|
lst.add(tok);
|
|
|
|
}
|
|
|
|
|
|
|
|
List<Tok>[] arr = (List<Tok>[])map.values().toArray(new ArrayList[map.size()]);
|
|
|
|
|
2007-06-01 19:32:56 -04:00
|
|
|
/* Jetty 6.1.3 miscompiles this generics version...
|
2006-01-26 00:37:29 -05:00
|
|
|
Arrays.sort(arr, new Comparator<List<Tok>>() {
|
|
|
|
public int compare(List<Tok> toks, List<Tok> toks1) {
|
|
|
|
return toks.get(0).pos - toks1.get(0).pos;
|
|
|
|
}
|
|
|
|
}
|
2007-06-01 19:32:56 -04:00
|
|
|
*/
|
|
|
|
|
|
|
|
Arrays.sort(arr, new Comparator() {
|
|
|
|
public int compare(Object toks, Object toks1) {
|
|
|
|
return ((List<Tok>)toks).get(0).pos - ((List<Tok>)toks1).get(0).pos;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-01-26 00:37:29 -05:00
|
|
|
);
|
|
|
|
|
2006-12-15 14:53:39 -05:00
|
|
|
out.println("<table width=\"auto\" class=\"analysis\" border=\"1\">");
|
|
|
|
|
|
|
|
if (verbose) {
|
|
|
|
printRow(out,"term position", arr, new ToStr() {
|
|
|
|
public String toStr(Object o) {
|
|
|
|
return Integer.toString(((Tok)o).pos);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
,false
|
|
|
|
,verbose
|
|
|
|
,null);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
printRow(out,"term text", arr, new ToStr() {
|
|
|
|
public String toStr(Object o) {
|
2010-03-14 20:43:05 -04:00
|
|
|
return ft.indexedToReadable( ((Tok)o).token.term() );
|
2006-12-15 14:53:39 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
,true
|
|
|
|
,verbose
|
|
|
|
,match
|
|
|
|
);
|
|
|
|
|
|
|
|
if (needRaw) {
|
|
|
|
printRow(out,"raw text", arr, new ToStr() {
|
|
|
|
public String toStr(Object o) {
|
2008-03-07 08:42:45 -05:00
|
|
|
// page is UTF-8, so anything goes.
|
2010-03-14 20:43:05 -04:00
|
|
|
return ((Tok)o).token.term();
|
2006-12-15 14:53:39 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
,true
|
|
|
|
,verbose
|
|
|
|
,match
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (verbose) {
|
|
|
|
printRow(out,"term type", arr, new ToStr() {
|
|
|
|
public String toStr(Object o) {
|
2008-03-07 08:42:45 -05:00
|
|
|
String tt = ((Tok)o).token.type();
|
|
|
|
if (tt == null) {
|
|
|
|
return "null";
|
|
|
|
} else {
|
|
|
|
return tt;
|
|
|
|
}
|
2006-12-15 14:53:39 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
,true
|
|
|
|
,verbose,
|
|
|
|
null
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (verbose) {
|
|
|
|
printRow(out,"source start,end", arr, new ToStr() {
|
|
|
|
public String toStr(Object o) {
|
|
|
|
Token t = ((Tok)o).token;
|
|
|
|
return Integer.toString(t.startOffset()) + ',' + t.endOffset() ;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
,true
|
|
|
|
,verbose
|
|
|
|
,null
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2008-07-12 10:12:29 -04:00
|
|
|
if (verbose) {
|
|
|
|
printRow(out,"payload", arr, new ToStr() {
|
|
|
|
public String toStr(Object o) {
|
|
|
|
Token t = ((Tok)o).token;
|
|
|
|
Payload p = t.getPayload();
|
|
|
|
if( null != p ) {
|
|
|
|
BigInteger bi = new BigInteger( p.getData() );
|
|
|
|
String ret = bi.toString( 16 );
|
|
|
|
if (ret.length() % 2 != 0) {
|
|
|
|
// Pad with 0
|
|
|
|
ret = "0"+ret;
|
|
|
|
}
|
|
|
|
ret += isPayloadString( p );
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
,true
|
|
|
|
,verbose
|
|
|
|
,null
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2006-12-15 14:53:39 -05:00
|
|
|
out.println("</table>");
|
2006-01-26 00:37:29 -05:00
|
|
|
}
|
|
|
|
|
2009-08-07 19:05:05 -04:00
|
|
|
static String writeCharStream(JspWriter out, CharStream input) throws IOException {
|
2009-07-08 21:35:30 -04:00
|
|
|
out.println("<table width=\"auto\" class=\"analysis\" border=\"1\">");
|
|
|
|
out.println("<tr>");
|
|
|
|
|
|
|
|
out.print("<th NOWRAP>");
|
|
|
|
XML.escapeCharData("text",out);
|
|
|
|
out.println("</th>");
|
|
|
|
|
|
|
|
final int BUFFER_SIZE = 1024;
|
|
|
|
char[] buf = new char[BUFFER_SIZE];
|
|
|
|
int len = 0;
|
|
|
|
StringBuilder sb = new StringBuilder();
|
|
|
|
do {
|
|
|
|
len = input.read( buf, 0, BUFFER_SIZE );
|
2009-10-11 06:04:01 -04:00
|
|
|
if( len > 0 )
|
|
|
|
sb.append(buf, 0, len);
|
2009-07-08 21:35:30 -04:00
|
|
|
} while( len == BUFFER_SIZE );
|
|
|
|
out.print("<td class=\"debugdata\">");
|
|
|
|
XML.escapeCharData(sb.toString(),out);
|
|
|
|
out.println("</td>");
|
|
|
|
|
|
|
|
out.println("</tr>");
|
|
|
|
out.println("</table>");
|
2009-08-07 19:05:05 -04:00
|
|
|
return sb.toString();
|
2009-07-08 21:35:30 -04:00
|
|
|
}
|
|
|
|
|
2006-01-26 00:37:29 -05:00
|
|
|
%>
|