mirror of https://github.com/apache/lucene.git
Fix for SOLR-3132
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1243774 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
26de35a65c
commit
6bb4ea097f
|
@ -738,7 +738,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
|||
info.add("uptime", System.currentTimeMillis() - core.getStartTime());
|
||||
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
|
||||
try {
|
||||
SimpleOrderedMap<Object> indexInfo = LukeRequestHandler.getIndexInfo(searcher.get().getIndexReader(), false);
|
||||
SimpleOrderedMap<Object> indexInfo = LukeRequestHandler.getIndexInfo(searcher.get().getIndexReader());
|
||||
long size = getIndexSize(core);
|
||||
indexInfo.add("sizeInBytes", size);
|
||||
indexInfo.add("size", NumberUtils.readableSize(size));
|
||||
|
|
|
@ -82,25 +82,25 @@ public class LukeRequestHandler extends RequestHandlerBase
|
|||
public static final String DOC_ID = "docId";
|
||||
public static final String ID = "id";
|
||||
public static final int DEFAULT_COUNT = 10;
|
||||
|
||||
|
||||
static final int HIST_ARRAY_SIZE = 33;
|
||||
|
||||
|
||||
private static enum ShowStyle {
|
||||
ALL,
|
||||
DOC,
|
||||
SCHEMA,
|
||||
INDEX;
|
||||
|
||||
|
||||
public static ShowStyle get(String v) {
|
||||
if(v==null) return null;
|
||||
if("schema".equals(v)) return SCHEMA;
|
||||
if("index".equals(v)) return INDEX;
|
||||
if("doc".equals(v)) return DOC;
|
||||
if("all".equals(v)) return ALL;
|
||||
if("schema".equalsIgnoreCase(v)) return SCHEMA;
|
||||
if("index".equalsIgnoreCase(v)) return INDEX;
|
||||
if("doc".equalsIgnoreCase(v)) return DOC;
|
||||
if("all".equalsIgnoreCase(v)) return ALL;
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown Show Style: "+v);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception
|
||||
|
@ -109,29 +109,17 @@ public class LukeRequestHandler extends RequestHandlerBase
|
|||
SolrIndexSearcher searcher = req.getSearcher();
|
||||
DirectoryReader reader = searcher.getIndexReader();
|
||||
SolrParams params = req.getParams();
|
||||
int numTerms = params.getInt( NUMTERMS, DEFAULT_COUNT );
|
||||
ShowStyle style = ShowStyle.get(params.get("show"));
|
||||
|
||||
// Always show the core lucene info
|
||||
Map<String, TopTermQueue> topTerms = new TreeMap<String, TopTermQueue>();
|
||||
|
||||
// If no doc is given, show all fields and top terms
|
||||
Set<String> fields = null;
|
||||
String fl = params.get(CommonParams.FL);
|
||||
if (fl != null) {
|
||||
fields = new TreeSet<String>(Arrays.asList(fl.split( "[,\\s]+" )));
|
||||
}
|
||||
if( ShowStyle.SCHEMA == style ) {
|
||||
numTerms = 0; // Abort any statistics gathering.
|
||||
|
||||
rsp.add("index", getIndexInfo(reader));
|
||||
|
||||
if(ShowStyle.INDEX==style) {
|
||||
return; // that's all we need
|
||||
}
|
||||
|
||||
rsp.add("index", getIndexInfo(reader, numTerms, topTerms, fields ));
|
||||
|
||||
if(ShowStyle.INDEX==style) {
|
||||
return; // thats all we need
|
||||
}
|
||||
|
||||
|
||||
|
||||
Integer docId = params.getInt( DOC_ID );
|
||||
if( docId == null && params.get( ID ) != null ) {
|
||||
// Look for something with a given solr ID
|
||||
|
@ -170,7 +158,7 @@ public class LukeRequestHandler extends RequestHandlerBase
|
|||
rsp.add( "schema", getSchemaInfo( req.getSchema() ) );
|
||||
}
|
||||
else {
|
||||
rsp.add( "fields", getIndexedFieldsInfo( searcher, fields, numTerms, topTerms) ) ;
|
||||
rsp.add( "fields", getIndexedFieldsInfo(req) ) ;
|
||||
}
|
||||
|
||||
// Add some generally helpful information
|
||||
|
@ -255,7 +243,8 @@ public class LukeRequestHandler extends RequestHandlerBase
|
|||
return key;
|
||||
}
|
||||
|
||||
private static SimpleOrderedMap<Object> getDocumentFieldsInfo( Document doc, int docId, IndexReader reader, IndexSchema schema ) throws IOException
|
||||
private static SimpleOrderedMap<Object> getDocumentFieldsInfo( Document doc, int docId, IndexReader reader,
|
||||
IndexSchema schema ) throws IOException
|
||||
{
|
||||
final CharsRef spare = new CharsRef();
|
||||
SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
|
||||
|
@ -311,13 +300,22 @@ public class LukeRequestHandler extends RequestHandlerBase
|
|||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private static SimpleOrderedMap<Object> getIndexedFieldsInfo(
|
||||
final SolrIndexSearcher searcher, final Set<String> fields, final int numTerms, Map<String,TopTermQueue> ttinfo)
|
||||
private static SimpleOrderedMap<Object> getIndexedFieldsInfo(SolrQueryRequest req)
|
||||
throws Exception {
|
||||
|
||||
SolrIndexSearcher searcher = req.getSearcher();
|
||||
SolrParams params = req.getParams();
|
||||
|
||||
Set<String> fields = null;
|
||||
String fl = params.get(CommonParams.FL);
|
||||
if (fl != null) {
|
||||
fields = new TreeSet<String>(Arrays.asList(fl.split( "[,\\s]+" )));
|
||||
}
|
||||
|
||||
AtomicReader reader = searcher.getAtomicReader();
|
||||
IndexSchema schema = searcher.getSchema();
|
||||
|
||||
// Don't be tempted to put this in the loop below, the whole point here is to alphabetize the fields!
|
||||
Set<String> fieldNames = new TreeSet<String>();
|
||||
for(FieldInfo fieldInfo : reader.getFieldInfos()) {
|
||||
fieldNames.add(fieldInfo.name);
|
||||
|
@ -325,82 +323,90 @@ public class LukeRequestHandler extends RequestHandlerBase
|
|||
|
||||
// Walk the term enum and keep a priority queue for each map in our set
|
||||
SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
|
||||
Fields theFields = reader.fields();
|
||||
|
||||
for (String fieldName : fieldNames) {
|
||||
if (fields != null && ! fields.contains(fieldName) && ! fields.contains("*")) {
|
||||
continue; // we're not interested in this term
|
||||
continue; //we're not interested in this field Still an issue here
|
||||
}
|
||||
|
||||
SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>();
|
||||
SimpleOrderedMap<Object> fieldMap = new SimpleOrderedMap<Object>();
|
||||
|
||||
SchemaField sfield = schema.getFieldOrNull( fieldName );
|
||||
FieldType ftype = (sfield==null)?null:sfield.getType();
|
||||
|
||||
f.add( "type", (ftype==null)?null:ftype.getTypeName() );
|
||||
f.add( "schema", getFieldFlags( sfield ) );
|
||||
fieldMap.add( "type", (ftype==null)?null:ftype.getTypeName() );
|
||||
fieldMap.add("schema", getFieldFlags(sfield));
|
||||
if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) {
|
||||
f.add("dynamicBase", schema.getDynamicPattern(sfield.getName()));
|
||||
fieldMap.add("dynamicBase", schema.getDynamicPattern(sfield.getName()));
|
||||
}
|
||||
|
||||
Terms terms = theFields.terms(fieldName);
|
||||
Terms terms = reader.fields().terms(fieldName);
|
||||
if (terms == null) { // Not indexed, so we need to report what we can (it made it through the fl param if specified)
|
||||
finfo.add( fieldName, f );
|
||||
finfo.add( fieldName, fieldMap );
|
||||
continue;
|
||||
}
|
||||
|
||||
TopTermQueue topTerms = ttinfo.get( fieldName );
|
||||
// If numTerms==0, the call is just asking for a quick field list
|
||||
if( ttinfo != null && sfield != null && sfield.indexed() ) {
|
||||
if (numTerms > 0) { // Read the actual field from the index and report that too.
|
||||
Document doc = null;
|
||||
if (topTerms != null && topTerms.getTopTermInfo() != null) {
|
||||
Term term = topTerms.getTopTermInfo().term;
|
||||
DocsEnum docsEnum = reader.termDocsEnum(reader.getLiveDocs(),
|
||||
term.field(),
|
||||
new BytesRef(term.text()),
|
||||
false);
|
||||
if (docsEnum != null) {
|
||||
int docId;
|
||||
if ((docId = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
|
||||
doc = reader.document(docId);
|
||||
}
|
||||
if(sfield != null && sfield.indexed() ) {
|
||||
// In the pre-4.0 days, this did a veeeery expensive range query. But we can be much faster now,
|
||||
// so just do this all the time.
|
||||
Document doc = getFirstLiveDoc(reader, fieldName, terms);
|
||||
|
||||
|
||||
if( doc != null ) {
|
||||
// Found a document with this field
|
||||
try {
|
||||
IndexableField fld = doc.getField( fieldName );
|
||||
if( fld != null ) {
|
||||
fieldMap.add("index", getFieldFlags(fld));
|
||||
}
|
||||
else {
|
||||
// it is a non-stored field...
|
||||
fieldMap.add("index", "(unstored field)");
|
||||
}
|
||||
}
|
||||
if( doc != null ) {
|
||||
// Found a document with this field
|
||||
try {
|
||||
IndexableField fld = doc.getField( fieldName );
|
||||
if( fld != null ) {
|
||||
f.add( "index", getFieldFlags( fld ) );
|
||||
}
|
||||
else {
|
||||
// it is a non-stored field...
|
||||
f.add( "index", "(unstored field)" );
|
||||
}
|
||||
}
|
||||
catch( Exception ex ) {
|
||||
log.warn( "error reading field: "+fieldName );
|
||||
}
|
||||
catch( Exception ex ) {
|
||||
log.warn( "error reading field: "+fieldName );
|
||||
}
|
||||
f.add("docs", terms.getDocCount());
|
||||
}
|
||||
if( topTerms != null ) {
|
||||
f.add( "distinct", topTerms.distinctTerms );
|
||||
fieldMap.add("docs", terms.getDocCount());
|
||||
|
||||
// Include top terms
|
||||
f.add( "topTerms", topTerms.toNamedList( searcher.getSchema() ) );
|
||||
|
||||
// Add a histogram
|
||||
f.add( "histogram", topTerms.histogram.toNamedList() );
|
||||
}
|
||||
}
|
||||
if (fields != null && (fields.contains(fieldName) || fields.contains("*"))) {
|
||||
getDetailedFieldInfo(req, fieldName, fieldMap);
|
||||
}
|
||||
// Add the field
|
||||
finfo.add( fieldName, f );
|
||||
finfo.add( fieldName, fieldMap );
|
||||
}
|
||||
return finfo;
|
||||
}
|
||||
|
||||
// Just get a document with the term in it, the first one will do!
|
||||
// Is there a better way to do this? Shouldn't actually be very costly
|
||||
// to do it this way.
|
||||
private static Document getFirstLiveDoc(AtomicReader reader, String fieldName, Terms terms) throws IOException {
|
||||
DocsEnum docsEnum = null;
|
||||
TermsEnum termsEnum = terms.iterator(null);
|
||||
BytesRef text;
|
||||
// Deal with the chance that the first bunch of terms are in deleted documents. Is there a better way?
|
||||
for (int idx = 0; idx < 1000 && docsEnum == null; ++idx) {
|
||||
text = termsEnum.next();
|
||||
if (text == null) { // Ran off the end of the terms enum without finding any live docs with that field in them.
|
||||
return null;
|
||||
}
|
||||
Term term = new Term(fieldName, text);
|
||||
docsEnum = reader.termDocsEnum(reader.getLiveDocs(),
|
||||
term.field(),
|
||||
new BytesRef(term.text()),
|
||||
false);
|
||||
if (docsEnum != null) {
|
||||
int docId;
|
||||
if ((docId = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
|
||||
return reader.document(docId);
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return info from the index
|
||||
*/
|
||||
|
@ -525,67 +531,24 @@ public class LukeRequestHandler extends RequestHandlerBase
|
|||
v.add( f.getName() );
|
||||
typeusemap.put( ft.getTypeName(), v );
|
||||
}
|
||||
public static SimpleOrderedMap<Object> getIndexInfo(DirectoryReader reader, boolean countTerms) throws IOException {
|
||||
return getIndexInfo(reader, countTerms ? 1 : 0, null, null);
|
||||
|
||||
/**
|
||||
* @deprecated use {@link #getIndexInfo(DirectoryReader)} since you now have to explicitly pass the "fl" prameter
|
||||
* and this was always called with "false" anyway from CoreAdminHandler
|
||||
*/
|
||||
public static SimpleOrderedMap<Object> getIndexInfo(DirectoryReader reader, boolean detail) throws IOException {
|
||||
return getIndexInfo(reader);
|
||||
}
|
||||
public static SimpleOrderedMap<Object> getIndexInfo( DirectoryReader reader, int numTerms,
|
||||
Map<String, TopTermQueue> topTerms,
|
||||
Set<String> fieldList) throws IOException {
|
||||
// This method just gets the top-most level of information. This was conflated with getting detailed info
|
||||
// for *all* the fields, called from CoreAdminHandler etc.
|
||||
|
||||
public static SimpleOrderedMap<Object> getIndexInfo(DirectoryReader reader) throws IOException {
|
||||
Directory dir = reader.directory();
|
||||
SimpleOrderedMap<Object> indexInfo = new SimpleOrderedMap<Object>();
|
||||
|
||||
indexInfo.add("numDocs", reader.numDocs());
|
||||
indexInfo.add("maxDoc", reader.maxDoc());
|
||||
final CharsRef spare = new CharsRef();
|
||||
if( numTerms > 0 ) {
|
||||
Fields fields = MultiFields.getFields(reader);
|
||||
long totalTerms = 0;
|
||||
if (fields != null) {
|
||||
FieldsEnum fieldsEnum = fields.iterator();
|
||||
String field;
|
||||
while ((field = fieldsEnum.next()) != null) {
|
||||
Terms terms = fieldsEnum.terms();
|
||||
if (terms == null) {
|
||||
continue;
|
||||
}
|
||||
totalTerms += terms.getUniqueTermCount();
|
||||
|
||||
if (fieldList != null && ! fieldList.contains(field) && ! fieldList.contains("*")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
TermsEnum termsEnum = terms.iterator(null);
|
||||
BytesRef text;
|
||||
int[] buckets = new int[HIST_ARRAY_SIZE];
|
||||
TopTermQueue tiq = topTerms.get(field);
|
||||
if (tiq == null) {
|
||||
tiq = new TopTermQueue(numTerms + 1); // Allocating slots for the top N terms to collect freqs.
|
||||
topTerms.put(field, tiq);
|
||||
}
|
||||
while ((text = termsEnum.next()) != null) {
|
||||
int freq = termsEnum.docFreq(); // This calculation seems odd, but it gives the same results as it used to.
|
||||
int slot = 32 - Integer.numberOfLeadingZeros(Math.max(0, freq - 1));
|
||||
buckets[slot] = buckets[slot] + 1;
|
||||
if (freq > tiq.minFreq) {
|
||||
UnicodeUtil.UTF8toUTF16(text, spare);
|
||||
String t = spare.toString();
|
||||
tiq.distinctTerms = new Long(fieldsEnum.terms().getUniqueTermCount()).intValue();
|
||||
|
||||
tiq.add(new TopTermQueue.TermInfo(new Term(field, t), termsEnum.docFreq()));
|
||||
if (tiq.size() > numTerms) { // if tiq full
|
||||
tiq.pop(); // remove lowest in tiq
|
||||
tiq.minFreq = tiq.getTopTermInfo().docFreq;
|
||||
}
|
||||
}
|
||||
}
|
||||
tiq.histogram.add(buckets);
|
||||
}
|
||||
}
|
||||
//Clumsy, but I'm tired.
|
||||
indexInfo.add("numTerms", (new Long(totalTerms)).intValue());
|
||||
|
||||
}
|
||||
|
||||
indexInfo.add("version", reader.getVersion()); // TODO? Is this different then: IndexReader.getCurrentVersion( dir )?
|
||||
indexInfo.add("segmentCount", reader.getSequentialSubReaders().length);
|
||||
indexInfo.add("current", reader.isCurrent() );
|
||||
|
@ -598,6 +561,57 @@ public class LukeRequestHandler extends RequestHandlerBase
|
|||
}
|
||||
return indexInfo;
|
||||
}
|
||||
|
||||
// Get terribly detailed information about a particular field. This is a very expensive call, use it with caution
|
||||
// especially on large indexes!
|
||||
private static void getDetailedFieldInfo(SolrQueryRequest req, String field, SimpleOrderedMap<Object> fieldMap)
|
||||
throws IOException {
|
||||
|
||||
SolrParams params = req.getParams();
|
||||
int numTerms = params.getInt( NUMTERMS, DEFAULT_COUNT );
|
||||
|
||||
TopTermQueue tiq = new TopTermQueue(numTerms + 1); // Something to collect the top N terms in.
|
||||
|
||||
final CharsRef spare = new CharsRef();
|
||||
|
||||
Fields fields = MultiFields.getFields(req.getSearcher().getIndexReader());
|
||||
|
||||
if (fields == null) { // No indexed fields
|
||||
return;
|
||||
}
|
||||
|
||||
Terms terms = fields.terms(field);
|
||||
if (terms == null) { // No terms in the field.
|
||||
return;
|
||||
}
|
||||
TermsEnum termsEnum = terms.iterator(null);
|
||||
BytesRef text;
|
||||
int[] buckets = new int[HIST_ARRAY_SIZE];
|
||||
while ((text = termsEnum.next()) != null) {
|
||||
int freq = termsEnum.docFreq(); // This calculation seems odd, but it gives the same results as it used to.
|
||||
int slot = 32 - Integer.numberOfLeadingZeros(Math.max(0, freq - 1));
|
||||
buckets[slot] = buckets[slot] + 1;
|
||||
if (freq > tiq.minFreq) {
|
||||
UnicodeUtil.UTF8toUTF16(text, spare);
|
||||
String t = spare.toString();
|
||||
tiq.distinctTerms = new Long(terms.getUniqueTermCount()).intValue();
|
||||
|
||||
tiq.add(new TopTermQueue.TermInfo(new Term(field, t), termsEnum.docFreq()));
|
||||
if (tiq.size() > numTerms) { // if tiq full
|
||||
tiq.pop(); // remove lowest in tiq
|
||||
tiq.minFreq = tiq.getTopTermInfo().docFreq;
|
||||
}
|
||||
}
|
||||
}
|
||||
tiq.histogram.add(buckets);
|
||||
fieldMap.add("distinct", tiq.distinctTerms);
|
||||
|
||||
// Include top terms
|
||||
fieldMap.add("topTerms", tiq.toNamedList(req.getSearcher().getSchema()));
|
||||
|
||||
// Add a histogram
|
||||
fieldMap.add("histogram", tiq.histogram.toNamedList());
|
||||
}
|
||||
//////////////////////// SolrInfoMBeans methods //////////////////////
|
||||
|
||||
@Override
|
||||
|
|
|
@ -82,7 +82,6 @@ public class MinimalSchemaTest extends SolrTestCaseJ4 {
|
|||
assertQ("basic luke request failed",
|
||||
req("qt", "/admin/luke")
|
||||
,"//int[@name='numDocs'][.='2']"
|
||||
,"//int[@name='numTerms'][.='5']"
|
||||
);
|
||||
|
||||
assertQ("luke show schema failed",
|
||||
|
|
|
@ -108,35 +108,35 @@ public class LukeRequestHandlerTest extends AbstractSolrTestCase {
|
|||
final int numFlags = EnumSet.allOf(FieldFlag.class).size();
|
||||
|
||||
assertQ("Not all flags ("+numFlags+") mentioned in info->key",
|
||||
req("qt","/admin/luke"),
|
||||
numFlags+"=count(//lst[@name='info']/lst[@name='key']/str)");
|
||||
req("qt","/admin/luke"),
|
||||
numFlags+"=count(//lst[@name='info']/lst[@name='key']/str)");
|
||||
|
||||
// code should be the same for all fields, but just in case do several
|
||||
for (String f : Arrays.asList("solr_t","solr_s","solr_ti",
|
||||
"solr_td","solr_pl","solr_dt","solr_b",
|
||||
"solr_sS","solr_sI")) {
|
||||
"solr_td","solr_pl","solr_dt","solr_b",
|
||||
"solr_sS","solr_sI")) {
|
||||
|
||||
final String xp = getFieldXPathPrefix(f);
|
||||
assertQ("Not as many schema flags as expected ("+numFlags+") for " + f,
|
||||
req("qt","/admin/luke", "fl", f),
|
||||
numFlags+"=string-length("+xp+"[@name='schema'])");
|
||||
req("qt","/admin/luke", "fl", f),
|
||||
numFlags+"=string-length("+xp+"[@name='schema'])");
|
||||
|
||||
}
|
||||
|
||||
// diff loop for checking 'index' flags,
|
||||
// only valid for fields that are indexed & stored
|
||||
for (String f : Arrays.asList("solr_t","solr_s","solr_ti",
|
||||
"solr_td","solr_pl","solr_dt","solr_b")) {
|
||||
"solr_td","solr_pl","solr_dt","solr_b")) {
|
||||
|
||||
final String xp = getFieldXPathPrefix(f);
|
||||
assertQ("Not as many index flags as expected ("+numFlags+") for " + f,
|
||||
req("qt","/admin/luke", "fl", f),
|
||||
numFlags+"=string-length("+xp+"[@name='index'])");
|
||||
req("qt","/admin/luke", "fl", f),
|
||||
numFlags+"=string-length("+xp+"[@name='index'])");
|
||||
|
||||
final String hxp = getFieldXPathHistogram(f);
|
||||
assertQ("Historgram field should be present for field "+f,
|
||||
req("qt", "/admin/luke", "fl", f),
|
||||
hxp+"[@name='histogram']");
|
||||
final String hxp = getFieldXPathHistogram(f);
|
||||
assertQ("Historgram field should be present for field "+f,
|
||||
req("qt", "/admin/luke", "fl", f),
|
||||
hxp+"[@name='histogram']");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -149,7 +149,7 @@ public class LukeRequestHandlerTest extends AbstractSolrTestCase {
|
|||
|
||||
@Test
|
||||
public void testFlParam() {
|
||||
SolrQueryRequest req = req("qt", "/admin/luke", "fl", "solr_t solr_s");
|
||||
SolrQueryRequest req = req("qt", "/admin/luke", "fl", "solr_t solr_s", "show", "all");
|
||||
try {
|
||||
// First, determine that the two fields ARE there
|
||||
String response = h.query(req);
|
||||
|
|
|
@ -114,7 +114,7 @@
|
|||
//further populates the loaded schema with information gathered
|
||||
// from the no argument LukeRequestHandler
|
||||
loadFromLukeHandler: function(func) {
|
||||
$.getJSON(solr.pathToLukeHandler+'?wt=json', function(data) {
|
||||
$.getJSON(solr.pathToLukeHandler+'?wt=json&fl=*', function(data) {
|
||||
$.each(data.fields, function(i, item) {
|
||||
var field = solr.schemaFields[i];
|
||||
|
||||
|
|
Loading…
Reference in New Issue