Added fix to QueryScorer - if a query has multiple WeightedTerms with different weights for the same term the highest weight is used for scoring that term (previously selected last weight in list)

SimpleHTMLEncoder now encodes characters outside of ASCII range as character entities as per suggestion here: http://issues.apache.org/bugzilla/show_bug.cgi?id=36333


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@279088 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Harwood 2005-09-06 20:19:50 +00:00
parent f6b07dabe8
commit c00b260ecf
2 changed files with 15 additions and 3 deletions

View File

@ -67,8 +67,13 @@ public class QueryScorer implements Scorer
termsToFind = new HashMap();
for (int i = 0; i < weightedTerms.length; i++)
{
termsToFind.put(weightedTerms[i].term,weightedTerms[i]);
maxTermWeight=Math.max(maxTermWeight,weightedTerms[i].getWeight());
WeightedTerm existingTerm=(WeightedTerm) termsToFind.get(weightedTerms[i].term);
if( (existingTerm==null) ||(existingTerm.weight<weightedTerms[i].weight) )
{
//if a term is defined more than once, always use the highest scoring weight
termsToFind.put(weightedTerms[i].term,weightedTerms[i]);
maxTermWeight=Math.max(maxTermWeight,weightedTerms[i].getWeight());
}
}
}

View File

@ -66,7 +66,14 @@ public class SimpleHTMLEncoder implements Encoder
break;
default:
result.append(ch);
if (ch < 128)
{
result.append(ch);
}
else
{
result.append("&#").append((int)ch).append(";");
}
}
}