mirror of https://github.com/apache/lucene.git
Upgrade spotless to 6.9.1, google java format to 1.23.0. (#13661)
This commit is contained in:
parent
af22fe922d
commit
f99deb58b7
|
@ -231,8 +231,8 @@ public class MissingDoclet extends StandardDoclet {
|
|||
case PACKAGE:
|
||||
checkComment(element);
|
||||
break;
|
||||
// class-like elements, check them, then recursively check their children (fields and
|
||||
// methods)
|
||||
// class-like elements, check them, then recursively check their children (fields and
|
||||
// methods)
|
||||
case CLASS:
|
||||
case INTERFACE:
|
||||
case ENUM:
|
||||
|
@ -257,7 +257,7 @@ public class MissingDoclet extends StandardDoclet {
|
|||
}
|
||||
}
|
||||
break;
|
||||
// method-like elements, check them if we are configured to do so
|
||||
// method-like elements, check them if we are configured to do so
|
||||
case METHOD:
|
||||
case CONSTRUCTOR:
|
||||
case FIELD:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"gradle/generation/jflex/skeleton.default.txt": "58944f66c9113a940dfaf6a17210ec8219024390",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.java": "1f7a446f3483326385eef257cea8366c27da0850",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.java": "e62dcd8c25219d8f5d783823b228ffe38d2bacde",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.jflex": "f52109bb7d5701979fde90aeeeda726246a8d5fd"
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"gradle/generation/jflex/skeleton.default.txt": "58944f66c9113a940dfaf6a17210ec8219024390",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java": "ac298e08bc5b96202efca0c01f9f0376fda976bd",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java": "2b5df5ff35543a6380c82f298225eb5fa06e4453",
|
||||
"lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex": "0b8c7774b98e8237702013e82c352d4711509bd0"
|
||||
}
|
|
@ -37,23 +37,23 @@ class BengaliNormalizer {
|
|||
|
||||
for (int i = 0; i < len; i++) {
|
||||
switch (s[i]) {
|
||||
// delete Chandrabindu
|
||||
// delete Chandrabindu
|
||||
case '\u0981':
|
||||
len = delete(s, i, len);
|
||||
i--;
|
||||
break;
|
||||
|
||||
// DirghoI kar -> RosshoI kar
|
||||
// DirghoI kar -> RosshoI kar
|
||||
case '\u09C0':
|
||||
s[i] = '\u09BF';
|
||||
break;
|
||||
|
||||
// DirghoU kar -> RosshoU kar
|
||||
// DirghoU kar -> RosshoU kar
|
||||
case '\u09C2':
|
||||
s[i] = '\u09C1';
|
||||
break;
|
||||
|
||||
// Khio (Ka + Hoshonto + Murdorno Sh)
|
||||
// Khio (Ka + Hoshonto + Murdorno Sh)
|
||||
case '\u0995':
|
||||
if (i + 2 < len && s[i + 1] == '\u09CD' && s[i + 2] == '\u09BF') {
|
||||
if (i == 0) {
|
||||
|
@ -67,12 +67,12 @@ class BengaliNormalizer {
|
|||
}
|
||||
break;
|
||||
|
||||
// Nga to Anusvara
|
||||
// Nga to Anusvara
|
||||
case '\u0999':
|
||||
s[i] = '\u0982';
|
||||
break;
|
||||
|
||||
// Ja Phala
|
||||
// Ja Phala
|
||||
case '\u09AF':
|
||||
if (i - 2 == 0 && s[i - 1] == '\u09CD') {
|
||||
s[i - 1] = '\u09C7';
|
||||
|
@ -89,7 +89,7 @@ class BengaliNormalizer {
|
|||
}
|
||||
break;
|
||||
|
||||
// Ba Phalaa
|
||||
// Ba Phalaa
|
||||
case '\u09AC':
|
||||
if ((i >= 1 && s[i - 1] != '\u09CD') || i == 0) {
|
||||
break;
|
||||
|
@ -109,7 +109,7 @@ class BengaliNormalizer {
|
|||
}
|
||||
break;
|
||||
|
||||
// Visarga
|
||||
// Visarga
|
||||
case '\u0983':
|
||||
if (i == len - 1) {
|
||||
if (len <= 3) {
|
||||
|
@ -122,18 +122,18 @@ class BengaliNormalizer {
|
|||
}
|
||||
break;
|
||||
|
||||
// All sh
|
||||
// All sh
|
||||
case '\u09B6':
|
||||
case '\u09B7':
|
||||
s[i] = '\u09B8';
|
||||
break;
|
||||
|
||||
// check na
|
||||
// check na
|
||||
case '\u09A3':
|
||||
s[i] = '\u09A8';
|
||||
break;
|
||||
|
||||
// check ra
|
||||
// check ra
|
||||
case '\u09DC':
|
||||
case '\u09DD':
|
||||
s[i] = '\u09B0';
|
||||
|
|
|
@ -747,70 +747,70 @@ class ClassicTokenizerImpl {
|
|||
/* Break so we don't hit fall-through warning: */
|
||||
break; /* ignore */
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 11:
|
||||
break;
|
||||
case 2:
|
||||
{
|
||||
return ALPHANUM;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 12:
|
||||
break;
|
||||
case 3:
|
||||
{
|
||||
return CJ;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 13:
|
||||
break;
|
||||
case 4:
|
||||
{
|
||||
return NUM;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 14:
|
||||
break;
|
||||
case 5:
|
||||
{
|
||||
return HOST;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 15:
|
||||
break;
|
||||
case 6:
|
||||
{
|
||||
return COMPANY;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 16:
|
||||
break;
|
||||
case 7:
|
||||
{
|
||||
return APOSTROPHE;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 17:
|
||||
break;
|
||||
case 8:
|
||||
{
|
||||
return ACRONYM_DEP;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 18:
|
||||
break;
|
||||
case 9:
|
||||
{
|
||||
return ACRONYM;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 19:
|
||||
break;
|
||||
case 10:
|
||||
{
|
||||
return EMAIL;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 20:
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -53,18 +53,18 @@ public final class GreekLowerCaseFilter extends TokenFilter {
|
|||
|
||||
private int lowerCase(int codepoint) {
|
||||
switch (codepoint) {
|
||||
/* There are two lowercase forms of sigma:
|
||||
* U+03C2: small final sigma (end of word)
|
||||
* U+03C3: small sigma (otherwise)
|
||||
*
|
||||
* Standardize both to U+03C3
|
||||
*/
|
||||
/* There are two lowercase forms of sigma:
|
||||
* U+03C2: small final sigma (end of word)
|
||||
* U+03C3: small sigma (otherwise)
|
||||
*
|
||||
* Standardize both to U+03C3
|
||||
*/
|
||||
case '\u03C2': /* small final sigma */
|
||||
return '\u03C3'; /* small sigma */
|
||||
|
||||
/* Some greek characters contain diacritics.
|
||||
* This filter removes these, converting to the lowercase base form.
|
||||
*/
|
||||
/* Some greek characters contain diacritics.
|
||||
* This filter removes these, converting to the lowercase base form.
|
||||
*/
|
||||
|
||||
case '\u0386': /* capital alpha with tonos */
|
||||
case '\u03AC': /* small alpha with tonos */
|
||||
|
@ -100,9 +100,9 @@ public final class GreekLowerCaseFilter extends TokenFilter {
|
|||
case '\u03CE': /* small omega with tonos */
|
||||
return '\u03C9'; /* small omega */
|
||||
|
||||
/* The previous implementation did the conversion below.
|
||||
* Only implemented for backwards compatibility with old indexes.
|
||||
*/
|
||||
/* The previous implementation did the conversion below.
|
||||
* Only implemented for backwards compatibility with old indexes.
|
||||
*/
|
||||
|
||||
case '\u03A2': /* reserved */
|
||||
return '\u03C2'; /* small final sigma */
|
||||
|
|
|
@ -456,7 +456,7 @@ class PorterStemmer {
|
|||
/* j >= 0 fixes Bug 2 */
|
||||
if (ends("ou")) break;
|
||||
return;
|
||||
/* takes care of -ous */
|
||||
/* takes care of -ous */
|
||||
case 's':
|
||||
if (ends("ism")) break;
|
||||
return;
|
||||
|
|
|
@ -67,7 +67,7 @@ public final class IrishLowerCaseFilter extends TokenFilter {
|
|||
case 'I':
|
||||
case 'O':
|
||||
case 'U':
|
||||
// vowels with acute accent (fada)
|
||||
// vowels with acute accent (fada)
|
||||
case '\u00c1':
|
||||
case '\u00c9':
|
||||
case '\u00cd':
|
||||
|
|
|
@ -47,18 +47,18 @@ class HindiNormalizer {
|
|||
|
||||
for (int i = 0; i < len; i++) {
|
||||
switch (s[i]) {
|
||||
// dead n -> bindu
|
||||
// dead n -> bindu
|
||||
case '\u0928':
|
||||
if (i + 1 < len && s[i + 1] == '\u094D') {
|
||||
s[i] = '\u0902';
|
||||
len = delete(s, i + 1, len);
|
||||
}
|
||||
break;
|
||||
// candrabindu -> bindu
|
||||
// candrabindu -> bindu
|
||||
case '\u0901':
|
||||
s[i] = '\u0902';
|
||||
break;
|
||||
// nukta deletions
|
||||
// nukta deletions
|
||||
case '\u093C':
|
||||
len = delete(s, i, len);
|
||||
i--;
|
||||
|
@ -96,18 +96,18 @@ class HindiNormalizer {
|
|||
case '\u095F':
|
||||
s[i] = '\u092F';
|
||||
break;
|
||||
// zwj/zwnj -> delete
|
||||
// zwj/zwnj -> delete
|
||||
case '\u200D':
|
||||
case '\u200C':
|
||||
len = delete(s, i, len);
|
||||
i--;
|
||||
break;
|
||||
// virama -> delete
|
||||
// virama -> delete
|
||||
case '\u094D':
|
||||
len = delete(s, i, len);
|
||||
i--;
|
||||
break;
|
||||
// chandra/short -> replace
|
||||
// chandra/short -> replace
|
||||
case '\u0945':
|
||||
case '\u0946':
|
||||
s[i] = '\u0947';
|
||||
|
@ -127,7 +127,7 @@ class HindiNormalizer {
|
|||
case '\u0972':
|
||||
s[i] = '\u0905';
|
||||
break;
|
||||
// long -> short ind. vowels
|
||||
// long -> short ind. vowels
|
||||
case '\u0906':
|
||||
s[i] = '\u0905';
|
||||
break;
|
||||
|
@ -149,7 +149,7 @@ class HindiNormalizer {
|
|||
case '\u0914':
|
||||
s[i] = '\u0913';
|
||||
break;
|
||||
// long -> short dep. vowels
|
||||
// long -> short dep. vowels
|
||||
case '\u0940':
|
||||
s[i] = '\u093F';
|
||||
break;
|
||||
|
|
|
@ -194,7 +194,7 @@ public final class WordDelimiterIterator {
|
|||
|
||||
int type = charType(text[current]);
|
||||
switch (type) {
|
||||
// return ALPHA word type for both lower and upper
|
||||
// return ALPHA word type for both lower and upper
|
||||
case LOWER:
|
||||
case UPPER:
|
||||
return ALPHA;
|
||||
|
@ -332,27 +332,27 @@ public final class WordDelimiterIterator {
|
|||
case Character.OTHER_NUMBER:
|
||||
return DIGIT;
|
||||
|
||||
// case Character.SPACE_SEPARATOR:
|
||||
// case Character.LINE_SEPARATOR:
|
||||
// case Character.PARAGRAPH_SEPARATOR:
|
||||
// case Character.CONTROL:
|
||||
// case Character.FORMAT:
|
||||
// case Character.PRIVATE_USE:
|
||||
// case Character.SPACE_SEPARATOR:
|
||||
// case Character.LINE_SEPARATOR:
|
||||
// case Character.PARAGRAPH_SEPARATOR:
|
||||
// case Character.CONTROL:
|
||||
// case Character.FORMAT:
|
||||
// case Character.PRIVATE_USE:
|
||||
|
||||
case Character.SURROGATE: // prevent splitting
|
||||
return ALPHA | DIGIT;
|
||||
|
||||
// case Character.DASH_PUNCTUATION:
|
||||
// case Character.START_PUNCTUATION:
|
||||
// case Character.END_PUNCTUATION:
|
||||
// case Character.CONNECTOR_PUNCTUATION:
|
||||
// case Character.OTHER_PUNCTUATION:
|
||||
// case Character.MATH_SYMBOL:
|
||||
// case Character.CURRENCY_SYMBOL:
|
||||
// case Character.MODIFIER_SYMBOL:
|
||||
// case Character.OTHER_SYMBOL:
|
||||
// case Character.INITIAL_QUOTE_PUNCTUATION:
|
||||
// case Character.FINAL_QUOTE_PUNCTUATION:
|
||||
// case Character.DASH_PUNCTUATION:
|
||||
// case Character.START_PUNCTUATION:
|
||||
// case Character.END_PUNCTUATION:
|
||||
// case Character.CONNECTOR_PUNCTUATION:
|
||||
// case Character.OTHER_PUNCTUATION:
|
||||
// case Character.MATH_SYMBOL:
|
||||
// case Character.CURRENCY_SYMBOL:
|
||||
// case Character.MODIFIER_SYMBOL:
|
||||
// case Character.OTHER_SYMBOL:
|
||||
// case Character.INITIAL_QUOTE_PUNCTUATION:
|
||||
// case Character.FINAL_QUOTE_PUNCTUATION:
|
||||
|
||||
default:
|
||||
return SUBWORD_DELIM;
|
||||
|
|
|
@ -38,25 +38,25 @@ class TeluguNormalizer {
|
|||
|
||||
for (int i = 0; i < len; i++) {
|
||||
switch (s[i]) {
|
||||
// candrabindu (ఀ and ఁ) -> bindu (ం)
|
||||
// candrabindu (ఀ and ఁ) -> bindu (ం)
|
||||
case '\u0C00': // ఀ
|
||||
case '\u0C01': // ఁ
|
||||
s[i] = '\u0C02'; // ం
|
||||
break;
|
||||
// delete visarga (ః)
|
||||
// delete visarga (ః)
|
||||
case '\u0C03':
|
||||
len = delete(s, i, len);
|
||||
i--;
|
||||
break;
|
||||
|
||||
// zwj/zwnj -> delete
|
||||
// zwj/zwnj -> delete
|
||||
case '\u200D':
|
||||
case '\u200C':
|
||||
len = delete(s, i, len);
|
||||
i--;
|
||||
break;
|
||||
|
||||
// long -> short vowels
|
||||
// long -> short vowels
|
||||
case '\u0C14': // ఔ
|
||||
s[i] = '\u0C13'; // ఓ
|
||||
break;
|
||||
|
@ -73,7 +73,7 @@ class TeluguNormalizer {
|
|||
s[i] = '\u0C09'; // ఉ
|
||||
break;
|
||||
|
||||
// long -> short vowels matras
|
||||
// long -> short vowels matras
|
||||
case '\u0C40': // ీ
|
||||
s[i] = '\u0C3F'; // ి
|
||||
break;
|
||||
|
@ -86,14 +86,14 @@ class TeluguNormalizer {
|
|||
case '\u0C4B': // ో
|
||||
s[i] = '\u0C4A'; // ొ
|
||||
break;
|
||||
// decomposed dipthong (ె + ౖ) -> precomposed diphthong vowel sign (ై)
|
||||
// decomposed dipthong (ె + ౖ) -> precomposed diphthong vowel sign (ై)
|
||||
case '\u0C46':
|
||||
if (i + 1 < len && s[i + 1] == '\u0C56') {
|
||||
s[i] = '\u0C48';
|
||||
len = delete(s, i + 1, len);
|
||||
}
|
||||
break;
|
||||
// composed oo or au -> oo or au
|
||||
// composed oo or au -> oo or au
|
||||
case '\u0C12':
|
||||
if (i + 1 < len && s[i + 1] == '\u0C55') {
|
||||
// (ఒ + ౕ) -> oo (ఓ)
|
||||
|
|
|
@ -61,12 +61,12 @@ public final class TurkishLowerCaseFilter extends TokenFilter {
|
|||
|
||||
if (iOrAfter) { // all the special I turkish handling happens here.
|
||||
switch (ch) {
|
||||
// remove COMBINING_DOT_ABOVE to mimic composed lowercase
|
||||
// remove COMBINING_DOT_ABOVE to mimic composed lowercase
|
||||
case COMBINING_DOT_ABOVE:
|
||||
length = delete(buffer, i, length);
|
||||
continue;
|
||||
// i itself, it depends if it is followed by COMBINING_DOT_ABOVE
|
||||
// if it is, we will make it small i and later remove the dot
|
||||
// i itself, it depends if it is followed by COMBINING_DOT_ABOVE
|
||||
// if it is, we will make it small i and later remove the dot
|
||||
case LATIN_CAPITAL_LETTER_I:
|
||||
if (isBeforeDot(buffer, i + 1, length)) {
|
||||
buffer[i] = LATIN_SMALL_LETTER_I;
|
||||
|
|
|
@ -901,7 +901,7 @@ class WikipediaTokenizerImpl {
|
|||
positionInc = 1; /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 47:
|
||||
break;
|
||||
case 2:
|
||||
|
@ -909,7 +909,7 @@ class WikipediaTokenizerImpl {
|
|||
positionInc = 1;
|
||||
return ALPHANUM;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 48:
|
||||
break;
|
||||
case 3:
|
||||
|
@ -920,7 +920,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 49:
|
||||
break;
|
||||
case 4:
|
||||
|
@ -928,7 +928,7 @@ class WikipediaTokenizerImpl {
|
|||
positionInc = 1;
|
||||
return CJ;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 50:
|
||||
break;
|
||||
case 5:
|
||||
|
@ -936,7 +936,7 @@ class WikipediaTokenizerImpl {
|
|||
positionInc = 1; /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 51:
|
||||
break;
|
||||
case 6:
|
||||
|
@ -945,7 +945,7 @@ class WikipediaTokenizerImpl {
|
|||
numWikiTokensSeen++;
|
||||
return currentTokType;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 52:
|
||||
break;
|
||||
case 7:
|
||||
|
@ -954,7 +954,7 @@ class WikipediaTokenizerImpl {
|
|||
numWikiTokensSeen++;
|
||||
return currentTokType;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 53:
|
||||
break;
|
||||
case 8:
|
||||
|
@ -962,7 +962,7 @@ class WikipediaTokenizerImpl {
|
|||
/* Break so we don't hit fall-through warning: */
|
||||
break; /* ignore */
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 54:
|
||||
break;
|
||||
case 9:
|
||||
|
@ -978,7 +978,7 @@ class WikipediaTokenizerImpl {
|
|||
numLinkToks++;
|
||||
return currentTokType;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 55:
|
||||
break;
|
||||
case 10:
|
||||
|
@ -988,7 +988,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 56:
|
||||
break;
|
||||
case 11:
|
||||
|
@ -997,7 +997,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(THREE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 57:
|
||||
break;
|
||||
case 12:
|
||||
|
@ -1007,7 +1007,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(STRING);
|
||||
return currentTokType; /*italics*/
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 58:
|
||||
break;
|
||||
case 13:
|
||||
|
@ -1017,7 +1017,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 59:
|
||||
break;
|
||||
case 14:
|
||||
|
@ -1026,7 +1026,7 @@ class WikipediaTokenizerImpl {
|
|||
numWikiTokensSeen++;
|
||||
return currentTokType;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 60:
|
||||
break;
|
||||
case 15:
|
||||
|
@ -1036,7 +1036,7 @@ class WikipediaTokenizerImpl {
|
|||
numWikiTokensSeen++;
|
||||
return currentTokType;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 61:
|
||||
break;
|
||||
case 16:
|
||||
|
@ -1046,7 +1046,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(STRING); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 62:
|
||||
break;
|
||||
case 17:
|
||||
|
@ -1055,7 +1055,7 @@ class WikipediaTokenizerImpl {
|
|||
numWikiTokensSeen = 0;
|
||||
return currentTokType;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 63:
|
||||
break;
|
||||
case 18:
|
||||
|
@ -1063,7 +1063,7 @@ class WikipediaTokenizerImpl {
|
|||
/* Break so we don't hit fall-through warning: */
|
||||
break; /* ignore STRING */
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 64:
|
||||
break;
|
||||
case 19:
|
||||
|
@ -1072,7 +1072,7 @@ class WikipediaTokenizerImpl {
|
|||
numWikiTokensSeen++;
|
||||
return currentTokType; /* STRING ALPHANUM*/
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 65:
|
||||
break;
|
||||
case 20:
|
||||
|
@ -1083,7 +1083,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 66:
|
||||
break;
|
||||
case 21:
|
||||
|
@ -1091,7 +1091,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(STRING);
|
||||
return currentTokType; /*pipe*/
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 67:
|
||||
break;
|
||||
case 22:
|
||||
|
@ -1106,7 +1106,7 @@ class WikipediaTokenizerImpl {
|
|||
} /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 68:
|
||||
break;
|
||||
case 23:
|
||||
|
@ -1116,7 +1116,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(DOUBLE_EQUALS_STATE); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 69:
|
||||
break;
|
||||
case 24:
|
||||
|
@ -1127,7 +1127,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 70:
|
||||
break;
|
||||
case 25:
|
||||
|
@ -1138,7 +1138,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(DOUBLE_BRACE_STATE); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 71:
|
||||
break;
|
||||
case 26:
|
||||
|
@ -1146,7 +1146,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 72:
|
||||
break;
|
||||
case 27:
|
||||
|
@ -1155,7 +1155,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 73:
|
||||
break;
|
||||
case 28:
|
||||
|
@ -1165,7 +1165,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 74:
|
||||
break;
|
||||
case 29:
|
||||
|
@ -1175,7 +1175,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 75:
|
||||
break;
|
||||
case 30:
|
||||
|
@ -1183,7 +1183,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 76:
|
||||
break;
|
||||
case 31:
|
||||
|
@ -1193,7 +1193,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */
|
||||
break; /*end italics*/
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 77:
|
||||
break;
|
||||
case 32:
|
||||
|
@ -1204,7 +1204,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 78:
|
||||
break;
|
||||
case 33:
|
||||
|
@ -1212,7 +1212,7 @@ class WikipediaTokenizerImpl {
|
|||
positionInc = 1;
|
||||
return NUM;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 79:
|
||||
break;
|
||||
case 34:
|
||||
|
@ -1220,7 +1220,7 @@ class WikipediaTokenizerImpl {
|
|||
positionInc = 1;
|
||||
return COMPANY;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 80:
|
||||
break;
|
||||
case 35:
|
||||
|
@ -1228,7 +1228,7 @@ class WikipediaTokenizerImpl {
|
|||
positionInc = 1;
|
||||
return APOSTROPHE;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 81:
|
||||
break;
|
||||
case 36:
|
||||
|
@ -1236,7 +1236,7 @@ class WikipediaTokenizerImpl {
|
|||
positionInc = 1;
|
||||
return HOST;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 82:
|
||||
break;
|
||||
case 37:
|
||||
|
@ -1245,7 +1245,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(FIVE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 83:
|
||||
break;
|
||||
case 38:
|
||||
|
@ -1255,7 +1255,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */
|
||||
break; /*end bold*/
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 84:
|
||||
break;
|
||||
case 39:
|
||||
|
@ -1265,7 +1265,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */
|
||||
break; /*end sub header*/
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 85:
|
||||
break;
|
||||
case 40:
|
||||
|
@ -1273,7 +1273,7 @@ class WikipediaTokenizerImpl {
|
|||
positionInc = 1;
|
||||
return ACRONYM;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 86:
|
||||
break;
|
||||
case 41:
|
||||
|
@ -1281,7 +1281,7 @@ class WikipediaTokenizerImpl {
|
|||
positionInc = 1;
|
||||
return EMAIL;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 87:
|
||||
break;
|
||||
case 42:
|
||||
|
@ -1291,7 +1291,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */
|
||||
break; /*end bold italics*/
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 88:
|
||||
break;
|
||||
case 43:
|
||||
|
@ -1301,7 +1301,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(EXTERNAL_LINK_STATE);
|
||||
return currentTokType;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 89:
|
||||
break;
|
||||
case 44:
|
||||
|
@ -1312,7 +1312,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 90:
|
||||
break;
|
||||
case 45:
|
||||
|
@ -1322,7 +1322,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 91:
|
||||
break;
|
||||
case 46:
|
||||
|
@ -1333,7 +1333,7 @@ class WikipediaTokenizerImpl {
|
|||
yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */
|
||||
break;
|
||||
}
|
||||
// fall through
|
||||
// fall through
|
||||
case 92:
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -245,7 +245,7 @@ public class Diff {
|
|||
deletes++;
|
||||
x--;
|
||||
break;
|
||||
// delete
|
||||
// delete
|
||||
case Y:
|
||||
if (deletes != base) {
|
||||
result.append('D').append(deletes);
|
||||
|
@ -258,7 +258,7 @@ public class Diff {
|
|||
result.append('I');
|
||||
result.append(b.charAt(--y));
|
||||
break;
|
||||
// insert
|
||||
// insert
|
||||
case R:
|
||||
if (deletes != base) {
|
||||
result.append('D').append(deletes);
|
||||
|
@ -272,7 +272,7 @@ public class Diff {
|
|||
result.append(b.charAt(--y));
|
||||
x--;
|
||||
break;
|
||||
// replace
|
||||
// replace
|
||||
case D:
|
||||
if (deletes != base) {
|
||||
result.append('D').append(deletes);
|
||||
|
|
|
@ -388,10 +388,14 @@ public final class Lucene94HnswVectorsWriter extends KnnVectorsWriter {
|
|||
// write the vector data to a temporary file
|
||||
DocsWithFieldSet docsWithField =
|
||||
switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> writeByteVectorData(
|
||||
tempVectorData, MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState));
|
||||
case FLOAT32 -> writeVectorData(
|
||||
tempVectorData, MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState));
|
||||
case BYTE ->
|
||||
writeByteVectorData(
|
||||
tempVectorData,
|
||||
MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState));
|
||||
case FLOAT32 ->
|
||||
writeVectorData(
|
||||
tempVectorData,
|
||||
MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState));
|
||||
};
|
||||
CodecUtil.writeFooter(tempVectorData);
|
||||
IOUtils.close(tempVectorData);
|
||||
|
@ -638,18 +642,20 @@ public final class Lucene94HnswVectorsWriter extends KnnVectorsWriter {
|
|||
throws IOException {
|
||||
int dim = fieldInfo.getVectorDimension();
|
||||
return switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> new FieldWriter<byte[]>(fieldInfo, M, beamWidth, infoStream) {
|
||||
@Override
|
||||
public byte[] copyValue(byte[] value) {
|
||||
return ArrayUtil.copyOfSubArray(value, 0, dim);
|
||||
}
|
||||
};
|
||||
case FLOAT32 -> new FieldWriter<float[]>(fieldInfo, M, beamWidth, infoStream) {
|
||||
@Override
|
||||
public float[] copyValue(float[] value) {
|
||||
return ArrayUtil.copyOfSubArray(value, 0, dim);
|
||||
}
|
||||
};
|
||||
case BYTE ->
|
||||
new FieldWriter<byte[]>(fieldInfo, M, beamWidth, infoStream) {
|
||||
@Override
|
||||
public byte[] copyValue(byte[] value) {
|
||||
return ArrayUtil.copyOfSubArray(value, 0, dim);
|
||||
}
|
||||
};
|
||||
case FLOAT32 ->
|
||||
new FieldWriter<float[]>(fieldInfo, M, beamWidth, infoStream) {
|
||||
@Override
|
||||
public float[] copyValue(float[] value) {
|
||||
return ArrayUtil.copyOfSubArray(value, 0, dim);
|
||||
}
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -663,12 +669,14 @@ public final class Lucene94HnswVectorsWriter extends KnnVectorsWriter {
|
|||
DefaultFlatVectorScorer defaultFlatVectorScorer = new DefaultFlatVectorScorer();
|
||||
RandomVectorScorerSupplier scorerSupplier =
|
||||
switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromBytes((List<byte[]>) vectors, dim));
|
||||
case FLOAT32 -> defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromFloats((List<float[]>) vectors, dim));
|
||||
case BYTE ->
|
||||
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromBytes((List<byte[]>) vectors, dim));
|
||||
case FLOAT32 ->
|
||||
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromFloats((List<float[]>) vectors, dim));
|
||||
};
|
||||
hnswGraphBuilder =
|
||||
HnswGraphBuilder.create(scorerSupplier, M, beamWidth, HnswGraphBuilder.randSeed);
|
||||
|
|
|
@ -414,10 +414,14 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
|
|||
// write the vector data to a temporary file
|
||||
DocsWithFieldSet docsWithField =
|
||||
switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> writeByteVectorData(
|
||||
tempVectorData, MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState));
|
||||
case FLOAT32 -> writeVectorData(
|
||||
tempVectorData, MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState));
|
||||
case BYTE ->
|
||||
writeByteVectorData(
|
||||
tempVectorData,
|
||||
MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState));
|
||||
case FLOAT32 ->
|
||||
writeVectorData(
|
||||
tempVectorData,
|
||||
MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState));
|
||||
};
|
||||
CodecUtil.writeFooter(tempVectorData);
|
||||
IOUtils.close(tempVectorData);
|
||||
|
@ -477,10 +481,12 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
|
|||
}
|
||||
DocIdSetIterator mergedVectorIterator = null;
|
||||
switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> mergedVectorIterator =
|
||||
KnnVectorsWriter.MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState);
|
||||
case FLOAT32 -> mergedVectorIterator =
|
||||
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState);
|
||||
case BYTE ->
|
||||
mergedVectorIterator =
|
||||
KnnVectorsWriter.MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState);
|
||||
case FLOAT32 ->
|
||||
mergedVectorIterator =
|
||||
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState);
|
||||
}
|
||||
graph =
|
||||
merger.merge(
|
||||
|
@ -680,18 +686,20 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
|
|||
throws IOException {
|
||||
int dim = fieldInfo.getVectorDimension();
|
||||
return switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> new FieldWriter<byte[]>(fieldInfo, M, beamWidth, infoStream) {
|
||||
@Override
|
||||
public byte[] copyValue(byte[] value) {
|
||||
return ArrayUtil.copyOfSubArray(value, 0, dim);
|
||||
}
|
||||
};
|
||||
case FLOAT32 -> new FieldWriter<float[]>(fieldInfo, M, beamWidth, infoStream) {
|
||||
@Override
|
||||
public float[] copyValue(float[] value) {
|
||||
return ArrayUtil.copyOfSubArray(value, 0, dim);
|
||||
}
|
||||
};
|
||||
case BYTE ->
|
||||
new FieldWriter<byte[]>(fieldInfo, M, beamWidth, infoStream) {
|
||||
@Override
|
||||
public byte[] copyValue(byte[] value) {
|
||||
return ArrayUtil.copyOfSubArray(value, 0, dim);
|
||||
}
|
||||
};
|
||||
case FLOAT32 ->
|
||||
new FieldWriter<float[]>(fieldInfo, M, beamWidth, infoStream) {
|
||||
@Override
|
||||
public float[] copyValue(float[] value) {
|
||||
return ArrayUtil.copyOfSubArray(value, 0, dim);
|
||||
}
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -704,12 +712,14 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
|
|||
vectors = new ArrayList<>();
|
||||
RandomVectorScorerSupplier scorerSupplier =
|
||||
switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromBytes((List<byte[]>) vectors, dim));
|
||||
case FLOAT32 -> defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromFloats((List<float[]>) vectors, dim));
|
||||
case BYTE ->
|
||||
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromBytes((List<byte[]>) vectors, dim));
|
||||
case FLOAT32 ->
|
||||
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromFloats((List<float[]>) vectors, dim));
|
||||
};
|
||||
hnswGraphBuilder =
|
||||
HnswGraphBuilder.create(scorerSupplier, M, beamWidth, HnswGraphBuilder.randSeed);
|
||||
|
|
|
@ -238,7 +238,7 @@ public class EnwikiContentSource extends ContentSource {
|
|||
time = null;
|
||||
id = null;
|
||||
break;
|
||||
// intentional fall-through.
|
||||
// intentional fall-through.
|
||||
case BODY:
|
||||
case DATE:
|
||||
case TITLE:
|
||||
|
|
|
@ -99,7 +99,7 @@ public class SpatialDocMaker extends DocMaker {
|
|||
return makeRPTStrategy(SPATIAL_FIELD, config, configMap, ctx);
|
||||
case "composite":
|
||||
return makeCompositeStrategy(config, configMap, ctx);
|
||||
// TODO add more as-needed
|
||||
// TODO add more as-needed
|
||||
default:
|
||||
throw new IllegalStateException("Unknown spatial.strategy: " + strategyName);
|
||||
}
|
||||
|
|
|
@ -230,12 +230,15 @@ public final class Lucene99FlatVectorsWriter extends FlatVectorsWriter {
|
|||
// No need to use temporary file as we don't have to re-open for reading
|
||||
DocsWithFieldSet docsWithField =
|
||||
switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> writeByteVectorData(
|
||||
vectorData,
|
||||
KnnVectorsWriter.MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState));
|
||||
case FLOAT32 -> writeVectorData(
|
||||
vectorData,
|
||||
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState));
|
||||
case BYTE ->
|
||||
writeByteVectorData(
|
||||
vectorData,
|
||||
KnnVectorsWriter.MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState));
|
||||
case FLOAT32 ->
|
||||
writeVectorData(
|
||||
vectorData,
|
||||
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(
|
||||
fieldInfo, mergeState));
|
||||
};
|
||||
long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset;
|
||||
writeMeta(
|
||||
|
@ -259,12 +262,16 @@ public final class Lucene99FlatVectorsWriter extends FlatVectorsWriter {
|
|||
// write the vector data to a temporary file
|
||||
DocsWithFieldSet docsWithField =
|
||||
switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> writeByteVectorData(
|
||||
tempVectorData,
|
||||
KnnVectorsWriter.MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState));
|
||||
case FLOAT32 -> writeVectorData(
|
||||
tempVectorData,
|
||||
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState));
|
||||
case BYTE ->
|
||||
writeByteVectorData(
|
||||
tempVectorData,
|
||||
KnnVectorsWriter.MergedVectorValues.mergeByteVectorValues(
|
||||
fieldInfo, mergeState));
|
||||
case FLOAT32 ->
|
||||
writeVectorData(
|
||||
tempVectorData,
|
||||
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(
|
||||
fieldInfo, mergeState));
|
||||
};
|
||||
CodecUtil.writeFooter(tempVectorData);
|
||||
IOUtils.close(tempVectorData);
|
||||
|
@ -289,24 +296,26 @@ public final class Lucene99FlatVectorsWriter extends FlatVectorsWriter {
|
|||
final IndexInput finalVectorDataInput = vectorDataInput;
|
||||
final RandomVectorScorerSupplier randomVectorScorerSupplier =
|
||||
switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> vectorsScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
new OffHeapByteVectorValues.DenseOffHeapVectorValues(
|
||||
fieldInfo.getVectorDimension(),
|
||||
docsWithField.cardinality(),
|
||||
finalVectorDataInput,
|
||||
fieldInfo.getVectorDimension() * Byte.BYTES,
|
||||
vectorsScorer,
|
||||
fieldInfo.getVectorSimilarityFunction()));
|
||||
case FLOAT32 -> vectorsScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
new OffHeapFloatVectorValues.DenseOffHeapVectorValues(
|
||||
fieldInfo.getVectorDimension(),
|
||||
docsWithField.cardinality(),
|
||||
finalVectorDataInput,
|
||||
fieldInfo.getVectorDimension() * Float.BYTES,
|
||||
vectorsScorer,
|
||||
fieldInfo.getVectorSimilarityFunction()));
|
||||
case BYTE ->
|
||||
vectorsScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
new OffHeapByteVectorValues.DenseOffHeapVectorValues(
|
||||
fieldInfo.getVectorDimension(),
|
||||
docsWithField.cardinality(),
|
||||
finalVectorDataInput,
|
||||
fieldInfo.getVectorDimension() * Byte.BYTES,
|
||||
vectorsScorer,
|
||||
fieldInfo.getVectorSimilarityFunction()));
|
||||
case FLOAT32 ->
|
||||
vectorsScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
new OffHeapFloatVectorValues.DenseOffHeapVectorValues(
|
||||
fieldInfo.getVectorDimension(),
|
||||
docsWithField.cardinality(),
|
||||
finalVectorDataInput,
|
||||
fieldInfo.getVectorDimension() * Float.BYTES,
|
||||
vectorsScorer,
|
||||
fieldInfo.getVectorSimilarityFunction()));
|
||||
};
|
||||
return new FlatCloseableRandomVectorScorerSupplier(
|
||||
() -> {
|
||||
|
@ -404,18 +413,20 @@ public final class Lucene99FlatVectorsWriter extends FlatVectorsWriter {
|
|||
static FieldWriter<?> create(FieldInfo fieldInfo) {
|
||||
int dim = fieldInfo.getVectorDimension();
|
||||
return switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> new Lucene99FlatVectorsWriter.FieldWriter<byte[]>(fieldInfo) {
|
||||
@Override
|
||||
public byte[] copyValue(byte[] value) {
|
||||
return ArrayUtil.copyOfSubArray(value, 0, dim);
|
||||
}
|
||||
};
|
||||
case FLOAT32 -> new Lucene99FlatVectorsWriter.FieldWriter<float[]>(fieldInfo) {
|
||||
@Override
|
||||
public float[] copyValue(float[] value) {
|
||||
return ArrayUtil.copyOfSubArray(value, 0, dim);
|
||||
}
|
||||
};
|
||||
case BYTE ->
|
||||
new Lucene99FlatVectorsWriter.FieldWriter<byte[]>(fieldInfo) {
|
||||
@Override
|
||||
public byte[] copyValue(byte[] value) {
|
||||
return ArrayUtil.copyOfSubArray(value, 0, dim);
|
||||
}
|
||||
};
|
||||
case FLOAT32 ->
|
||||
new Lucene99FlatVectorsWriter.FieldWriter<float[]>(fieldInfo) {
|
||||
@Override
|
||||
public float[] copyValue(float[] value) {
|
||||
return ArrayUtil.copyOfSubArray(value, 0, dim);
|
||||
}
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -358,10 +358,12 @@ public final class Lucene99HnswVectorsWriter extends KnnVectorsWriter {
|
|||
}
|
||||
DocIdSetIterator mergedVectorIterator = null;
|
||||
switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> mergedVectorIterator =
|
||||
KnnVectorsWriter.MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState);
|
||||
case FLOAT32 -> mergedVectorIterator =
|
||||
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState);
|
||||
case BYTE ->
|
||||
mergedVectorIterator =
|
||||
KnnVectorsWriter.MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState);
|
||||
case FLOAT32 ->
|
||||
mergedVectorIterator =
|
||||
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState);
|
||||
}
|
||||
graph =
|
||||
merger.merge(
|
||||
|
@ -543,20 +545,22 @@ public final class Lucene99HnswVectorsWriter extends KnnVectorsWriter {
|
|||
InfoStream infoStream)
|
||||
throws IOException {
|
||||
return switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> new FieldWriter<>(
|
||||
scorer,
|
||||
(FlatFieldVectorsWriter<byte[]>) flatFieldVectorsWriter,
|
||||
fieldInfo,
|
||||
M,
|
||||
beamWidth,
|
||||
infoStream);
|
||||
case FLOAT32 -> new FieldWriter<>(
|
||||
scorer,
|
||||
(FlatFieldVectorsWriter<float[]>) flatFieldVectorsWriter,
|
||||
fieldInfo,
|
||||
M,
|
||||
beamWidth,
|
||||
infoStream);
|
||||
case BYTE ->
|
||||
new FieldWriter<>(
|
||||
scorer,
|
||||
(FlatFieldVectorsWriter<byte[]>) flatFieldVectorsWriter,
|
||||
fieldInfo,
|
||||
M,
|
||||
beamWidth,
|
||||
infoStream);
|
||||
case FLOAT32 ->
|
||||
new FieldWriter<>(
|
||||
scorer,
|
||||
(FlatFieldVectorsWriter<float[]>) flatFieldVectorsWriter,
|
||||
fieldInfo,
|
||||
M,
|
||||
beamWidth,
|
||||
infoStream);
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -572,16 +576,18 @@ public final class Lucene99HnswVectorsWriter extends KnnVectorsWriter {
|
|||
this.fieldInfo = fieldInfo;
|
||||
RandomVectorScorerSupplier scorerSupplier =
|
||||
switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE -> scorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromBytes(
|
||||
(List<byte[]>) flatFieldVectorsWriter.getVectors(),
|
||||
fieldInfo.getVectorDimension()));
|
||||
case FLOAT32 -> scorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromFloats(
|
||||
(List<float[]>) flatFieldVectorsWriter.getVectors(),
|
||||
fieldInfo.getVectorDimension()));
|
||||
case BYTE ->
|
||||
scorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromBytes(
|
||||
(List<byte[]>) flatFieldVectorsWriter.getVectors(),
|
||||
fieldInfo.getVectorDimension()));
|
||||
case FLOAT32 ->
|
||||
scorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromFloats(
|
||||
(List<float[]>) flatFieldVectorsWriter.getVectors(),
|
||||
fieldInfo.getVectorDimension()));
|
||||
};
|
||||
hnswGraphBuilder =
|
||||
HnswGraphBuilder.create(scorerSupplier, M, beamWidth, HnswGraphBuilder.randSeed);
|
||||
|
|
|
@ -99,14 +99,20 @@ public class Lucene99ScalarQuantizedVectorScorer implements FlatVectorsScorer {
|
|||
RandomAccessQuantizedByteVectorValues values) {
|
||||
return switch (sim) {
|
||||
case EUCLIDEAN -> new Euclidean(values, constMultiplier, targetBytes);
|
||||
case COSINE, DOT_PRODUCT -> dotProductFactory(
|
||||
targetBytes, offsetCorrection, constMultiplier, values, f -> Math.max((1 + f) / 2, 0));
|
||||
case MAXIMUM_INNER_PRODUCT -> dotProductFactory(
|
||||
targetBytes,
|
||||
offsetCorrection,
|
||||
constMultiplier,
|
||||
values,
|
||||
VectorUtil::scaleMaxInnerProductScore);
|
||||
case COSINE, DOT_PRODUCT ->
|
||||
dotProductFactory(
|
||||
targetBytes,
|
||||
offsetCorrection,
|
||||
constMultiplier,
|
||||
values,
|
||||
f -> Math.max((1 + f) / 2, 0));
|
||||
case MAXIMUM_INNER_PRODUCT ->
|
||||
dotProductFactory(
|
||||
targetBytes,
|
||||
offsetCorrection,
|
||||
constMultiplier,
|
||||
values,
|
||||
VectorUtil::scaleMaxInnerProductScore);
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -148,7 +148,7 @@ public class DateTools {
|
|||
calInstance.setTimeInMillis(time);
|
||||
|
||||
switch (resolution) {
|
||||
// NOTE: switch statement fall-through is deliberate
|
||||
// NOTE: switch statement fall-through is deliberate
|
||||
case YEAR:
|
||||
calInstance.set(Calendar.MONTH, 0);
|
||||
case MONTH:
|
||||
|
|
|
@ -252,7 +252,7 @@ public abstract class FilteredTermsEnum extends TermsEnum {
|
|||
switch (accept(actualTerm)) {
|
||||
case YES_AND_SEEK:
|
||||
doSeek = true;
|
||||
// term accepted, but we need to seek so fall-through
|
||||
// term accepted, but we need to seek so fall-through
|
||||
case YES:
|
||||
// term accepted
|
||||
return actualTerm;
|
||||
|
|
|
@ -1034,10 +1034,12 @@ final class IndexingChain implements Accountable {
|
|||
int docID, PerField pf, VectorEncoding vectorEncoding, IndexableField field)
|
||||
throws IOException {
|
||||
switch (vectorEncoding) {
|
||||
case BYTE -> ((KnnFieldVectorsWriter<byte[]>) pf.knnFieldVectorsWriter)
|
||||
.addValue(docID, ((KnnByteVectorField) field).vectorValue());
|
||||
case FLOAT32 -> ((KnnFieldVectorsWriter<float[]>) pf.knnFieldVectorsWriter)
|
||||
.addValue(docID, ((KnnFloatVectorField) field).vectorValue());
|
||||
case BYTE ->
|
||||
((KnnFieldVectorsWriter<byte[]>) pf.knnFieldVectorsWriter)
|
||||
.addValue(docID, ((KnnByteVectorField) field).vectorValue());
|
||||
case FLOAT32 ->
|
||||
((KnnFieldVectorsWriter<float[]>) pf.knnFieldVectorsWriter)
|
||||
.addValue(docID, ((KnnFloatVectorField) field).vectorValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -68,10 +68,10 @@ public record IOContext(
|
|||
Objects.requireNonNull(context, "context must not be null");
|
||||
Objects.requireNonNull(readAdvice, "readAdvice must not be null");
|
||||
switch (context) {
|
||||
case MERGE -> Objects.requireNonNull(
|
||||
mergeInfo, "mergeInfo must not be null if context is MERGE");
|
||||
case FLUSH -> Objects.requireNonNull(
|
||||
flushInfo, "flushInfo must not be null if context is FLUSH");
|
||||
case MERGE ->
|
||||
Objects.requireNonNull(mergeInfo, "mergeInfo must not be null if context is MERGE");
|
||||
case FLUSH ->
|
||||
Objects.requireNonNull(flushInfo, "flushInfo must not be null if context is FLUSH");
|
||||
}
|
||||
if ((context == Context.FLUSH || context == Context.MERGE)
|
||||
&& readAdvice != ReadAdvice.SEQUENTIAL) {
|
||||
|
|
|
@ -185,7 +185,7 @@ public final class ArrayUtil {
|
|||
// round up to multiple of 8
|
||||
return (newSize + 7) & 0x7ffffff8;
|
||||
case 8:
|
||||
// no rounding
|
||||
// no rounding
|
||||
default:
|
||||
// odd (invalid?) size
|
||||
return newSize;
|
||||
|
@ -205,7 +205,7 @@ public final class ArrayUtil {
|
|||
// align with size of 5,7,9,11...
|
||||
return (newSize & 0x7ffffffe) + 1;
|
||||
case 8:
|
||||
// no processing required
|
||||
// no processing required
|
||||
default:
|
||||
// odd (invalid?) size
|
||||
return newSize;
|
||||
|
|
|
@ -180,10 +180,10 @@ public abstract class StringHelper {
|
|||
switch (len & 0x03) {
|
||||
case 3:
|
||||
k1 = (data[roundedEnd + 2] & 0xff) << 16;
|
||||
// fallthrough
|
||||
// fallthrough
|
||||
case 2:
|
||||
k1 |= (data[roundedEnd + 1] & 0xff) << 8;
|
||||
// fallthrough
|
||||
// fallthrough
|
||||
case 1:
|
||||
k1 |= (data[roundedEnd] & 0xff);
|
||||
k1 *= c1;
|
||||
|
|
|
@ -511,8 +511,9 @@ public final class UnicodeUtil {
|
|||
case 2 -> v = leadByte & 31; // 5 useful bits
|
||||
case 3 -> v = leadByte & 15; // 4 useful bits
|
||||
case 4 -> v = leadByte & 7; // 3 useful bits
|
||||
default -> throw new IllegalArgumentException(
|
||||
"Invalid UTF8 header byte: 0x" + Integer.toHexString(leadByte));
|
||||
default ->
|
||||
throw new IllegalArgumentException(
|
||||
"Invalid UTF8 header byte: 0x" + Integer.toHexString(leadByte));
|
||||
}
|
||||
|
||||
// TODO: this may read past utf8's limit.
|
||||
|
|
|
@ -817,7 +817,7 @@ public class RegExp {
|
|||
|
||||
void toStringTree(StringBuilder b, String indent) {
|
||||
switch (kind) {
|
||||
// binary
|
||||
// binary
|
||||
case REGEXP_UNION:
|
||||
case REGEXP_CONCATENATION:
|
||||
case REGEXP_INTERSECTION:
|
||||
|
@ -827,7 +827,7 @@ public class RegExp {
|
|||
exp1.toStringTree(b, indent + " ");
|
||||
exp2.toStringTree(b, indent + " ");
|
||||
break;
|
||||
// unary
|
||||
// unary
|
||||
case REGEXP_OPTIONAL:
|
||||
case REGEXP_REPEAT:
|
||||
case REGEXP_COMPLEMENT:
|
||||
|
|
|
@ -40,10 +40,12 @@ public interface ScalarQuantizedVectorSimilarity {
|
|||
VectorSimilarityFunction sim, float constMultiplier, byte bits) {
|
||||
return switch (sim) {
|
||||
case EUCLIDEAN -> new Euclidean(constMultiplier);
|
||||
case COSINE, DOT_PRODUCT -> new DotProduct(
|
||||
constMultiplier, bits <= 4 ? VectorUtil::int4DotProduct : VectorUtil::dotProduct);
|
||||
case MAXIMUM_INNER_PRODUCT -> new MaximumInnerProduct(
|
||||
constMultiplier, bits <= 4 ? VectorUtil::int4DotProduct : VectorUtil::dotProduct);
|
||||
case COSINE, DOT_PRODUCT ->
|
||||
new DotProduct(
|
||||
constMultiplier, bits <= 4 ? VectorUtil::int4DotProduct : VectorUtil::dotProduct);
|
||||
case MAXIMUM_INNER_PRODUCT ->
|
||||
new MaximumInnerProduct(
|
||||
constMultiplier, bits <= 4 ? VectorUtil::int4DotProduct : VectorUtil::dotProduct);
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -52,8 +52,8 @@ abstract sealed class Lucene99MemorySegmentByteVectorScorer
|
|||
case COSINE -> Optional.of(new CosineScorer(msInput, values, queryVector));
|
||||
case DOT_PRODUCT -> Optional.of(new DotProductScorer(msInput, values, queryVector));
|
||||
case EUCLIDEAN -> Optional.of(new EuclideanScorer(msInput, values, queryVector));
|
||||
case MAXIMUM_INNER_PRODUCT -> Optional.of(
|
||||
new MaxInnerProductScorer(msInput, values, queryVector));
|
||||
case MAXIMUM_INNER_PRODUCT ->
|
||||
Optional.of(new MaxInnerProductScorer(msInput, values, queryVector));
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -125,10 +125,10 @@ abstract class HnswGraphTestCase<T> extends LuceneTestCase {
|
|||
throws IOException {
|
||||
RandomAccessVectorValues vectorsCopy = vectors.copy();
|
||||
return switch (getVectorEncoding()) {
|
||||
case BYTE -> flatVectorScorer.getRandomVectorScorer(
|
||||
similarityFunction, vectorsCopy, (byte[]) query);
|
||||
case FLOAT32 -> flatVectorScorer.getRandomVectorScorer(
|
||||
similarityFunction, vectorsCopy, (float[]) query);
|
||||
case BYTE ->
|
||||
flatVectorScorer.getRandomVectorScorer(similarityFunction, vectorsCopy, (byte[]) query);
|
||||
case FLOAT32 ->
|
||||
flatVectorScorer.getRandomVectorScorer(similarityFunction, vectorsCopy, (float[]) query);
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -1238,17 +1238,19 @@ abstract class HnswGraphTestCase<T> extends LuceneTestCase {
|
|||
break;
|
||||
}
|
||||
switch (getVectorEncoding()) {
|
||||
case BYTE -> assertArrayEquals(
|
||||
"vectors do not match for doc=" + uDoc,
|
||||
(byte[]) u.vectorValue(),
|
||||
(byte[]) v.vectorValue());
|
||||
case FLOAT32 -> assertArrayEquals(
|
||||
"vectors do not match for doc=" + uDoc,
|
||||
(float[]) u.vectorValue(),
|
||||
(float[]) v.vectorValue(),
|
||||
1e-4f);
|
||||
default -> throw new IllegalArgumentException(
|
||||
"unknown vector encoding: " + getVectorEncoding());
|
||||
case BYTE ->
|
||||
assertArrayEquals(
|
||||
"vectors do not match for doc=" + uDoc,
|
||||
(byte[]) u.vectorValue(),
|
||||
(byte[]) v.vectorValue());
|
||||
case FLOAT32 ->
|
||||
assertArrayEquals(
|
||||
"vectors do not match for doc=" + uDoc,
|
||||
(float[]) u.vectorValue(),
|
||||
(float[]) v.vectorValue(),
|
||||
1e-4f);
|
||||
default ->
|
||||
throw new IllegalArgumentException("unknown vector encoding: " + getVectorEncoding());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -197,7 +197,7 @@ public abstract class FilterSpans extends Spans {
|
|||
if (startPos != NO_MORE_POSITIONS) {
|
||||
break;
|
||||
}
|
||||
// else fallthrough
|
||||
// else fallthrough
|
||||
case NO_MORE_IN_CURRENT_DOC:
|
||||
startPos = -1;
|
||||
return false;
|
||||
|
|
|
@ -146,7 +146,7 @@ public class TestDocValuesFieldSources extends LuceneTestCase {
|
|||
case SORTED:
|
||||
values.ordVal(i); // no exception
|
||||
assertTrue(values.numOrd() >= 1);
|
||||
// fall-through
|
||||
// fall-through
|
||||
case BINARY:
|
||||
assertEquals(expected, values.objectVal(i));
|
||||
assertEquals(expected, values.strVal(i));
|
||||
|
|
|
@ -346,16 +346,16 @@ public class TestRandomSpatialOpFuzzyPrefixTree extends StrategyTestCase {
|
|||
case 0:
|
||||
queryShape = randomPoint();
|
||||
break;
|
||||
// LUCENE-5549
|
||||
// TODO debug: -Dtests.method=testWithin -Dtests.multiplier=3
|
||||
// -Dtests.seed=5F5294CE2E075A3E:AAD2F0F79288CA64
|
||||
// case 1:case 2:case 3:
|
||||
// if (!indexedAtLeastOneShapePair) {
|
||||
// // avoids ShapePair.relate(ShapePair), which isn't reliable
|
||||
// queryShape = randomShapePairRect(!biasContains);
|
||||
// // invert biasContains for query side
|
||||
// break;
|
||||
// }
|
||||
// LUCENE-5549
|
||||
// TODO debug: -Dtests.method=testWithin -Dtests.multiplier=3
|
||||
// -Dtests.seed=5F5294CE2E075A3E:AAD2F0F79288CA64
|
||||
// case 1:case 2:case 3:
|
||||
// if (!indexedAtLeastOneShapePair) {
|
||||
// // avoids ShapePair.relate(ShapePair), which isn't reliable
|
||||
// queryShape = randomShapePairRect(!biasContains);
|
||||
// // invert biasContains for query side
|
||||
// break;
|
||||
// }
|
||||
|
||||
case 4:
|
||||
// choose an existing indexed shape
|
||||
|
@ -366,7 +366,7 @@ public class TestRandomSpatialOpFuzzyPrefixTree extends StrategyTestCase {
|
|||
break;
|
||||
}
|
||||
}
|
||||
// fall-through
|
||||
// fall-through
|
||||
|
||||
default:
|
||||
queryShape = randomRectangle();
|
||||
|
|
|
@ -948,7 +948,7 @@ public class TestFuzzySuggester extends LuceneTestCase {
|
|||
}
|
||||
return builder.toString();
|
||||
}
|
||||
// NOTE: fall through to delete:
|
||||
// NOTE: fall through to delete:
|
||||
case 2:
|
||||
// Delete input[i]
|
||||
for (int j = i + 1; j < input.length; j++) {
|
||||
|
|
|
@ -99,8 +99,8 @@ public abstract class BaseKnnVectorsFormatTestCase extends BaseIndexFileFormatTe
|
|||
protected void addRandomFields(Document doc) {
|
||||
switch (vectorEncoding) {
|
||||
case BYTE -> doc.add(new KnnByteVectorField("v2", randomVector8(30), similarityFunction));
|
||||
case FLOAT32 -> doc.add(
|
||||
new KnnFloatVectorField("v2", randomNormalizedVector(30), similarityFunction));
|
||||
case FLOAT32 ->
|
||||
doc.add(new KnnFloatVectorField("v2", randomNormalizedVector(30), similarityFunction));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1260,8 +1260,8 @@ public final class TestUtil {
|
|||
return new LuceneFixedGap();
|
||||
case 1:
|
||||
return new BlockTreeOrdsPostingsFormat();
|
||||
// TODO: these don't actually support ords!
|
||||
// case 2: return new FSTOrdPostingsFormat();
|
||||
// TODO: these don't actually support ords!
|
||||
// case 2: return new FSTOrdPostingsFormat();
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ ecj = "3.36.0"
|
|||
errorprone = "2.18.0"
|
||||
flexmark = "0.61.24"
|
||||
# @keep This is GJF version for spotless/ tidy.
|
||||
googleJavaFormat = "1.18.1"
|
||||
googleJavaFormat = "1.23.0"
|
||||
groovy = "3.0.21"
|
||||
hamcrest = "2.2"
|
||||
icu4j = "74.2"
|
||||
|
@ -80,6 +80,6 @@ forbiddenapis = "de.thetaphi.forbiddenapis:3.7"
|
|||
jacocolog = "org.barfuin.gradle.jacocolog:3.1.0"
|
||||
owasp-dependencycheck = "org.owasp.dependencycheck:7.2.0"
|
||||
randomizedtesting = "com.carrotsearch.gradle.randomizedtesting:0.0.6"
|
||||
spotless = "com.diffplug.spotless:6.5.2"
|
||||
spotless = "com.diffplug.spotless:6.9.1"
|
||||
undercouch-download = "de.undercouch.download:5.2.0"
|
||||
versionCatalogUpdate = "nl.littlerobots.version-catalog-update:0.8.4"
|
||||
|
|
Loading…
Reference in New Issue