mirror of https://github.com/apache/lucene.git
LUCENE-2257: improve max per-segment term count limit from ~2.1B to ~274B
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@909352 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c0a1615db1
commit
5995714461
|
@ -107,6 +107,10 @@ Bug fixes
|
|||
* LUCENE-2249: ParallelMultiSearcher should shut down thread pool on
|
||||
close. (Martin Traverso via Uwe Schindler)
|
||||
|
||||
* LUCENE-2257: Increase max number of unique terms in one segment to
|
||||
termIndexInterval (default 128) * ~2.1 billion = ~274 billion.
|
||||
(Tom Burton-West via Mike McCandless)
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-2128: Parallelized fetching document frequencies during weight
|
||||
|
|
|
@ -2548,11 +2548,12 @@ document.write("Last Published: " + document.lastModified);
|
|||
<div class="section">
|
||||
<p>
|
||||
When referring to term numbers, Lucene's current
|
||||
implementation uses a Java <span class="codefrag">int</span>, which means
|
||||
the maximum number of unique terms in any single index
|
||||
segment is 2,147,483,648. This is technically not a
|
||||
limitation of the index file format, just of Lucene's
|
||||
current implementation.
|
||||
implementation uses a Java <span class="codefrag">int</span> to hold the
|
||||
term index, which means the maximum number of unique
|
||||
terms in any single index segment is ~2.1 billion times
|
||||
the term index interval (default 128) = ~274 billion.
|
||||
This is technically not a limitation of the index file
|
||||
format, just of Lucene's current implementation.
|
||||
</p>
|
||||
<p>
|
||||
Similarly, Lucene uses a Java <span class="codefrag">int</span> to refer
|
||||
|
|
|
@ -667,10 +667,10 @@ endobj
|
|||
>>
|
||||
endobj
|
||||
111 0 obj
|
||||
<< /Length 1801 /Filter [ /ASCII85Decode /FlateDecode ]
|
||||
<< /Length 1845 /Filter [ /ASCII85Decode /FlateDecode ]
|
||||
>>
|
||||
stream
|
||||
Gatm<>E@c%'Ro4HjK13fG\lPL<L(^M)=;1H]aSa"3di^?5e;fsk/+q5qDnr@j5@/#>)aB'A"VfQS_N=$kG,NXdH;*]f';idMD;tC&-V:5k;U.'/tp@-$LTH7^Q,1p?N)YM*ArPAGr!8(0!,&.e*ZcV$F&^;F6Ba=Bj-erc@iWHmO",>o\fW2U#KJ8%,]-0<;4\qcMW)dW(W4r')L'%gC?hm*f`HWnV%dbVZG#-_jUj#'P\O_oZ'&jIQr]%54&Rn#K1_F/7O2B740fG=lc[4Ro3YmceBq7Jn539+CM0NV+[Zoel+/JaFIhGpmP4I%\.R[ZC5-ce+e*fLJ4fAT\@3Qg<OUkg%TUHnGP,C=+#_'.pZ4H",NEsLTNJM(fBH(?KeXfo^Bc8fTuro@"@"D!KU38XjWeP#<ha-pVbZZknj=14l:`^lpdt5U*+CR:Rd:o1k!7K+CnUk"W%n9HHE4gBNuJ8`C+?h=TFac<%Jbl7LF%';\'KIXPn!+dA)t3eiE2m_tS#TS7_cUE%q?XTk5a=alQPQGf:S;O8pJoArGIg8D1^=@"9Xra$]:!Qj.`rXSB_"RPF;J`A:g<&58dgZ<6Y60G+Xe$CU3W``ZHtZ,gKj2a.5`>Gc1u-7W7h"cbCre?E8TM&At<U83)97qa[m#f:qI#Ki-S6k-)'9O'n)hi>8O!?\Zsm0q0)\(>&=We,.h1o`pZE7qOk1rFOCj3(%1n;J"B0i&M[BePZMTY8(,j_P+]_P5I?\J/(K8Z6DjQ_^H`SEO/<c%UDPoC1LgWl98*ZQ1]Y35)SuSu#ZX`icHJ?a0aBM$o43HuJb;OMG;$MM6UhF!JK?Ak#AJZ[A84JW*RL^jn2O7"f!cRE`_?KbQ6?b_61\#p$X"5_iJeHa38glO9^pJ:2R^=5R/3#XE]"\+C,=$RjI@3i)9d>Y">"_S/9Z@,81;O;nec;K.Sh35ViII!^N'h`d&Fa!oTbnAF-)SW:(h*^RW!ZkFMnf4=9B6Til(FqYcoO?tniTMb>_+7;LbV\NtQAdON4g_="n/dHr.9:I`img>pmXGCoL.5dT;_%U,hKC-2."mArlK#2QtTjDcU^OZijUGX1BS&fM_IqtMc4YW2pchD7rNX.bkiX:CO+B#XE_l!(M&o8,(lC&:VFoWIf2JGO'0#O'r5)PBdF3_]8*R*2#c0Gb-m"MMVl"<Q\)h\G'())cK.N`6a-%YMn%^1Ye$lp!7*.*f0%_Tkd\nG$RNi()\E3I*dq/R_VI+tT[^l@#-T'\c[&GE1s;J9gn?>4r1ri%&PJjQZZUW)F>7c#SGN$u)DUp-W7j\O%/N"t%)+rC6*J5b&i/rg?gf)qQ>0oC<>?l$4CDGTap@Wn1#_`e#([onhm(7l/DLQ].K%&"i%@qog]SY54_K@#]&\CdSEi2a6_A-\ptml007OI2_pGssbFA@6[laqdrV2`R)'V"YX4fS*\3f>,V>4#L01afe)u\m0--fn,sQ)-]FFs,o^4H.qb\/+2kb`6l+lrN;Y4Gh8JS#)F1%g,CT;[NS:Jn>_(cEbKaGProLPjEZK-n>=?sW2b%F]e/'&0&"]e\7%IOAG78TS[D<B6J_lP+`S*fEMr*BXh*5qda[qM%Kd&rep2">N8C+WmlWBfB"=LHK?[dUc'ujV5J\KMr?G`JG;6k+/'3XN&F5`o:W;koaM4seWJhHt0u4mEMOr5Bo>0`iiqk,6HmeKLq4s5I7W&`*]&'g1*_%_OUd!0NMK,:Za1Fo%!78.6-^dpe7i'q*bg[A:V/te//^*dZeQmd3b+3">0,"]'IfX.'6n&~>
|
||||
Gatm<>BAg]'RnB3n/LcAW02O@J/Pq745JTKec65F#:5KGZ*oni-#N@,hi;M#d?,Yd>@9L+Eb+E>^2oQXAs9^1ai=_X<r-kr8MS(g'LhkJ&@T?O5+sCo9('qO5>Q0'^&@dco^idiB%[<2Vm-.F?@O`+.)LNo/%e)!DF@c9H;iPf\X6.,28n**]:&i5-/YXjesG6chX''KWD??0@/BgBgPg/i+("o=j<n96JHW2\NCtH4;(--Hcu8[Ho3me+HoMW^Nj.ai3S6_S8e,ZT<EZQ[*"p'`+#gT(P2urT+CM6TM?VH5S#$TYAAL$dI)Tj*"^5eHhXIk!>2NX(P0'ODl8@)CV8'q/0JJ'qT,2dZj@eRhl2)eG5hql,[U'&GA5g]tBkWd"ht*C]:.X,IJD:$Y8*\o"9@*4#=aI`*i2[+QM-hWkQX:B7;*(I#aIE4Y-7]I@hlm_I=Ys^3AIVYCG^U?e;aZ/$Qjg2,^d?/3#mjXsjHcBla:tk5Zc1Wp9=8F?5!XlfGL:G5q0gNL#ZR+WL!h;L)l;MY.r[`SLg6)`G/k_:L[KZnU*72gF,.XiE3`m5#7f1@JS.WIo@efS#26'!,qgh`+lfY'KqWP4WnNG$TdmPA&P1bNXJEpZ\B5_g1KufB?AQgcV/oL3562M1$jDUZ1WCYN83`gae1`4ETh\sT\c[KpZmarCa$tR*@=Y;>DfdgNV-pfL=BX>945KHrX\,OeJpGQrNP66?Z4=7n(UJ]F!FJr.#+@(,[$AS,(A(1%\_5*=<PGsSO*;ZEEi+/(;=T%W.k=A%=B:L<*HkrMe:Q"K!o(Z8M"5fTp5;nc@?lk,O^Rbo0TFml-2!Z@S",OMUdbOmPgq&m$kpp(/s\/TK^i\n,l!8D/"_jCY\W;Bet,j)"#4k9Y,V3e.jkl?8C,=oI"U('Om*36V"Gb`8+F[?+[^`ihp$Zpfdkg:i#!bSJpjj$%L?d`6_#Kp=0B4+LVMVirQss;"@\*>SV2gop=(.1,XC28DF:0,;X?n:1j]'Wb.SUFW*)2i<_uNVVVkC$3J`Vrg?RV1/(db)*gG\kW84OJT(0*+D9"Le6*KPe'UFCGc%8Uf"`,Uc=9Bb\NVp6YGICZ%"(U$^I)i[3O21F*H=B\^-/',Lfd)SVJd[/QeTj/7!KC?b2Sr8-R&2$!%f9\.Z;KjY/\pF]/`6V/E$iU0aau[6U,bNl>lkro4uNKp7S4\bE2qFEc1sV3VnHsGg=oj%ebEhM2anjEk9A5(cdng-*M"4t5uKquQQ_a69:iC8#Ds(]^mtB.WYGadC0Adc)WACId3qH"_<63NQG`$BeI4^mk/[+:Hd>#e@!<rjJ$SJH#6O1o##9u9l)W:ZfsbXV+M,&X6(="C1>eWJq1$^qlA*I$M'_V<k0bqPl/2]ggobt6/(a%(H!7o$LRAr<aGH_$n5*Nt.*49\&/q<#V:KYBinB,M$4VidP;>g+Q_O?7#,E2C?@^ATK_ZF85?IOS2(1O_eSX_NLU4h_NH#b`4qeD:hJW!qA#HkL"n)KBH@-.LpP>=ki2,O,5a/`l#J;k\mu>9-Z2"<uSKA$uSF8t(>"?/*82.L2pc7M[%J+,^%=DgZ1dXbF=T#uc3+nuNVAp&*oI>jo`AQCR[td7;TqaI0,q(ah.jdG(2!m_[[pb3OpT`#>323Wj*LEW=Faj\2j3$aQbKW?iCt:Ss^4Pb5'-*c,geVDUOr:]L=E^(LLf<IncuH^"q'WVVKi%n&-Fh)tc^hYHL#BWt#t'qHTpGo9I+%V4UGf_^XO^[]a%=lBBh/Y$0oc[M^NB,OJ?GXh<)u6NY6Y$KXhG`+:!W*djgcNqfbL0&mZ^ps9@Co["2nl!!W~>
|
||||
endstream
|
||||
endobj
|
||||
112 0 obj
|
||||
|
@ -1206,80 +1206,80 @@ endobj
|
|||
xref
|
||||
0 153
|
||||
0000000000 65535 f
|
||||
0000054489 00000 n
|
||||
0000054695 00000 n
|
||||
0000054788 00000 n
|
||||
0000054533 00000 n
|
||||
0000054739 00000 n
|
||||
0000054832 00000 n
|
||||
0000000015 00000 n
|
||||
0000000071 00000 n
|
||||
0000001333 00000 n
|
||||
0000001453 00000 n
|
||||
0000001639 00000 n
|
||||
0000054940 00000 n
|
||||
0000054984 00000 n
|
||||
0000001774 00000 n
|
||||
0000055003 00000 n
|
||||
0000055047 00000 n
|
||||
0000001909 00000 n
|
||||
0000055069 00000 n
|
||||
0000055113 00000 n
|
||||
0000002046 00000 n
|
||||
0000055133 00000 n
|
||||
0000055177 00000 n
|
||||
0000002183 00000 n
|
||||
0000055199 00000 n
|
||||
0000055243 00000 n
|
||||
0000002320 00000 n
|
||||
0000055265 00000 n
|
||||
0000055309 00000 n
|
||||
0000002457 00000 n
|
||||
0000055331 00000 n
|
||||
0000055375 00000 n
|
||||
0000002594 00000 n
|
||||
0000055395 00000 n
|
||||
0000055439 00000 n
|
||||
0000002731 00000 n
|
||||
0000055459 00000 n
|
||||
0000055503 00000 n
|
||||
0000002868 00000 n
|
||||
0000055525 00000 n
|
||||
0000055569 00000 n
|
||||
0000003005 00000 n
|
||||
0000055590 00000 n
|
||||
0000055634 00000 n
|
||||
0000003142 00000 n
|
||||
0000055656 00000 n
|
||||
0000055700 00000 n
|
||||
0000003279 00000 n
|
||||
0000055722 00000 n
|
||||
0000055766 00000 n
|
||||
0000003416 00000 n
|
||||
0000055787 00000 n
|
||||
0000055831 00000 n
|
||||
0000003553 00000 n
|
||||
0000055853 00000 n
|
||||
0000055897 00000 n
|
||||
0000003690 00000 n
|
||||
0000055917 00000 n
|
||||
0000055961 00000 n
|
||||
0000003826 00000 n
|
||||
0000055981 00000 n
|
||||
0000056025 00000 n
|
||||
0000003963 00000 n
|
||||
0000056047 00000 n
|
||||
0000056091 00000 n
|
||||
0000004100 00000 n
|
||||
0000056113 00000 n
|
||||
0000056157 00000 n
|
||||
0000004237 00000 n
|
||||
0000056178 00000 n
|
||||
0000056222 00000 n
|
||||
0000004373 00000 n
|
||||
0000056244 00000 n
|
||||
0000056288 00000 n
|
||||
0000004510 00000 n
|
||||
0000056308 00000 n
|
||||
0000056352 00000 n
|
||||
0000004647 00000 n
|
||||
0000056374 00000 n
|
||||
0000056418 00000 n
|
||||
0000004783 00000 n
|
||||
0000056440 00000 n
|
||||
0000056484 00000 n
|
||||
0000004920 00000 n
|
||||
0000005673 00000 n
|
||||
0000005796 00000 n
|
||||
0000005872 00000 n
|
||||
0000056504 00000 n
|
||||
0000056548 00000 n
|
||||
0000006004 00000 n
|
||||
0000056570 00000 n
|
||||
0000056614 00000 n
|
||||
0000006137 00000 n
|
||||
0000056634 00000 n
|
||||
0000056678 00000 n
|
||||
0000006270 00000 n
|
||||
0000056699 00000 n
|
||||
0000056743 00000 n
|
||||
0000006403 00000 n
|
||||
0000056764 00000 n
|
||||
0000056808 00000 n
|
||||
0000006536 00000 n
|
||||
0000056829 00000 n
|
||||
0000056873 00000 n
|
||||
0000006669 00000 n
|
||||
0000056894 00000 n
|
||||
0000056938 00000 n
|
||||
0000006801 00000 n
|
||||
0000056959 00000 n
|
||||
0000057003 00000 n
|
||||
0000006934 00000 n
|
||||
0000009085 00000 n
|
||||
0000009193 00000 n
|
||||
|
@ -1317,47 +1317,47 @@ xref
|
|||
0000043768 00000 n
|
||||
0000045499 00000 n
|
||||
0000045609 00000 n
|
||||
0000047504 00000 n
|
||||
0000057024 00000 n
|
||||
0000047614 00000 n
|
||||
0000047814 00000 n
|
||||
0000048032 00000 n
|
||||
0000048238 00000 n
|
||||
0000048446 00000 n
|
||||
0000048614 00000 n
|
||||
0000048814 00000 n
|
||||
0000048972 00000 n
|
||||
0000049147 00000 n
|
||||
0000049410 00000 n
|
||||
0000049651 00000 n
|
||||
0000049780 00000 n
|
||||
0000049934 00000 n
|
||||
0000050088 00000 n
|
||||
0000050232 00000 n
|
||||
0000050382 00000 n
|
||||
0000050523 00000 n
|
||||
0000050758 00000 n
|
||||
0000050953 00000 n
|
||||
0000051193 00000 n
|
||||
0000051375 00000 n
|
||||
0000051548 00000 n
|
||||
0000051751 00000 n
|
||||
0000051939 00000 n
|
||||
0000052191 00000 n
|
||||
0000052332 00000 n
|
||||
0000052541 00000 n
|
||||
0000052727 00000 n
|
||||
0000052901 00000 n
|
||||
0000053146 00000 n
|
||||
0000053337 00000 n
|
||||
0000053543 00000 n
|
||||
0000053709 00000 n
|
||||
0000053823 00000 n
|
||||
0000053934 00000 n
|
||||
0000054046 00000 n
|
||||
0000054155 00000 n
|
||||
0000054262 00000 n
|
||||
0000054379 00000 n
|
||||
0000047548 00000 n
|
||||
0000057068 00000 n
|
||||
0000047658 00000 n
|
||||
0000047858 00000 n
|
||||
0000048076 00000 n
|
||||
0000048282 00000 n
|
||||
0000048490 00000 n
|
||||
0000048658 00000 n
|
||||
0000048858 00000 n
|
||||
0000049016 00000 n
|
||||
0000049191 00000 n
|
||||
0000049454 00000 n
|
||||
0000049695 00000 n
|
||||
0000049824 00000 n
|
||||
0000049978 00000 n
|
||||
0000050132 00000 n
|
||||
0000050276 00000 n
|
||||
0000050426 00000 n
|
||||
0000050567 00000 n
|
||||
0000050802 00000 n
|
||||
0000050997 00000 n
|
||||
0000051237 00000 n
|
||||
0000051419 00000 n
|
||||
0000051592 00000 n
|
||||
0000051795 00000 n
|
||||
0000051983 00000 n
|
||||
0000052235 00000 n
|
||||
0000052376 00000 n
|
||||
0000052585 00000 n
|
||||
0000052771 00000 n
|
||||
0000052945 00000 n
|
||||
0000053190 00000 n
|
||||
0000053381 00000 n
|
||||
0000053587 00000 n
|
||||
0000053753 00000 n
|
||||
0000053867 00000 n
|
||||
0000053978 00000 n
|
||||
0000054090 00000 n
|
||||
0000054199 00000 n
|
||||
0000054306 00000 n
|
||||
0000054423 00000 n
|
||||
trailer
|
||||
<<
|
||||
/Size 153
|
||||
|
@ -1365,5 +1365,5 @@ trailer
|
|||
/Info 4 0 R
|
||||
>>
|
||||
startxref
|
||||
57078
|
||||
57122
|
||||
%%EOF
|
||||
|
|
|
@ -109,7 +109,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
|||
return clone;
|
||||
}
|
||||
|
||||
final void seek(long pointer, int p, Term t, TermInfo ti)
|
||||
final void seek(long pointer, long p, Term t, TermInfo ti)
|
||||
throws IOException {
|
||||
input.seek(pointer);
|
||||
position = p;
|
||||
|
|
|
@ -177,7 +177,7 @@ final class TermInfosReader {
|
|||
|
||||
private final void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException {
|
||||
enumerator.seek(indexPointers[indexOffset],
|
||||
(indexOffset * totalIndexInterval) - 1,
|
||||
((long) indexOffset * totalIndexInterval) - 1,
|
||||
indexTerms[indexOffset], indexInfos[indexOffset]);
|
||||
}
|
||||
|
||||
|
@ -280,28 +280,6 @@ final class TermInfosReader {
|
|||
return true;
|
||||
}
|
||||
|
||||
/** Returns the nth term in the set. */
|
||||
final Term get(int position) throws IOException {
|
||||
if (size == 0) return null;
|
||||
|
||||
SegmentTermEnum enumerator = getThreadResources().termEnum;
|
||||
if (enumerator.term() != null &&
|
||||
position >= enumerator.position &&
|
||||
position < (enumerator.position + totalIndexInterval))
|
||||
return scanEnum(enumerator, position); // can avoid seek
|
||||
|
||||
seekEnum(enumerator, position/totalIndexInterval); // must seek
|
||||
return scanEnum(enumerator, position);
|
||||
}
|
||||
|
||||
private final Term scanEnum(SegmentTermEnum enumerator, int position) throws IOException {
|
||||
while(enumerator.position < position)
|
||||
if (!enumerator.next())
|
||||
return null;
|
||||
|
||||
return enumerator.term();
|
||||
}
|
||||
|
||||
private void ensureIndexIsRead() {
|
||||
if (indexTerms == null) {
|
||||
throw new IllegalStateException("terms index was not loaded when this reader was created");
|
||||
|
|
|
@ -1846,11 +1846,12 @@
|
|||
|
||||
<p>
|
||||
When referring to term numbers, Lucene's current
|
||||
implementation uses a Java <code>int</code>, which means
|
||||
the maximum number of unique terms in any single index
|
||||
segment is 2,147,483,648. This is technically not a
|
||||
limitation of the index file format, just of Lucene's
|
||||
current implementation.
|
||||
implementation uses a Java <code>int</code> to hold the
|
||||
term index, which means the maximum number of unique
|
||||
terms in any single index segment is ~2.1 billion times
|
||||
the term index interval (default 128) = ~274 billion.
|
||||
This is technically not a limitation of the index file
|
||||
format, just of Lucene's current implementation.
|
||||
</p>
|
||||
<p>
|
||||
Similarly, Lucene uses a Java <code>int</code> to refer
|
||||
|
|
Loading…
Reference in New Issue