368 lines
21 KiB
HTML
368 lines
21 KiB
HTML
|
||
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<meta name="color-scheme" content="light dark">
|
||
<title>PEP 353 – Using ssize_t as the index type | peps.python.org</title>
|
||
<link rel="shortcut icon" href="../_static/py.png">
|
||
<link rel="canonical" href="https://peps.python.org/pep-0353/">
|
||
<link rel="stylesheet" href="../_static/style.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/mq.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" media="(prefers-color-scheme: light)" id="pyg-light">
|
||
<link rel="stylesheet" href="../_static/pygments_dark.css" type="text/css" media="(prefers-color-scheme: dark)" id="pyg-dark">
|
||
<link rel="alternate" type="application/rss+xml" title="Latest PEPs" href="https://peps.python.org/peps.rss">
|
||
<meta property="og:title" content='PEP 353 – Using ssize_t as the index type | peps.python.org'>
|
||
<meta property="og:description" content="In Python 2.4, indices of sequences are restricted to the C type int. On 64-bit machines, sequences therefore cannot use the full address space, and are restricted to 2**31 elements. This PEP proposes to change this, introducing a platform-specific inde...">
|
||
<meta property="og:type" content="website">
|
||
<meta property="og:url" content="https://peps.python.org/pep-0353/">
|
||
<meta property="og:site_name" content="Python Enhancement Proposals (PEPs)">
|
||
<meta property="og:image" content="https://peps.python.org/_static/og-image.png">
|
||
<meta property="og:image:alt" content="Python PEPs">
|
||
<meta property="og:image:width" content="200">
|
||
<meta property="og:image:height" content="200">
|
||
<meta name="description" content="In Python 2.4, indices of sequences are restricted to the C type int. On 64-bit machines, sequences therefore cannot use the full address space, and are restricted to 2**31 elements. This PEP proposes to change this, introducing a platform-specific inde...">
|
||
<meta name="theme-color" content="#3776ab">
|
||
</head>
|
||
<body>
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
||
<symbol id="svg-sun-half" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Following system colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="9"></circle>
|
||
<path d="M12 3v18m0-12l4.65-4.65M12 14.3l7.37-7.37M12 19.6l8.85-8.85"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected dark colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
|
||
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected light colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="5"></circle>
|
||
<line x1="12" y1="1" x2="12" y2="3"></line>
|
||
<line x1="12" y1="21" x2="12" y2="23"></line>
|
||
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
||
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
||
<line x1="1" y1="12" x2="3" y2="12"></line>
|
||
<line x1="21" y1="12" x2="23" y2="12"></line>
|
||
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
||
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
||
</svg>
|
||
</symbol>
|
||
</svg>
|
||
<script>
|
||
|
||
document.documentElement.dataset.colour_scheme = localStorage.getItem("colour_scheme") || "auto"
|
||
</script>
|
||
<section id="pep-page-section">
|
||
<header>
|
||
<h1>Python Enhancement Proposals</h1>
|
||
<ul class="breadcrumbs">
|
||
<li><a href="https://www.python.org/" title="The Python Programming Language">Python</a> » </li>
|
||
<li><a href="../pep-0000/">PEP Index</a> » </li>
|
||
<li>PEP 353</li>
|
||
</ul>
|
||
<button id="colour-scheme-cycler" onClick="setColourScheme(nextColourScheme())">
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
<span class="visually-hidden">Toggle light / dark / auto colour theme</span>
|
||
</button>
|
||
</header>
|
||
<article>
|
||
<section id="pep-content">
|
||
<h1 class="page-title">PEP 353 – Using ssize_t as the index type</h1>
|
||
<dl class="rfc2822 field-list simple">
|
||
<dt class="field-odd">Author<span class="colon">:</span></dt>
|
||
<dd class="field-odd">Martin von Löwis <martin at v.loewis.de></dd>
|
||
<dt class="field-even">Status<span class="colon">:</span></dt>
|
||
<dd class="field-even"><abbr title="Accepted and implementation complete, or no longer active">Final</abbr></dd>
|
||
<dt class="field-odd">Type<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><abbr title="Normative PEP with a new feature for Python, implementation change for CPython or interoperability standard for the ecosystem">Standards Track</abbr></dd>
|
||
<dt class="field-even">Created<span class="colon">:</span></dt>
|
||
<dd class="field-even">18-Dec-2005</dd>
|
||
<dt class="field-odd">Python-Version<span class="colon">:</span></dt>
|
||
<dd class="field-odd">2.5</dd>
|
||
<dt class="field-even">Post-History<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p></p></dd>
|
||
</dl>
|
||
<hr class="docutils" />
|
||
<section id="contents">
|
||
<details><summary>Table of Contents</summary><ul class="simple">
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
||
<li><a class="reference internal" href="#specification">Specification</a></li>
|
||
<li><a class="reference internal" href="#conversion-guidelines">Conversion guidelines</a></li>
|
||
<li><a class="reference internal" href="#discussion">Discussion</a><ul>
|
||
<li><a class="reference internal" href="#why-not-size-t">Why not size_t</a></li>
|
||
<li><a class="reference internal" href="#why-not-py-intptr-t">Why not Py_intptr_t</a></li>
|
||
<li><a class="reference internal" href="#doesn-t-this-break-much-code">Doesn’t this break much code?</a></li>
|
||
<li><a class="reference internal" href="#doesn-t-this-consume-too-much-memory">Doesn’t this consume too much memory?</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#open-issues">Open Issues</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
</details></section>
|
||
<section id="abstract">
|
||
<h2><a class="toc-backref" href="#abstract" role="doc-backlink">Abstract</a></h2>
|
||
<p>In Python 2.4, indices of sequences are restricted to the C type
|
||
int. On 64-bit machines, sequences therefore cannot use the full
|
||
address space, and are restricted to 2**31 elements. This PEP proposes
|
||
to change this, introducing a platform-specific index type
|
||
Py_ssize_t. An implementation of the proposed change is in
|
||
<a class="reference external" href="http://svn.python.org/projects/python/branches/ssize_t">http://svn.python.org/projects/python/branches/ssize_t</a>.</p>
|
||
</section>
|
||
<section id="rationale">
|
||
<h2><a class="toc-backref" href="#rationale" role="doc-backlink">Rationale</a></h2>
|
||
<p>64-bit machines are becoming more popular, and the size of main memory
|
||
increases beyond 4GiB. On such machines, Python currently is limited,
|
||
in that sequences (strings, unicode objects, tuples, lists,
|
||
array.arrays, …) cannot contain more than 2GiElements.</p>
|
||
<p>Today, very few machines have memory to represent larger lists: as
|
||
each pointer is 8B (in a 64-bit machine), one needs 16GiB to just hold
|
||
the pointers of such a list; with data in the list, the memory
|
||
consumption grows even more. However, there are three container types
|
||
for which users request improvements today:</p>
|
||
<ul class="simple">
|
||
<li>strings (currently restricted to 2GiB)</li>
|
||
<li>mmap objects (likewise; plus the system typically
|
||
won’t keep the whole object in memory concurrently)</li>
|
||
<li>Numarray objects (from Numerical Python)</li>
|
||
</ul>
|
||
<p>As the proposed change will cause incompatibilities on 64-bit
|
||
machines, it should be carried out while such machines are not in wide
|
||
use (IOW, as early as possible).</p>
|
||
</section>
|
||
<section id="specification">
|
||
<h2><a class="toc-backref" href="#specification" role="doc-backlink">Specification</a></h2>
|
||
<p>A new type Py_ssize_t is introduced, which has the same size as the
|
||
compiler’s size_t type, but is signed. It will be a typedef for
|
||
ssize_t where available.</p>
|
||
<p>The internal representation of the length fields of all container
|
||
types is changed from int to ssize_t, for all types included in the
|
||
standard distribution. In particular, PyObject_VAR_HEAD is changed to
|
||
use Py_ssize_t, affecting all extension modules that use that macro.</p>
|
||
<p>All occurrences of index and length parameters and results are changed
|
||
to use Py_ssize_t, including the sequence slots in type objects, and
|
||
the buffer interface.</p>
|
||
<p>New conversion functions PyInt_FromSsize_t and PyInt_AsSsize_t, are
|
||
introduced. PyInt_FromSsize_t will transparently return a long int
|
||
object if the value exceeds the LONG_MAX; PyInt_AsSsize_t will
|
||
transparently process long int objects.</p>
|
||
<p>New function pointer typedefs ssizeargfunc, ssizessizeargfunc,
|
||
ssizeobjargproc, ssizessizeobjargproc, and lenfunc are introduced. The
|
||
buffer interface function types are now called readbufferproc,
|
||
writebufferproc, segcountproc, and charbufferproc.</p>
|
||
<p>A new conversion code ‘n’ is introduced for PyArg_ParseTuple
|
||
Py_BuildValue, PyObject_CallFunction and PyObject_CallMethod.
|
||
This code operates on Py_ssize_t.</p>
|
||
<p>The conversion codes ‘s#’ and ‘t#’ will output Py_ssize_t
|
||
if the macro PY_SSIZE_T_CLEAN is defined before Python.h
|
||
is included, and continue to output int if that macro
|
||
isn’t defined.</p>
|
||
<p>At places where a conversion from size_t/Py_ssize_t to
|
||
int is necessary, the strategy for conversion is chosen
|
||
on a case-by-case basis (see next section).</p>
|
||
<p>To prevent loading extension modules that assume a 32-bit
|
||
size type into an interpreter that has a 64-bit size type,
|
||
Py_InitModule4 is renamed to Py_InitModule4_64.</p>
|
||
</section>
|
||
<section id="conversion-guidelines">
|
||
<h2><a class="toc-backref" href="#conversion-guidelines" role="doc-backlink">Conversion guidelines</a></h2>
|
||
<p>Module authors have the choice whether they support this PEP in their
|
||
code or not; if they support it, they have the choice of different
|
||
levels of compatibility.</p>
|
||
<p>If a module is not converted to support this PEP, it will continue to
|
||
work unmodified on a 32-bit system. On a 64-bit system, compile-time
|
||
errors and warnings might be issued, and the module might crash the
|
||
interpreter if the warnings are ignored.</p>
|
||
<p>Conversion of a module can either attempt to continue using int
|
||
indices, or use Py_ssize_t indices throughout.</p>
|
||
<p>If the module should continue to use int indices, care must be taken
|
||
when calling functions that return Py_ssize_t or size_t, in
|
||
particular, for functions that return the length of an object (this
|
||
includes the strlen function and the sizeof operator). A good compiler
|
||
will warn when a Py_ssize_t/size_t value is truncated into an int.
|
||
In these cases, three strategies are available:</p>
|
||
<ul>
|
||
<li>statically determine that the size can never exceed an int
|
||
(e.g. when taking the sizeof a struct, or the strlen of
|
||
a file pathname). In this case, write:<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">some_int</span> <span class="o">=</span> <span class="n">Py_SAFE_DOWNCAST</span><span class="p">(</span><span class="n">some_value</span><span class="p">,</span> <span class="n">Py_ssize_t</span><span class="p">,</span> <span class="nb">int</span><span class="p">);</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This will add an assertion in debug mode that the value
|
||
really fits into an int, and just add a cast otherwise.</p>
|
||
</li>
|
||
<li>statically determine that the value shouldn’t overflow an
|
||
int unless there is a bug in the C code somewhere. Test
|
||
whether the value is smaller than INT_MAX, and raise an
|
||
InternalError if it isn’t.</li>
|
||
<li>otherwise, check whether the value fits an int, and raise
|
||
a ValueError if it doesn’t.</li>
|
||
</ul>
|
||
<p>The same care must be taken for tp_as_sequence slots, in
|
||
addition, the signatures of these slots change, and the
|
||
slots must be explicitly recast (e.g. from intargfunc
|
||
to ssizeargfunc). Compatibility with previous Python
|
||
versions can be achieved with the test:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="c1">#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)</span>
|
||
<span class="n">typedef</span> <span class="nb">int</span> <span class="n">Py_ssize_t</span><span class="p">;</span>
|
||
<span class="c1">#define PY_SSIZE_T_MAX INT_MAX</span>
|
||
<span class="c1">#define PY_SSIZE_T_MIN INT_MIN</span>
|
||
<span class="c1">#endif</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>and then using Py_ssize_t in the rest of the code. For
|
||
the tp_as_sequence slots, additional typedefs might
|
||
be necessary; alternatively, by replacing:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">PyObject</span><span class="o">*</span> <span class="n">foo_item</span><span class="p">(</span><span class="n">struct</span> <span class="n">MyType</span><span class="o">*</span> <span class="n">obj</span><span class="p">,</span> <span class="nb">int</span> <span class="n">index</span><span class="p">)</span>
|
||
<span class="p">{</span>
|
||
<span class="o">...</span>
|
||
<span class="p">}</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>with:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">PyObject</span><span class="o">*</span> <span class="n">foo_item</span><span class="p">(</span><span class="n">PyObject</span><span class="o">*</span> <span class="n">_obj</span><span class="p">,</span> <span class="n">Py_ssize_t</span> <span class="n">index</span><span class="p">)</span>
|
||
<span class="p">{</span>
|
||
<span class="n">struct</span> <span class="n">MyType</span><span class="o">*</span> <span class="n">obj</span> <span class="o">=</span> <span class="p">(</span><span class="n">struct</span> <span class="n">MyType</span><span class="o">*</span><span class="p">)</span><span class="n">_obj</span><span class="p">;</span>
|
||
<span class="o">...</span>
|
||
<span class="p">}</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>it becomes possible to drop the cast entirely; the type
|
||
of foo_item should then match the sq_item slot in all
|
||
Python versions.</p>
|
||
<p>If the module should be extended to use Py_ssize_t indices, all usages
|
||
of the type int should be reviewed, to see whether it should be
|
||
changed to Py_ssize_t. The compiler will help in finding the spots,
|
||
but a manual review is still necessary.</p>
|
||
<p>Particular care must be taken for PyArg_ParseTuple calls:
|
||
they need all be checked for s# and t# converters, and
|
||
PY_SSIZE_T_CLEAN must be defined before including Python.h
|
||
if the calls have been updated accordingly.</p>
|
||
<p>Fredrik Lundh has written a <a class="reference external" href="http://svn.effbot.python-hosting.com/stuff/sandbox/python/ssizecheck.py">scanner</a> which checks the code
|
||
of a C module for usage of APIs whose signature has changed.</p>
|
||
</section>
|
||
<section id="discussion">
|
||
<h2><a class="toc-backref" href="#discussion" role="doc-backlink">Discussion</a></h2>
|
||
<section id="why-not-size-t">
|
||
<h3><a class="toc-backref" href="#why-not-size-t" role="doc-backlink">Why not size_t</a></h3>
|
||
<p>An initial attempt to implement this feature tried to use
|
||
size_t. It quickly turned out that this cannot work: Python
|
||
uses negative indices in many places (to indicate counting
|
||
from the end). Even in places where size_t would be usable,
|
||
too many reformulations of code where necessary, e.g. in
|
||
loops like:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">for</span><span class="p">(</span><span class="n">index</span> <span class="o">=</span> <span class="n">length</span><span class="o">-</span><span class="mi">1</span><span class="p">;</span> <span class="n">index</span> <span class="o">>=</span> <span class="mi">0</span><span class="p">;</span> <span class="n">index</span><span class="o">--</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This loop will never terminate if index is changed from
|
||
int to size_t.</p>
|
||
</section>
|
||
<section id="why-not-py-intptr-t">
|
||
<h3><a class="toc-backref" href="#why-not-py-intptr-t" role="doc-backlink">Why not Py_intptr_t</a></h3>
|
||
<p>Conceptually, Py_intptr_t and Py_ssize_t are different things:
|
||
Py_intptr_t needs to be the same size as void*, and Py_ssize_t
|
||
the same size as size_t. These could differ, e.g. on machines
|
||
where pointers have segment and offset. On current flat-address
|
||
space machines, there is no difference, so for all practical
|
||
purposes, Py_intptr_t would have worked as well.</p>
|
||
</section>
|
||
<section id="doesn-t-this-break-much-code">
|
||
<h3><a class="toc-backref" href="#doesn-t-this-break-much-code" role="doc-backlink">Doesn’t this break much code?</a></h3>
|
||
<p>With the changes proposed, code breakage is fairly
|
||
minimal. On a 32-bit system, no code will break, as
|
||
Py_ssize_t is just a typedef for int.</p>
|
||
<p>On a 64-bit system, the compiler will warn in many
|
||
places. If these warnings are ignored, the code will
|
||
continue to work as long as the container sizes don’t
|
||
exceed 2**31, i.e. it will work nearly as good as
|
||
it does currently. There are two exceptions to this
|
||
statement: if the extension module implements the
|
||
sequence protocol, it must be updated, or the calling
|
||
conventions will be wrong. The other exception is
|
||
the places where Py_ssize_t is output through a
|
||
pointer (rather than a return value); this applies
|
||
most notably to codecs and slice objects.</p>
|
||
<p>If the conversion of the code is made, the same code
|
||
can continue to work on earlier Python releases.</p>
|
||
</section>
|
||
<section id="doesn-t-this-consume-too-much-memory">
|
||
<h3><a class="toc-backref" href="#doesn-t-this-consume-too-much-memory" role="doc-backlink">Doesn’t this consume too much memory?</a></h3>
|
||
<p>One might think that using Py_ssize_t in all tuples,
|
||
strings, lists, etc. is a waste of space. This is
|
||
not true, though: on a 32-bit machine, there is no
|
||
change. On a 64-bit machine, the size of many
|
||
containers doesn’t change, e.g.</p>
|
||
<ul class="simple">
|
||
<li>in lists and tuples, a pointer immediately follows
|
||
the ob_size member. This means that the compiler
|
||
currently inserts a 4 padding bytes; with the
|
||
change, these padding bytes become part of the size.</li>
|
||
<li>in strings, the ob_shash field follows ob_size.
|
||
This field is of type long, which is a 64-bit
|
||
type on most 64-bit systems (except Win64), so
|
||
the compiler inserts padding before it as well.</li>
|
||
</ul>
|
||
</section>
|
||
</section>
|
||
<section id="open-issues">
|
||
<h2><a class="toc-backref" href="#open-issues" role="doc-backlink">Open Issues</a></h2>
|
||
<ul>
|
||
<li>Marc-Andre Lemburg commented that complete backwards
|
||
compatibility with existing source code should be
|
||
preserved. In particular, functions that have
|
||
Py_ssize_t* output arguments should continue to run
|
||
correctly even if the callers pass int*.<p>It is not clear what strategy could be used to implement
|
||
that requirement.</p>
|
||
</li>
|
||
</ul>
|
||
</section>
|
||
<section id="copyright">
|
||
<h2><a class="toc-backref" href="#copyright" role="doc-backlink">Copyright</a></h2>
|
||
<p>This document has been placed in the public domain.</p>
|
||
</section>
|
||
</section>
|
||
<hr class="docutils" />
|
||
<p>Source: <a class="reference external" href="https://github.com/python/peps/blob/main/peps/pep-0353.rst">https://github.com/python/peps/blob/main/peps/pep-0353.rst</a></p>
|
||
<p>Last modified: <a class="reference external" href="https://github.com/python/peps/commits/main/peps/pep-0353.rst">2023-09-09 17:39:29 GMT</a></p>
|
||
|
||
</article>
|
||
<nav id="pep-sidebar">
|
||
<h2>Contents</h2>
|
||
<ul>
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
||
<li><a class="reference internal" href="#specification">Specification</a></li>
|
||
<li><a class="reference internal" href="#conversion-guidelines">Conversion guidelines</a></li>
|
||
<li><a class="reference internal" href="#discussion">Discussion</a><ul>
|
||
<li><a class="reference internal" href="#why-not-size-t">Why not size_t</a></li>
|
||
<li><a class="reference internal" href="#why-not-py-intptr-t">Why not Py_intptr_t</a></li>
|
||
<li><a class="reference internal" href="#doesn-t-this-break-much-code">Doesn’t this break much code?</a></li>
|
||
<li><a class="reference internal" href="#doesn-t-this-consume-too-much-memory">Doesn’t this consume too much memory?</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#open-issues">Open Issues</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
|
||
<br>
|
||
<a id="source" href="https://github.com/python/peps/blob/main/peps/pep-0353.rst">Page Source (GitHub)</a>
|
||
</nav>
|
||
</section>
|
||
<script src="../_static/colour_scheme.js"></script>
|
||
<script src="../_static/wrap_tables.js"></script>
|
||
<script src="../_static/sticky_banner.js"></script>
|
||
</body>
|
||
</html> |