663 lines
46 KiB
HTML
663 lines
46 KiB
HTML
|
||
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<meta name="color-scheme" content="light dark">
|
||
<title>PEP 744 – JIT Compilation | peps.python.org</title>
|
||
<link rel="shortcut icon" href="../_static/py.png">
|
||
<link rel="canonical" href="https://peps.python.org/pep-0744/">
|
||
<link rel="stylesheet" href="../_static/style.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/mq.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" media="(prefers-color-scheme: light)" id="pyg-light">
|
||
<link rel="stylesheet" href="../_static/pygments_dark.css" type="text/css" media="(prefers-color-scheme: dark)" id="pyg-dark">
|
||
<link rel="alternate" type="application/rss+xml" title="Latest PEPs" href="https://peps.python.org/peps.rss">
|
||
<meta property="og:title" content='PEP 744 – JIT Compilation | peps.python.org'>
|
||
<meta property="og:description" content="Earlier this year, an experimental “just-in-time” compiler was merged into CPython’s main development branch. While recent CPython releases have included other substantial internal changes, this addition represents a particularly significant departure f...">
|
||
<meta property="og:type" content="website">
|
||
<meta property="og:url" content="https://peps.python.org/pep-0744/">
|
||
<meta property="og:site_name" content="Python Enhancement Proposals (PEPs)">
|
||
<meta property="og:image" content="https://peps.python.org/_static/og-image.png">
|
||
<meta property="og:image:alt" content="Python PEPs">
|
||
<meta property="og:image:width" content="200">
|
||
<meta property="og:image:height" content="200">
|
||
<meta name="description" content="Earlier this year, an experimental “just-in-time” compiler was merged into CPython’s main development branch. While recent CPython releases have included other substantial internal changes, this addition represents a particularly significant departure f...">
|
||
<meta name="theme-color" content="#3776ab">
|
||
</head>
|
||
<body>
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
||
<symbol id="svg-sun-half" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Following system colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="9"></circle>
|
||
<path d="M12 3v18m0-12l4.65-4.65M12 14.3l7.37-7.37M12 19.6l8.85-8.85"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected dark colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
|
||
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected light colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="5"></circle>
|
||
<line x1="12" y1="1" x2="12" y2="3"></line>
|
||
<line x1="12" y1="21" x2="12" y2="23"></line>
|
||
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
||
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
||
<line x1="1" y1="12" x2="3" y2="12"></line>
|
||
<line x1="21" y1="12" x2="23" y2="12"></line>
|
||
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
||
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
||
</svg>
|
||
</symbol>
|
||
</svg>
|
||
<script>
|
||
|
||
document.documentElement.dataset.colour_scheme = localStorage.getItem("colour_scheme") || "auto"
|
||
</script>
|
||
<section id="pep-page-section">
|
||
<header>
|
||
<h1>Python Enhancement Proposals</h1>
|
||
<ul class="breadcrumbs">
|
||
<li><a href="https://www.python.org/" title="The Python Programming Language">Python</a> » </li>
|
||
<li><a href="../pep-0000/">PEP Index</a> » </li>
|
||
<li>PEP 744</li>
|
||
</ul>
|
||
<button id="colour-scheme-cycler" onClick="setColourScheme(nextColourScheme())">
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
<span class="visually-hidden">Toggle light / dark / auto colour theme</span>
|
||
</button>
|
||
</header>
|
||
<article>
|
||
<section id="pep-content">
|
||
<h1 class="page-title">PEP 744 – JIT Compilation</h1>
|
||
<dl class="rfc2822 field-list simple">
|
||
<dt class="field-odd">Author<span class="colon">:</span></dt>
|
||
<dd class="field-odd">Brandt Bucher <brandt at python.org>,
|
||
Savannah Ostrowski <savannahostrowski at gmail.com></dd>
|
||
<dt class="field-even">Discussions-To<span class="colon">:</span></dt>
|
||
<dd class="field-even"><a class="reference external" href="https://discuss.python.org/t/pep-744-jit-compilation/50756">Discourse thread</a></dd>
|
||
<dt class="field-odd">Status<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><abbr title="Proposal under active discussion and revision">Draft</abbr></dd>
|
||
<dt class="field-even">Type<span class="colon">:</span></dt>
|
||
<dd class="field-even"><abbr title="Non-normative PEP containing background, guidelines or other information relevant to the Python ecosystem">Informational</abbr></dd>
|
||
<dt class="field-odd">Created<span class="colon">:</span></dt>
|
||
<dd class="field-odd">11-Apr-2024</dd>
|
||
<dt class="field-even">Python-Version<span class="colon">:</span></dt>
|
||
<dd class="field-even">3.13</dd>
|
||
<dt class="field-odd">Post-History<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><a class="reference external" href="https://discuss.python.org/t/pep-744-jit-compilation/50756" title="Discourse thread">11-Apr-2024</a></dd>
|
||
</dl>
|
||
<hr class="docutils" />
|
||
<section id="contents">
|
||
<details><summary>Table of Contents</summary><ul class="simple">
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#motivation">Motivation</a></li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
||
<li><a class="reference internal" href="#specification">Specification</a><ul>
|
||
<li><a class="reference internal" href="#support">Support</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#backwards-compatibility">Backwards Compatibility</a><ul>
|
||
<li><a class="reference internal" href="#debugging">Debugging</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#security-implications">Security Implications</a><ul>
|
||
<li><a class="reference internal" href="#apple-silicon">Apple Silicon</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#how-to-teach-this">How to Teach This</a></li>
|
||
<li><a class="reference internal" href="#reference-implementation">Reference Implementation</a></li>
|
||
<li><a class="reference internal" href="#rejected-ideas">Rejected Ideas</a><ul>
|
||
<li><a class="reference internal" href="#maintain-it-outside-of-cpython">Maintain it outside of CPython</a></li>
|
||
<li><a class="reference internal" href="#turn-it-on-by-default">Turn it on by default</a></li>
|
||
<li><a class="reference internal" href="#support-multiple-compiler-toolchains">Support multiple compiler toolchains</a></li>
|
||
<li><a class="reference internal" href="#compile-the-base-interpreter-s-bytecode">Compile the base interpreter’s bytecode</a></li>
|
||
<li><a class="reference internal" href="#add-gpu-support">Add GPU support</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#open-issues">Open Issues</a><ul>
|
||
<li><a class="reference internal" href="#speed">Speed</a></li>
|
||
<li><a class="reference internal" href="#memory">Memory</a></li>
|
||
<li><a class="reference internal" href="#dependencies">Dependencies</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#footnotes">Footnotes</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
</details></section>
|
||
<section id="abstract">
|
||
<h2><a class="toc-backref" href="#abstract" role="doc-backlink">Abstract</a></h2>
|
||
<p>Earlier this year, an <a class="reference external" href="https://github.com/python/cpython/pull/113465">experimental “just-in-time” compiler</a> was merged into CPython’s
|
||
<code class="docutils literal notranslate"><span class="pre">main</span></code> development branch. While recent CPython releases have included other
|
||
substantial internal changes, this addition represents a particularly
|
||
significant departure from the way CPython has traditionally executed Python
|
||
code. As such, it deserves wider discussion.</p>
|
||
<p>This PEP aims to summarize the design decisions behind this addition, the
|
||
current state of the implementation, and future plans for making the JIT a
|
||
permanent, non-experimental part of CPython. It does <em>not</em> seek to provide a
|
||
comprehensive overview of <em>how</em> the JIT works, instead focusing on the
|
||
particular advantages and disadvantages of the chosen approach, as well as
|
||
answering many questions that have been asked about the JIT since its
|
||
introduction.</p>
|
||
<p>Readers interested in learning more about the new JIT are encouraged to consult
|
||
the following resources:</p>
|
||
<ul class="simple">
|
||
<li>The <a class="reference external" href="https://youtu.be/HxSHIpEQRjs">presentation</a> which first introduced the
|
||
JIT at the 2023 CPython Core Developer Sprint. It includes relevant
|
||
background, a light technical introduction to the “copy-and-patch” technique
|
||
used, and an open discussion of its design amongst the core developers
|
||
present. Slides for this talk can be found on <a class="reference external" href="https://github.com/brandtbucher/brandtbucher/blob/master/2023/10/10/a_jit_compiler_for_cpython.pdf">GitHub</a>.</li>
|
||
<li>The <a class="reference external" href="https://dl.acm.org/doi/10.1145/3485513">open access paper</a> originally
|
||
describing copy-and-patch.</li>
|
||
<li>The <a class="reference external" href="https://sillycross.github.io/2023/05/12/2023-05-12">blog post</a> by the
|
||
paper’s author detailing the implementation of a copy-and-patch JIT compiler
|
||
for Lua. While this is a great low-level explanation of the approach, note
|
||
that it also incorporates other techniques and makes implementation decisions
|
||
that are not particularly relevant to CPython’s JIT.</li>
|
||
<li>The <a class="reference external" href="#reference-implementation">implementation</a> itself.</li>
|
||
</ul>
|
||
</section>
|
||
<section id="motivation">
|
||
<h2><a class="toc-backref" href="#motivation" role="doc-backlink">Motivation</a></h2>
|
||
<p>Until this point, CPython has always executed Python code by compiling it to
|
||
bytecode, which is interpreted at runtime. This bytecode is a more-or-less
|
||
direct translation of the source code: it is untyped, and largely unoptimized.</p>
|
||
<p>Since the Python 3.11 release, CPython has used a “specializing adaptive
|
||
interpreter” (<a class="pep reference internal" href="../pep-0659/" title="PEP 659 – Specializing Adaptive Interpreter">PEP 659</a>), which <a class="reference external" href="https://youtu.be/shQtrn1v7sQ">rewrites these bytecode instructions in-place</a> with type-specialized versions as they run.
|
||
This new interpreter delivers significant performance improvements, despite the
|
||
fact that its optimization potential is limited by the boundaries of individual
|
||
bytecode instructions. It also collects a wealth of new profiling information:
|
||
the types flowing though a program, the memory layout of particular objects, and
|
||
what paths through the program are being executed the most. In other words,
|
||
<em>what</em> to optimize, and <em>how</em> to optimize it.</p>
|
||
<p>Since the Python 3.12 release, CPython has generated this interpreter from a
|
||
<a class="reference external" href="https://github.com/python/cpython/blob/main/Python/bytecodes.c">C-like domain-specific language</a> (DSL). In
|
||
addition to taming some of the complexity of the new adaptive interpreter, the
|
||
DSL also allows CPython’s maintainers to avoid hand-writing tedious boilerplate
|
||
code in many parts of the interpreter, compiler, and standard library that must
|
||
be kept in sync with the instruction definitions. This ability to generate large
|
||
amounts of runtime infrastructure from a single source of truth is not only
|
||
convenient for maintenance; it also unlocks many possibilities for expanding
|
||
CPython’s execution in new ways. For instance, it makes it feasible to
|
||
automatically generate tables for translating a sequence of instructions into an
|
||
equivalent sequence of smaller “micro-ops”, generate an optimizer for sequences
|
||
of these micro-ops, and even generate an entire second interpreter for executing
|
||
them.</p>
|
||
<p>In fact, since early in the Python 3.13 release cycle, all CPython builds have
|
||
included this exact micro-op translation, optimization, and execution machinery.
|
||
However, it is disabled by default; the overhead of interpreting even optimized
|
||
traces of micro-ops is just too large for most code. Heavier optimization
|
||
probably won’t improve the situation much either, since any efficiency gains
|
||
made by new optimizations will likely be offset by the interpretive overhead of
|
||
even smaller, more complex micro-ops.</p>
|
||
<p>The most obvious strategy to overcome this new bottleneck is to statically
|
||
compile these optimized traces. This presents opportunities to avoid several
|
||
sources of indirection and overhead introduced by interpretation. In particular,
|
||
it allows the removal of dispatch overhead between micro-ops (by replacing a
|
||
generic interpreter with a straight-line sequence of hot code), instruction
|
||
decoding overhead for individual micro-ops (by “burning” the values or addresses
|
||
of arguments, constants, and cached values directly into machine instructions),
|
||
and memory traffic (by moving data off of heap-allocated Python frames and into
|
||
physical hardware registers).</p>
|
||
<p>Since much of this data varies even between identical runs of a program and the
|
||
existing optimization pipeline makes heavy use of runtime profiling information,
|
||
it doesn’t make much sense to compile these traces ahead of time and would be a
|
||
substantial redesign of the existing specification and micro-op tracing infrastructure
|
||
that has already been implemented. As has been demonstrated for many other dynamic
|
||
languages (<a class="reference external" href="https://www.pypy.org">and even Python itself</a>), the most promising
|
||
approach is to compile the optimized micro-ops “just in time” for execution.</p>
|
||
</section>
|
||
<section id="rationale">
|
||
<h2><a class="toc-backref" href="#rationale" role="doc-backlink">Rationale</a></h2>
|
||
<p>Despite their reputation, JIT compilers are not magic “go faster” machines.
|
||
Developing and maintaining any sort of optimizing compiler for even a single
|
||
platform, let alone all of CPython’s most popular supported platforms, is an
|
||
incredibly complicated, expensive task. Using an existing compiler framework
|
||
like LLVM can make this task simpler, but only at the cost of introducing heavy
|
||
runtime dependencies and significantly higher JIT compilation overhead.</p>
|
||
<p>It’s clear that successfully compiling Python code at runtime requires not only
|
||
high-quality Python-specific optimizations for the code being run, <em>but also</em>
|
||
quick generation of efficient machine code for the optimized program. The Python
|
||
core development team has the necessary skills and experience for the former (a
|
||
middle-end tightly coupled to the interpreter), and copy-and-patch compilation
|
||
provides an attractive solution for the latter.</p>
|
||
<p>In a nutshell, copy-and-patch allows a high-quality template JIT compiler to be
|
||
generated from the same DSL used to generate the rest of the interpreter. For a
|
||
widely-used, volunteer-driven project like CPython, this benefit cannot be
|
||
overstated: CPython’s maintainers, by merely editing the bytecode definitions,
|
||
will also get the JIT backend updated “for free”, for <em>all</em> JIT-supported
|
||
platforms, at once. This is equally true whether instructions are being added,
|
||
modified, or removed.</p>
|
||
<p>Like the rest of the interpreter, the JIT compiler is generated at build time,
|
||
and has no runtime dependencies. It supports a wide range of platforms (see the
|
||
<a class="reference internal" href="#support">Support</a> section below), and has comparatively low maintenance burden. In all,
|
||
the current implementation is made up of about 900 lines of build-time Python
|
||
code and 500 lines of runtime C code.</p>
|
||
</section>
|
||
<section id="specification">
|
||
<h2><a class="toc-backref" href="#specification" role="doc-backlink">Specification</a></h2>
|
||
<p>The JIT is currently not part of the default build configuration, and it is
|
||
likely to remain that way for the foreseeable future (though official binaries
|
||
may include it). That said, the JIT will become non-experimental once all of
|
||
the following conditions are met:</p>
|
||
<ol class="arabic simple">
|
||
<li>It provides a meaningful performance improvement for at least one popular
|
||
platform (realistically, on the order of 5%).</li>
|
||
<li>It can be built, distributed, and deployed with minimal disruption.</li>
|
||
<li>The Steering Council, upon request, has determined that it would provide more
|
||
value to the community if enabled than if disabled (considering tradeoffs
|
||
such as maintenance burden, memory usage, or the feasibility of alternate
|
||
designs).</li>
|
||
</ol>
|
||
<p>These criteria should be considered a starting point, and may be expanded over
|
||
time. For example, discussion of this PEP may reveal that additional
|
||
requirements (such as multiple committed maintainers, a security audit,
|
||
documentation in the devguide, support for out-of-process debugging, or a
|
||
runtime option to disable the JIT) should be added to this list.</p>
|
||
<p>Until the JIT is non-experimental, it should <em>not</em> be used in production, and
|
||
may be broken or removed at any time without warning.</p>
|
||
<p>Once the JIT is no longer experimental, it should be treated in much the same
|
||
way as other build options such as <code class="docutils literal notranslate"><span class="pre">--enable-optimizations</span></code> or <code class="docutils literal notranslate"><span class="pre">--with-lto</span></code>.
|
||
It may be a recommended (or even default) option for some platforms, and release
|
||
managers <em>may</em> choose to enable it in official releases.</p>
|
||
<section id="support">
|
||
<h3><a class="toc-backref" href="#support" role="doc-backlink">Support</a></h3>
|
||
<p>The JIT has been developed for all of <a class="pep reference internal" href="../pep-0011/" title="PEP 11 – CPython platform support">PEP 11</a>’s current tier one platforms,
|
||
most of its tier two platforms, and one of its tier three platforms.
|
||
Specifically, CPython’s <code class="docutils literal notranslate"><span class="pre">main</span></code> branch has <a class="reference external" href="https://github.com/python/cpython/blob/main/.github/workflows/jit.yml">CI</a>
|
||
building and testing the JIT for both release and debug builds on:</p>
|
||
<ul class="simple">
|
||
<li><code class="docutils literal notranslate"><span class="pre">aarch64-apple-darwin/clang</span></code></li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">aarch64-pc-windows/msvc</span></code> <a class="footnote-reference brackets" href="#untested" id="id1">[1]</a></li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">aarch64-unknown-linux-gnu/clang</span></code> <a class="footnote-reference brackets" href="#emulated" id="id2">[2]</a></li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">aarch64-unknown-linux-gnu/gcc</span></code> <a class="footnote-reference brackets" href="#emulated" id="id3">[2]</a></li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">i686-pc-windows-msvc/msvc</span></code></li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">x86_64-apple-darwin/clang</span></code></li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">x86_64-pc-windows-msvc/msvc</span></code></li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">x86_64-unknown-linux-gnu/clang</span></code></li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">x86_64-unknown-linux-gnu/gcc</span></code></li>
|
||
</ul>
|
||
<p>It’s worth noting that some platforms, even future tier one platforms, may never
|
||
gain JIT support. This can be for a variety of reasons, including insufficient
|
||
LLVM support (<code class="docutils literal notranslate"><span class="pre">powerpc64le-unknown-linux-gnu/gcc</span></code>), inherent limitations of
|
||
the platform (<code class="docutils literal notranslate"><span class="pre">wasm32-unknown-wasi/clang</span></code>), or lack of developer interest
|
||
(<code class="docutils literal notranslate"><span class="pre">x86_64-unknown-freebsd/clang</span></code>).</p>
|
||
<p>Once JIT support for a platform is added (meaning, the JIT builds successfully
|
||
without displaying warnings to the user), it should be treated in much the same
|
||
way as <a class="pep reference internal" href="../pep-0011/" title="PEP 11 – CPython platform support">PEP 11</a> prescribes: it should have reliable CI/buildbots, and JIT
|
||
failures on tier one and tier two platforms should block releases. Though it’s
|
||
not necessary to update <a class="pep reference internal" href="../pep-0011/" title="PEP 11 – CPython platform support">PEP 11</a> to specify JIT support, it may be helpful to
|
||
do so anyway. Otherwise, a list of supported platforms should be maintained in
|
||
<a class="reference external" href="https://github.com/python/cpython/blob/main/Tools/jit/README.md">the JIT’s README</a>.</p>
|
||
<p>Since it should always be possible to build CPython without the JIT, removing
|
||
JIT support for a platform should <em>not</em> be considered a backwards-incompatible
|
||
change. However, if it is reasonable to do so, the normal deprecation process
|
||
should be followed as outlined in <a class="pep reference internal" href="../pep-0387/" title="PEP 387 – Backwards Compatibility Policy">PEP 387</a>.</p>
|
||
<p>The JIT’s build-time dependencies may be changed between releases, within
|
||
reason.</p>
|
||
</section>
|
||
</section>
|
||
<section id="backwards-compatibility">
|
||
<h2><a class="toc-backref" href="#backwards-compatibility" role="doc-backlink">Backwards Compatibility</a></h2>
|
||
<p>Due to the fact that the current interpreter and the JIT backend are both
|
||
generated from the same specification, the behavior of Python code should be
|
||
completely unchanged. In practice, observable differences that have been found
|
||
and fixed during testing have tended to be bugs in the existing micro-op
|
||
translation and optimization stages, rather than bugs in the copy-and-patch
|
||
step.</p>
|
||
<section id="debugging">
|
||
<h3><a class="toc-backref" href="#debugging" role="doc-backlink">Debugging</a></h3>
|
||
<p>Tools that profile and debug Python code will continue to work fine. This
|
||
includes in-process tools that use Python-provided functionality (like
|
||
<code class="docutils literal notranslate"><span class="pre">sys.monitoring</span></code>, <code class="docutils literal notranslate"><span class="pre">sys.settrace</span></code>, or <code class="docutils literal notranslate"><span class="pre">sys.setprofile</span></code>), as well as
|
||
out-of-process tools that walk Python frames from the interpreter state.</p>
|
||
<p>However, it appears that profilers and debuggers <em>for C code</em> are currently
|
||
unable to trace back through JIT frames. Working with leaf frames is possible
|
||
(this is how the JIT itself is debugged), though it is of limited utility due to
|
||
the absence of proper debugging information for JIT frames.</p>
|
||
<p>Since the code templates emitted by the JIT are compiled by Clang, it <em>may</em> be
|
||
possible to allow JIT frames to be traced through by simply modifying the
|
||
compiler flags to use frame pointers more carefully. It may also be possible to
|
||
harvest and emit the debugging information produced by Clang. Neither of these
|
||
ideas have been explored very deeply.</p>
|
||
<p>While this is an issue that <em>should</em> be fixed, fixing it is not a particularly
|
||
high priority at this time. This is probably a problem best explored by somebody
|
||
with more domain expertise in collaboration with those maintaining the JIT, who
|
||
have little experience with the inner workings of these tools.</p>
|
||
</section>
|
||
</section>
|
||
<section id="security-implications">
|
||
<h2><a class="toc-backref" href="#security-implications" role="doc-backlink">Security Implications</a></h2>
|
||
<p>This JIT, like any JIT, produces large amounts of executable data at runtime.
|
||
This introduces a potential new attack surface to CPython, since a malicious
|
||
actor capable of influencing the contents of this data is therefore capable of
|
||
executing arbitrary code. This is a <a class="reference external" href="https://en.wikipedia.org/wiki/Just-in-time_compilation#Security">well-known vulnerability</a> of JIT
|
||
compilers.</p>
|
||
<p>In order to mitigate this risk, the JIT has been written with best practices in
|
||
mind. In particular, the data in question is not exposed by the JIT compiler to
|
||
other parts of the program while it remains writable, and at <em>no</em> point is the
|
||
data both <a class="reference external" href="https://en.wikipedia.org/wiki/W%5EX">writable <em>and</em> executable</a>.</p>
|
||
<p>The nature of template-based JITs also seriously limits the kinds of code that
|
||
can be generated, further reducing the likelihood of a successful exploit. As an
|
||
additional precaution, the templates themselves are stored in static, read-only
|
||
memory.</p>
|
||
<p>However, it would be naive to assume that no possible vulnerabilities exist in
|
||
the JIT, especially at this early stage. The author is not a security expert,
|
||
but is available to join or work closely with the Python Security Response Team
|
||
to triage and fix security issues as they arise.</p>
|
||
<section id="apple-silicon">
|
||
<h3><a class="toc-backref" href="#apple-silicon" role="doc-backlink">Apple Silicon</a></h3>
|
||
<p>Though difficult to test without actually signing and packaging a macOS release,
|
||
it <em>appears</em> that macOS releases should <a class="reference external" href="https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon#Enable-the-JIT-Entitlement-for-the-Hardened-Runtime">enable the JIT Entitlement for the
|
||
Hardened Runtime</a>.</p>
|
||
<p>This shouldn’t make <em>installing</em> Python any harder, but may add additional steps
|
||
for release managers to perform.</p>
|
||
</section>
|
||
</section>
|
||
<section id="how-to-teach-this">
|
||
<h2><a class="toc-backref" href="#how-to-teach-this" role="doc-backlink">How to Teach This</a></h2>
|
||
<p>Choose the sections that best describe you:</p>
|
||
<ul class="simple">
|
||
<li><strong>If you are a Python programmer or end user…</strong><ul>
|
||
<li>…nothing changes for you. Nobody should be distributing JIT-enabled
|
||
CPython interpreters to you while it is still an experimental feature. Once
|
||
it is non-experimental, you will probably notice slightly better performance
|
||
and slightly higher memory usage. You shouldn’t be able to observe any other
|
||
changes.</li>
|
||
</ul>
|
||
</li>
|
||
<li><strong>If you maintain third-party packages…</strong><ul>
|
||
<li>…nothing changes for you. There are no API or ABI changes, and the JIT is
|
||
not exposed to third-party code. You shouldn’t need to change your CI
|
||
matrix, and you shouldn’t be able to observe differences in the way your
|
||
packages work when the JIT is enabled.</li>
|
||
</ul>
|
||
</li>
|
||
<li><strong>If you profile or debug Python code…</strong><ul>
|
||
<li>…nothing changes for you. All Python profiling and tracing functionality
|
||
remains.</li>
|
||
</ul>
|
||
</li>
|
||
<li><strong>If you profile or debug C code…</strong><ul>
|
||
<li>…currently, the ability to trace <em>through</em> JIT frames is limited. This may
|
||
cause issues if you need to observe the entire C call stack, rather than
|
||
just “leaf” frames. See the <a class="reference internal" href="#debugging">Debugging</a> section above for more information.</li>
|
||
</ul>
|
||
</li>
|
||
<li><strong>If you compile your own Python interpreter….</strong><ul>
|
||
<li>…if you don’t wish to build the JIT, you can simply ignore it. Otherwise,
|
||
you will need to <a class="reference external" href="https://github.com/python/cpython/blob/main/Tools/jit/README.md">install a compatible version of LLVM</a>, and
|
||
pass the appropriate flag to the build scripts. Your build may take up to a
|
||
minute longer. Note that the JIT should <em>not</em> be distributed to end users or
|
||
used in production while it is still in the experimental phase.</li>
|
||
</ul>
|
||
</li>
|
||
<li><strong>If you’re a maintainer of CPython (or a fork of CPython)…</strong><ul>
|
||
<li><strong>…and you change the bytecode definitions or the main interpreter
|
||
loop…</strong><ul>
|
||
<li>…in general, the JIT shouldn’t be much of an inconvenience to you
|
||
(depending on what you’re trying to do). The micro-op interpreter isn’t
|
||
going anywhere, and still offers a debugging experience similar to what
|
||
the main bytecode interpreter provides today. There is moderate likelihood
|
||
that larger changes to the interpreter (such as adding new local
|
||
variables, changing error handling and deoptimization logic, or changing
|
||
the micro-op format) will require changes to the C template used to
|
||
generate the JIT, which is meant to mimic the main interpreter loop. You
|
||
may also occasionally just get unlucky and break JIT code generation,
|
||
which will require you to either modify the Python build scripts yourself,
|
||
or solicit the help of somebody more familiar with them (see below).</li>
|
||
</ul>
|
||
</li>
|
||
<li><strong>…and you work on the JIT itself…</strong><ul>
|
||
<li>…you hopefully already have a decent idea of what you’re getting
|
||
yourself into. You will be regularly modifying the Python build scripts,
|
||
the C template used to generate the JIT, and the C code that actually
|
||
makes up the runtime portion of the JIT. You will also be dealing with
|
||
all sorts of crashes, stepping over machine code in a debugger, staring at
|
||
COFF/ELF/Mach-O dumps, developing on a wide range of platforms, and
|
||
generally being the point of contact for the people changing the bytecode
|
||
when CI starts failing on their PRs (see above). Ideally, you’re at least
|
||
<em>familiar</em> with assembly, have taken a couple of courses with “compilers”
|
||
in their name, and have read a blog post or two about linkers.</li>
|
||
</ul>
|
||
</li>
|
||
<li><strong>…and you maintain other parts of CPython…</strong><ul>
|
||
<li>…nothing changes for you. You shouldn’t need to develop locally with JIT
|
||
builds. If you choose to do so (for example, to help reproduce and triage
|
||
JIT issues), your builds may take up to a minute longer each time the
|
||
relevant files are modified.</li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</section>
|
||
<section id="reference-implementation">
|
||
<h2><a class="toc-backref" href="#reference-implementation" role="doc-backlink">Reference Implementation</a></h2>
|
||
<p>Key parts of the implementation include:</p>
|
||
<ul class="simple">
|
||
<li><a class="reference external" href="https://github.com/python/cpython/blob/main/Tools/jit/README.md"><code class="docutils literal notranslate"><span class="pre">Tools/jit/README.md</span></code></a>: Instructions for how to build the JIT.</li>
|
||
<li><a class="reference external" href="https://github.com/python/cpython/blob/main/Python/jit.c"><code class="docutils literal notranslate"><span class="pre">Python/jit.c</span></code></a>: The entire runtime portion of the JIT compiler.</li>
|
||
<li><a class="reference external" href="https://gist.github.com/brandtbucher/9d3cc396dcb15d13f7e971175e987f3a"><code class="docutils literal notranslate"><span class="pre">jit_stencils.h</span></code></a>: An example of the JIT’s generated templates.</li>
|
||
<li><a class="reference external" href="https://github.com/python/cpython/blob/main/Tools/jit/template.c"><code class="docutils literal notranslate"><span class="pre">Tools/jit/template.c</span></code></a>: The code which is compiled to produce the JIT’s templates.</li>
|
||
<li><a class="reference external" href="https://github.com/python/cpython/blob/main/Tools/jit/_targets.py"><code class="docutils literal notranslate"><span class="pre">Tools/jit/_targets.py</span></code></a>: The code to compile and parse the templates at build time.</li>
|
||
</ul>
|
||
</section>
|
||
<section id="rejected-ideas">
|
||
<h2><a class="toc-backref" href="#rejected-ideas" role="doc-backlink">Rejected Ideas</a></h2>
|
||
<section id="maintain-it-outside-of-cpython">
|
||
<h3><a class="toc-backref" href="#maintain-it-outside-of-cpython" role="doc-backlink">Maintain it outside of CPython</a></h3>
|
||
<p>While it is <em>probably</em> possible to maintain the JIT outside of CPython, its
|
||
implementation is tied tightly enough to the rest of the interpreter that
|
||
keeping it up-to-date would probably be more difficult than actually developing
|
||
the JIT itself. Additionally, contributors working on the existing micro-op
|
||
definitions and optimizations would need to modify and build two separate
|
||
projects to measure the effects of their changes under the JIT (whereas today,
|
||
infrastructure exists to do this automatically for any proposed change).</p>
|
||
<p>Releases of the separate “JIT” project would probably also need to correspond to
|
||
specific CPython pre-releases and patch releases, depending on exactly what
|
||
changes are present. Individual CPython commits between releases likely wouldn’t
|
||
have corresponding JIT releases at all, further complicating debugging efforts
|
||
(such as bisection to find breaking changes upstream).</p>
|
||
<p>Since the JIT is already quite stable, and the ultimate goal is for it to be a
|
||
non-experimental part of CPython, keeping it in <code class="docutils literal notranslate"><span class="pre">main</span></code> seems to be the best
|
||
path forward. With that said, the relevant code is organized in such a way that
|
||
the JIT can be easily “deleted” if it does not end up meeting its goals.</p>
|
||
</section>
|
||
<section id="turn-it-on-by-default">
|
||
<h3><a class="toc-backref" href="#turn-it-on-by-default" role="doc-backlink">Turn it on by default</a></h3>
|
||
<p>On the other hand, some have suggested that the JIT should be enabled by default
|
||
in its current form.</p>
|
||
<p>Again, it is important to remember that a JIT is not a magic “go faster”
|
||
machine; currently, the JIT is about as fast as the existing specializing
|
||
interpreter. This may sound underwhelming, but it is actually a fairly
|
||
significant achievement, and it’s the main reason why this approach was
|
||
considered viable enough to be merged into <code class="docutils literal notranslate"><span class="pre">main</span></code> for further development.</p>
|
||
<p>While the JIT provides significant gains over the existing micro-op interpreter,
|
||
it isn’t yet a clear win when always enabled (especially considering its
|
||
increased memory consumption and additional build-time dependencies). That’s the
|
||
purpose of this PEP: to clarify expectations about the objective criteria that
|
||
should be met in order to “flip the switch”.</p>
|
||
<p>At least for now, having this in <code class="docutils literal notranslate"><span class="pre">main</span></code>, but off by default, seems to be a
|
||
good compromise between always turning it on and not having it available at all.</p>
|
||
</section>
|
||
<section id="support-multiple-compiler-toolchains">
|
||
<h3><a class="toc-backref" href="#support-multiple-compiler-toolchains" role="doc-backlink">Support multiple compiler toolchains</a></h3>
|
||
<p>Clang is specifically needed because it’s the only C compiler with support for
|
||
guaranteed tail calls (<a class="reference external" href="https://clang.llvm.org/docs/AttributeReference.html#musttail"><code class="docutils literal notranslate"><span class="pre">musttail</span></code></a>), which are required by CPython’s
|
||
<a class="reference external" href="https://en.wikipedia.org/wiki/Continuation-passing_style#Tail_calls">continuation-passing-style</a> approach
|
||
to JIT compilation. Without it, the tail-recursive calls between templates could
|
||
result in unbounded C stack growth (and eventual overflow).</p>
|
||
<p>Since LLVM also includes other functionalities required by the JIT build process
|
||
(namely, utilities for object file parsing and disassembly), and additional
|
||
toolchains introduce additional testing and maintenance burden, it’s convenient
|
||
to only support one major version of one toolchain at this time.</p>
|
||
</section>
|
||
<section id="compile-the-base-interpreter-s-bytecode">
|
||
<h3><a class="toc-backref" href="#compile-the-base-interpreter-s-bytecode" role="doc-backlink">Compile the base interpreter’s bytecode</a></h3>
|
||
<p>Most of the prior art for copy-and-patch uses it as a fast baseline JIT, whereas
|
||
CPython’s JIT is using the technique to compile optimized micro-op traces.</p>
|
||
<p>In practice, the new JIT currently sits somewhere between the “baseline” and
|
||
“optimizing” compiler tiers of other dynamic language runtimes. This is because
|
||
CPython uses its specializing adaptive interpreter to collect runtime profiling
|
||
information, which is used to detect and optimize “hot” paths through the code.
|
||
This step is carried out using self-modifying code, a technique which is much
|
||
more difficult to implement with a JIT compiler.</p>
|
||
<p>While it’s <em>possible</em> to compile normal bytecode using copy-and-patch (in fact,
|
||
early prototypes predated the micro-op interpreter and did exactly this), it
|
||
just doesn’t seem to provide enough optimization potential as the more granular
|
||
micro-op format.</p>
|
||
</section>
|
||
<section id="add-gpu-support">
|
||
<h3><a class="toc-backref" href="#add-gpu-support" role="doc-backlink">Add GPU support</a></h3>
|
||
<p>The JIT is currently CPU-only. It does not, for example, offload NumPy array
|
||
computations to CUDA GPUs, as JITs like <a class="reference external" href="https://numba.pydata.org/numba-doc/latest/cuda/overview.html">Numba</a> do.</p>
|
||
<p>There is already a rich ecosystem of tools for accelerating these sorts of
|
||
specialized tasks, and CPython’s JIT is not intended to replace them. Instead,
|
||
it is meant to improve the performance of general-purpose Python code, which is
|
||
less likely to benefit from deeper GPU integration.</p>
|
||
</section>
|
||
</section>
|
||
<section id="open-issues">
|
||
<h2><a class="toc-backref" href="#open-issues" role="doc-backlink">Open Issues</a></h2>
|
||
<section id="speed">
|
||
<h3><a class="toc-backref" href="#speed" role="doc-backlink">Speed</a></h3>
|
||
<p>Currently, the JIT is <a class="reference external" href="https://github.com/faster-cpython/benchmarking-public/blob/main/configs.svg">about as fast as the existing specializing interpreter</a>
|
||
on most platforms. Improving this is obviously a top priority at this point,
|
||
since providing a significant performance gain is the entire motivation for
|
||
having a JIT at all. A number of proposed improvements are already underway, and
|
||
this ongoing work is being tracked in <a class="reference external" href="https://github.com/python/cpython/issues/115802">GH-115802</a>.</p>
|
||
</section>
|
||
<section id="memory">
|
||
<h3><a class="toc-backref" href="#memory" role="doc-backlink">Memory</a></h3>
|
||
<p>Because it allocates additional memory for executable machine code, the JIT does
|
||
use more memory than the existing interpreter at runtime. According to the
|
||
official benchmarks, the JIT currently uses about <a class="reference external" href="https://github.com/faster-cpython/benchmarking-public/blob/main/memory_configs.svg">10-20% more memory than the
|
||
base interpreter</a>.
|
||
The upper end of this range is due to <code class="docutils literal notranslate"><span class="pre">aarch64-apple-darwin</span></code>, which has larger
|
||
page sizes (and thus, a larger minimum allocation granularity).</p>
|
||
<p>However, these numbers should be taken with a grain of salt, as the benchmarks
|
||
themselves don’t actually have a very high baseline of memory usage. Since they
|
||
have a higher ratio of code to data, the JIT’s memory overhead is more
|
||
pronounced than it would be in a typical workload where memory pressure is more
|
||
likely to be a real concern.</p>
|
||
<p>Not much effort has been put into optimizing the JIT’s memory usage yet, so
|
||
these numbers likely represent a maximum that will be reduced over time.
|
||
Improving this is a medium priority, and is being tracked in <a class="reference external" href="https://github.com/python/cpython/issues/116017">GH-116017</a>. We may consider
|
||
exposing configurable parameters for limiting memory consumption in the
|
||
future, but no official APIs will be exposed until the JIT meets the
|
||
requirements to be considered non-experimental.</p>
|
||
<p>Earlier versions of the JIT had a more complicated memory allocation scheme
|
||
which imposed a number of fragile limitations on the size and layout of the
|
||
emitted code, and significantly bloated the memory footprint of Python
|
||
executable. These issues are no longer present in the current design.</p>
|
||
</section>
|
||
<section id="dependencies">
|
||
<h3><a class="toc-backref" href="#dependencies" role="doc-backlink">Dependencies</a></h3>
|
||
<p>At the time of writing, the JIT has a build-time dependency on LLVM. LLVM
|
||
is used to compile individual micro-op instructions into blobs of machine code,
|
||
which are then linked together to form the JIT’s templates. These templates are
|
||
used to build CPython itself. The JIT has no runtime dependency on LLVM and is
|
||
therefore not at all exposed as a dependency to end users.</p>
|
||
<p>Building the JIT adds between 3 and 60 seconds to the build process, depending
|
||
on platform. It is only rebuilt whenever the generated files become out-of-date,
|
||
so only those who are actively developing the main interpreter loop will be
|
||
rebuilding it with any frequency.</p>
|
||
<p>Unlike many other generated files in CPython, the JIT’s generated files are not
|
||
tracked by Git. This is because they contain compiled binary code templates
|
||
specific to not only the host platform, but also the current build configuration
|
||
for that platform. As such, hosting them would require a significant engineering
|
||
effort in order to build and host dozens of large binary files for each commit
|
||
that changes the generated code. While perhaps feasible, this is not a priority,
|
||
since installing the required tools is not prohibitively difficult for most
|
||
people building CPython, and the build step is not particularly time-consuming.</p>
|
||
<p>Since some still remain interested in this possibility, discussion is being
|
||
tracked in <a class="reference external" href="https://github.com/python/cpython/issues/115869">GH-115869</a>.</p>
|
||
</section>
|
||
</section>
|
||
<section id="footnotes">
|
||
<h2><a class="toc-backref" href="#footnotes" role="doc-backlink">Footnotes</a></h2>
|
||
<aside class="footnote-list brackets">
|
||
<aside class="footnote brackets" id="untested" role="doc-footnote">
|
||
<dt class="label" id="untested">[<a href="#id1">1</a>]</dt>
|
||
<dd>Due to lack of available hardware, the JIT is built, but not
|
||
tested, for this platform.</aside>
|
||
<aside class="footnote brackets" id="emulated" role="doc-footnote">
|
||
<dt class="label" id="emulated">[2]<em> (<a href='#id2'>1</a>, <a href='#id3'>2</a>) </em></dt>
|
||
<dd>Due to lack of available hardware, the JIT is built using
|
||
cross-compilation and tested using hardware emulation for this platform. Some
|
||
tests are skipped because emulation causes them to fail. However, the JIT has
|
||
been successfully built and tested for this platform on non-emulated
|
||
hardware.</aside>
|
||
</aside>
|
||
</section>
|
||
<section id="copyright">
|
||
<h2><a class="toc-backref" href="#copyright" role="doc-backlink">Copyright</a></h2>
|
||
<p>This document is placed in the public domain or under the CC0-1.0-Universal
|
||
license, whichever is more permissive.</p>
|
||
</section>
|
||
</section>
|
||
<hr class="docutils" />
|
||
<p>Source: <a class="reference external" href="https://github.com/python/peps/blob/main/peps/pep-0744.rst">https://github.com/python/peps/blob/main/peps/pep-0744.rst</a></p>
|
||
<p>Last modified: <a class="reference external" href="https://github.com/python/peps/commits/main/peps/pep-0744.rst">2024-09-12 20:31:23 GMT</a></p>
|
||
|
||
</article>
|
||
<nav id="pep-sidebar">
|
||
<h2>Contents</h2>
|
||
<ul>
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#motivation">Motivation</a></li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
||
<li><a class="reference internal" href="#specification">Specification</a><ul>
|
||
<li><a class="reference internal" href="#support">Support</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#backwards-compatibility">Backwards Compatibility</a><ul>
|
||
<li><a class="reference internal" href="#debugging">Debugging</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#security-implications">Security Implications</a><ul>
|
||
<li><a class="reference internal" href="#apple-silicon">Apple Silicon</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#how-to-teach-this">How to Teach This</a></li>
|
||
<li><a class="reference internal" href="#reference-implementation">Reference Implementation</a></li>
|
||
<li><a class="reference internal" href="#rejected-ideas">Rejected Ideas</a><ul>
|
||
<li><a class="reference internal" href="#maintain-it-outside-of-cpython">Maintain it outside of CPython</a></li>
|
||
<li><a class="reference internal" href="#turn-it-on-by-default">Turn it on by default</a></li>
|
||
<li><a class="reference internal" href="#support-multiple-compiler-toolchains">Support multiple compiler toolchains</a></li>
|
||
<li><a class="reference internal" href="#compile-the-base-interpreter-s-bytecode">Compile the base interpreter’s bytecode</a></li>
|
||
<li><a class="reference internal" href="#add-gpu-support">Add GPU support</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#open-issues">Open Issues</a><ul>
|
||
<li><a class="reference internal" href="#speed">Speed</a></li>
|
||
<li><a class="reference internal" href="#memory">Memory</a></li>
|
||
<li><a class="reference internal" href="#dependencies">Dependencies</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#footnotes">Footnotes</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
|
||
<br>
|
||
<a id="source" href="https://github.com/python/peps/blob/main/peps/pep-0744.rst">Page Source (GitHub)</a>
|
||
</nav>
|
||
</section>
|
||
<script src="../_static/colour_scheme.js"></script>
|
||
<script src="../_static/wrap_tables.js"></script>
|
||
<script src="../_static/sticky_banner.js"></script>
|
||
</body>
|
||
</html> |