696 lines
56 KiB
HTML
696 lines
56 KiB
HTML
|
||
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<meta name="color-scheme" content="light dark">
|
||
<title>PEP 574 – Pickle protocol 5 with out-of-band data | peps.python.org</title>
|
||
<link rel="shortcut icon" href="../_static/py.png">
|
||
<link rel="canonical" href="https://peps.python.org/pep-0574/">
|
||
<link rel="stylesheet" href="../_static/style.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/mq.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" media="(prefers-color-scheme: light)" id="pyg-light">
|
||
<link rel="stylesheet" href="../_static/pygments_dark.css" type="text/css" media="(prefers-color-scheme: dark)" id="pyg-dark">
|
||
<link rel="alternate" type="application/rss+xml" title="Latest PEPs" href="https://peps.python.org/peps.rss">
|
||
<meta property="og:title" content='PEP 574 – Pickle protocol 5 with out-of-band data | peps.python.org'>
|
||
<meta property="og:description" content="This PEP proposes to standardize a new pickle protocol version, and accompanying APIs to take full advantage of it:">
|
||
<meta property="og:type" content="website">
|
||
<meta property="og:url" content="https://peps.python.org/pep-0574/">
|
||
<meta property="og:site_name" content="Python Enhancement Proposals (PEPs)">
|
||
<meta property="og:image" content="https://peps.python.org/_static/og-image.png">
|
||
<meta property="og:image:alt" content="Python PEPs">
|
||
<meta property="og:image:width" content="200">
|
||
<meta property="og:image:height" content="200">
|
||
<meta name="description" content="This PEP proposes to standardize a new pickle protocol version, and accompanying APIs to take full advantage of it:">
|
||
<meta name="theme-color" content="#3776ab">
|
||
</head>
|
||
<body>
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
||
<symbol id="svg-sun-half" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Following system colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="9"></circle>
|
||
<path d="M12 3v18m0-12l4.65-4.65M12 14.3l7.37-7.37M12 19.6l8.85-8.85"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected dark colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
|
||
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected light colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="5"></circle>
|
||
<line x1="12" y1="1" x2="12" y2="3"></line>
|
||
<line x1="12" y1="21" x2="12" y2="23"></line>
|
||
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
||
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
||
<line x1="1" y1="12" x2="3" y2="12"></line>
|
||
<line x1="21" y1="12" x2="23" y2="12"></line>
|
||
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
||
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
||
</svg>
|
||
</symbol>
|
||
</svg>
|
||
<script>
|
||
|
||
document.documentElement.dataset.colour_scheme = localStorage.getItem("colour_scheme") || "auto"
|
||
</script>
|
||
<section id="pep-page-section">
|
||
<header>
|
||
<h1>Python Enhancement Proposals</h1>
|
||
<ul class="breadcrumbs">
|
||
<li><a href="https://www.python.org/" title="The Python Programming Language">Python</a> » </li>
|
||
<li><a href="../pep-0000/">PEP Index</a> » </li>
|
||
<li>PEP 574</li>
|
||
</ul>
|
||
<button id="colour-scheme-cycler" onClick="setColourScheme(nextColourScheme())">
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
<span class="visually-hidden">Toggle light / dark / auto colour theme</span>
|
||
</button>
|
||
</header>
|
||
<article>
|
||
<section id="pep-content">
|
||
<h1 class="page-title">PEP 574 – Pickle protocol 5 with out-of-band data</h1>
|
||
<dl class="rfc2822 field-list simple">
|
||
<dt class="field-odd">Author<span class="colon">:</span></dt>
|
||
<dd class="field-odd">Antoine Pitrou <solipsis at pitrou.net></dd>
|
||
<dt class="field-even">BDFL-Delegate<span class="colon">:</span></dt>
|
||
<dd class="field-even">Alyssa Coghlan</dd>
|
||
<dt class="field-odd">Status<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><abbr title="Accepted and implementation complete, or no longer active">Final</abbr></dd>
|
||
<dt class="field-even">Type<span class="colon">:</span></dt>
|
||
<dd class="field-even"><abbr title="Normative PEP with a new feature for Python, implementation change for CPython or interoperability standard for the ecosystem">Standards Track</abbr></dd>
|
||
<dt class="field-odd">Created<span class="colon">:</span></dt>
|
||
<dd class="field-odd">23-Mar-2018</dd>
|
||
<dt class="field-even">Python-Version<span class="colon">:</span></dt>
|
||
<dd class="field-even">3.8</dd>
|
||
<dt class="field-odd">Post-History<span class="colon">:</span></dt>
|
||
<dd class="field-odd">28-Mar-2018, 30-Apr-2019</dd>
|
||
<dt class="field-even">Resolution<span class="colon">:</span></dt>
|
||
<dd class="field-even"><a class="reference external" href="https://mail.python.org/pipermail/python-dev/2019-May/157284.html">Python-Dev message</a></dd>
|
||
</dl>
|
||
<hr class="docutils" />
|
||
<section id="contents">
|
||
<details><summary>Table of Contents</summary><ul class="simple">
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
||
<li><a class="reference internal" href="#example">Example</a></li>
|
||
<li><a class="reference internal" href="#producer-api">Producer API</a><ul>
|
||
<li><a class="reference internal" href="#picklebuffer-objects">PickleBuffer objects</a></li>
|
||
<li><a class="reference internal" href="#buffer-requirements">Buffer requirements</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#consumer-api">Consumer API</a></li>
|
||
<li><a class="reference internal" href="#protocol-changes">Protocol changes</a></li>
|
||
<li><a class="reference internal" href="#side-effects">Side effects</a><ul>
|
||
<li><a class="reference internal" href="#improved-in-band-performance">Improved in-band performance</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#caveats">Caveats</a><ul>
|
||
<li><a class="reference internal" href="#mutability">Mutability</a></li>
|
||
<li><a class="reference internal" href="#data-sharing">Data sharing</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#rejected-alternatives">Rejected alternatives</a><ul>
|
||
<li><a class="reference internal" href="#using-the-existing-persistent-load-interface">Using the existing persistent load interface</a></li>
|
||
<li><a class="reference internal" href="#passing-a-sequence-of-buffers-in-buffer-callback">Passing a sequence of buffers in <code class="docutils literal notranslate"><span class="pre">buffer_callback</span></code></a></li>
|
||
<li><a class="reference internal" href="#allow-serializing-a-picklebuffer-in-protocol-4-and-earlier">Allow serializing a <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code> in protocol 4 and earlier</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#implementation">Implementation</a></li>
|
||
<li><a class="reference internal" href="#related-work">Related work</a></li>
|
||
<li><a class="reference internal" href="#acknowledgements">Acknowledgements</a></li>
|
||
<li><a class="reference internal" href="#references">References</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
</details></section>
|
||
<section id="abstract">
|
||
<h2><a class="toc-backref" href="#abstract" role="doc-backlink">Abstract</a></h2>
|
||
<p>This PEP proposes to standardize a new pickle protocol version, and
|
||
accompanying APIs to take full advantage of it:</p>
|
||
<ol class="arabic simple">
|
||
<li>A new pickle protocol version (5) to cover the extra metadata needed
|
||
for out-of-band data buffers.</li>
|
||
<li>A new <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code> type for <code class="docutils literal notranslate"><span class="pre">__reduce_ex__</span></code> implementations
|
||
to return out-of-band data buffers.</li>
|
||
<li>A new <code class="docutils literal notranslate"><span class="pre">buffer_callback</span></code> parameter when pickling, to handle out-of-band
|
||
data buffers.</li>
|
||
<li>A new <code class="docutils literal notranslate"><span class="pre">buffers</span></code> parameter when unpickling to provide out-of-band data
|
||
buffers.</li>
|
||
</ol>
|
||
<p>The PEP guarantees unchanged behaviour for anyone not using the new APIs.</p>
|
||
</section>
|
||
<section id="rationale">
|
||
<h2><a class="toc-backref" href="#rationale" role="doc-backlink">Rationale</a></h2>
|
||
<p>The pickle protocol was originally designed in 1995 for on-disk persistency
|
||
of arbitrary Python objects. The performance of a 1995-era storage medium
|
||
probably made it irrelevant to focus on performance metrics such as
|
||
use of RAM bandwidth when copying temporary data before writing it to disk.</p>
|
||
<p>Nowadays the pickle protocol sees a growing use in applications where most
|
||
of the data isn’t ever persisted to disk (or, when it is, it uses a portable
|
||
format instead of Python-specific). Instead, pickle is being used to transmit
|
||
data and commands from one process to another, either on the same machine
|
||
or on multiple machines. Those applications will sometimes deal with very
|
||
large data (such as Numpy arrays or Pandas dataframes) that need to be
|
||
transferred around. For those applications, pickle is currently
|
||
wasteful as it imposes spurious memory copies of the data being serialized.</p>
|
||
<p>As a matter of fact, the standard <code class="docutils literal notranslate"><span class="pre">multiprocessing</span></code> module uses pickle
|
||
for serialization, and therefore also suffers from this problem when
|
||
sending large data to another process.</p>
|
||
<p>Third-party Python libraries, such as Dask <a class="footnote-reference brackets" href="#dask" id="id1">[1]</a>, PyArrow <a class="footnote-reference brackets" href="#pyarrow" id="id2">[4]</a>
|
||
and IPyParallel <a class="footnote-reference brackets" href="#ipyparallel" id="id3">[3]</a>, have started implementing alternative
|
||
serialization schemes with the explicit goal of avoiding copies on large
|
||
data. Implementing a new serialization scheme is difficult and often
|
||
leads to reduced generality (since many Python objects support pickle
|
||
but not the new serialization scheme). Falling back on pickle for
|
||
unsupported types is an option, but then you get back the spurious
|
||
memory copies you wanted to avoid in the first place. For example,
|
||
<code class="docutils literal notranslate"><span class="pre">dask</span></code> is able to avoid memory copies for Numpy arrays and
|
||
built-in containers thereof (such as lists or dicts containing Numpy
|
||
arrays), but if a large Numpy array is an attribute of a user-defined
|
||
object, <code class="docutils literal notranslate"><span class="pre">dask</span></code> will serialize the user-defined object as a pickle
|
||
stream, leading to memory copies.</p>
|
||
<p>The common theme of these third-party serialization efforts is to generate
|
||
a stream of object metadata (which contains pickle-like information about
|
||
the objects being serialized) and a separate stream of zero-copy buffer
|
||
objects for the payloads of large objects. Note that, in this scheme,
|
||
small objects such as ints, etc. can be dumped together with the metadata
|
||
stream. Refinements can include opportunistic compression of large data
|
||
depending on its type and layout, like <code class="docutils literal notranslate"><span class="pre">dask</span></code> does.</p>
|
||
<p>This PEP aims to make <code class="docutils literal notranslate"><span class="pre">pickle</span></code> usable in a way where large data is handled
|
||
as a separate stream of zero-copy buffers, letting the application handle
|
||
those buffers optimally.</p>
|
||
</section>
|
||
<section id="example">
|
||
<h2><a class="toc-backref" href="#example" role="doc-backlink">Example</a></h2>
|
||
<p>To keep the example simple and avoid requiring knowledge of third-party
|
||
libraries, we will focus here on a bytearray object (but the issue is
|
||
conceptually the same with more sophisticated objects such as Numpy arrays).
|
||
Like most objects, the bytearray object isn’t immediately understood by
|
||
the pickle module and must therefore specify its decomposition scheme.</p>
|
||
<p>Here is how a bytearray object currently decomposes for pickling:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">b</span><span class="o">.</span><span class="n">__reduce_ex__</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span>
|
||
<span class="go">(<class 'bytearray'>, (b'abc',), None)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This is because the <code class="docutils literal notranslate"><span class="pre">bytearray.__reduce_ex__</span></code> implementation reads
|
||
morally as follows:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">bytearray</span><span class="p">:</span>
|
||
|
||
<span class="k">def</span> <span class="nf">__reduce_ex__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">protocol</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="n">protocol</span> <span class="o">==</span> <span class="mi">4</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">),</span> <span class="nb">bytes</span><span class="p">(</span><span class="bp">self</span><span class="p">),</span> <span class="kc">None</span>
|
||
<span class="c1"># Legacy code for earlier protocols omitted</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>In turn it produces the following pickle code:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">pickletools</span><span class="o">.</span><span class="n">dis</span><span class="p">(</span><span class="n">pickletools</span><span class="o">.</span><span class="n">optimize</span><span class="p">(</span><span class="n">pickle</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="mi">4</span><span class="p">)))</span>
|
||
<span class="go"> 0: \x80 PROTO 4</span>
|
||
<span class="go"> 2: \x95 FRAME 30</span>
|
||
<span class="go"> 11: \x8c SHORT_BINUNICODE 'builtins'</span>
|
||
<span class="go"> 21: \x8c SHORT_BINUNICODE 'bytearray'</span>
|
||
<span class="go"> 32: \x93 STACK_GLOBAL</span>
|
||
<span class="go"> 33: C SHORT_BINBYTES b'abc'</span>
|
||
<span class="go"> 38: \x85 TUPLE1</span>
|
||
<span class="go"> 39: R REDUCE</span>
|
||
<span class="go"> 40: . STOP</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>(the call to <code class="docutils literal notranslate"><span class="pre">pickletools.optimize</span></code> above is only meant to make the
|
||
pickle stream more readable by removing the MEMOIZE opcodes)</p>
|
||
<p>We can notice several things about the bytearray’s payload (the sequence
|
||
of bytes <code class="docutils literal notranslate"><span class="pre">b'abc'</span></code>):</p>
|
||
<ul class="simple">
|
||
<li><code class="docutils literal notranslate"><span class="pre">bytearray.__reduce_ex__</span></code> produces a first copy by instantiating a
|
||
new bytes object from the bytearray’s data.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">pickle.dumps</span></code> produces a second copy when inserting the contents of
|
||
that bytes object into the pickle stream, after the SHORT_BINBYTES opcode.</li>
|
||
<li>Furthermore, when deserializing the pickle stream, a temporary bytes
|
||
object is created when the SHORT_BINBYTES opcode is encountered (inducing
|
||
a data copy).</li>
|
||
</ul>
|
||
<p>What we really want is something like the following:</p>
|
||
<ul class="simple">
|
||
<li><code class="docutils literal notranslate"><span class="pre">bytearray.__reduce_ex__</span></code> produces a <em>view</em> of the bytearray’s data.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">pickle.dumps</span></code> doesn’t try to copy that data into the pickle stream
|
||
but instead passes the buffer view to its caller (which can decide on the
|
||
most efficient handling of that buffer).</li>
|
||
<li>When deserializing, <code class="docutils literal notranslate"><span class="pre">pickle.loads</span></code> takes the pickle stream and the
|
||
buffer view separately, and passes the buffer view directly to the
|
||
bytearray constructor.</li>
|
||
</ul>
|
||
<p>We see that several conditions are required for the above to work:</p>
|
||
<ul class="simple">
|
||
<li><code class="docutils literal notranslate"><span class="pre">__reduce__</span></code> or <code class="docutils literal notranslate"><span class="pre">__reduce_ex__</span></code> must be able to return <em>something</em>
|
||
that indicates a serializable no-copy buffer view.</li>
|
||
<li>The pickle protocol must be able to represent references to such buffer
|
||
views, instructing the unpickler that it may have to get the actual buffer
|
||
out of band.</li>
|
||
<li>The <code class="docutils literal notranslate"><span class="pre">pickle.Pickler</span></code> API must provide its caller with a way
|
||
to receive such buffer views while serializing.</li>
|
||
<li>The <code class="docutils literal notranslate"><span class="pre">pickle.Unpickler</span></code> API must similarly allow its caller to provide
|
||
the buffer views required for deserialization.</li>
|
||
<li>For compatibility, the pickle protocol must also be able to contain direct
|
||
serializations of such buffer views, such that current uses of the <code class="docutils literal notranslate"><span class="pre">pickle</span></code>
|
||
API don’t have to be modified if they are not concerned with memory copies.</li>
|
||
</ul>
|
||
</section>
|
||
<section id="producer-api">
|
||
<h2><a class="toc-backref" href="#producer-api" role="doc-backlink">Producer API</a></h2>
|
||
<p>We are introducing a new type <code class="docutils literal notranslate"><span class="pre">pickle.PickleBuffer</span></code> which can be
|
||
instantiated from any buffer-supporting object, and is specifically meant
|
||
to be returned from <code class="docutils literal notranslate"><span class="pre">__reduce__</span></code> implementations:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">bytearray</span><span class="p">:</span>
|
||
|
||
<span class="k">def</span> <span class="nf">__reduce_ex__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">protocol</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="n">protocol</span> <span class="o">>=</span> <span class="mi">5</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">),</span> <span class="p">(</span><span class="n">PickleBuffer</span><span class="p">(</span><span class="bp">self</span><span class="p">),),</span> <span class="kc">None</span>
|
||
<span class="c1"># Legacy code for earlier protocols omitted</span>
|
||
</pre></div>
|
||
</div>
|
||
<p><code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code> is a simple wrapper that doesn’t have all the memoryview
|
||
semantics and functionality, but is specifically recognized by the <code class="docutils literal notranslate"><span class="pre">pickle</span></code>
|
||
module if protocol 5 or higher is enabled. It is an error to try to
|
||
serialize a <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code> with pickle protocol version 4 or earlier.</p>
|
||
<p>Only the raw <em>data</em> of the <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code> will be considered by the
|
||
<code class="docutils literal notranslate"><span class="pre">pickle</span></code> module. Any type-specific <em>metadata</em> (such as shapes or
|
||
datatype) must be returned separately by the type’s <code class="docutils literal notranslate"><span class="pre">__reduce__</span></code>
|
||
implementation, as is already the case.</p>
|
||
<section id="picklebuffer-objects">
|
||
<h3><a class="toc-backref" href="#picklebuffer-objects" role="doc-backlink">PickleBuffer objects</a></h3>
|
||
<p>The <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code> class supports a very simple Python API. Its constructor
|
||
takes a single <a class="pep reference internal" href="../pep-3118/" title="PEP 3118 – Revising the buffer protocol">PEP 3118</a>-compatible object. <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code>
|
||
objects themselves support the buffer protocol, so consumers can
|
||
call <code class="docutils literal notranslate"><span class="pre">memoryview(...)</span></code> on them to get additional information
|
||
about the underlying buffer (such as the original type, shape, etc.).
|
||
In addition, <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code> objects have the following methods:</p>
|
||
<p><code class="docutils literal notranslate"><span class="pre">raw()</span></code></p>
|
||
<blockquote>
|
||
<div>Return a memoryview of the raw memory bytes underlying the PickleBuffer,
|
||
erasing any shape, strides and format information. This is required to
|
||
handle Fortran-contiguous buffers correctly in the pure Python pickle
|
||
implementation.</div></blockquote>
|
||
<p><code class="docutils literal notranslate"><span class="pre">release()</span></code></p>
|
||
<blockquote>
|
||
<div>Release the PickleBuffer’s underlying buffer, making it unusable.</div></blockquote>
|
||
<p>On the C side, a simple API will be provided to create and inspect
|
||
PickleBuffer objects:</p>
|
||
<p><code class="docutils literal notranslate"><span class="pre">PyObject</span> <span class="pre">*PyPickleBuffer_FromObject(PyObject</span> <span class="pre">*obj)</span></code></p>
|
||
<blockquote>
|
||
<div>Create a <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code> object holding a view over the <a class="pep reference internal" href="../pep-3118/" title="PEP 3118 – Revising the buffer protocol">PEP 3118</a>-compatible
|
||
<em>obj</em>.</div></blockquote>
|
||
<p><code class="docutils literal notranslate"><span class="pre">PyPickleBuffer_Check(PyObject</span> <span class="pre">*obj)</span></code></p>
|
||
<blockquote>
|
||
<div>Return whether <em>obj</em> is a <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code> instance.</div></blockquote>
|
||
<p><code class="docutils literal notranslate"><span class="pre">const</span> <span class="pre">Py_buffer</span> <span class="pre">*PyPickleBuffer_GetBuffer(PyObject</span> <span class="pre">*picklebuf)</span></code></p>
|
||
<blockquote>
|
||
<div>Return a pointer to the internal <code class="docutils literal notranslate"><span class="pre">Py_buffer</span></code> owned by the <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code>
|
||
instance. An exception is raised if the buffer is released.</div></blockquote>
|
||
<p><code class="docutils literal notranslate"><span class="pre">int</span> <span class="pre">PyPickleBuffer_Release(PyObject</span> <span class="pre">*picklebuf)</span></code></p>
|
||
<blockquote>
|
||
<div>Release the <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code> instance’s underlying buffer.</div></blockquote>
|
||
</section>
|
||
<section id="buffer-requirements">
|
||
<h3><a class="toc-backref" href="#buffer-requirements" role="doc-backlink">Buffer requirements</a></h3>
|
||
<p><code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code> can wrap any kind of buffer, including non-contiguous
|
||
buffers. However, it is required that <code class="docutils literal notranslate"><span class="pre">__reduce__</span></code> only returns a
|
||
contiguous <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code> (<em>contiguity</em> here is meant in the <a class="pep reference internal" href="../pep-3118/" title="PEP 3118 – Revising the buffer protocol">PEP 3118</a>
|
||
sense: either C-ordered or Fortran-ordered). Non-contiguous buffers
|
||
will raise an error when pickled.</p>
|
||
<p>This restriction is primarily an ease-of-implementation issue for the
|
||
<code class="docutils literal notranslate"><span class="pre">pickle</span></code> module but also other consumers of out-of-band buffers.
|
||
The simplest solution on the provider side is to return a contiguous
|
||
copy of a non-contiguous buffer; a sophisticated provider, though, may
|
||
decide instead to return a sequence of contiguous sub-buffers.</p>
|
||
</section>
|
||
</section>
|
||
<section id="consumer-api">
|
||
<h2><a class="toc-backref" href="#consumer-api" role="doc-backlink">Consumer API</a></h2>
|
||
<p><code class="docutils literal notranslate"><span class="pre">pickle.Pickler.__init__</span></code> and <code class="docutils literal notranslate"><span class="pre">pickle.dumps</span></code> are augmented with an additional
|
||
<code class="docutils literal notranslate"><span class="pre">buffer_callback</span></code> parameter:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Pickler</span><span class="p">:</span>
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">...</span><span class="p">,</span> <span class="n">buffer_callback</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> If *buffer_callback* is None (the default), buffer views are</span>
|
||
<span class="sd"> serialized into *file* as part of the pickle stream.</span>
|
||
|
||
<span class="sd"> If *buffer_callback* is not None, then it can be called any number</span>
|
||
<span class="sd"> of times with a buffer view. If the callback returns a false value</span>
|
||
<span class="sd"> (such as None), the given buffer is out-of-band; otherwise the</span>
|
||
<span class="sd"> buffer is serialized in-band, i.e. inside the pickle stream.</span>
|
||
|
||
<span class="sd"> The callback should arrange to store or transmit out-of-band buffers</span>
|
||
<span class="sd"> without changing their order.</span>
|
||
|
||
<span class="sd"> It is an error if *buffer_callback* is not None and *protocol* is</span>
|
||
<span class="sd"> None or smaller than 5.</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="nf">pickle</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="o">...</span><span class="p">,</span> <span class="n">buffer_callback</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> See above for *buffer_callback*.</span>
|
||
<span class="sd"> """</span>
|
||
</pre></div>
|
||
</div>
|
||
<p><code class="docutils literal notranslate"><span class="pre">pickle.Unpickler.__init__</span></code> and <code class="docutils literal notranslate"><span class="pre">pickle.loads</span></code> are augmented with an
|
||
additional <code class="docutils literal notranslate"><span class="pre">buffers</span></code> parameter:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Unpickler</span><span class="p">:</span>
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="n">file</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="o">...</span><span class="p">,</span> <span class="n">buffers</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> If *buffers* is not None, it should be an iterable of buffer-enabled</span>
|
||
<span class="sd"> objects that is consumed each time the pickle stream references</span>
|
||
<span class="sd"> an out-of-band buffer view. Such buffers have been given in order</span>
|
||
<span class="sd"> to the *buffer_callback* of a Pickler object.</span>
|
||
|
||
<span class="sd"> If *buffers* is None (the default), then the buffers are taken</span>
|
||
<span class="sd"> from the pickle stream, assuming they are serialized there.</span>
|
||
<span class="sd"> It is an error for *buffers* to be None if the pickle stream</span>
|
||
<span class="sd"> was produced with a non-None *buffer_callback*.</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="nf">pickle</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="o">...</span><span class="p">,</span> <span class="n">buffers</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> See above for *buffers*.</span>
|
||
<span class="sd"> """</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="protocol-changes">
|
||
<h2><a class="toc-backref" href="#protocol-changes" role="doc-backlink">Protocol changes</a></h2>
|
||
<p>Three new opcodes are introduced:</p>
|
||
<ul class="simple">
|
||
<li><code class="docutils literal notranslate"><span class="pre">BYTEARRAY8</span></code> creates a bytearray from the data following it in the pickle
|
||
stream and pushes it on the stack (just like <code class="docutils literal notranslate"><span class="pre">BINBYTES8</span></code> does for bytes
|
||
objects);</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">NEXT_BUFFER</span></code> fetches a buffer from the <code class="docutils literal notranslate"><span class="pre">buffers</span></code> iterable and pushes
|
||
it on the stack.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">READONLY_BUFFER</span></code> makes a readonly view of the top of the stack.</li>
|
||
</ul>
|
||
<p>When pickling encounters a <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code>, that buffer can be considered
|
||
in-band or out-of-band depending on the following conditions:</p>
|
||
<ul class="simple">
|
||
<li>if no <code class="docutils literal notranslate"><span class="pre">buffer_callback</span></code> is given, the buffer is in-band;</li>
|
||
<li>if a <code class="docutils literal notranslate"><span class="pre">buffer_callback</span></code> is given, it is called with the buffer. If the
|
||
callback returns a true value, the buffer is in-band; if the callback
|
||
returns a false value, the buffer is out-of-band.</li>
|
||
</ul>
|
||
<p>An in-band buffer is serialized as follows:</p>
|
||
<ul class="simple">
|
||
<li>If the buffer is writable, it is serialized into the pickle stream as if
|
||
it were a <code class="docutils literal notranslate"><span class="pre">bytearray</span></code> object.</li>
|
||
<li>If the buffer is readonly, it is serialized into the pickle stream as if
|
||
it were a <code class="docutils literal notranslate"><span class="pre">bytes</span></code> object.</li>
|
||
</ul>
|
||
<p>An out-of-band buffer is serialized as follows:</p>
|
||
<ul class="simple">
|
||
<li>If the buffer is writable, a <code class="docutils literal notranslate"><span class="pre">NEXT_BUFFER</span></code> opcode is appended to the
|
||
pickle stream.</li>
|
||
<li>If the buffer is readonly, a <code class="docutils literal notranslate"><span class="pre">NEXT_BUFFER</span></code> opcode is appended to the
|
||
pickle stream, followed by a <code class="docutils literal notranslate"><span class="pre">READONLY_BUFFER</span></code> opcode.</li>
|
||
</ul>
|
||
<p>The distinction between readonly and writable buffers is motivated below
|
||
(see “Mutability”).</p>
|
||
</section>
|
||
<section id="side-effects">
|
||
<h2><a class="toc-backref" href="#side-effects" role="doc-backlink">Side effects</a></h2>
|
||
<section id="improved-in-band-performance">
|
||
<h3><a class="toc-backref" href="#improved-in-band-performance" role="doc-backlink">Improved in-band performance</a></h3>
|
||
<p>Even in-band pickling can be improved by returning a <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code>
|
||
instance from <code class="docutils literal notranslate"><span class="pre">__reduce_ex__</span></code>, as one copy is avoided on the serialization
|
||
path <a class="footnote-reference brackets" href="#arrow-pickle5-benchmark" id="id4">[10]</a> <a class="footnote-reference brackets" href="#numpy-pickle5-benchmark" id="id5">[12]</a>.</p>
|
||
</section>
|
||
</section>
|
||
<section id="caveats">
|
||
<h2><a class="toc-backref" href="#caveats" role="doc-backlink">Caveats</a></h2>
|
||
<section id="mutability">
|
||
<h3><a class="toc-backref" href="#mutability" role="doc-backlink">Mutability</a></h3>
|
||
<p><a class="pep reference internal" href="../pep-3118/" title="PEP 3118 – Revising the buffer protocol">PEP 3118</a> buffers can be readonly or writable. Some objects,
|
||
such as Numpy arrays, need to be backed by a mutable buffer for full
|
||
operation. Pickle consumers that use the <code class="docutils literal notranslate"><span class="pre">buffer_callback</span></code> and <code class="docutils literal notranslate"><span class="pre">buffers</span></code>
|
||
arguments will have to be careful to recreate mutable buffers. When doing
|
||
I/O, this implies using buffer-passing API variants such as <code class="docutils literal notranslate"><span class="pre">readinto</span></code>
|
||
(which are also often preferable for performance).</p>
|
||
</section>
|
||
<section id="data-sharing">
|
||
<h3><a class="toc-backref" href="#data-sharing" role="doc-backlink">Data sharing</a></h3>
|
||
<p>If you pickle and then unpickle an object in the same process, passing
|
||
out-of-band buffer views, then the unpickled object may be backed by the
|
||
same buffer as the original pickled object.</p>
|
||
<p>For example, it might be reasonable to implement reduction of a Numpy array
|
||
as follows (crucial metadata such as shapes is omitted for simplicity):</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">ndarray</span><span class="p">:</span>
|
||
|
||
<span class="k">def</span> <span class="nf">__reduce_ex__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">protocol</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="n">protocol</span> <span class="o">==</span> <span class="mi">5</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">numpy</span><span class="o">.</span><span class="n">frombuffer</span><span class="p">,</span> <span class="p">(</span><span class="n">PickleBuffer</span><span class="p">(</span><span class="bp">self</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
|
||
<span class="c1"># Legacy code for earlier protocols omitted</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Then simply passing the PickleBuffer around from <code class="docutils literal notranslate"><span class="pre">dumps</span></code> to <code class="docutils literal notranslate"><span class="pre">loads</span></code>
|
||
will produce a new Numpy array sharing the same underlying memory as the
|
||
original Numpy object (and, incidentally, keeping it alive):</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||
<span class="gp">>>> </span><span class="n">a</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
|
||
<span class="gp">>>> </span><span class="n">a</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||
<span class="go">0.0</span>
|
||
<span class="gp">>>> </span><span class="n">buffers</span> <span class="o">=</span> <span class="p">[]</span>
|
||
<span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="n">pickle</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">buffer_callback</span><span class="o">=</span><span class="n">buffers</span><span class="o">.</span><span class="n">append</span><span class="p">)</span>
|
||
<span class="gp">>>> </span><span class="n">b</span> <span class="o">=</span> <span class="n">pickle</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">buffers</span><span class="o">=</span><span class="n">buffers</span><span class="p">)</span>
|
||
<span class="gp">>>> </span><span class="n">b</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="mi">42</span>
|
||
<span class="gp">>>> </span><span class="n">a</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||
<span class="go">42.0</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This won’t happen with the traditional <code class="docutils literal notranslate"><span class="pre">pickle</span></code> API (i.e. without passing
|
||
<code class="docutils literal notranslate"><span class="pre">buffers</span></code> and <code class="docutils literal notranslate"><span class="pre">buffer_callback</span></code> parameters), because then the buffer view
|
||
is serialized inside the pickle stream with a copy.</p>
|
||
</section>
|
||
</section>
|
||
<section id="rejected-alternatives">
|
||
<h2><a class="toc-backref" href="#rejected-alternatives" role="doc-backlink">Rejected alternatives</a></h2>
|
||
<section id="using-the-existing-persistent-load-interface">
|
||
<h3><a class="toc-backref" href="#using-the-existing-persistent-load-interface" role="doc-backlink">Using the existing persistent load interface</a></h3>
|
||
<p>The <code class="docutils literal notranslate"><span class="pre">pickle</span></code> persistence interface is a way of storing references to
|
||
designated objects in the pickle stream while handling their actual
|
||
serialization out of band. For example, one might consider the following
|
||
for zero-copy serialization of bytearrays:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">MyPickle</span><span class="p">(</span><span class="n">pickle</span><span class="o">.</span><span class="n">Pickler</span><span class="p">):</span>
|
||
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">buffers</span> <span class="o">=</span> <span class="p">[]</span>
|
||
|
||
<span class="k">def</span> <span class="nf">persistent_id</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">obj</span><span class="p">)</span> <span class="ow">is</span> <span class="ow">not</span> <span class="nb">bytearray</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="kc">None</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="n">index</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">buffers</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">buffers</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">obj</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="p">(</span><span class="s1">'bytearray'</span><span class="p">,</span> <span class="n">index</span><span class="p">)</span>
|
||
|
||
|
||
<span class="k">class</span> <span class="nc">MyUnpickle</span><span class="p">(</span><span class="n">pickle</span><span class="o">.</span><span class="n">Unpickler</span><span class="p">):</span>
|
||
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="n">buffers</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">buffers</span> <span class="o">=</span> <span class="n">buffers</span>
|
||
|
||
<span class="k">def</span> <span class="nf">persistent_load</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pid</span><span class="p">):</span>
|
||
<span class="n">type_tag</span><span class="p">,</span> <span class="n">index</span> <span class="o">=</span> <span class="n">pid</span>
|
||
<span class="k">if</span> <span class="n">type_tag</span> <span class="o">==</span> <span class="s1">'bytearray'</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">buffers</span><span class="p">[</span><span class="n">index</span><span class="p">]</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="k">assert</span> <span class="mi">0</span> <span class="c1"># unexpected type</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This mechanism has two drawbacks:</p>
|
||
<ul>
|
||
<li>Each <code class="docutils literal notranslate"><span class="pre">pickle</span></code> consumer must reimplement <code class="docutils literal notranslate"><span class="pre">Pickler</span></code> and <code class="docutils literal notranslate"><span class="pre">Unpickler</span></code>
|
||
subclasses, with custom code for each type of interest. Essentially,
|
||
N pickle consumers end up each implementing custom code for M producers.
|
||
This is difficult (especially for sophisticated types such as Numpy
|
||
arrays) and poorly scalable.</li>
|
||
<li>Each object encountered by the pickle module (even simple built-in objects
|
||
such as ints and strings) triggers a call to the user’s <code class="docutils literal notranslate"><span class="pre">persistent_id()</span></code>
|
||
method, leading to a possible performance drop compared to nominal.<p>(the Python 2 <code class="docutils literal notranslate"><span class="pre">cPickle</span></code> module supported an undocumented
|
||
<code class="docutils literal notranslate"><span class="pre">inst_persistent_id()</span></code> hook that was only called on non-built-in types;
|
||
it was added in 1997 in order to alleviate the performance issue of
|
||
calling <code class="docutils literal notranslate"><span class="pre">persistent_id</span></code>, presumably at ZODB’s request)</p>
|
||
</li>
|
||
</ul>
|
||
</section>
|
||
<section id="passing-a-sequence-of-buffers-in-buffer-callback">
|
||
<h3><a class="toc-backref" href="#passing-a-sequence-of-buffers-in-buffer-callback" role="doc-backlink">Passing a sequence of buffers in <code class="docutils literal notranslate"><span class="pre">buffer_callback</span></code></a></h3>
|
||
<p>By passing a sequence of buffers, rather than a single buffer, we would
|
||
potentially save on function call overhead in case a large number
|
||
of buffers are produced during serialization. This would need
|
||
additional support in the Pickler to save buffers before calling the
|
||
callback. However, it would also prevent the buffer callback from returning
|
||
a boolean to indicate whether a buffer is to be serialized in-band or
|
||
out-of-band.</p>
|
||
<p>We consider that having a large number of buffers to serialize is an
|
||
unlikely case, and decided to pass a single buffer to the buffer callback.</p>
|
||
</section>
|
||
<section id="allow-serializing-a-picklebuffer-in-protocol-4-and-earlier">
|
||
<h3><a class="toc-backref" href="#allow-serializing-a-picklebuffer-in-protocol-4-and-earlier" role="doc-backlink">Allow serializing a <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code> in protocol 4 and earlier</a></h3>
|
||
<p>If we were to allow serializing a <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code> in protocols 4 and earlier,
|
||
it would actually make a supplementary memory copy when the buffer is mutable.
|
||
Indeed, a mutable <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code> would serialize as a bytearray object
|
||
in those protocols (that is a first copy), and serializing the bytearray
|
||
object would call <code class="docutils literal notranslate"><span class="pre">bytearray.__reduce_ex__</span></code> which returns a bytes object
|
||
(that is a second copy).</p>
|
||
<p>To prevent <code class="docutils literal notranslate"><span class="pre">__reduce__</span></code> implementors from introducing involuntary
|
||
performance regressions, we decided to reject <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code> when
|
||
the protocol is smaller than 5. This forces implementors to switch to
|
||
<code class="docutils literal notranslate"><span class="pre">__reduce_ex__</span></code> and implement protocol-dependent serialization, taking
|
||
advantage of the best path for each protocol (or at least treat protocol
|
||
5 and upwards separately from protocols 4 and downwards).</p>
|
||
</section>
|
||
</section>
|
||
<section id="implementation">
|
||
<h2><a class="toc-backref" href="#implementation" role="doc-backlink">Implementation</a></h2>
|
||
<p>The PEP was initially implemented in the author’s GitHub fork <a class="footnote-reference brackets" href="#pickle5-git" id="id6">[6]</a>.
|
||
It was later merged into Python 3.8 <a class="footnote-reference brackets" href="#pickle5-pr" id="id7">[7]</a>.</p>
|
||
<p>A backport for Python 3.6 and 3.7 is downloadable from PyPI
|
||
<a class="footnote-reference brackets" href="#pickle5-pypi" id="id8">[8]</a>.</p>
|
||
<p>Support for pickle protocol 5 and out-of-band buffers was added to Numpy
|
||
<a class="footnote-reference brackets" href="#numpy-pickle5-pr" id="id9">[11]</a>.</p>
|
||
<p>Support for pickle protocol 5 and out-of-band buffers was added to the Apache
|
||
Arrow Python bindings <a class="footnote-reference brackets" href="#arrow-pickle5-pr" id="id10">[9]</a>.</p>
|
||
</section>
|
||
<section id="related-work">
|
||
<h2><a class="toc-backref" href="#related-work" role="doc-backlink">Related work</a></h2>
|
||
<p>Dask.distributed implements a custom zero-copy serialization with fallback
|
||
to pickle <a class="footnote-reference brackets" href="#dask-serialization" id="id11">[2]</a>.</p>
|
||
<p>PyArrow implements zero-copy component-based serialization for a few
|
||
selected types <a class="footnote-reference brackets" href="#pyarrow-serialization" id="id12">[5]</a>.</p>
|
||
<p><a class="pep reference internal" href="../pep-0554/" title="PEP 554 – Multiple Interpreters in the Stdlib">PEP 554</a> proposes hosting multiple interpreters in a single process, with
|
||
provisions for transferring buffers between interpreters as a communication
|
||
scheme.</p>
|
||
</section>
|
||
<section id="acknowledgements">
|
||
<h2><a class="toc-backref" href="#acknowledgements" role="doc-backlink">Acknowledgements</a></h2>
|
||
<p>Thanks to the following people for early feedback: Alyssa Coghlan, Olivier
|
||
Grisel, Stefan Krah, MinRK, Matt Rocklin, Eric Snow.</p>
|
||
<p>Thanks to Pierre Glaser and Olivier Grisel for experimenting with the
|
||
implementation.</p>
|
||
</section>
|
||
<section id="references">
|
||
<h2><a class="toc-backref" href="#references" role="doc-backlink">References</a></h2>
|
||
<aside class="footnote-list brackets">
|
||
<aside class="footnote brackets" id="dask" role="doc-footnote">
|
||
<dt class="label" id="dask">[<a href="#id1">1</a>]</dt>
|
||
<dd>Dask.distributed – A lightweight library for distributed computing
|
||
in Python
|
||
<a class="reference external" href="https://distributed.readthedocs.io/">https://distributed.readthedocs.io/</a></aside>
|
||
<aside class="footnote brackets" id="dask-serialization" role="doc-footnote">
|
||
<dt class="label" id="dask-serialization">[<a href="#id11">2</a>]</dt>
|
||
<dd>Dask.distributed custom serialization
|
||
<a class="reference external" href="https://distributed.readthedocs.io/en/latest/serialization.html">https://distributed.readthedocs.io/en/latest/serialization.html</a></aside>
|
||
<aside class="footnote brackets" id="ipyparallel" role="doc-footnote">
|
||
<dt class="label" id="ipyparallel">[<a href="#id3">3</a>]</dt>
|
||
<dd>IPyParallel – Using IPython for parallel computing
|
||
<a class="reference external" href="https://ipyparallel.readthedocs.io/">https://ipyparallel.readthedocs.io/</a></aside>
|
||
<aside class="footnote brackets" id="pyarrow" role="doc-footnote">
|
||
<dt class="label" id="pyarrow">[<a href="#id2">4</a>]</dt>
|
||
<dd>PyArrow – A cross-language development platform for in-memory data
|
||
<a class="reference external" href="https://arrow.apache.org/docs/python/">https://arrow.apache.org/docs/python/</a></aside>
|
||
<aside class="footnote brackets" id="pyarrow-serialization" role="doc-footnote">
|
||
<dt class="label" id="pyarrow-serialization">[<a href="#id12">5</a>]</dt>
|
||
<dd>PyArrow IPC and component-based serialization
|
||
<a class="reference external" href="https://arrow.apache.org/docs/python/ipc.html#component-based-serialization">https://arrow.apache.org/docs/python/ipc.html#component-based-serialization</a></aside>
|
||
<aside class="footnote brackets" id="pickle5-git" role="doc-footnote">
|
||
<dt class="label" id="pickle5-git">[<a href="#id6">6</a>]</dt>
|
||
<dd><code class="docutils literal notranslate"><span class="pre">pickle5</span></code> branch on GitHub
|
||
<a class="reference external" href="https://github.com/pitrou/cpython/tree/pickle5">https://github.com/pitrou/cpython/tree/pickle5</a></aside>
|
||
<aside class="footnote brackets" id="pickle5-pr" role="doc-footnote">
|
||
<dt class="label" id="pickle5-pr">[<a href="#id7">7</a>]</dt>
|
||
<dd>PEP 574 Pull Request on GitHub
|
||
<a class="reference external" href="https://github.com/python/cpython/pull/7076">https://github.com/python/cpython/pull/7076</a></aside>
|
||
<aside class="footnote brackets" id="pickle5-pypi" role="doc-footnote">
|
||
<dt class="label" id="pickle5-pypi">[<a href="#id8">8</a>]</dt>
|
||
<dd><code class="docutils literal notranslate"><span class="pre">pickle5</span></code> project on PyPI
|
||
<a class="reference external" href="https://pypi.org/project/pickle5/">https://pypi.org/project/pickle5/</a></aside>
|
||
<aside class="footnote brackets" id="arrow-pickle5-pr" role="doc-footnote">
|
||
<dt class="label" id="arrow-pickle5-pr">[<a href="#id10">9</a>]</dt>
|
||
<dd>Pull request: Experimental zero-copy pickling in Apache Arrow
|
||
<a class="reference external" href="https://github.com/apache/arrow/pull/2161">https://github.com/apache/arrow/pull/2161</a></aside>
|
||
<aside class="footnote brackets" id="arrow-pickle5-benchmark" role="doc-footnote">
|
||
<dt class="label" id="arrow-pickle5-benchmark">[<a href="#id4">10</a>]</dt>
|
||
<dd>Benchmark zero-copy pickling in Apache Arrow
|
||
<a class="reference external" href="https://github.com/apache/arrow/pull/2161#issuecomment-407859213">https://github.com/apache/arrow/pull/2161#issuecomment-407859213</a></aside>
|
||
<aside class="footnote brackets" id="numpy-pickle5-pr" role="doc-footnote">
|
||
<dt class="label" id="numpy-pickle5-pr">[<a href="#id9">11</a>]</dt>
|
||
<dd>Pull request: Support pickle protocol 5 in Numpy
|
||
<a class="reference external" href="https://github.com/numpy/numpy/pull/12011">https://github.com/numpy/numpy/pull/12011</a></aside>
|
||
<aside class="footnote brackets" id="numpy-pickle5-benchmark" role="doc-footnote">
|
||
<dt class="label" id="numpy-pickle5-benchmark">[<a href="#id5">12</a>]</dt>
|
||
<dd>Benchmark pickling Numpy arrays with different pickle protocols
|
||
<a class="reference external" href="https://github.com/numpy/numpy/issues/11161#issuecomment-424035962">https://github.com/numpy/numpy/issues/11161#issuecomment-424035962</a></aside>
|
||
</aside>
|
||
</section>
|
||
<section id="copyright">
|
||
<h2><a class="toc-backref" href="#copyright" role="doc-backlink">Copyright</a></h2>
|
||
<p>This document has been placed into the public domain.</p>
|
||
</section>
|
||
</section>
|
||
<hr class="docutils" />
|
||
<p>Source: <a class="reference external" href="https://github.com/python/peps/blob/main/peps/pep-0574.rst">https://github.com/python/peps/blob/main/peps/pep-0574.rst</a></p>
|
||
<p>Last modified: <a class="reference external" href="https://github.com/python/peps/commits/main/peps/pep-0574.rst">2023-10-11 12:05:51 GMT</a></p>
|
||
|
||
</article>
|
||
<nav id="pep-sidebar">
|
||
<h2>Contents</h2>
|
||
<ul>
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
||
<li><a class="reference internal" href="#example">Example</a></li>
|
||
<li><a class="reference internal" href="#producer-api">Producer API</a><ul>
|
||
<li><a class="reference internal" href="#picklebuffer-objects">PickleBuffer objects</a></li>
|
||
<li><a class="reference internal" href="#buffer-requirements">Buffer requirements</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#consumer-api">Consumer API</a></li>
|
||
<li><a class="reference internal" href="#protocol-changes">Protocol changes</a></li>
|
||
<li><a class="reference internal" href="#side-effects">Side effects</a><ul>
|
||
<li><a class="reference internal" href="#improved-in-band-performance">Improved in-band performance</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#caveats">Caveats</a><ul>
|
||
<li><a class="reference internal" href="#mutability">Mutability</a></li>
|
||
<li><a class="reference internal" href="#data-sharing">Data sharing</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#rejected-alternatives">Rejected alternatives</a><ul>
|
||
<li><a class="reference internal" href="#using-the-existing-persistent-load-interface">Using the existing persistent load interface</a></li>
|
||
<li><a class="reference internal" href="#passing-a-sequence-of-buffers-in-buffer-callback">Passing a sequence of buffers in <code class="docutils literal notranslate"><span class="pre">buffer_callback</span></code></a></li>
|
||
<li><a class="reference internal" href="#allow-serializing-a-picklebuffer-in-protocol-4-and-earlier">Allow serializing a <code class="docutils literal notranslate"><span class="pre">PickleBuffer</span></code> in protocol 4 and earlier</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#implementation">Implementation</a></li>
|
||
<li><a class="reference internal" href="#related-work">Related work</a></li>
|
||
<li><a class="reference internal" href="#acknowledgements">Acknowledgements</a></li>
|
||
<li><a class="reference internal" href="#references">References</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
|
||
<br>
|
||
<a id="source" href="https://github.com/python/peps/blob/main/peps/pep-0574.rst">Page Source (GitHub)</a>
|
||
</nav>
|
||
</section>
|
||
<script src="../_static/colour_scheme.js"></script>
|
||
<script src="../_static/wrap_tables.js"></script>
|
||
<script src="../_static/sticky_banner.js"></script>
|
||
</body>
|
||
</html> |