417 lines
34 KiB
HTML
417 lines
34 KiB
HTML
|
||
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<meta name="color-scheme" content="light dark">
|
||
<title>PEP 3137 – Immutable Bytes and Mutable Buffer | peps.python.org</title>
|
||
<link rel="shortcut icon" href="../_static/py.png">
|
||
<link rel="canonical" href="https://peps.python.org/pep-3137/">
|
||
<link rel="stylesheet" href="../_static/style.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/mq.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" media="(prefers-color-scheme: light)" id="pyg-light">
|
||
<link rel="stylesheet" href="../_static/pygments_dark.css" type="text/css" media="(prefers-color-scheme: dark)" id="pyg-dark">
|
||
<link rel="alternate" type="application/rss+xml" title="Latest PEPs" href="https://peps.python.org/peps.rss">
|
||
<meta property="og:title" content='PEP 3137 – Immutable Bytes and Mutable Buffer | peps.python.org'>
|
||
<meta property="og:description" content="After releasing Python 3.0a1 with a mutable bytes type, pressure mounted to add a way to represent immutable bytes. Gregory P. Smith proposed a patch that would allow making a bytes object temporarily immutable by requesting that the data be locked usi...">
|
||
<meta property="og:type" content="website">
|
||
<meta property="og:url" content="https://peps.python.org/pep-3137/">
|
||
<meta property="og:site_name" content="Python Enhancement Proposals (PEPs)">
|
||
<meta property="og:image" content="https://peps.python.org/_static/og-image.png">
|
||
<meta property="og:image:alt" content="Python PEPs">
|
||
<meta property="og:image:width" content="200">
|
||
<meta property="og:image:height" content="200">
|
||
<meta name="description" content="After releasing Python 3.0a1 with a mutable bytes type, pressure mounted to add a way to represent immutable bytes. Gregory P. Smith proposed a patch that would allow making a bytes object temporarily immutable by requesting that the data be locked usi...">
|
||
<meta name="theme-color" content="#3776ab">
|
||
</head>
|
||
<body>
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
||
<symbol id="svg-sun-half" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Following system colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="9"></circle>
|
||
<path d="M12 3v18m0-12l4.65-4.65M12 14.3l7.37-7.37M12 19.6l8.85-8.85"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected dark colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
|
||
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected light colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="5"></circle>
|
||
<line x1="12" y1="1" x2="12" y2="3"></line>
|
||
<line x1="12" y1="21" x2="12" y2="23"></line>
|
||
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
||
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
||
<line x1="1" y1="12" x2="3" y2="12"></line>
|
||
<line x1="21" y1="12" x2="23" y2="12"></line>
|
||
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
||
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
||
</svg>
|
||
</symbol>
|
||
</svg>
|
||
<script>
|
||
|
||
document.documentElement.dataset.colour_scheme = localStorage.getItem("colour_scheme") || "auto"
|
||
</script>
|
||
<section id="pep-page-section">
|
||
<header>
|
||
<h1>Python Enhancement Proposals</h1>
|
||
<ul class="breadcrumbs">
|
||
<li><a href="https://www.python.org/" title="The Python Programming Language">Python</a> » </li>
|
||
<li><a href="../pep-0000/">PEP Index</a> » </li>
|
||
<li>PEP 3137</li>
|
||
</ul>
|
||
<button id="colour-scheme-cycler" onClick="setColourScheme(nextColourScheme())">
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
<span class="visually-hidden">Toggle light / dark / auto colour theme</span>
|
||
</button>
|
||
</header>
|
||
<article>
|
||
<section id="pep-content">
|
||
<h1 class="page-title">PEP 3137 – Immutable Bytes and Mutable Buffer</h1>
|
||
<dl class="rfc2822 field-list simple">
|
||
<dt class="field-odd">Author<span class="colon">:</span></dt>
|
||
<dd class="field-odd">Guido van Rossum <guido at python.org></dd>
|
||
<dt class="field-even">Status<span class="colon">:</span></dt>
|
||
<dd class="field-even"><abbr title="Accepted and implementation complete, or no longer active">Final</abbr></dd>
|
||
<dt class="field-odd">Type<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><abbr title="Normative PEP with a new feature for Python, implementation change for CPython or interoperability standard for the ecosystem">Standards Track</abbr></dd>
|
||
<dt class="field-even">Created<span class="colon">:</span></dt>
|
||
<dd class="field-even">26-Sep-2007</dd>
|
||
<dt class="field-odd">Python-Version<span class="colon">:</span></dt>
|
||
<dd class="field-odd">3.0</dd>
|
||
<dt class="field-even">Post-History<span class="colon">:</span></dt>
|
||
<dd class="field-even">26-Sep-2007, 30-Sep-2007</dd>
|
||
</dl>
|
||
<hr class="docutils" />
|
||
<section id="contents">
|
||
<details><summary>Table of Contents</summary><ul class="simple">
|
||
<li><a class="reference internal" href="#introduction">Introduction</a></li>
|
||
<li><a class="reference internal" href="#advantages">Advantages</a></li>
|
||
<li><a class="reference internal" href="#naming">Naming</a><ul>
|
||
<li><a class="reference internal" href="#summary">Summary</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#literal-notations">Literal Notations</a></li>
|
||
<li><a class="reference internal" href="#functionality">Functionality</a><ul>
|
||
<li><a class="reference internal" href="#pep-3118-buffer-api">PEP 3118 Buffer API</a></li>
|
||
<li><a class="reference internal" href="#constructors">Constructors</a></li>
|
||
<li><a class="reference internal" href="#comparisons">Comparisons</a></li>
|
||
<li><a class="reference internal" href="#slicing">Slicing</a></li>
|
||
<li><a class="reference internal" href="#indexing">Indexing</a></li>
|
||
<li><a class="reference internal" href="#str-and-repr">Str() and Repr()</a></li>
|
||
<li><a class="reference internal" href="#operators">Operators</a></li>
|
||
<li><a class="reference internal" href="#methods">Methods</a></li>
|
||
<li><a class="reference internal" href="#bytes-and-the-str-type">Bytes and the Str Type</a></li>
|
||
<li><a class="reference internal" href="#the-basestring-type">The <code class="docutils literal notranslate"><span class="pre">basestring</span></code> Type</a></li>
|
||
<li><a class="reference internal" href="#pickling">Pickling</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
</details></section>
|
||
<section id="introduction">
|
||
<h2><a class="toc-backref" href="#introduction" role="doc-backlink">Introduction</a></h2>
|
||
<p>After releasing Python 3.0a1 with a mutable bytes type, pressure
|
||
mounted to add a way to represent immutable bytes. Gregory P. Smith
|
||
proposed a patch that would allow making a bytes object temporarily
|
||
immutable by requesting that the data be locked using the new buffer
|
||
API from <a class="pep reference internal" href="../pep-3118/" title="PEP 3118 – Revising the buffer protocol">PEP 3118</a>. This did not seem the right approach to me.</p>
|
||
<p>Jeffrey Yasskin, with the help of Adam Hupp, then prepared a patch to
|
||
make the bytes type immutable (by crudely removing all mutating APIs)
|
||
and fix the fall-out in the test suite. This showed that there aren’t
|
||
all that many places that depend on the mutability of bytes, with the
|
||
exception of code that builds up a return value from small pieces.</p>
|
||
<p>Thinking through the consequences, and noticing that using the array
|
||
module as an ersatz mutable bytes type is far from ideal, and
|
||
recalling a proposal put forward earlier by Talin, I floated the
|
||
suggestion to have both a mutable and an immutable bytes type. (This
|
||
had been brought up before, but until seeing the evidence of Jeffrey’s
|
||
patch I wasn’t open to the suggestion.)</p>
|
||
<p>Moreover, a possible implementation strategy became clear: use the old
|
||
PyString implementation, stripped down to remove locale support and
|
||
implicit conversions to/from Unicode, for the immutable bytes type,
|
||
and keep the new PyBytes implementation as the mutable bytes type.</p>
|
||
<p>The ensuing discussion made it clear that the idea is welcome but
|
||
needs to be specified more precisely. Hence this PEP.</p>
|
||
</section>
|
||
<section id="advantages">
|
||
<h2><a class="toc-backref" href="#advantages" role="doc-backlink">Advantages</a></h2>
|
||
<p>One advantage of having an immutable bytes type is that code objects
|
||
can use these. It also makes it possible to efficiently create hash
|
||
tables using bytes for keys; this may be useful when parsing protocols
|
||
like HTTP or SMTP which are based on bytes representing text.</p>
|
||
<p>Porting code that manipulates binary data (or encoded text) in Python
|
||
2.x will be easier using the new design than using the original 3.0
|
||
design with mutable bytes; simply replace <code class="docutils literal notranslate"><span class="pre">str</span></code> with <code class="docutils literal notranslate"><span class="pre">bytes</span></code> and
|
||
change ‘…’ literals into b’…’ literals.</p>
|
||
</section>
|
||
<section id="naming">
|
||
<h2><a class="toc-backref" href="#naming" role="doc-backlink">Naming</a></h2>
|
||
<p>I propose the following type names at the Python level:</p>
|
||
<ul class="simple">
|
||
<li><code class="docutils literal notranslate"><span class="pre">bytes</span></code> is an immutable array of bytes (PyString)</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">bytearray</span></code> is a mutable array of bytes (PyBytes)</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">memoryview</span></code> is a bytes view on another object (PyMemory)</li>
|
||
</ul>
|
||
<p>The old type named <code class="docutils literal notranslate"><span class="pre">buffer</span></code> is so similar to the new type
|
||
<code class="docutils literal notranslate"><span class="pre">memoryview</span></code>, introduce by <a class="pep reference internal" href="../pep-3118/" title="PEP 3118 – Revising the buffer protocol">PEP 3118</a>, that it is redundant. The rest
|
||
of this PEP doesn’t discuss the functionality of <code class="docutils literal notranslate"><span class="pre">memoryview</span></code>; it is
|
||
just mentioned here to justify getting rid of the old <code class="docutils literal notranslate"><span class="pre">buffer</span></code> type.
|
||
(An earlier version of this PEP proposed <code class="docutils literal notranslate"><span class="pre">buffer</span></code> as the new name
|
||
for PyBytes; in the end this name was deemed to confusing given the
|
||
many other uses of the word buffer.)</p>
|
||
<p>While eventually it makes sense to change the C API names, this PEP
|
||
maintains the old C API names, which should be familiar to all.</p>
|
||
<section id="summary">
|
||
<h3><a class="toc-backref" href="#summary" role="doc-backlink">Summary</a></h3>
|
||
<p>Here’s a simple ASCII-art table summarizing the type names in various
|
||
Python versions:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">+--------------+-------------+------------+--------------------------+</span>
|
||
<span class="o">|</span> <span class="n">C</span> <span class="n">name</span> <span class="o">|</span> <span class="mf">2.</span><span class="n">x</span> <span class="nb">repr</span> <span class="o">|</span> <span class="mf">3.0</span><span class="n">a1</span> <span class="nb">repr</span> <span class="o">|</span> <span class="mf">3.0</span><span class="n">a2</span> <span class="nb">repr</span> <span class="o">|</span>
|
||
<span class="o">+--------------+-------------+------------+--------------------------+</span>
|
||
<span class="o">|</span> <span class="n">PyUnicode</span> <span class="o">|</span> <span class="n">unicode</span> <span class="sa">u</span><span class="s1">''</span> <span class="o">|</span> <span class="nb">str</span> <span class="s1">''</span> <span class="o">|</span> <span class="nb">str</span> <span class="s1">''</span> <span class="o">|</span>
|
||
<span class="o">|</span> <span class="n">PyString</span> <span class="o">|</span> <span class="nb">str</span> <span class="s1">''</span> <span class="o">|</span> <span class="n">str8</span> <span class="n">s</span><span class="s1">''</span> <span class="o">|</span> <span class="nb">bytes</span> <span class="sa">b</span><span class="s1">''</span> <span class="o">|</span>
|
||
<span class="o">|</span> <span class="n">PyBytes</span> <span class="o">|</span> <span class="n">N</span><span class="o">/</span><span class="n">A</span> <span class="o">|</span> <span class="nb">bytes</span> <span class="sa">b</span><span class="s1">''</span> <span class="o">|</span> <span class="nb">bytearray</span> <span class="nb">bytearray</span><span class="p">(</span><span class="sa">b</span><span class="s1">''</span><span class="p">)</span> <span class="o">|</span>
|
||
<span class="o">|</span> <span class="n">PyBuffer</span> <span class="o">|</span> <span class="n">buffer</span> <span class="o">|</span> <span class="n">buffer</span> <span class="o">|</span> <span class="n">N</span><span class="o">/</span><span class="n">A</span> <span class="o">|</span>
|
||
<span class="o">|</span> <span class="n">PyMemoryView</span> <span class="o">|</span> <span class="n">N</span><span class="o">/</span><span class="n">A</span> <span class="o">|</span> <span class="nb">memoryview</span> <span class="o">|</span> <span class="nb">memoryview</span> <span class="o"><...></span> <span class="o">|</span>
|
||
<span class="o">+--------------+-------------+------------+--------------------------+</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="literal-notations">
|
||
<h2><a class="toc-backref" href="#literal-notations" role="doc-backlink">Literal Notations</a></h2>
|
||
<p>The b’…’ notation introduced in Python 3.0a1 returns an immutable
|
||
bytes object, whatever variation is used. To create a mutable array
|
||
of bytes, use bytearray(b’…’) or bytearray([…]). The latter form
|
||
takes a list of integers in range(256).</p>
|
||
</section>
|
||
<section id="functionality">
|
||
<h2><a class="toc-backref" href="#functionality" role="doc-backlink">Functionality</a></h2>
|
||
<section id="pep-3118-buffer-api">
|
||
<h3><a class="toc-backref" href="#pep-3118-buffer-api" role="doc-backlink">PEP 3118 Buffer API</a></h3>
|
||
<p>Both bytes and bytearray implement the <a class="pep reference internal" href="../pep-3118/" title="PEP 3118 – Revising the buffer protocol">PEP 3118</a> buffer API. The bytes
|
||
type only implements read-only requests; the bytearray type allows
|
||
writable and data-locked requests as well. The element data type is
|
||
always ‘B’ (i.e. unsigned byte).</p>
|
||
</section>
|
||
<section id="constructors">
|
||
<h3><a class="toc-backref" href="#constructors" role="doc-backlink">Constructors</a></h3>
|
||
<p>There are four forms of constructors, applicable to both bytes and
|
||
bytearray:</p>
|
||
<ul class="simple">
|
||
<li><code class="docutils literal notranslate"><span class="pre">bytes(<bytes>)</span></code>, <code class="docutils literal notranslate"><span class="pre">bytes(<bytearray>)</span></code>, <code class="docutils literal notranslate"><span class="pre">bytearray(<bytes>)</span></code>,
|
||
<code class="docutils literal notranslate"><span class="pre">bytearray(<bytearray>)</span></code>: simple copying constructors, with the
|
||
note that <code class="docutils literal notranslate"><span class="pre">bytes(<bytes>)</span></code> might return its (immutable)
|
||
argument, but <code class="docutils literal notranslate"><span class="pre">bytearray(<bytearray>)</span></code> always makes a copy.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">bytes(<str>,</span> <span class="pre"><encoding>[,</span> <span class="pre"><errors>])</span></code>, <code class="docutils literal notranslate"><span class="pre">bytearray(<str>,</span>
|
||
<span class="pre"><encoding>[,</span> <span class="pre"><errors>])</span></code>: encode a text string. Note that the
|
||
<code class="docutils literal notranslate"><span class="pre">str.encode()</span></code> method returns an <em>immutable</em> bytes object. The
|
||
<encoding> argument is mandatory; <errors> is optional.
|
||
<encoding> and <errors>, if given, must be <code class="docutils literal notranslate"><span class="pre">str</span></code> instances.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">bytes(<memory</span> <span class="pre">view>)</span></code>, <code class="docutils literal notranslate"><span class="pre">bytearray(<memory</span> <span class="pre">view>)</span></code>: construct
|
||
a bytes or bytearray object from anything that implements the PEP
|
||
3118 buffer API.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">bytes(<iterable</span> <span class="pre">of</span> <span class="pre">ints>)</span></code>, <code class="docutils literal notranslate"><span class="pre">bytearray(<iterable</span> <span class="pre">of</span> <span class="pre">ints>)</span></code>:
|
||
construct a bytes or bytearray object from a stream of integers in
|
||
range(256).</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">bytes(<int>)</span></code>, <code class="docutils literal notranslate"><span class="pre">bytearray(<int>)</span></code>: construct a
|
||
zero-initialized bytes or bytearray object of a given length.</li>
|
||
</ul>
|
||
</section>
|
||
<section id="comparisons">
|
||
<h3><a class="toc-backref" href="#comparisons" role="doc-backlink">Comparisons</a></h3>
|
||
<p>The bytes and bytearray types are comparable with each other and
|
||
orderable, so that e.g. b’abc’ == bytearray(b’abc’) < b’abd’.</p>
|
||
<p>Comparing either type to a str object for equality returns False
|
||
regardless of the contents of either operand. Ordering comparisons
|
||
with str raise TypeError. This is all conformant to the standard
|
||
rules for comparison and ordering between objects of incompatible
|
||
types.</p>
|
||
<p>(<strong>Note:</strong> in Python 3.0a1, comparing a bytes instance with a str
|
||
instance would raise TypeError, on the premise that this would catch
|
||
the occasional mistake quicker, especially in code ported from Python
|
||
2.x. However, a long discussion on the python-3000 list pointed out
|
||
so many problems with this that it is clearly a bad idea, to be rolled
|
||
back in 3.0a2 regardless of the fate of the rest of this PEP.)</p>
|
||
</section>
|
||
<section id="slicing">
|
||
<h3><a class="toc-backref" href="#slicing" role="doc-backlink">Slicing</a></h3>
|
||
<p>Slicing a bytes object returns a bytes object. Slicing a bytearray
|
||
object returns a bytearray object.</p>
|
||
<p>Slice assignment to a bytearray object accepts anything that
|
||
implements the <a class="pep reference internal" href="../pep-3118/" title="PEP 3118 – Revising the buffer protocol">PEP 3118</a> buffer API, or an iterable of integers in
|
||
range(256).</p>
|
||
</section>
|
||
<section id="indexing">
|
||
<h3><a class="toc-backref" href="#indexing" role="doc-backlink">Indexing</a></h3>
|
||
<p>Indexing bytes and bytearray returns small ints (like the bytes type in
|
||
3.0a1, and like lists or array.array(‘B’)).</p>
|
||
<p>Assignment to an item of a bytearray object accepts an int in
|
||
range(256). (To assign from a bytes sequence, use a slice
|
||
assignment.)</p>
|
||
</section>
|
||
<section id="str-and-repr">
|
||
<h3><a class="toc-backref" href="#str-and-repr" role="doc-backlink">Str() and Repr()</a></h3>
|
||
<p>The str() and repr() functions return the same thing for these
|
||
objects. The repr() of a bytes object returns a b’…’ style literal.
|
||
The repr() of a bytearray returns a string of the form “bytearray(b’…’)”.</p>
|
||
</section>
|
||
<section id="operators">
|
||
<h3><a class="toc-backref" href="#operators" role="doc-backlink">Operators</a></h3>
|
||
<p>The following operators are implemented by the bytes and bytearray
|
||
types, except where mentioned:</p>
|
||
<ul class="simple">
|
||
<li><code class="docutils literal notranslate"><span class="pre">b1</span> <span class="pre">+</span> <span class="pre">b2</span></code>: concatenation. With mixed bytes/bytearray operands,
|
||
the return type is that of the first argument (this seems arbitrary
|
||
until you consider how <code class="docutils literal notranslate"><span class="pre">+=</span></code> works).</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">b1</span> <span class="pre">+=</span> <span class="pre">b2</span></code>: mutates b1 if it is a bytearray object.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">b</span> <span class="pre">*</span> <span class="pre">n</span></code>, <code class="docutils literal notranslate"><span class="pre">n</span> <span class="pre">*</span> <span class="pre">b</span></code>: repetition; n must be an integer.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">b</span> <span class="pre">*=</span> <span class="pre">n</span></code>: mutates b if it is a bytearray object.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">b1</span> <span class="pre">in</span> <span class="pre">b2</span></code>, <code class="docutils literal notranslate"><span class="pre">b1</span> <span class="pre">not</span> <span class="pre">in</span> <span class="pre">b2</span></code>: substring test; b1 can be any
|
||
object implementing the <a class="pep reference internal" href="../pep-3118/" title="PEP 3118 – Revising the buffer protocol">PEP 3118</a> buffer API.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">i</span> <span class="pre">in</span> <span class="pre">b</span></code>, <code class="docutils literal notranslate"><span class="pre">i</span> <span class="pre">not</span> <span class="pre">in</span> <span class="pre">b</span></code>: single-byte membership test; i must
|
||
be an integer (if it is a length-1 bytes array, it is considered
|
||
to be a substring test, with the same outcome).</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">len(b)</span></code>: the number of bytes.</li>
|
||
<li><code class="docutils literal notranslate"><span class="pre">hash(b)</span></code>: the hash value; only implemented by the bytes type.</li>
|
||
</ul>
|
||
<p>Note that the % operator is <em>not</em> implemented. It does not appear
|
||
worth the complexity.</p>
|
||
</section>
|
||
<section id="methods">
|
||
<h3><a class="toc-backref" href="#methods" role="doc-backlink">Methods</a></h3>
|
||
<p>The following methods are implemented by bytes as well as bytearray, with
|
||
similar semantics. They accept anything that implements the <a class="pep reference internal" href="../pep-3118/" title="PEP 3118 – Revising the buffer protocol">PEP 3118</a>
|
||
buffer API for bytes arguments, and return the same type as the object
|
||
whose method is called (“self”):</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">.</span><span class="n">capitalize</span><span class="p">(),</span> <span class="o">.</span><span class="n">center</span><span class="p">(),</span> <span class="o">.</span><span class="n">count</span><span class="p">(),</span> <span class="o">.</span><span class="n">decode</span><span class="p">(),</span> <span class="o">.</span><span class="n">endswith</span><span class="p">(),</span>
|
||
<span class="o">.</span><span class="n">expandtabs</span><span class="p">(),</span> <span class="o">.</span><span class="n">find</span><span class="p">(),</span> <span class="o">.</span><span class="n">index</span><span class="p">(),</span> <span class="o">.</span><span class="n">isalnum</span><span class="p">(),</span> <span class="o">.</span><span class="n">isalpha</span><span class="p">(),</span> <span class="o">.</span><span class="n">isdigit</span><span class="p">(),</span>
|
||
<span class="o">.</span><span class="n">islower</span><span class="p">(),</span> <span class="o">.</span><span class="n">isspace</span><span class="p">(),</span> <span class="o">.</span><span class="n">istitle</span><span class="p">(),</span> <span class="o">.</span><span class="n">isupper</span><span class="p">(),</span> <span class="o">.</span><span class="n">join</span><span class="p">(),</span> <span class="o">.</span><span class="n">ljust</span><span class="p">(),</span>
|
||
<span class="o">.</span><span class="n">lower</span><span class="p">(),</span> <span class="o">.</span><span class="n">lstrip</span><span class="p">(),</span> <span class="o">.</span><span class="n">partition</span><span class="p">(),</span> <span class="o">.</span><span class="n">replace</span><span class="p">(),</span> <span class="o">.</span><span class="n">rfind</span><span class="p">(),</span> <span class="o">.</span><span class="n">rindex</span><span class="p">(),</span>
|
||
<span class="o">.</span><span class="n">rjust</span><span class="p">(),</span> <span class="o">.</span><span class="n">rpartition</span><span class="p">(),</span> <span class="o">.</span><span class="n">rsplit</span><span class="p">(),</span> <span class="o">.</span><span class="n">rstrip</span><span class="p">(),</span> <span class="o">.</span><span class="n">split</span><span class="p">(),</span>
|
||
<span class="o">.</span><span class="n">splitlines</span><span class="p">(),</span> <span class="o">.</span><span class="n">startswith</span><span class="p">(),</span> <span class="o">.</span><span class="n">strip</span><span class="p">(),</span> <span class="o">.</span><span class="n">swapcase</span><span class="p">(),</span> <span class="o">.</span><span class="n">title</span><span class="p">(),</span>
|
||
<span class="o">.</span><span class="n">translate</span><span class="p">(),</span> <span class="o">.</span><span class="n">upper</span><span class="p">(),</span> <span class="o">.</span><span class="n">zfill</span><span class="p">()</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This is exactly the set of methods present on the str type in Python
|
||
2.x, with the exclusion of .encode(). The signatures and semantics
|
||
are the same too. However, whenever character classes like letter,
|
||
whitespace, lower case are used, the ASCII definitions of these
|
||
classes are used. (The Python 2.x str type uses the definitions from
|
||
the current locale, settable through the locale module.) The
|
||
.encode() method is left out because of the more strict definitions of
|
||
encoding and decoding in Python 3000: encoding always takes a Unicode
|
||
string and returns a bytes sequence, and decoding always takes a bytes
|
||
sequence and returns a Unicode string.</p>
|
||
<p>In addition, both types implement the class method <code class="docutils literal notranslate"><span class="pre">.fromhex()</span></code>,
|
||
which constructs an object from a string containing hexadecimal values
|
||
(with or without spaces between the bytes).</p>
|
||
<p>The bytearray type implements these additional methods from the
|
||
MutableSequence ABC (see <a class="pep reference internal" href="../pep-3119/" title="PEP 3119 – Introducing Abstract Base Classes">PEP 3119</a>):</p>
|
||
<blockquote>
|
||
<div>.extend(), .insert(), .append(), .reverse(), .pop(), .remove().</div></blockquote>
|
||
</section>
|
||
<section id="bytes-and-the-str-type">
|
||
<h3><a class="toc-backref" href="#bytes-and-the-str-type" role="doc-backlink">Bytes and the Str Type</a></h3>
|
||
<p>Like the bytes type in Python 3.0a1, and unlike the relationship
|
||
between str and unicode in Python 2.x, attempts to mix bytes (or
|
||
bytearray) objects and str objects without specifying an encoding will
|
||
raise a TypeError exception. (However, comparing bytes/bytearray and
|
||
str objects for equality will simply return False; see the section on
|
||
Comparisons above.)</p>
|
||
<p>Conversions between bytes or bytearray objects and str objects must
|
||
always be explicit, using an encoding. There are two equivalent APIs:
|
||
<code class="docutils literal notranslate"><span class="pre">str(b,</span> <span class="pre"><encoding>[,</span> <span class="pre"><errors>])</span></code> is equivalent to
|
||
<code class="docutils literal notranslate"><span class="pre">b.decode(<encoding>[,</span> <span class="pre"><errors>])</span></code>, and
|
||
<code class="docutils literal notranslate"><span class="pre">bytes(s,</span> <span class="pre"><encoding>[,</span> <span class="pre"><errors>])</span></code> is equivalent to
|
||
<code class="docutils literal notranslate"><span class="pre">s.encode(<encoding>[,</span> <span class="pre"><errors>])</span></code>.</p>
|
||
<p>There is one exception: we can convert from bytes (or bytearray) to str
|
||
without specifying an encoding by writing <code class="docutils literal notranslate"><span class="pre">str(b)</span></code>. This produces
|
||
the same result as <code class="docutils literal notranslate"><span class="pre">repr(b)</span></code>. This exception is necessary because
|
||
of the general promise that <em>any</em> object can be printed, and printing
|
||
is just a special case of conversion to str. There is however no
|
||
promise that printing a bytes object interprets the individual bytes
|
||
as characters (unlike in Python 2.x).</p>
|
||
<p>The str type currently implements the <a class="pep reference internal" href="../pep-3118/" title="PEP 3118 – Revising the buffer protocol">PEP 3118</a> buffer API. While this
|
||
is perhaps occasionally convenient, it is also potentially confusing,
|
||
because the bytes accessed via the buffer API represent a
|
||
platform-depending encoding: depending on the platform byte order and
|
||
a compile-time configuration option, the encoding could be UTF-16-BE,
|
||
UTF-16-LE, UTF-32-BE, or UTF-32-LE. Worse, a different implementation
|
||
of the str type might completely change the bytes representation,
|
||
e.g. to UTF-8, or even make it impossible to access the data as a
|
||
contiguous array of bytes at all. Therefore, the <a class="pep reference internal" href="../pep-3118/" title="PEP 3118 – Revising the buffer protocol">PEP 3118</a> buffer API
|
||
will be removed from the str type.</p>
|
||
</section>
|
||
<section id="the-basestring-type">
|
||
<h3><a class="toc-backref" href="#the-basestring-type" role="doc-backlink">The <code class="docutils literal notranslate"><span class="pre">basestring</span></code> Type</a></h3>
|
||
<p>The <code class="docutils literal notranslate"><span class="pre">basestring</span></code> type will be removed from the language. Code that
|
||
used to say <code class="docutils literal notranslate"><span class="pre">isinstance(x,</span> <span class="pre">basestring)</span></code> should be changed to use
|
||
<code class="docutils literal notranslate"><span class="pre">isinstance(x,</span> <span class="pre">str)</span></code> instead.</p>
|
||
</section>
|
||
<section id="pickling">
|
||
<h3><a class="toc-backref" href="#pickling" role="doc-backlink">Pickling</a></h3>
|
||
<p>Left as an exercise for the reader.</p>
|
||
</section>
|
||
</section>
|
||
<section id="copyright">
|
||
<h2><a class="toc-backref" href="#copyright" role="doc-backlink">Copyright</a></h2>
|
||
<p>This document has been placed in the public domain.</p>
|
||
</section>
|
||
</section>
|
||
<hr class="docutils" />
|
||
<p>Source: <a class="reference external" href="https://github.com/python/peps/blob/main/peps/pep-3137.rst">https://github.com/python/peps/blob/main/peps/pep-3137.rst</a></p>
|
||
<p>Last modified: <a class="reference external" href="https://github.com/python/peps/commits/main/peps/pep-3137.rst">2023-09-09 17:39:29 GMT</a></p>
|
||
|
||
</article>
|
||
<nav id="pep-sidebar">
|
||
<h2>Contents</h2>
|
||
<ul>
|
||
<li><a class="reference internal" href="#introduction">Introduction</a></li>
|
||
<li><a class="reference internal" href="#advantages">Advantages</a></li>
|
||
<li><a class="reference internal" href="#naming">Naming</a><ul>
|
||
<li><a class="reference internal" href="#summary">Summary</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#literal-notations">Literal Notations</a></li>
|
||
<li><a class="reference internal" href="#functionality">Functionality</a><ul>
|
||
<li><a class="reference internal" href="#pep-3118-buffer-api">PEP 3118 Buffer API</a></li>
|
||
<li><a class="reference internal" href="#constructors">Constructors</a></li>
|
||
<li><a class="reference internal" href="#comparisons">Comparisons</a></li>
|
||
<li><a class="reference internal" href="#slicing">Slicing</a></li>
|
||
<li><a class="reference internal" href="#indexing">Indexing</a></li>
|
||
<li><a class="reference internal" href="#str-and-repr">Str() and Repr()</a></li>
|
||
<li><a class="reference internal" href="#operators">Operators</a></li>
|
||
<li><a class="reference internal" href="#methods">Methods</a></li>
|
||
<li><a class="reference internal" href="#bytes-and-the-str-type">Bytes and the Str Type</a></li>
|
||
<li><a class="reference internal" href="#the-basestring-type">The <code class="docutils literal notranslate"><span class="pre">basestring</span></code> Type</a></li>
|
||
<li><a class="reference internal" href="#pickling">Pickling</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
|
||
<br>
|
||
<a id="source" href="https://github.com/python/peps/blob/main/peps/pep-3137.rst">Page Source (GitHub)</a>
|
||
</nav>
|
||
</section>
|
||
<script src="../_static/colour_scheme.js"></script>
|
||
<script src="../_static/wrap_tables.js"></script>
|
||
<script src="../_static/sticky_banner.js"></script>
|
||
</body>
|
||
</html> |