601 lines
36 KiB
HTML
601 lines
36 KiB
HTML
|
||
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<meta name="color-scheme" content="light dark">
|
||
<title>PEP 3127 – Integer Literal Support and Syntax | peps.python.org</title>
|
||
<link rel="shortcut icon" href="../_static/py.png">
|
||
<link rel="canonical" href="https://peps.python.org/pep-3127/">
|
||
<link rel="stylesheet" href="../_static/style.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/mq.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" media="(prefers-color-scheme: light)" id="pyg-light">
|
||
<link rel="stylesheet" href="../_static/pygments_dark.css" type="text/css" media="(prefers-color-scheme: dark)" id="pyg-dark">
|
||
<link rel="alternate" type="application/rss+xml" title="Latest PEPs" href="https://peps.python.org/peps.rss">
|
||
<meta property="og:title" content='PEP 3127 – Integer Literal Support and Syntax | peps.python.org'>
|
||
<meta property="og:description" content="This PEP proposes changes to the Python core to rationalize the treatment of string literal representations of integers in different radices (bases). These changes are targeted at Python 3.0, but the backward-compatible parts of the changes should be a...">
|
||
<meta property="og:type" content="website">
|
||
<meta property="og:url" content="https://peps.python.org/pep-3127/">
|
||
<meta property="og:site_name" content="Python Enhancement Proposals (PEPs)">
|
||
<meta property="og:image" content="https://peps.python.org/_static/og-image.png">
|
||
<meta property="og:image:alt" content="Python PEPs">
|
||
<meta property="og:image:width" content="200">
|
||
<meta property="og:image:height" content="200">
|
||
<meta name="description" content="This PEP proposes changes to the Python core to rationalize the treatment of string literal representations of integers in different radices (bases). These changes are targeted at Python 3.0, but the backward-compatible parts of the changes should be a...">
|
||
<meta name="theme-color" content="#3776ab">
|
||
</head>
|
||
<body>
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
||
<symbol id="svg-sun-half" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Following system colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="9"></circle>
|
||
<path d="M12 3v18m0-12l4.65-4.65M12 14.3l7.37-7.37M12 19.6l8.85-8.85"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected dark colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
|
||
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected light colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="5"></circle>
|
||
<line x1="12" y1="1" x2="12" y2="3"></line>
|
||
<line x1="12" y1="21" x2="12" y2="23"></line>
|
||
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
||
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
||
<line x1="1" y1="12" x2="3" y2="12"></line>
|
||
<line x1="21" y1="12" x2="23" y2="12"></line>
|
||
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
||
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
||
</svg>
|
||
</symbol>
|
||
</svg>
|
||
<script>
|
||
|
||
document.documentElement.dataset.colour_scheme = localStorage.getItem("colour_scheme") || "auto"
|
||
</script>
|
||
<section id="pep-page-section">
|
||
<header>
|
||
<h1>Python Enhancement Proposals</h1>
|
||
<ul class="breadcrumbs">
|
||
<li><a href="https://www.python.org/" title="The Python Programming Language">Python</a> » </li>
|
||
<li><a href="../pep-0000/">PEP Index</a> » </li>
|
||
<li>PEP 3127</li>
|
||
</ul>
|
||
<button id="colour-scheme-cycler" onClick="setColourScheme(nextColourScheme())">
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
<span class="visually-hidden">Toggle light / dark / auto colour theme</span>
|
||
</button>
|
||
</header>
|
||
<article>
|
||
<section id="pep-content">
|
||
<h1 class="page-title">PEP 3127 – Integer Literal Support and Syntax</h1>
|
||
<dl class="rfc2822 field-list simple">
|
||
<dt class="field-odd">Author<span class="colon">:</span></dt>
|
||
<dd class="field-odd">Patrick Maupin <pmaupin at gmail.com></dd>
|
||
<dt class="field-even">Discussions-To<span class="colon">:</span></dt>
|
||
<dd class="field-even"><a class="reference external" href="https://mail.python.org/pipermail/python-3000/">Python-3000 list</a></dd>
|
||
<dt class="field-odd">Status<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><abbr title="Accepted and implementation complete, or no longer active">Final</abbr></dd>
|
||
<dt class="field-even">Type<span class="colon">:</span></dt>
|
||
<dd class="field-even"><abbr title="Normative PEP with a new feature for Python, implementation change for CPython or interoperability standard for the ecosystem">Standards Track</abbr></dd>
|
||
<dt class="field-odd">Created<span class="colon">:</span></dt>
|
||
<dd class="field-odd">14-Mar-2007</dd>
|
||
<dt class="field-even">Python-Version<span class="colon">:</span></dt>
|
||
<dd class="field-even">3.0</dd>
|
||
<dt class="field-odd">Post-History<span class="colon">:</span></dt>
|
||
<dd class="field-odd">18-Mar-2007</dd>
|
||
</dl>
|
||
<hr class="docutils" />
|
||
<section id="contents">
|
||
<details><summary>Table of Contents</summary><ul class="simple">
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#motivation">Motivation</a></li>
|
||
<li><a class="reference internal" href="#specification">Specification</a><ul>
|
||
<li><a class="reference internal" href="#grammar-specification">Grammar specification</a></li>
|
||
<li><a class="reference internal" href="#int-specification">int() specification</a></li>
|
||
<li><a class="reference internal" href="#long-specification">long() specification</a></li>
|
||
<li><a class="reference internal" href="#tokenizer-exception-handling">Tokenizer exception handling</a></li>
|
||
<li><a class="reference internal" href="#int-exception-handling">int() exception handling</a></li>
|
||
<li><a class="reference internal" href="#oct-function">oct() function</a></li>
|
||
<li><a class="reference internal" href="#output-formatting">Output formatting</a></li>
|
||
<li><a class="reference internal" href="#transition-from-2-6-to-3-0">Transition from 2.6 to 3.0</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a><ul>
|
||
<li><a class="reference internal" href="#background">Background</a></li>
|
||
<li><a class="reference internal" href="#removal-of-old-octal-syntax">Removal of old octal syntax</a></li>
|
||
<li><a class="reference internal" href="#supported-radices">Supported radices</a></li>
|
||
<li><a class="reference internal" href="#syntax-for-supported-radices">Syntax for supported radices</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#open-issues">Open Issues</a></li>
|
||
<li><a class="reference internal" href="#references">References</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
</details></section>
|
||
<section id="abstract">
|
||
<h2><a class="toc-backref" href="#abstract" role="doc-backlink">Abstract</a></h2>
|
||
<p>This PEP proposes changes to the Python core to rationalize
|
||
the treatment of string literal representations of integers
|
||
in different radices (bases). These changes are targeted at
|
||
Python 3.0, but the backward-compatible parts of the changes
|
||
should be added to Python 2.6, so that all valid 3.0 integer
|
||
literals will also be valid in 2.6.</p>
|
||
<p>The proposal is that:</p>
|
||
<ol class="loweralpha simple">
|
||
<li>octal literals must now be specified
|
||
with a leading “0o” or “0O” instead of “0”;</li>
|
||
<li>binary literals are now supported via a
|
||
leading “0b” or “0B”; and</li>
|
||
<li>provision will be made for binary numbers in
|
||
string formatting.</li>
|
||
</ol>
|
||
</section>
|
||
<section id="motivation">
|
||
<h2><a class="toc-backref" href="#motivation" role="doc-backlink">Motivation</a></h2>
|
||
<p>This PEP was motivated by two different issues:</p>
|
||
<ul class="simple">
|
||
<li>The default octal representation of integers is silently confusing
|
||
to people unfamiliar with C-like languages. It is extremely easy
|
||
to inadvertently create an integer object with the wrong value,
|
||
because ‘013’ means ‘decimal 11’, not ‘decimal 13’, to the Python
|
||
language itself, which is not the meaning that most humans would
|
||
assign to this literal.</li>
|
||
<li>Some Python users have a strong desire for binary support in
|
||
the language.</li>
|
||
</ul>
|
||
</section>
|
||
<section id="specification">
|
||
<h2><a class="toc-backref" href="#specification" role="doc-backlink">Specification</a></h2>
|
||
<section id="grammar-specification">
|
||
<h3><a class="toc-backref" href="#grammar-specification" role="doc-backlink">Grammar specification</a></h3>
|
||
<p>The grammar will be changed. For Python 2.6, the changed and
|
||
new token definitions will be:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">integer</span> <span class="p">:</span><span class="o">:=</span> <span class="n">decimalinteger</span> <span class="o">|</span> <span class="n">octinteger</span> <span class="o">|</span> <span class="n">hexinteger</span> <span class="o">|</span>
|
||
<span class="n">bininteger</span> <span class="o">|</span> <span class="n">oldoctinteger</span>
|
||
|
||
<span class="n">octinteger</span> <span class="p">:</span><span class="o">:=</span> <span class="s2">"0"</span> <span class="p">(</span><span class="s2">"o"</span> <span class="o">|</span> <span class="s2">"O"</span><span class="p">)</span> <span class="n">octdigit</span><span class="o">+</span>
|
||
|
||
<span class="n">bininteger</span> <span class="p">:</span><span class="o">:=</span> <span class="s2">"0"</span> <span class="p">(</span><span class="s2">"b"</span> <span class="o">|</span> <span class="s2">"B"</span><span class="p">)</span> <span class="n">bindigit</span><span class="o">+</span>
|
||
|
||
<span class="n">oldoctinteger</span> <span class="p">:</span><span class="o">:=</span> <span class="s2">"0"</span> <span class="n">octdigit</span><span class="o">+</span>
|
||
|
||
<span class="n">bindigit</span> <span class="p">:</span><span class="o">:=</span> <span class="s2">"0"</span> <span class="o">|</span> <span class="s2">"1"</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>For Python 3.0, “oldoctinteger” will not be supported, and
|
||
an exception will be raised if a literal has a leading “0” and
|
||
a second character which is a digit.</p>
|
||
<p>For both versions, this will require changes to PyLong_FromString
|
||
as well as the grammar.</p>
|
||
<p>The documentation will have to be changed as well: grammar.txt,
|
||
as well as the integer literal section of the reference manual.</p>
|
||
<p><a class="pep reference internal" href="../pep-0306/" title="PEP 306 – How to Change Python’s Grammar">PEP 306</a> should be checked for other issues, and that PEP should
|
||
be updated if the procedure described therein is insufficient.</p>
|
||
</section>
|
||
<section id="int-specification">
|
||
<h3><a class="toc-backref" href="#int-specification" role="doc-backlink">int() specification</a></h3>
|
||
<p>int(s, 0) will also match the new grammar definition.</p>
|
||
<p>This should happen automatically with the changes to
|
||
PyLong_FromString required for the grammar change.</p>
|
||
<p>Also the documentation for int() should be changed to explain
|
||
that int(s) operates identically to int(s, 10), and the word
|
||
“guess” should be removed from the description of int(s, 0).</p>
|
||
</section>
|
||
<section id="long-specification">
|
||
<h3><a class="toc-backref" href="#long-specification" role="doc-backlink">long() specification</a></h3>
|
||
<p>For Python 2.6, the long() implementation and documentation
|
||
should be changed to reflect the new grammar.</p>
|
||
</section>
|
||
<section id="tokenizer-exception-handling">
|
||
<h3><a class="toc-backref" href="#tokenizer-exception-handling" role="doc-backlink">Tokenizer exception handling</a></h3>
|
||
<p>If an invalid token contains a leading “0”, the exception
|
||
error message should be more informative than the current
|
||
“SyntaxError: invalid token”. It should explain that decimal
|
||
numbers may not have a leading zero, and that octal numbers
|
||
require an “o” after the leading zero.</p>
|
||
</section>
|
||
<section id="int-exception-handling">
|
||
<h3><a class="toc-backref" href="#int-exception-handling" role="doc-backlink">int() exception handling</a></h3>
|
||
<p>The ValueError raised for any call to int() with a string
|
||
should at least explicitly contain the base in the error
|
||
message, e.g.:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="ne">ValueError</span><span class="p">:</span> <span class="n">invalid</span> <span class="n">literal</span> <span class="k">for</span> <span class="n">base</span> <span class="mi">8</span> <span class="nb">int</span><span class="p">():</span> <span class="mi">09</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="oct-function">
|
||
<h3><a class="toc-backref" href="#oct-function" role="doc-backlink">oct() function</a></h3>
|
||
<p>oct() should be updated to output ‘0o’ in front of
|
||
the octal digits (for 3.0, and 2.6 compatibility mode).</p>
|
||
</section>
|
||
<section id="output-formatting">
|
||
<h3><a class="toc-backref" href="#output-formatting" role="doc-backlink">Output formatting</a></h3>
|
||
<p>In 3.0, the string % operator alternate syntax for the ‘o’
|
||
option will need to be updated to add ‘0o’ in front,
|
||
instead of ‘0’. In 2.6, alternate octal formatting will
|
||
continue to add only ‘0’. In neither 2.6 nor 3.0 will
|
||
the % operator support binary output. This is because
|
||
binary output is already supported by <a class="pep reference internal" href="../pep-3101/" title="PEP 3101 – Advanced String Formatting">PEP 3101</a>
|
||
(str.format), which is the preferred string formatting
|
||
method.</p>
|
||
</section>
|
||
<section id="transition-from-2-6-to-3-0">
|
||
<h3><a class="toc-backref" href="#transition-from-2-6-to-3-0" role="doc-backlink">Transition from 2.6 to 3.0</a></h3>
|
||
<p>The 2to3 translator will have to insert ‘o’ into any
|
||
octal string literal.</p>
|
||
<p>The Py3K compatible option to Python 2.6 should cause
|
||
attempts to use oldoctinteger literals to raise an
|
||
exception.</p>
|
||
</section>
|
||
</section>
|
||
<section id="rationale">
|
||
<h2><a class="toc-backref" href="#rationale" role="doc-backlink">Rationale</a></h2>
|
||
<p>Most of the discussion on these issues occurred on the Python-3000
|
||
mailing list starting 14-Mar-2007, prompted by an observation that
|
||
the average human being would be completely mystified upon finding
|
||
that prepending a “0” to a string of digits changes the meaning of
|
||
that digit string entirely.</p>
|
||
<p>It was pointed out during this discussion that a similar, but shorter,
|
||
discussion on the subject occurred in January 2006, prompted by a
|
||
discovery of the same issue.</p>
|
||
<section id="background">
|
||
<h3><a class="toc-backref" href="#background" role="doc-backlink">Background</a></h3>
|
||
<p>For historical reasons, Python’s string representation of integers
|
||
in different bases (radices), for string formatting and token
|
||
literals, borrows heavily from C. <a class="footnote-reference brackets" href="#id6" id="id1">[1]</a> <a class="footnote-reference brackets" href="#id7" id="id2">[2]</a> Usage has shown that
|
||
the historical method of specifying an octal number is confusing,
|
||
and also that it would be nice to have additional support for binary
|
||
literals.</p>
|
||
<p>Throughout this document, unless otherwise noted, discussions about
|
||
the string representation of integers relate to these features:</p>
|
||
<ul class="simple">
|
||
<li>Literal integer tokens, as used by normal module compilation,
|
||
by eval(), and by int(token, 0). (int(token) and int(token, 2-36)
|
||
are not modified by this proposal.)<ul>
|
||
<li>Under 2.6, long() is treated the same as int()</li>
|
||
</ul>
|
||
</li>
|
||
<li>Formatting of integers into strings, either via the % string
|
||
operator or the new <a class="pep reference internal" href="../pep-3101/" title="PEP 3101 – Advanced String Formatting">PEP 3101</a> advanced string formatting method.</li>
|
||
</ul>
|
||
<p>It is presumed that:</p>
|
||
<ul class="simple">
|
||
<li>All of these features should have an identical set
|
||
of supported radices, for consistency.</li>
|
||
<li>Python source code syntax and int(mystring, 0) should
|
||
continue to share identical behavior.</li>
|
||
</ul>
|
||
</section>
|
||
<section id="removal-of-old-octal-syntax">
|
||
<h3><a class="toc-backref" href="#removal-of-old-octal-syntax" role="doc-backlink">Removal of old octal syntax</a></h3>
|
||
<p>This PEP proposes that the ability to specify an octal number by
|
||
using a leading zero will be removed from the language in Python 3.0
|
||
(and the Python 3.0 preview mode of 2.6), and that a SyntaxError will
|
||
be raised whenever a leading “0” is immediately followed by another
|
||
digit.</p>
|
||
<p>During the present discussion, it was almost universally agreed that:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="nb">eval</span><span class="p">(</span><span class="s1">'010'</span><span class="p">)</span> <span class="o">==</span> <span class="mi">8</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>should no longer be true, because that is confusing to new users.
|
||
It was also proposed that:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="nb">eval</span><span class="p">(</span><span class="s1">'0010'</span><span class="p">)</span> <span class="o">==</span> <span class="mi">10</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>should become true, but that is much more contentious, because it is so
|
||
inconsistent with usage in other computer languages that mistakes are
|
||
likely to be made.</p>
|
||
<p>Almost all currently popular computer languages, including C/C++,
|
||
Java, Perl, and JavaScript, treat a sequence of digits with a
|
||
leading zero as an octal number. Proponents of treating these
|
||
numbers as decimal instead have a very valid point – as discussed
|
||
in <a class="reference internal" href="#supported-radices">Supported radices</a>, below, the entire non-computer world uses
|
||
decimal numbers almost exclusively. There is ample anecdotal
|
||
evidence that many people are dismayed and confused if they
|
||
are confronted with non-decimal radices.</p>
|
||
<p>However, in most situations, most people do not write gratuitous
|
||
zeros in front of their decimal numbers. The primary exception is
|
||
when an attempt is being made to line up columns of numbers. But
|
||
since <a class="pep reference internal" href="../pep-0008/" title="PEP 8 – Style Guide for Python Code">PEP 8</a> specifically discourages the use of spaces to try to
|
||
align Python code, one would suspect the same argument should apply
|
||
to the use of leading zeros for the same purpose.</p>
|
||
<p>Finally, although the email discussion often focused on whether anybody
|
||
actually <em>uses</em> octal any more, and whether we should cater to those
|
||
old-timers in any case, that is almost entirely besides the point.</p>
|
||
<p>Assume the rare complete newcomer to computing who <em>does</em>, either
|
||
occasionally or as a matter of habit, use leading zeros for decimal
|
||
numbers. Python could either:</p>
|
||
<ol class="loweralpha simple">
|
||
<li>silently do the wrong thing with their numbers, as it does now;</li>
|
||
<li>immediately disabuse them of the notion that this is viable syntax
|
||
(and yes, the SyntaxWarning should be more gentle than it
|
||
currently is, but that is a subject for a different PEP); or</li>
|
||
<li>let them continue to think that computers are happy with
|
||
multi-digit decimal integers which start with “0”.</li>
|
||
</ol>
|
||
<p>Some people passionately believe that (c) is the correct answer,
|
||
and they would be absolutely right if we could be sure that new
|
||
users will never blossom and grow and start writing AJAX applications.</p>
|
||
<p>So while a new Python user may (currently) be mystified at the
|
||
delayed discovery that their numbers don’t work properly, we can
|
||
fix it by explaining to them immediately that Python doesn’t like
|
||
leading zeros (hopefully with a reasonable message!), or we can
|
||
delegate this teaching experience to the JavaScript interpreter
|
||
in the browser, and let them try to debug their issue there.</p>
|
||
</section>
|
||
<section id="supported-radices">
|
||
<h3><a class="toc-backref" href="#supported-radices" role="doc-backlink">Supported radices</a></h3>
|
||
<p>This PEP proposes that the supported radices for the Python
|
||
language will be 2, 8, 10, and 16.</p>
|
||
<p>Once it is agreed that the old syntax for octal (radix 8) representation
|
||
of integers must be removed from the language, the next obvious
|
||
question is “Do we actually need a way to specify (and display)
|
||
numbers in octal?”</p>
|
||
<p>This question is quickly followed by “What radices does the language
|
||
need to support?” Because computers are so adept at doing what you
|
||
tell them to, a tempting answer in the discussion was “all of them.”
|
||
This answer has obviously been given before – the int() constructor
|
||
will accept an explicit radix with a value between 2 and 36, inclusive,
|
||
with the latter number bearing a suspicious arithmetic similarity to
|
||
the sum of the number of numeric digits and the number of same-case
|
||
letters in the ASCII alphabet.</p>
|
||
<p>But the best argument for inclusion will have a use-case to back
|
||
it up, so the idea of supporting all radices was quickly rejected,
|
||
and the only radices left with any real support were decimal,
|
||
hexadecimal, octal, and binary.</p>
|
||
<p>Just because a particular radix has a vocal supporter on the
|
||
mailing list does not mean that it really should be in the
|
||
language, so the rest of this section is a treatise on the
|
||
utility of these particular radices, vs. other possible choices.</p>
|
||
<p>Humans use other numeric bases constantly. If I tell you that
|
||
it is 12:30 PM, I have communicated quantitative information
|
||
arguably composed of <em>three</em> separate bases (12, 60, and 2),
|
||
only one of which is in the “agreed” list above. But the
|
||
<em>communication</em> of that information used two decimal digits
|
||
each for the base 12 and base 60 information, and, perversely,
|
||
two letters for information which could have fit in a single
|
||
decimal digit.</p>
|
||
<p>So, in general, humans communicate “normal” (non-computer)
|
||
numerical information either via names (AM, PM, January, …)
|
||
or via use of decimal notation. Obviously, names are
|
||
seldom used for large sets of items, so decimal is used for
|
||
everything else. There are studies which attempt to explain
|
||
why this is so, typically reaching the expected conclusion
|
||
that the Arabic numeral system is well-suited to human
|
||
cognition. <a class="footnote-reference brackets" href="#id8" id="id3">[3]</a></p>
|
||
<p>There is even support in the history of the design of
|
||
computers to indicate that decimal notation is the correct
|
||
way for computers to communicate with humans. One of
|
||
the first modern computers, ENIAC <a class="footnote-reference brackets" href="#id9" id="id4">[4]</a> computed in decimal,
|
||
even though there were already existing computers which
|
||
operated in binary.</p>
|
||
<p>Decimal computer operation was important enough
|
||
that many computers, including the ubiquitous PC, have
|
||
instructions designed to operate on “binary coded decimal”
|
||
(BCD) <a class="footnote-reference brackets" href="#id10" id="id5">[5]</a>, a representation which devotes 4 bits to each
|
||
decimal digit. These instructions date from a time when the
|
||
most strenuous calculations ever performed on many numbers
|
||
were the calculations actually required to perform textual
|
||
I/O with them. It is possible to display BCD without having
|
||
to perform a divide/remainder operation on every displayed
|
||
digit, and this was a huge computational win when most
|
||
hardware didn’t have fast divide capability. Another factor
|
||
contributing to the use of BCD is that, with BCD calculations,
|
||
rounding will happen exactly the same way that a human would
|
||
do it, so BCD is still sometimes used in fields like finance,
|
||
despite the computational and storage superiority of binary.</p>
|
||
<p>So, if it weren’t for the fact that computers themselves
|
||
normally use binary for efficient computation and data
|
||
storage, string representations of integers would probably
|
||
always be in decimal.</p>
|
||
<p>Unfortunately, computer hardware doesn’t think like humans,
|
||
so programmers and hardware engineers must often resort to
|
||
thinking like the computer, which means that it is important
|
||
for Python to have the ability to communicate binary data
|
||
in a form that is understandable to humans.</p>
|
||
<p>The requirement that the binary data notation must be cognitively
|
||
easy for humans to process means that it should contain an integral
|
||
number of binary digits (bits) per symbol, while otherwise
|
||
conforming quite closely to the standard tried-and-true decimal
|
||
notation (position indicates power, larger magnitude on the left,
|
||
not too many symbols in the alphabet, etc.).</p>
|
||
<p>The obvious “sweet spot” for this binary data notation is
|
||
thus octal, which packs the largest integral number of bits
|
||
possible into a single symbol chosen from the Arabic numeral
|
||
alphabet.</p>
|
||
<p>In fact, some computer architectures, such as the PDP8 and the
|
||
8080/Z80, were defined in terms of octal, in the sense of arranging
|
||
the bitfields of instructions in groups of three, and using
|
||
octal representations to describe the instruction set.</p>
|
||
<p>Even today, octal is important because of bit-packed structures
|
||
which consist of 3 bits per field, such as Unix file permission
|
||
masks.</p>
|
||
<p>But octal has a drawback when used for larger numbers. The
|
||
number of bits per symbol, while integral, is not itself
|
||
a power of two. This limitation (given that the word size
|
||
of most computers these days is a power of two) has resulted
|
||
in hexadecimal, which is more popular than octal despite the
|
||
fact that it requires a 60% larger alphabet than decimal,
|
||
because each symbol contains 4 bits.</p>
|
||
<p>Some numbers, such as Unix file permission masks, are easily
|
||
decoded by humans when represented in octal, but difficult to
|
||
decode in hexadecimal, while other numbers are much easier for
|
||
humans to handle in hexadecimal.</p>
|
||
<p>Unfortunately, there are also binary numbers used in computers
|
||
which are not very well communicated in either hexadecimal or
|
||
octal. Thankfully, fewer people have to deal with these on a
|
||
regular basis, but on the other hand, this means that several
|
||
people on the discussion list questioned the wisdom of adding
|
||
a straight binary representation to Python.</p>
|
||
<p>One example of where these numbers is very useful is in
|
||
reading and writing hardware registers. Sometimes hardware
|
||
designers will eschew human readability and opt for address
|
||
space efficiency, by packing multiple bit fields into a single
|
||
hardware register at unaligned bit locations, and it is tedious
|
||
and error-prone for a human to reconstruct a 5 bit field which
|
||
consists of the upper 3 bits of one hex digit, and the lower 2
|
||
bits of the next hex digit.</p>
|
||
<p>Even if the ability of Python to communicate binary information
|
||
to humans is only useful for a small technical subset of the
|
||
population, it is exactly that population subset which contains
|
||
most, if not all, members of the Python core team, so even straight
|
||
binary, the least useful of these notations, has several enthusiastic
|
||
supporters and few, if any, staunch opponents, among the Python community.</p>
|
||
</section>
|
||
<section id="syntax-for-supported-radices">
|
||
<h3><a class="toc-backref" href="#syntax-for-supported-radices" role="doc-backlink">Syntax for supported radices</a></h3>
|
||
<p>This proposal is to use a “0o” prefix with either uppercase
|
||
or lowercase “o” for octal, and a “0b” prefix with either
|
||
uppercase or lowercase “b” for binary.</p>
|
||
<p>There was strong support for not supporting uppercase, but
|
||
this is a separate subject for a different PEP, as ‘j’ for
|
||
complex numbers, ‘e’ for exponent, and ‘r’ for raw string
|
||
(to name a few) already support uppercase.</p>
|
||
<p>The syntax for delimiting the different radices received a lot of
|
||
attention in the discussion on Python-3000. There are several
|
||
(sometimes conflicting) requirements and “nice-to-haves” for
|
||
this syntax:</p>
|
||
<ul class="simple">
|
||
<li>It should be as compatible with other languages and
|
||
previous versions of Python as is reasonable, both
|
||
for the input syntax and for the output (e.g. string
|
||
% operator) syntax.</li>
|
||
<li>It should be as obvious to the casual observer as
|
||
possible.</li>
|
||
<li>It should be easy to visually distinguish integers
|
||
formatted in the different bases.</li>
|
||
</ul>
|
||
<p>Proposed syntaxes included things like arbitrary radix prefixes,
|
||
such as 16r100 (256 in hexadecimal), and radix suffixes, similar
|
||
to the 100h assembler-style suffix. The debate on whether the
|
||
letter “O” could be used for octal was intense – an uppercase
|
||
“O” looks suspiciously similar to a zero in some fonts. Suggestions
|
||
were made to use a “c” (the second letter of “oCtal”), or even
|
||
to use a “t” for “ocTal” and an “n” for “biNary” to go along
|
||
with the “x” for “heXadecimal”.</p>
|
||
<p>For the string % operator, “o” was already being used to denote
|
||
octal. Binary formatting is not being added to the % operator
|
||
because <a class="pep reference internal" href="../pep-3101/" title="PEP 3101 – Advanced String Formatting">PEP 3101</a> (Advanced String Formatting) already supports
|
||
binary, % formatting will be deprecated in the future.</p>
|
||
<p>At the end of the day, since uppercase “O” can look like a zero
|
||
and uppercase “B” can look like an 8, it was decided that these
|
||
prefixes should be lowercase only, but, like ‘r’ for raw string,
|
||
that can be a preference or style-guide issue.</p>
|
||
</section>
|
||
</section>
|
||
<section id="open-issues">
|
||
<h2><a class="toc-backref" href="#open-issues" role="doc-backlink">Open Issues</a></h2>
|
||
<p>It was suggested in the discussion that lowercase should be used
|
||
for all numeric and string special modifiers, such as ‘x’ for
|
||
hexadecimal, ‘r’ for raw strings, ‘e’ for exponentiation, and
|
||
‘j’ for complex numbers. This is an issue for a separate PEP.</p>
|
||
<p>This PEP takes no position on uppercase or lowercase for input,
|
||
just noting that, for consistency, if uppercase is not to be
|
||
removed from input parsing for other letters, it should be
|
||
added for octal and binary, and documenting the changes under
|
||
this assumption, as there is not yet a PEP about the case issue.</p>
|
||
<p>Output formatting may be a different story – there is already
|
||
ample precedence for case sensitivity in the output format string,
|
||
and there would need to be a consensus that there is a valid
|
||
use-case for the “alternate form” of the string % operator
|
||
to support uppercase ‘B’ or ‘O’ characters for binary or
|
||
octal output. Currently, <a class="pep reference internal" href="../pep-3101/" title="PEP 3101 – Advanced String Formatting">PEP 3101</a> does not even support this
|
||
alternate capability, and the hex() function does not allow
|
||
the programmer to specify the case of the ‘x’ character.</p>
|
||
<p>There are still some strong feelings that ‘0123’ should be
|
||
allowed as a literal decimal in Python 3.0. If this is the
|
||
right thing to do, this can easily be covered in an additional
|
||
PEP. This proposal only takes the first step of making ‘0123’
|
||
not be a valid octal number, for reasons covered in the rationale.</p>
|
||
<p>Is there (or should there be) an option for the 2to3 translator
|
||
which only makes the 2.6 compatible changes? Should this be
|
||
run on 2.6 library code before the 2.6 release?</p>
|
||
<p>Should a bin() function which matches hex() and oct() be added?</p>
|
||
<p>Is hex() really that useful once we have advanced string formatting?</p>
|
||
</section>
|
||
<section id="references">
|
||
<h2><a class="toc-backref" href="#references" role="doc-backlink">References</a></h2>
|
||
<aside class="footnote-list brackets">
|
||
<aside class="footnote brackets" id="id6" role="doc-footnote">
|
||
<dt class="label" id="id6">[<a href="#id1">1</a>]</dt>
|
||
<dd>GNU libc manual printf integer format conversions
|
||
(<a class="reference external" href="http://www.gnu.org/software/libc/manual/html_node/Integer-Conversions.html">http://www.gnu.org/software/libc/manual/html_node/Integer-Conversions.html</a>)</aside>
|
||
<aside class="footnote brackets" id="id7" role="doc-footnote">
|
||
<dt class="label" id="id7">[<a href="#id2">2</a>]</dt>
|
||
<dd>Python string formatting operations
|
||
(<a class="reference external" href="http://docs.python.org/library/stdtypes.html#string-formatting-operations">http://docs.python.org/library/stdtypes.html#string-formatting-operations</a>)</aside>
|
||
<aside class="footnote brackets" id="id8" role="doc-footnote">
|
||
<dt class="label" id="id8">[<a href="#id3">3</a>]</dt>
|
||
<dd>The Representation of Numbers, Jiajie Zhang and Donald A. Norman
|
||
(<a class="reference external" href="http://acad88.sahs.uth.tmc.edu/research/publications/Number-Representation.pdf">http://acad88.sahs.uth.tmc.edu/research/publications/Number-Representation.pdf</a>)</aside>
|
||
<aside class="footnote brackets" id="id9" role="doc-footnote">
|
||
<dt class="label" id="id9">[<a href="#id4">4</a>]</dt>
|
||
<dd>ENIAC page at Wikipedia
|
||
(<a class="reference external" href="http://en.wikipedia.org/wiki/ENIAC">http://en.wikipedia.org/wiki/ENIAC</a>)</aside>
|
||
<aside class="footnote brackets" id="id10" role="doc-footnote">
|
||
<dt class="label" id="id10">[<a href="#id5">5</a>]</dt>
|
||
<dd>BCD page at Wikipedia
|
||
(<a class="reference external" href="http://en.wikipedia.org/wiki/Binary-coded_decimal">http://en.wikipedia.org/wiki/Binary-coded_decimal</a>)</aside>
|
||
</aside>
|
||
</section>
|
||
<section id="copyright">
|
||
<h2><a class="toc-backref" href="#copyright" role="doc-backlink">Copyright</a></h2>
|
||
<p>This document has been placed in the public domain.</p>
|
||
</section>
|
||
</section>
|
||
<hr class="docutils" />
|
||
<p>Source: <a class="reference external" href="https://github.com/python/peps/blob/main/peps/pep-3127.rst">https://github.com/python/peps/blob/main/peps/pep-3127.rst</a></p>
|
||
<p>Last modified: <a class="reference external" href="https://github.com/python/peps/commits/main/peps/pep-3127.rst">2025-02-01 08:59:27 GMT</a></p>
|
||
|
||
</article>
|
||
<nav id="pep-sidebar">
|
||
<h2>Contents</h2>
|
||
<ul>
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#motivation">Motivation</a></li>
|
||
<li><a class="reference internal" href="#specification">Specification</a><ul>
|
||
<li><a class="reference internal" href="#grammar-specification">Grammar specification</a></li>
|
||
<li><a class="reference internal" href="#int-specification">int() specification</a></li>
|
||
<li><a class="reference internal" href="#long-specification">long() specification</a></li>
|
||
<li><a class="reference internal" href="#tokenizer-exception-handling">Tokenizer exception handling</a></li>
|
||
<li><a class="reference internal" href="#int-exception-handling">int() exception handling</a></li>
|
||
<li><a class="reference internal" href="#oct-function">oct() function</a></li>
|
||
<li><a class="reference internal" href="#output-formatting">Output formatting</a></li>
|
||
<li><a class="reference internal" href="#transition-from-2-6-to-3-0">Transition from 2.6 to 3.0</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a><ul>
|
||
<li><a class="reference internal" href="#background">Background</a></li>
|
||
<li><a class="reference internal" href="#removal-of-old-octal-syntax">Removal of old octal syntax</a></li>
|
||
<li><a class="reference internal" href="#supported-radices">Supported radices</a></li>
|
||
<li><a class="reference internal" href="#syntax-for-supported-radices">Syntax for supported radices</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#open-issues">Open Issues</a></li>
|
||
<li><a class="reference internal" href="#references">References</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
|
||
<br>
|
||
<a id="source" href="https://github.com/python/peps/blob/main/peps/pep-3127.rst">Page Source (GitHub)</a>
|
||
</nav>
|
||
</section>
|
||
<script src="../_static/colour_scheme.js"></script>
|
||
<script src="../_static/wrap_tables.js"></script>
|
||
<script src="../_static/sticky_banner.js"></script>
|
||
</body>
|
||
</html> |