197 lines
11 KiB
HTML
197 lines
11 KiB
HTML
|
|
|||
|
<!DOCTYPE html>
|
|||
|
<html lang="en">
|
|||
|
<head>
|
|||
|
<meta charset="utf-8">
|
|||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|||
|
<meta name="color-scheme" content="light dark">
|
|||
|
<title>PEP 3120 – Using UTF-8 as the default source encoding | peps.python.org</title>
|
|||
|
<link rel="shortcut icon" href="../_static/py.png">
|
|||
|
<link rel="canonical" href="https://peps.python.org/pep-3120/">
|
|||
|
<link rel="stylesheet" href="../_static/style.css" type="text/css">
|
|||
|
<link rel="stylesheet" href="../_static/mq.css" type="text/css">
|
|||
|
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" media="(prefers-color-scheme: light)" id="pyg-light">
|
|||
|
<link rel="stylesheet" href="../_static/pygments_dark.css" type="text/css" media="(prefers-color-scheme: dark)" id="pyg-dark">
|
|||
|
<link rel="alternate" type="application/rss+xml" title="Latest PEPs" href="https://peps.python.org/peps.rss">
|
|||
|
<meta property="og:title" content='PEP 3120 – Using UTF-8 as the default source encoding | peps.python.org'>
|
|||
|
<meta property="og:description" content="Python Enhancement Proposals (PEPs)">
|
|||
|
<meta property="og:type" content="website">
|
|||
|
<meta property="og:url" content="https://peps.python.org/pep-3120/">
|
|||
|
<meta property="og:site_name" content="Python Enhancement Proposals (PEPs)">
|
|||
|
<meta property="og:image" content="https://peps.python.org/_static/og-image.png">
|
|||
|
<meta property="og:image:alt" content="Python PEPs">
|
|||
|
<meta property="og:image:width" content="200">
|
|||
|
<meta property="og:image:height" content="200">
|
|||
|
<meta name="description" content="Python Enhancement Proposals (PEPs)">
|
|||
|
<meta name="theme-color" content="#3776ab">
|
|||
|
</head>
|
|||
|
<body>
|
|||
|
|
|||
|
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
|||
|
<symbol id="svg-sun-half" viewBox="0 0 24 24" pointer-events="all">
|
|||
|
<title>Following system colour scheme</title>
|
|||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
|||
|
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
|||
|
<circle cx="12" cy="12" r="9"></circle>
|
|||
|
<path d="M12 3v18m0-12l4.65-4.65M12 14.3l7.37-7.37M12 19.6l8.85-8.85"></path>
|
|||
|
</svg>
|
|||
|
</symbol>
|
|||
|
<symbol id="svg-moon" viewBox="0 0 24 24" pointer-events="all">
|
|||
|
<title>Selected dark colour scheme</title>
|
|||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
|||
|
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
|||
|
<path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
|
|||
|
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"></path>
|
|||
|
</svg>
|
|||
|
</symbol>
|
|||
|
<symbol id="svg-sun" viewBox="0 0 24 24" pointer-events="all">
|
|||
|
<title>Selected light colour scheme</title>
|
|||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
|||
|
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
|||
|
<circle cx="12" cy="12" r="5"></circle>
|
|||
|
<line x1="12" y1="1" x2="12" y2="3"></line>
|
|||
|
<line x1="12" y1="21" x2="12" y2="23"></line>
|
|||
|
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
|||
|
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
|||
|
<line x1="1" y1="12" x2="3" y2="12"></line>
|
|||
|
<line x1="21" y1="12" x2="23" y2="12"></line>
|
|||
|
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
|||
|
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
|||
|
</svg>
|
|||
|
</symbol>
|
|||
|
</svg>
|
|||
|
<script>
|
|||
|
|
|||
|
document.documentElement.dataset.colour_scheme = localStorage.getItem("colour_scheme") || "auto"
|
|||
|
</script>
|
|||
|
<section id="pep-page-section">
|
|||
|
<header>
|
|||
|
<h1>Python Enhancement Proposals</h1>
|
|||
|
<ul class="breadcrumbs">
|
|||
|
<li><a href="https://www.python.org/" title="The Python Programming Language">Python</a> » </li>
|
|||
|
<li><a href="../pep-0000/">PEP Index</a> » </li>
|
|||
|
<li>PEP 3120</li>
|
|||
|
</ul>
|
|||
|
<button id="colour-scheme-cycler" onClick="setColourScheme(nextColourScheme())">
|
|||
|
<svg aria-hidden="true" class="colour-scheme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
|||
|
<svg aria-hidden="true" class="colour-scheme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
|||
|
<svg aria-hidden="true" class="colour-scheme-icon-when-light"><use href="#svg-sun"></use></svg>
|
|||
|
<span class="visually-hidden">Toggle light / dark / auto colour theme</span>
|
|||
|
</button>
|
|||
|
</header>
|
|||
|
<article>
|
|||
|
<section id="pep-content">
|
|||
|
<h1 class="page-title">PEP 3120 – Using UTF-8 as the default source encoding</h1>
|
|||
|
<dl class="rfc2822 field-list simple">
|
|||
|
<dt class="field-odd">Author<span class="colon">:</span></dt>
|
|||
|
<dd class="field-odd">Martin von Löwis <martin at v.loewis.de></dd>
|
|||
|
<dt class="field-even">Status<span class="colon">:</span></dt>
|
|||
|
<dd class="field-even"><abbr title="Accepted and implementation complete, or no longer active">Final</abbr></dd>
|
|||
|
<dt class="field-odd">Type<span class="colon">:</span></dt>
|
|||
|
<dd class="field-odd"><abbr title="Normative PEP with a new feature for Python, implementation change for CPython or interoperability standard for the ecosystem">Standards Track</abbr></dd>
|
|||
|
<dt class="field-even">Created<span class="colon">:</span></dt>
|
|||
|
<dd class="field-even">15-Apr-2007</dd>
|
|||
|
<dt class="field-odd">Python-Version<span class="colon">:</span></dt>
|
|||
|
<dd class="field-odd">3.0</dd>
|
|||
|
<dt class="field-even">Post-History<span class="colon">:</span></dt>
|
|||
|
<dd class="field-even"><p></p></dd>
|
|||
|
</dl>
|
|||
|
<hr class="docutils" />
|
|||
|
<section id="contents">
|
|||
|
<details><summary>Table of Contents</summary><ul class="simple">
|
|||
|
<li><a class="reference internal" href="#specification">Specification</a></li>
|
|||
|
<li><a class="reference internal" href="#a-bit-of-history">A Bit of History</a></li>
|
|||
|
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
|||
|
<li><a class="reference internal" href="#implementation">Implementation</a></li>
|
|||
|
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
|||
|
</ul>
|
|||
|
</details></section>
|
|||
|
<section id="specification">
|
|||
|
<h2><a class="toc-backref" href="#specification" role="doc-backlink">Specification</a></h2>
|
|||
|
<p>This PEP proposes to change the default source encoding from ASCII to
|
|||
|
UTF-8. Support for alternative source encodings (<a class="pep reference internal" href="../pep-0263/" title="PEP 263 – Defining Python Source Code Encodings">PEP 263</a>) continues to
|
|||
|
exist; an explicit encoding declaration takes precedence over the
|
|||
|
default.</p>
|
|||
|
</section>
|
|||
|
<section id="a-bit-of-history">
|
|||
|
<h2><a class="toc-backref" href="#a-bit-of-history" role="doc-backlink">A Bit of History</a></h2>
|
|||
|
<p>In Python 1, the source encoding was unspecified, except that the
|
|||
|
source encoding had to be a superset of the system’s basic execution
|
|||
|
character set (i.e. an ASCII superset, on most systems). The source
|
|||
|
encoding was only relevant for the lexis itself (bytes representing
|
|||
|
letters for keywords, identifiers, punctuation, line breaks, etc).
|
|||
|
The contents of a string literal was copied literally from the file
|
|||
|
on source.</p>
|
|||
|
<p>In Python 2.0, the source encoding changed to Latin-1 as a side effect
|
|||
|
of introducing Unicode. For Unicode string literals, the characters
|
|||
|
were still copied literally from the source file, but widened on a
|
|||
|
character-by-character basis. As Unicode gives a fixed interpretation
|
|||
|
to code points, this algorithm effectively fixed a source encoding, at
|
|||
|
least for files containing non-ASCII characters in Unicode literals.</p>
|
|||
|
<p><a class="pep reference internal" href="../pep-0263/" title="PEP 263 – Defining Python Source Code Encodings">PEP 263</a> identified the problem that you can use only those Unicode
|
|||
|
characters in a Unicode literal which are also in Latin-1, and
|
|||
|
introduced a syntax for declaring the source encoding. If no source
|
|||
|
encoding was given, the default should be ASCII. For compatibility
|
|||
|
with Python 2.0 and 2.1, files were interpreted as Latin-1 for a
|
|||
|
transitional period. This transition ended with Python 2.5, which
|
|||
|
gives an error if non-ASCII characters are encountered and no source
|
|||
|
encoding is declared.</p>
|
|||
|
</section>
|
|||
|
<section id="rationale">
|
|||
|
<h2><a class="toc-backref" href="#rationale" role="doc-backlink">Rationale</a></h2>
|
|||
|
<p>With <a class="pep reference internal" href="../pep-0263/" title="PEP 263 – Defining Python Source Code Encodings">PEP 263</a>, using arbitrary non-ASCII characters in a Python file is
|
|||
|
possible, but tedious. One has to explicitly add an encoding
|
|||
|
declaration. Even though some editors (like IDLE and Emacs) support
|
|||
|
the declarations of <a class="pep reference internal" href="../pep-0263/" title="PEP 263 – Defining Python Source Code Encodings">PEP 263</a>, many editors still do not (and never
|
|||
|
will); users have to explicitly adjust the encoding which the editor
|
|||
|
assumes on a file-by-file basis.</p>
|
|||
|
<p>When the default encoding is changed to UTF-8, adding non-ASCII text
|
|||
|
to Python files becomes easier and more portable: On some systems,
|
|||
|
editors will automatically choose UTF-8 when saving text (e.g. on Unix
|
|||
|
systems where the locale uses UTF-8). On other systems, editors will
|
|||
|
guess the encoding when reading the file, and UTF-8 is easy to
|
|||
|
guess. Yet other editors support associating a default encoding with a
|
|||
|
file extension, allowing users to associate .py with UTF-8.</p>
|
|||
|
<p>For Python 2, an important reason for using non-UTF-8 encodings was
|
|||
|
that byte string literals would be in the source encoding at run-time,
|
|||
|
allowing then to output them to a file or render them to the user
|
|||
|
as-is. With Python 3, all strings will be Unicode strings, so the
|
|||
|
original encoding of the source will have no impact at run-time.</p>
|
|||
|
</section>
|
|||
|
<section id="implementation">
|
|||
|
<h2><a class="toc-backref" href="#implementation" role="doc-backlink">Implementation</a></h2>
|
|||
|
<p>The parser needs to be changed to accept bytes > 127 if no source
|
|||
|
encoding is specified; instead of giving an error, it needs to check
|
|||
|
that the bytes are well-formed UTF-8 (decoding is not necessary,
|
|||
|
as the parser converts all source code to UTF-8, anyway).</p>
|
|||
|
<p>IDLE needs to be changed to use UTF-8 as the default encoding.</p>
|
|||
|
</section>
|
|||
|
<section id="copyright">
|
|||
|
<h2><a class="toc-backref" href="#copyright" role="doc-backlink">Copyright</a></h2>
|
|||
|
<p>This document has been placed in the public domain.</p>
|
|||
|
</section>
|
|||
|
</section>
|
|||
|
<hr class="docutils" />
|
|||
|
<p>Source: <a class="reference external" href="https://github.com/python/peps/blob/main/peps/pep-3120.rst">https://github.com/python/peps/blob/main/peps/pep-3120.rst</a></p>
|
|||
|
<p>Last modified: <a class="reference external" href="https://github.com/python/peps/commits/main/peps/pep-3120.rst">2023-09-09 17:39:29 GMT</a></p>
|
|||
|
|
|||
|
</article>
|
|||
|
<nav id="pep-sidebar">
|
|||
|
<h2>Contents</h2>
|
|||
|
<ul>
|
|||
|
<li><a class="reference internal" href="#specification">Specification</a></li>
|
|||
|
<li><a class="reference internal" href="#a-bit-of-history">A Bit of History</a></li>
|
|||
|
<li><a class="reference internal" href="#rationale">Rationale</a></li>
|
|||
|
<li><a class="reference internal" href="#implementation">Implementation</a></li>
|
|||
|
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
|||
|
</ul>
|
|||
|
|
|||
|
<br>
|
|||
|
<a id="source" href="https://github.com/python/peps/blob/main/peps/pep-3120.rst">Page Source (GitHub)</a>
|
|||
|
</nav>
|
|||
|
</section>
|
|||
|
<script src="../_static/colour_scheme.js"></script>
|
|||
|
<script src="../_static/wrap_tables.js"></script>
|
|||
|
<script src="../_static/sticky_banner.js"></script>
|
|||
|
</body>
|
|||
|
</html>
|