464 lines
37 KiB
HTML
464 lines
37 KiB
HTML
|
|
|||
|
<!DOCTYPE html>
|
|||
|
<html lang="en">
|
|||
|
<head>
|
|||
|
<meta charset="utf-8">
|
|||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|||
|
<meta name="color-scheme" content="light dark">
|
|||
|
<title>PEP 597 – Add optional EncodingWarning | peps.python.org</title>
|
|||
|
<link rel="shortcut icon" href="../_static/py.png">
|
|||
|
<link rel="canonical" href="https://peps.python.org/pep-0597/">
|
|||
|
<link rel="stylesheet" href="../_static/style.css" type="text/css">
|
|||
|
<link rel="stylesheet" href="../_static/mq.css" type="text/css">
|
|||
|
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" media="(prefers-color-scheme: light)" id="pyg-light">
|
|||
|
<link rel="stylesheet" href="../_static/pygments_dark.css" type="text/css" media="(prefers-color-scheme: dark)" id="pyg-dark">
|
|||
|
<link rel="alternate" type="application/rss+xml" title="Latest PEPs" href="https://peps.python.org/peps.rss">
|
|||
|
<meta property="og:title" content='PEP 597 – Add optional EncodingWarning | peps.python.org'>
|
|||
|
<meta property="og:description" content="Add a new warning category EncodingWarning. It is emitted when the encoding argument to open() is omitted and the default locale-specific encoding is used.">
|
|||
|
<meta property="og:type" content="website">
|
|||
|
<meta property="og:url" content="https://peps.python.org/pep-0597/">
|
|||
|
<meta property="og:site_name" content="Python Enhancement Proposals (PEPs)">
|
|||
|
<meta property="og:image" content="https://peps.python.org/_static/og-image.png">
|
|||
|
<meta property="og:image:alt" content="Python PEPs">
|
|||
|
<meta property="og:image:width" content="200">
|
|||
|
<meta property="og:image:height" content="200">
|
|||
|
<meta name="description" content="Add a new warning category EncodingWarning. It is emitted when the encoding argument to open() is omitted and the default locale-specific encoding is used.">
|
|||
|
<meta name="theme-color" content="#3776ab">
|
|||
|
</head>
|
|||
|
<body>
|
|||
|
|
|||
|
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
|||
|
<symbol id="svg-sun-half" viewBox="0 0 24 24" pointer-events="all">
|
|||
|
<title>Following system colour scheme</title>
|
|||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
|||
|
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
|||
|
<circle cx="12" cy="12" r="9"></circle>
|
|||
|
<path d="M12 3v18m0-12l4.65-4.65M12 14.3l7.37-7.37M12 19.6l8.85-8.85"></path>
|
|||
|
</svg>
|
|||
|
</symbol>
|
|||
|
<symbol id="svg-moon" viewBox="0 0 24 24" pointer-events="all">
|
|||
|
<title>Selected dark colour scheme</title>
|
|||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
|||
|
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
|||
|
<path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
|
|||
|
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"></path>
|
|||
|
</svg>
|
|||
|
</symbol>
|
|||
|
<symbol id="svg-sun" viewBox="0 0 24 24" pointer-events="all">
|
|||
|
<title>Selected light colour scheme</title>
|
|||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
|||
|
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
|||
|
<circle cx="12" cy="12" r="5"></circle>
|
|||
|
<line x1="12" y1="1" x2="12" y2="3"></line>
|
|||
|
<line x1="12" y1="21" x2="12" y2="23"></line>
|
|||
|
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
|||
|
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
|||
|
<line x1="1" y1="12" x2="3" y2="12"></line>
|
|||
|
<line x1="21" y1="12" x2="23" y2="12"></line>
|
|||
|
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
|||
|
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
|||
|
</svg>
|
|||
|
</symbol>
|
|||
|
</svg>
|
|||
|
<script>
|
|||
|
|
|||
|
document.documentElement.dataset.colour_scheme = localStorage.getItem("colour_scheme") || "auto"
|
|||
|
</script>
|
|||
|
<section id="pep-page-section">
|
|||
|
<header>
|
|||
|
<h1>Python Enhancement Proposals</h1>
|
|||
|
<ul class="breadcrumbs">
|
|||
|
<li><a href="https://www.python.org/" title="The Python Programming Language">Python</a> » </li>
|
|||
|
<li><a href="../pep-0000/">PEP Index</a> » </li>
|
|||
|
<li>PEP 597</li>
|
|||
|
</ul>
|
|||
|
<button id="colour-scheme-cycler" onClick="setColourScheme(nextColourScheme())">
|
|||
|
<svg aria-hidden="true" class="colour-scheme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
|||
|
<svg aria-hidden="true" class="colour-scheme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
|||
|
<svg aria-hidden="true" class="colour-scheme-icon-when-light"><use href="#svg-sun"></use></svg>
|
|||
|
<span class="visually-hidden">Toggle light / dark / auto colour theme</span>
|
|||
|
</button>
|
|||
|
</header>
|
|||
|
<article>
|
|||
|
<section id="pep-content">
|
|||
|
<h1 class="page-title">PEP 597 – Add optional EncodingWarning</h1>
|
|||
|
<dl class="rfc2822 field-list simple">
|
|||
|
<dt class="field-odd">Author<span class="colon">:</span></dt>
|
|||
|
<dd class="field-odd">Inada Naoki <songofacandy at gmail.com></dd>
|
|||
|
<dt class="field-even">Status<span class="colon">:</span></dt>
|
|||
|
<dd class="field-even"><abbr title="Accepted and implementation complete, or no longer active">Final</abbr></dd>
|
|||
|
<dt class="field-odd">Type<span class="colon">:</span></dt>
|
|||
|
<dd class="field-odd"><abbr title="Normative PEP with a new feature for Python, implementation change for CPython or interoperability standard for the ecosystem">Standards Track</abbr></dd>
|
|||
|
<dt class="field-even">Created<span class="colon">:</span></dt>
|
|||
|
<dd class="field-even">05-Jun-2019</dd>
|
|||
|
<dt class="field-odd">Python-Version<span class="colon">:</span></dt>
|
|||
|
<dd class="field-odd">3.10</dd>
|
|||
|
</dl>
|
|||
|
<hr class="docutils" />
|
|||
|
<section id="contents">
|
|||
|
<details><summary>Table of Contents</summary><ul class="simple">
|
|||
|
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
|||
|
<li><a class="reference internal" href="#motivation">Motivation</a><ul>
|
|||
|
<li><a class="reference internal" href="#using-the-default-encoding-is-a-common-mistake">Using the default encoding is a common mistake</a></li>
|
|||
|
<li><a class="reference internal" href="#explicit-way-to-use-locale-specific-encoding">Explicit way to use locale-specific encoding</a></li>
|
|||
|
<li><a class="reference internal" href="#prepare-to-change-the-default-encoding-to-utf-8">Prepare to change the default encoding to UTF-8</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li><a class="reference internal" href="#specification">Specification</a><ul>
|
|||
|
<li><a class="reference internal" href="#encodingwarning"><code class="docutils literal notranslate"><span class="pre">EncodingWarning</span></code></a></li>
|
|||
|
<li><a class="reference internal" href="#options-to-enable-the-warning">Options to enable the warning</a></li>
|
|||
|
<li><a class="reference internal" href="#encoding-locale"><code class="docutils literal notranslate"><span class="pre">encoding="locale"</span></code></a></li>
|
|||
|
<li><a class="reference internal" href="#io-text-encoding"><code class="docutils literal notranslate"><span class="pre">io.text_encoding()</span></code></a></li>
|
|||
|
<li><a class="reference internal" href="#affected-standard-library-modules">Affected standard library modules</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li><a class="reference internal" href="#rationale">Rationale</a><ul>
|
|||
|
<li><a class="reference internal" href="#opt-in-warning">Opt-in warning</a></li>
|
|||
|
<li><a class="reference internal" href="#locale-is-not-a-codec-alias">“locale” is not a codec alias</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li><a class="reference internal" href="#backward-compatibility">Backward Compatibility</a></li>
|
|||
|
<li><a class="reference internal" href="#forward-compatibility">Forward Compatibility</a></li>
|
|||
|
<li><a class="reference internal" href="#how-to-teach-this">How to Teach This</a><ul>
|
|||
|
<li><a class="reference internal" href="#for-new-users">For new users</a></li>
|
|||
|
<li><a class="reference internal" href="#for-experienced-users">For experienced users</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li><a class="reference internal" href="#reference-implementation">Reference Implementation</a></li>
|
|||
|
<li><a class="reference internal" href="#discussions">Discussions</a></li>
|
|||
|
<li><a class="reference internal" href="#references">References</a></li>
|
|||
|
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
|||
|
</ul>
|
|||
|
</details></section>
|
|||
|
<section id="abstract">
|
|||
|
<h2><a class="toc-backref" href="#abstract" role="doc-backlink">Abstract</a></h2>
|
|||
|
<p>Add a new warning category <code class="docutils literal notranslate"><span class="pre">EncodingWarning</span></code>. It is emitted when the
|
|||
|
<code class="docutils literal notranslate"><span class="pre">encoding</span></code> argument to <code class="docutils literal notranslate"><span class="pre">open()</span></code> is omitted and the default
|
|||
|
locale-specific encoding is used.</p>
|
|||
|
<p>The warning is disabled by default. A new <code class="docutils literal notranslate"><span class="pre">-X</span> <span class="pre">warn_default_encoding</span></code>
|
|||
|
command-line option and a new <code class="docutils literal notranslate"><span class="pre">PYTHONWARNDEFAULTENCODING</span></code> environment
|
|||
|
variable can be used to enable it.</p>
|
|||
|
<p>A <code class="docutils literal notranslate"><span class="pre">"locale"</span></code> argument value for <code class="docutils literal notranslate"><span class="pre">encoding</span></code> is added too. It
|
|||
|
explicitly specifies that the locale encoding should be used, silencing
|
|||
|
the warning.</p>
|
|||
|
</section>
|
|||
|
<section id="motivation">
|
|||
|
<h2><a class="toc-backref" href="#motivation" role="doc-backlink">Motivation</a></h2>
|
|||
|
<section id="using-the-default-encoding-is-a-common-mistake">
|
|||
|
<h3><a class="toc-backref" href="#using-the-default-encoding-is-a-common-mistake" role="doc-backlink">Using the default encoding is a common mistake</a></h3>
|
|||
|
<p>Developers using macOS or Linux may forget that the default encoding
|
|||
|
is not always UTF-8.</p>
|
|||
|
<p>For example, using <code class="docutils literal notranslate"><span class="pre">long_description</span> <span class="pre">=</span> <span class="pre">open("README.md").read()</span></code> in
|
|||
|
<code class="docutils literal notranslate"><span class="pre">setup.py</span></code> is a common mistake. Many Windows users cannot install
|
|||
|
such packages if there is at least one non-ASCII character
|
|||
|
(e.g. emoji, author names, copyright symbols, and the like)
|
|||
|
in their UTF-8-encoded <code class="docutils literal notranslate"><span class="pre">README.md</span></code> file.</p>
|
|||
|
<p>Of the 4000 most downloaded packages from PyPI, 489 use non-ASCII
|
|||
|
characters in their README, and 82 fail to install from source on
|
|||
|
non-UTF-8 locales due to not specifying an encoding for a non-ASCII
|
|||
|
file. <a class="footnote-reference brackets" href="#id10" id="id1">[1]</a></p>
|
|||
|
<p>Another example is <code class="docutils literal notranslate"><span class="pre">logging.basicConfig(filename="log.txt")</span></code>.
|
|||
|
Some users might expect it to use UTF-8 by default, but the locale
|
|||
|
encoding is actually what is used. <a class="footnote-reference brackets" href="#id11" id="id2">[2]</a></p>
|
|||
|
<p>Even Python experts may assume that the default encoding is UTF-8.
|
|||
|
This creates bugs that only happen on Windows; see <a class="footnote-reference brackets" href="#id12" id="id3">[3]</a>, <a class="footnote-reference brackets" href="#id13" id="id4">[4]</a>, <a class="footnote-reference brackets" href="#id14" id="id5">[5]</a>,
|
|||
|
and <a class="footnote-reference brackets" href="#id15" id="id6">[6]</a> for example.</p>
|
|||
|
<p>Emitting a warning when the <code class="docutils literal notranslate"><span class="pre">encoding</span></code> argument is omitted will help
|
|||
|
find such mistakes.</p>
|
|||
|
</section>
|
|||
|
<section id="explicit-way-to-use-locale-specific-encoding">
|
|||
|
<h3><a class="toc-backref" href="#explicit-way-to-use-locale-specific-encoding" role="doc-backlink">Explicit way to use locale-specific encoding</a></h3>
|
|||
|
<p><code class="docutils literal notranslate"><span class="pre">open(filename)</span></code> isn’t explicit about which encoding is expected:</p>
|
|||
|
<ul class="simple">
|
|||
|
<li>If ASCII is assumed, this isn’t a bug, but may result in decreased
|
|||
|
performance on Windows, particularly with non-Latin-1 locale encodings</li>
|
|||
|
<li>If UTF-8 is assumed, this may be a bug or a platform-specific script</li>
|
|||
|
<li>If the locale encoding is assumed, the behavior is as expected
|
|||
|
(but could change if future versions of Python modify the default)</li>
|
|||
|
</ul>
|
|||
|
<p>From this point of view, <code class="docutils literal notranslate"><span class="pre">open(filename)</span></code> is not readable code.</p>
|
|||
|
<p><code class="docutils literal notranslate"><span class="pre">encoding=locale.getpreferredencoding(False)</span></code> can be used to
|
|||
|
specify the locale encoding explicitly, but it is too long and easy
|
|||
|
to misuse (e.g. one can forget to pass <code class="docutils literal notranslate"><span class="pre">False</span></code> as its argument).</p>
|
|||
|
<p>This PEP provides an explicit way to specify the locale encoding.</p>
|
|||
|
</section>
|
|||
|
<section id="prepare-to-change-the-default-encoding-to-utf-8">
|
|||
|
<h3><a class="toc-backref" href="#prepare-to-change-the-default-encoding-to-utf-8" role="doc-backlink">Prepare to change the default encoding to UTF-8</a></h3>
|
|||
|
<p>Since UTF-8 has become the de-facto standard text encoding,
|
|||
|
we might default to it for opening files in the future.</p>
|
|||
|
<p>However, such a change will affect many applications and libraries.
|
|||
|
If we start emitting <code class="docutils literal notranslate"><span class="pre">DeprecationWarning</span></code> everywhere the <code class="docutils literal notranslate"><span class="pre">encoding</span></code>
|
|||
|
argument is omitted, it will be too noisy and painful.</p>
|
|||
|
<p>Although this PEP doesn’t propose changing the default encoding,
|
|||
|
it will help enable that change by:</p>
|
|||
|
<ul class="simple">
|
|||
|
<li>Reducing the number of omitted <code class="docutils literal notranslate"><span class="pre">encoding</span></code> arguments in libraries
|
|||
|
before we start emitting a <code class="docutils literal notranslate"><span class="pre">DeprecationWarning</span></code> by default.</li>
|
|||
|
<li>Allowing users to pass <code class="docutils literal notranslate"><span class="pre">encoding="locale"</span></code> to suppress
|
|||
|
the current warning and any <code class="docutils literal notranslate"><span class="pre">DeprecationWarning</span></code> added in the future,
|
|||
|
as well as retaining consistent behavior if later Python versions
|
|||
|
change the default, ensuring support for any Python version >=3.10.</li>
|
|||
|
</ul>
|
|||
|
</section>
|
|||
|
</section>
|
|||
|
<section id="specification">
|
|||
|
<h2><a class="toc-backref" href="#specification" role="doc-backlink">Specification</a></h2>
|
|||
|
<section id="encodingwarning">
|
|||
|
<h3><a class="toc-backref" href="#encodingwarning" role="doc-backlink"><code class="docutils literal notranslate"><span class="pre">EncodingWarning</span></code></a></h3>
|
|||
|
<p>Add a new <code class="docutils literal notranslate"><span class="pre">EncodingWarning</span></code> warning class as a subclass of
|
|||
|
<code class="docutils literal notranslate"><span class="pre">Warning</span></code>. It is emitted when the <code class="docutils literal notranslate"><span class="pre">encoding</span></code> argument is omitted and
|
|||
|
the default locale-specific encoding is used.</p>
|
|||
|
</section>
|
|||
|
<section id="options-to-enable-the-warning">
|
|||
|
<h3><a class="toc-backref" href="#options-to-enable-the-warning" role="doc-backlink">Options to enable the warning</a></h3>
|
|||
|
<p>The <code class="docutils literal notranslate"><span class="pre">-X</span> <span class="pre">warn_default_encoding</span></code> option and the
|
|||
|
<code class="docutils literal notranslate"><span class="pre">PYTHONWARNDEFAULTENCODING</span></code> environment variable are added. They
|
|||
|
are used to enable <code class="docutils literal notranslate"><span class="pre">EncodingWarning</span></code>.</p>
|
|||
|
<p><code class="docutils literal notranslate"><span class="pre">sys.flags.warn_default_encoding</span></code> is also added. The flag is true when
|
|||
|
<code class="docutils literal notranslate"><span class="pre">EncodingWarning</span></code> is enabled.</p>
|
|||
|
<p>When the flag is set, <code class="docutils literal notranslate"><span class="pre">io.TextIOWrapper()</span></code>, <code class="docutils literal notranslate"><span class="pre">open()</span></code> and other
|
|||
|
modules using them will emit <code class="docutils literal notranslate"><span class="pre">EncodingWarning</span></code> when the <code class="docutils literal notranslate"><span class="pre">encoding</span></code>
|
|||
|
argument is omitted.</p>
|
|||
|
<p>Since <code class="docutils literal notranslate"><span class="pre">EncodingWarning</span></code> is a subclass of <code class="docutils literal notranslate"><span class="pre">Warning</span></code>, they are
|
|||
|
shown by default (if the <code class="docutils literal notranslate"><span class="pre">warn_default_encoding</span></code> flag is set), unlike
|
|||
|
<code class="docutils literal notranslate"><span class="pre">DeprecationWarning</span></code>.</p>
|
|||
|
</section>
|
|||
|
<section id="encoding-locale">
|
|||
|
<h3><a class="toc-backref" href="#encoding-locale" role="doc-backlink"><code class="docutils literal notranslate"><span class="pre">encoding="locale"</span></code></a></h3>
|
|||
|
<p><code class="docutils literal notranslate"><span class="pre">io.TextIOWrapper</span></code> will accept <code class="docutils literal notranslate"><span class="pre">"locale"</span></code> as a valid argument to
|
|||
|
<code class="docutils literal notranslate"><span class="pre">encoding</span></code>. It has the same meaning as the current <code class="docutils literal notranslate"><span class="pre">encoding=None</span></code>,
|
|||
|
except that <code class="docutils literal notranslate"><span class="pre">io.TextIOWrapper</span></code> doesn’t emit <code class="docutils literal notranslate"><span class="pre">EncodingWarning</span></code> when
|
|||
|
<code class="docutils literal notranslate"><span class="pre">encoding="locale"</span></code> is specified.</p>
|
|||
|
</section>
|
|||
|
<section id="io-text-encoding">
|
|||
|
<h3><a class="toc-backref" href="#io-text-encoding" role="doc-backlink"><code class="docutils literal notranslate"><span class="pre">io.text_encoding()</span></code></a></h3>
|
|||
|
<p><code class="docutils literal notranslate"><span class="pre">io.text_encoding()</span></code> is a helper for functions with an
|
|||
|
<code class="docutils literal notranslate"><span class="pre">encoding=None</span></code> parameter that pass it to <code class="docutils literal notranslate"><span class="pre">io.TextIOWrapper()</span></code> or
|
|||
|
<code class="docutils literal notranslate"><span class="pre">open()</span></code>.</p>
|
|||
|
<p>A pure Python implementation will look like this:</p>
|
|||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">text_encoding</span><span class="p">(</span><span class="n">encoding</span><span class="p">,</span> <span class="n">stacklevel</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
|
|||
|
<span class="w"> </span><span class="sd">"""A helper function to choose the text encoding.</span>
|
|||
|
|
|||
|
<span class="sd"> When *encoding* is not None, just return it.</span>
|
|||
|
<span class="sd"> Otherwise, return the default text encoding (i.e. "locale").</span>
|
|||
|
|
|||
|
<span class="sd"> This function emits an EncodingWarning if *encoding* is None and</span>
|
|||
|
<span class="sd"> sys.flags.warn_default_encoding is true.</span>
|
|||
|
|
|||
|
<span class="sd"> This function can be used in APIs with an encoding=None parameter</span>
|
|||
|
<span class="sd"> that pass it to TextIOWrapper or open.</span>
|
|||
|
<span class="sd"> However, please consider using encoding="utf-8" for new APIs.</span>
|
|||
|
<span class="sd"> """</span>
|
|||
|
<span class="k">if</span> <span class="n">encoding</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|||
|
<span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">flags</span><span class="o">.</span><span class="n">warn_default_encoding</span><span class="p">:</span>
|
|||
|
<span class="kn">import</span> <span class="nn">warnings</span>
|
|||
|
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
|
|||
|
<span class="s2">"'encoding' argument not specified."</span><span class="p">,</span>
|
|||
|
<span class="ne">EncodingWarning</span><span class="p">,</span> <span class="n">stacklevel</span> <span class="o">+</span> <span class="mi">2</span><span class="p">)</span>
|
|||
|
<span class="n">encoding</span> <span class="o">=</span> <span class="s2">"locale"</span>
|
|||
|
<span class="k">return</span> <span class="n">encoding</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
<p>For example, <code class="docutils literal notranslate"><span class="pre">pathlib.Path.read_text()</span></code> can use it like this:</p>
|
|||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">read_text</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">errors</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|||
|
<span class="n">encoding</span> <span class="o">=</span> <span class="n">io</span><span class="o">.</span><span class="n">text_encoding</span><span class="p">(</span><span class="n">encoding</span><span class="p">)</span>
|
|||
|
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s1">'r'</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="n">encoding</span><span class="p">,</span> <span class="n">errors</span><span class="o">=</span><span class="n">errors</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
|
|||
|
<span class="k">return</span> <span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
<p>By using <code class="docutils literal notranslate"><span class="pre">io.text_encoding()</span></code>, <code class="docutils literal notranslate"><span class="pre">EncodingWarning</span></code> is emitted for
|
|||
|
the caller of <code class="docutils literal notranslate"><span class="pre">read_text()</span></code> instead of <code class="docutils literal notranslate"><span class="pre">read_text()</span></code> itself.</p>
|
|||
|
</section>
|
|||
|
<section id="affected-standard-library-modules">
|
|||
|
<h3><a class="toc-backref" href="#affected-standard-library-modules" role="doc-backlink">Affected standard library modules</a></h3>
|
|||
|
<p>Many standard library modules will be affected by this change.</p>
|
|||
|
<p>Most APIs accepting <code class="docutils literal notranslate"><span class="pre">encoding=None</span></code> will use <code class="docutils literal notranslate"><span class="pre">io.text_encoding()</span></code>
|
|||
|
as written in the previous section.</p>
|
|||
|
<p>Where using the locale encoding as the default encoding is reasonable,
|
|||
|
<code class="docutils literal notranslate"><span class="pre">encoding="locale"</span></code> will be used instead. For example,
|
|||
|
the <code class="docutils literal notranslate"><span class="pre">subprocess</span></code> module will use the locale encoding as the default
|
|||
|
for pipes.</p>
|
|||
|
<p>Many tests use <code class="docutils literal notranslate"><span class="pre">open()</span></code> without <code class="docutils literal notranslate"><span class="pre">encoding</span></code> specified to read
|
|||
|
ASCII text files. They should be rewritten with <code class="docutils literal notranslate"><span class="pre">encoding="ascii"</span></code>.</p>
|
|||
|
</section>
|
|||
|
</section>
|
|||
|
<section id="rationale">
|
|||
|
<h2><a class="toc-backref" href="#rationale" role="doc-backlink">Rationale</a></h2>
|
|||
|
<section id="opt-in-warning">
|
|||
|
<h3><a class="toc-backref" href="#opt-in-warning" role="doc-backlink">Opt-in warning</a></h3>
|
|||
|
<p>Although <code class="docutils literal notranslate"><span class="pre">DeprecationWarning</span></code> is suppressed by default, always
|
|||
|
emitting <code class="docutils literal notranslate"><span class="pre">DeprecationWarning</span></code> when the <code class="docutils literal notranslate"><span class="pre">encoding</span></code> argument is
|
|||
|
omitted would be too noisy.</p>
|
|||
|
<p>Noisy warnings may lead developers to dismiss the
|
|||
|
<code class="docutils literal notranslate"><span class="pre">DeprecationWarning</span></code>.</p>
|
|||
|
</section>
|
|||
|
<section id="locale-is-not-a-codec-alias">
|
|||
|
<h3><a class="toc-backref" href="#locale-is-not-a-codec-alias" role="doc-backlink">“locale” is not a codec alias</a></h3>
|
|||
|
<p>We don’t add “locale” as a codec alias because the locale can be
|
|||
|
changed at runtime.</p>
|
|||
|
<p>Additionally, <code class="docutils literal notranslate"><span class="pre">TextIOWrapper</span></code> checks <code class="docutils literal notranslate"><span class="pre">os.device_encoding()</span></code>
|
|||
|
when <code class="docutils literal notranslate"><span class="pre">encoding=None</span></code>. This behavior cannot be implemented in
|
|||
|
a codec.</p>
|
|||
|
</section>
|
|||
|
</section>
|
|||
|
<section id="backward-compatibility">
|
|||
|
<h2><a class="toc-backref" href="#backward-compatibility" role="doc-backlink">Backward Compatibility</a></h2>
|
|||
|
<p>The new warning is not emitted by default, so this PEP is 100%
|
|||
|
backwards-compatible.</p>
|
|||
|
</section>
|
|||
|
<section id="forward-compatibility">
|
|||
|
<h2><a class="toc-backref" href="#forward-compatibility" role="doc-backlink">Forward Compatibility</a></h2>
|
|||
|
<p>Passing <code class="docutils literal notranslate"><span class="pre">"locale"</span></code> as the argument to <code class="docutils literal notranslate"><span class="pre">encoding</span></code> is not
|
|||
|
forward-compatible. Code using it will not work on Python older than
|
|||
|
3.10, and will instead raise <code class="docutils literal notranslate"><span class="pre">LookupError:</span> <span class="pre">unknown</span> <span class="pre">encoding:</span> <span class="pre">locale</span></code>.</p>
|
|||
|
<p>Until developers can drop Python 3.9 support, <code class="docutils literal notranslate"><span class="pre">EncodingWarning</span></code>
|
|||
|
can only be used for finding missing <code class="docutils literal notranslate"><span class="pre">encoding="utf-8"</span></code> arguments.</p>
|
|||
|
</section>
|
|||
|
<section id="how-to-teach-this">
|
|||
|
<h2><a class="toc-backref" href="#how-to-teach-this" role="doc-backlink">How to Teach This</a></h2>
|
|||
|
<section id="for-new-users">
|
|||
|
<h3><a class="toc-backref" href="#for-new-users" role="doc-backlink">For new users</a></h3>
|
|||
|
<p>Since <code class="docutils literal notranslate"><span class="pre">EncodingWarning</span></code> is used to write cross-platform code,
|
|||
|
there is no need to teach it to new users.</p>
|
|||
|
<p>We can just recommend using UTF-8 for text files and using
|
|||
|
<code class="docutils literal notranslate"><span class="pre">encoding="utf-8"</span></code> when opening them.</p>
|
|||
|
</section>
|
|||
|
<section id="for-experienced-users">
|
|||
|
<h3><a class="toc-backref" href="#for-experienced-users" role="doc-backlink">For experienced users</a></h3>
|
|||
|
<p>Using <code class="docutils literal notranslate"><span class="pre">open(filename)</span></code> to read text files encoded in UTF-8 is a
|
|||
|
common mistake. It may not work on Windows because UTF-8 is not the
|
|||
|
default encoding.</p>
|
|||
|
<p>You can use <code class="docutils literal notranslate"><span class="pre">-X</span> <span class="pre">warn_default_encoding</span></code> or
|
|||
|
<code class="docutils literal notranslate"><span class="pre">PYTHONWARNDEFAULTENCODING=1</span></code> to find this type of mistake.</p>
|
|||
|
<p>Omitting the <code class="docutils literal notranslate"><span class="pre">encoding</span></code> argument is not a bug when opening text files
|
|||
|
encoded in the locale encoding, but <code class="docutils literal notranslate"><span class="pre">encoding="locale"</span></code> is recommended
|
|||
|
in Python 3.10 and later because it is more explicit.</p>
|
|||
|
</section>
|
|||
|
</section>
|
|||
|
<section id="reference-implementation">
|
|||
|
<h2><a class="toc-backref" href="#reference-implementation" role="doc-backlink">Reference Implementation</a></h2>
|
|||
|
<p><a class="reference external" href="https://github.com/python/cpython/pull/19481">https://github.com/python/cpython/pull/19481</a></p>
|
|||
|
</section>
|
|||
|
<section id="discussions">
|
|||
|
<h2><a class="toc-backref" href="#discussions" role="doc-backlink">Discussions</a></h2>
|
|||
|
<p>The latest discussion thread is:
|
|||
|
<a class="reference external" href="https://mail.python.org/archives/list/python-dev@python.org/thread/SFYUP2TWD5JZ5KDLVSTZ44GWKVY4YNCV/">https://mail.python.org/archives/list/python-dev@python.org/thread/SFYUP2TWD5JZ5KDLVSTZ44GWKVY4YNCV/</a></p>
|
|||
|
<ul class="simple">
|
|||
|
<li>Why not implement this in linters?<ul>
|
|||
|
<li><code class="docutils literal notranslate"><span class="pre">encoding="locale"</span></code> and <code class="docutils literal notranslate"><span class="pre">io.text_encoding()</span></code> must be implemented
|
|||
|
in Python.</li>
|
|||
|
<li>It is difficult to find all callers of functions wrapping
|
|||
|
<code class="docutils literal notranslate"><span class="pre">open()</span></code> or <code class="docutils literal notranslate"><span class="pre">TextIOWrapper()</span></code> (see the <code class="docutils literal notranslate"><span class="pre">io.text_encoding()</span></code>
|
|||
|
section).</li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li>Many developers will not use the option.<ul>
|
|||
|
<li>Some will, and report the warnings to libraries they use,
|
|||
|
so the option is worth it even if many developers don’t enable it.</li>
|
|||
|
<li>For example, I found <a class="footnote-reference brackets" href="#id16" id="id7">[7]</a> and <a class="footnote-reference brackets" href="#id17" id="id8">[8]</a> by running
|
|||
|
<code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">-U</span> <span class="pre">pip</span></code>, and <a class="footnote-reference brackets" href="#id18" id="id9">[9]</a> by running <code class="docutils literal notranslate"><span class="pre">tox</span></code>
|
|||
|
with the reference implementation. This demonstrates how this
|
|||
|
option can be used to find potential issues.</li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
</ul>
|
|||
|
</section>
|
|||
|
<section id="references">
|
|||
|
<h2><a class="toc-backref" href="#references" role="doc-backlink">References</a></h2>
|
|||
|
<aside class="footnote-list brackets">
|
|||
|
<aside class="footnote brackets" id="id10" role="doc-footnote">
|
|||
|
<dt class="label" id="id10">[<a href="#id1">1</a>]</dt>
|
|||
|
<dd>“Packages can’t be installed when encoding is not UTF-8”
|
|||
|
(<a class="reference external" href="https://github.com/methane/pep597-pypi-ascii">https://github.com/methane/pep597-pypi-ascii</a>)</aside>
|
|||
|
<aside class="footnote brackets" id="id11" role="doc-footnote">
|
|||
|
<dt class="label" id="id11">[<a href="#id2">2</a>]</dt>
|
|||
|
<dd>“Logging - Inconsistent behaviour when handling unicode”
|
|||
|
(<a class="reference external" href="https://bugs.python.org/issue37111">https://bugs.python.org/issue37111</a>)</aside>
|
|||
|
<aside class="footnote brackets" id="id12" role="doc-footnote">
|
|||
|
<dt class="label" id="id12">[<a href="#id3">3</a>]</dt>
|
|||
|
<dd>Packaging tutorial in packaging.python.org didn’t specify
|
|||
|
encoding to read a <code class="docutils literal notranslate"><span class="pre">README.md</span></code>
|
|||
|
(<a class="reference external" href="https://github.com/pypa/packaging.python.org/pull/682">https://github.com/pypa/packaging.python.org/pull/682</a>)</aside>
|
|||
|
<aside class="footnote brackets" id="id13" role="doc-footnote">
|
|||
|
<dt class="label" id="id13">[<a href="#id4">4</a>]</dt>
|
|||
|
<dd><code class="docutils literal notranslate"><span class="pre">json.tool</span></code> had used locale encoding to read JSON files.
|
|||
|
(<a class="reference external" href="https://bugs.python.org/issue33684">https://bugs.python.org/issue33684</a>)</aside>
|
|||
|
<aside class="footnote brackets" id="id14" role="doc-footnote">
|
|||
|
<dt class="label" id="id14">[<a href="#id5">5</a>]</dt>
|
|||
|
<dd>site: Potential UnicodeDecodeError when handling pth file
|
|||
|
(<a class="reference external" href="https://bugs.python.org/issue33684">https://bugs.python.org/issue33684</a>)</aside>
|
|||
|
<aside class="footnote brackets" id="id15" role="doc-footnote">
|
|||
|
<dt class="label" id="id15">[<a href="#id6">6</a>]</dt>
|
|||
|
<dd>pypa/pip: “Installing packages fails if Python 3 installed
|
|||
|
into path with non-ASCII characters”
|
|||
|
(<a class="reference external" href="https://github.com/pypa/pip/issues/9054">https://github.com/pypa/pip/issues/9054</a>)</aside>
|
|||
|
<aside class="footnote brackets" id="id16" role="doc-footnote">
|
|||
|
<dt class="label" id="id16">[<a href="#id7">7</a>]</dt>
|
|||
|
<dd>“site: Potential UnicodeDecodeError when handling pth file”
|
|||
|
(<a class="reference external" href="https://bugs.python.org/issue43214">https://bugs.python.org/issue43214</a>)</aside>
|
|||
|
<aside class="footnote brackets" id="id17" role="doc-footnote">
|
|||
|
<dt class="label" id="id17">[<a href="#id8">8</a>]</dt>
|
|||
|
<dd>“[pypa/pip] Use <code class="docutils literal notranslate"><span class="pre">encoding</span></code> option or binary mode for open()”
|
|||
|
(<a class="reference external" href="https://github.com/pypa/pip/pull/9608">https://github.com/pypa/pip/pull/9608</a>)</aside>
|
|||
|
<aside class="footnote brackets" id="id18" role="doc-footnote">
|
|||
|
<dt class="label" id="id18">[<a href="#id9">9</a>]</dt>
|
|||
|
<dd>“Possible UnicodeError caused by missing encoding=”utf-8””
|
|||
|
(<a class="reference external" href="https://github.com/tox-dev/tox/issues/1908">https://github.com/tox-dev/tox/issues/1908</a>)</aside>
|
|||
|
</aside>
|
|||
|
</section>
|
|||
|
<section id="copyright">
|
|||
|
<h2><a class="toc-backref" href="#copyright" role="doc-backlink">Copyright</a></h2>
|
|||
|
<p>This document is placed in the public domain or under the
|
|||
|
CC0-1.0-Universal license, whichever is more permissive.</p>
|
|||
|
</section>
|
|||
|
</section>
|
|||
|
<hr class="docutils" />
|
|||
|
<p>Source: <a class="reference external" href="https://github.com/python/peps/blob/main/peps/pep-0597.rst">https://github.com/python/peps/blob/main/peps/pep-0597.rst</a></p>
|
|||
|
<p>Last modified: <a class="reference external" href="https://github.com/python/peps/commits/main/peps/pep-0597.rst">2023-09-09 17:39:29 GMT</a></p>
|
|||
|
|
|||
|
</article>
|
|||
|
<nav id="pep-sidebar">
|
|||
|
<h2>Contents</h2>
|
|||
|
<ul>
|
|||
|
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
|||
|
<li><a class="reference internal" href="#motivation">Motivation</a><ul>
|
|||
|
<li><a class="reference internal" href="#using-the-default-encoding-is-a-common-mistake">Using the default encoding is a common mistake</a></li>
|
|||
|
<li><a class="reference internal" href="#explicit-way-to-use-locale-specific-encoding">Explicit way to use locale-specific encoding</a></li>
|
|||
|
<li><a class="reference internal" href="#prepare-to-change-the-default-encoding-to-utf-8">Prepare to change the default encoding to UTF-8</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li><a class="reference internal" href="#specification">Specification</a><ul>
|
|||
|
<li><a class="reference internal" href="#encodingwarning"><code class="docutils literal notranslate"><span class="pre">EncodingWarning</span></code></a></li>
|
|||
|
<li><a class="reference internal" href="#options-to-enable-the-warning">Options to enable the warning</a></li>
|
|||
|
<li><a class="reference internal" href="#encoding-locale"><code class="docutils literal notranslate"><span class="pre">encoding="locale"</span></code></a></li>
|
|||
|
<li><a class="reference internal" href="#io-text-encoding"><code class="docutils literal notranslate"><span class="pre">io.text_encoding()</span></code></a></li>
|
|||
|
<li><a class="reference internal" href="#affected-standard-library-modules">Affected standard library modules</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li><a class="reference internal" href="#rationale">Rationale</a><ul>
|
|||
|
<li><a class="reference internal" href="#opt-in-warning">Opt-in warning</a></li>
|
|||
|
<li><a class="reference internal" href="#locale-is-not-a-codec-alias">“locale” is not a codec alias</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li><a class="reference internal" href="#backward-compatibility">Backward Compatibility</a></li>
|
|||
|
<li><a class="reference internal" href="#forward-compatibility">Forward Compatibility</a></li>
|
|||
|
<li><a class="reference internal" href="#how-to-teach-this">How to Teach This</a><ul>
|
|||
|
<li><a class="reference internal" href="#for-new-users">For new users</a></li>
|
|||
|
<li><a class="reference internal" href="#for-experienced-users">For experienced users</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li><a class="reference internal" href="#reference-implementation">Reference Implementation</a></li>
|
|||
|
<li><a class="reference internal" href="#discussions">Discussions</a></li>
|
|||
|
<li><a class="reference internal" href="#references">References</a></li>
|
|||
|
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
|||
|
</ul>
|
|||
|
|
|||
|
<br>
|
|||
|
<a id="source" href="https://github.com/python/peps/blob/main/peps/pep-0597.rst">Page Source (GitHub)</a>
|
|||
|
</nav>
|
|||
|
</section>
|
|||
|
<script src="../_static/colour_scheme.js"></script>
|
|||
|
<script src="../_static/wrap_tables.js"></script>
|
|||
|
<script src="../_static/sticky_banner.js"></script>
|
|||
|
</body>
|
|||
|
</html>
|