396 lines
27 KiB
HTML
396 lines
27 KiB
HTML
|
||
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<meta name="color-scheme" content="light dark">
|
||
<title>PEP 3138 – String representation in Python 3000 | peps.python.org</title>
|
||
<link rel="shortcut icon" href="../_static/py.png">
|
||
<link rel="canonical" href="https://peps.python.org/pep-3138/">
|
||
<link rel="stylesheet" href="../_static/style.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/mq.css" type="text/css">
|
||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" media="(prefers-color-scheme: light)" id="pyg-light">
|
||
<link rel="stylesheet" href="../_static/pygments_dark.css" type="text/css" media="(prefers-color-scheme: dark)" id="pyg-dark">
|
||
<link rel="alternate" type="application/rss+xml" title="Latest PEPs" href="https://peps.python.org/peps.rss">
|
||
<meta property="og:title" content='PEP 3138 – String representation in Python 3000 | peps.python.org'>
|
||
<meta property="og:description" content="This PEP proposes a new string representation form for Python 3000. In Python prior to Python 3000, the repr() built-in function converted arbitrary objects to printable ASCII strings for debugging and logging. For Python 3000, a wider range of charact...">
|
||
<meta property="og:type" content="website">
|
||
<meta property="og:url" content="https://peps.python.org/pep-3138/">
|
||
<meta property="og:site_name" content="Python Enhancement Proposals (PEPs)">
|
||
<meta property="og:image" content="https://peps.python.org/_static/og-image.png">
|
||
<meta property="og:image:alt" content="Python PEPs">
|
||
<meta property="og:image:width" content="200">
|
||
<meta property="og:image:height" content="200">
|
||
<meta name="description" content="This PEP proposes a new string representation form for Python 3000. In Python prior to Python 3000, the repr() built-in function converted arbitrary objects to printable ASCII strings for debugging and logging. For Python 3000, a wider range of charact...">
|
||
<meta name="theme-color" content="#3776ab">
|
||
</head>
|
||
<body>
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
||
<symbol id="svg-sun-half" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Following system colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="9"></circle>
|
||
<path d="M12 3v18m0-12l4.65-4.65M12 14.3l7.37-7.37M12 19.6l8.85-8.85"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected dark colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
|
||
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"></path>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun" viewBox="0 0 24 24" pointer-events="all">
|
||
<title>Selected light colour scheme</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
|
||
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||
<circle cx="12" cy="12" r="5"></circle>
|
||
<line x1="12" y1="1" x2="12" y2="3"></line>
|
||
<line x1="12" y1="21" x2="12" y2="23"></line>
|
||
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
||
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
||
<line x1="1" y1="12" x2="3" y2="12"></line>
|
||
<line x1="21" y1="12" x2="23" y2="12"></line>
|
||
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
||
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
||
</svg>
|
||
</symbol>
|
||
</svg>
|
||
<script>
|
||
|
||
document.documentElement.dataset.colour_scheme = localStorage.getItem("colour_scheme") || "auto"
|
||
</script>
|
||
<section id="pep-page-section">
|
||
<header>
|
||
<h1>Python Enhancement Proposals</h1>
|
||
<ul class="breadcrumbs">
|
||
<li><a href="https://www.python.org/" title="The Python Programming Language">Python</a> » </li>
|
||
<li><a href="../pep-0000/">PEP Index</a> » </li>
|
||
<li>PEP 3138</li>
|
||
</ul>
|
||
<button id="colour-scheme-cycler" onClick="setColourScheme(nextColourScheme())">
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg aria-hidden="true" class="colour-scheme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
<span class="visually-hidden">Toggle light / dark / auto colour theme</span>
|
||
</button>
|
||
</header>
|
||
<article>
|
||
<section id="pep-content">
|
||
<h1 class="page-title">PEP 3138 – String representation in Python 3000</h1>
|
||
<dl class="rfc2822 field-list simple">
|
||
<dt class="field-odd">Author<span class="colon">:</span></dt>
|
||
<dd class="field-odd">Atsuo Ishimoto <ishimoto at gembook.org></dd>
|
||
<dt class="field-even">Status<span class="colon">:</span></dt>
|
||
<dd class="field-even"><abbr title="Accepted and implementation complete, or no longer active">Final</abbr></dd>
|
||
<dt class="field-odd">Type<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><abbr title="Normative PEP with a new feature for Python, implementation change for CPython or interoperability standard for the ecosystem">Standards Track</abbr></dd>
|
||
<dt class="field-even">Created<span class="colon">:</span></dt>
|
||
<dd class="field-even">05-May-2008</dd>
|
||
<dt class="field-odd">Python-Version<span class="colon">:</span></dt>
|
||
<dd class="field-odd">3.0</dd>
|
||
<dt class="field-even">Post-History<span class="colon">:</span></dt>
|
||
<dd class="field-even">05-May-2008, 05-Jun-2008</dd>
|
||
</dl>
|
||
<hr class="docutils" />
|
||
<section id="contents">
|
||
<details><summary>Table of Contents</summary><ul class="simple">
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#motivation">Motivation</a></li>
|
||
<li><a class="reference internal" href="#specification">Specification</a></li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a><ul>
|
||
<li><a class="reference internal" href="#alternate-solutions">Alternate Solutions</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#backwards-compatibility">Backwards Compatibility</a></li>
|
||
<li><a class="reference internal" href="#rejected-proposals">Rejected Proposals</a></li>
|
||
<li><a class="reference internal" href="#implementation">Implementation</a></li>
|
||
<li><a class="reference internal" href="#references">References</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
</details></section>
|
||
<section id="abstract">
|
||
<h2><a class="toc-backref" href="#abstract" role="doc-backlink">Abstract</a></h2>
|
||
<p>This PEP proposes a new string representation form for Python 3000.
|
||
In Python prior to Python 3000, the repr() built-in function converted
|
||
arbitrary objects to printable ASCII strings for debugging and
|
||
logging. For Python 3000, a wider range of characters, based on the
|
||
Unicode standard, should be considered ‘printable’.</p>
|
||
</section>
|
||
<section id="motivation">
|
||
<h2><a class="toc-backref" href="#motivation" role="doc-backlink">Motivation</a></h2>
|
||
<p>The current repr() converts 8-bit strings to ASCII using following
|
||
algorithm.</p>
|
||
<ul class="simple">
|
||
<li>Convert CR, LF, TAB and ‘\’ to ‘\r’, ‘\n’, ‘\t’, ‘\\’.</li>
|
||
<li>Convert other non-printable characters(0x00-0x1f, 0x7f) and
|
||
non-ASCII characters (>= 0x80) to ‘\xXX’.</li>
|
||
<li>Backslash-escape quote characters (apostrophe, ‘) and add the quote
|
||
character at the beginning and the end.</li>
|
||
</ul>
|
||
<p>For Unicode strings, the following additional conversions are done.</p>
|
||
<ul class="simple">
|
||
<li>Convert leading surrogate pair characters without trailing character
|
||
(0xd800-0xdbff, but not followed by 0xdc00-0xdfff) to ‘\uXXXX’.</li>
|
||
<li>Convert 16-bit characters (>= 0x100) to ‘\uXXXX’.</li>
|
||
<li>Convert 21-bit characters (>= 0x10000) and surrogate pair characters
|
||
to ‘\U00xxxxxx’.</li>
|
||
</ul>
|
||
<p>This algorithm converts any string to printable ASCII, and repr() is
|
||
used as a handy and safe way to print strings for debugging or for
|
||
logging. Although all non-ASCII characters are escaped, this does not
|
||
matter when most of the string’s characters are ASCII. But for other
|
||
languages, such as Japanese where most characters in a string are not
|
||
ASCII, this is very inconvenient.</p>
|
||
<p>We can use <code class="docutils literal notranslate"><span class="pre">print(aJapaneseString)</span></code> to get a readable string, but we
|
||
don’t have a similar workaround for printing strings from collections
|
||
such as lists or tuples. <code class="docutils literal notranslate"><span class="pre">print(listOfJapaneseStrings)</span></code> uses repr()
|
||
to build the string to be printed, so the resulting strings are always
|
||
hex-escaped. Or when <code class="docutils literal notranslate"><span class="pre">open(japaneseFilename)</span></code> raises an exception,
|
||
the error message is something like <code class="docutils literal notranslate"><span class="pre">IOError:</span> <span class="pre">[Errno</span> <span class="pre">2]</span> <span class="pre">No</span> <span class="pre">such</span> <span class="pre">file</span>
|
||
<span class="pre">or</span> <span class="pre">directory:</span> <span class="pre">'\u65e5\u672c\u8a9e'</span></code>, which isn’t helpful.</p>
|
||
<p>Python 3000 has a lot of nice features for non-Latin users such as
|
||
non-ASCII identifiers, so it would be helpful if Python could also
|
||
progress in a similar way for printable output.</p>
|
||
<p>Some users might be concerned that such output will mess up their
|
||
console if they print binary data like images. But this is unlikely
|
||
to happen in practice because bytes and strings are different types in
|
||
Python 3000, so printing an image to the console won’t mess it up.</p>
|
||
<p>This issue was once discussed by Hye-Shik Chang <a class="footnote-reference brackets" href="#id3" id="id1">[1]</a>, but was rejected.</p>
|
||
</section>
|
||
<section id="specification">
|
||
<h2><a class="toc-backref" href="#specification" role="doc-backlink">Specification</a></h2>
|
||
<ul class="simple">
|
||
<li>Add a new function to the Python C API <code class="docutils literal notranslate"><span class="pre">int</span> <span class="pre">Py_UNICODE_ISPRINTABLE</span>
|
||
<span class="pre">(Py_UNICODE</span> <span class="pre">ch)</span></code>. This function returns 0 if repr() should escape
|
||
the Unicode character <code class="docutils literal notranslate"><span class="pre">ch</span></code>; otherwise it returns 1. Characters
|
||
that should be escaped are defined in the Unicode character database
|
||
as:<ul>
|
||
<li>Cc (Other, Control)</li>
|
||
<li>Cf (Other, Format)</li>
|
||
<li>Cs (Other, Surrogate)</li>
|
||
<li>Co (Other, Private Use)</li>
|
||
<li>Cn (Other, Not Assigned)</li>
|
||
<li>Zl (Separator, Line), refers to LINE SEPARATOR (’\u2028’).</li>
|
||
<li>Zp (Separator, Paragraph), refers to PARAGRAPH SEPARATOR
|
||
(’\u2029’).</li>
|
||
<li>Zs (Separator, Space) other than ASCII space (’\x20’). Characters
|
||
in this category should be escaped to avoid ambiguity.</li>
|
||
</ul>
|
||
</li>
|
||
<li>The algorithm to build repr() strings should be changed to:<ul>
|
||
<li>Convert CR, LF, TAB and ‘\’ to ‘\r’, ‘\n’, ‘\t’, ‘\\’.</li>
|
||
<li>Convert non-printable ASCII characters (0x00-0x1f, 0x7f) to
|
||
‘\xXX’.</li>
|
||
<li>Convert leading surrogate pair characters without trailing
|
||
character (0xd800-0xdbff, but not followed by 0xdc00-0xdfff) to
|
||
‘\uXXXX’.</li>
|
||
<li>Convert non-printable characters (Py_UNICODE_ISPRINTABLE() returns
|
||
0) to ‘\xXX’, ‘\uXXXX’ or ‘\U00xxxxxx’.</li>
|
||
<li>Backslash-escape quote characters (apostrophe, 0x27) and add a
|
||
quote character at the beginning and the end.</li>
|
||
</ul>
|
||
</li>
|
||
<li>Set the Unicode error-handler for sys.stderr to ‘backslashreplace’
|
||
by default.</li>
|
||
<li>Add a new function to the Python C API <code class="docutils literal notranslate"><span class="pre">PyObject</span> <span class="pre">*PyObject_ASCII</span>
|
||
<span class="pre">(PyObject</span> <span class="pre">*o)</span></code>. This function converts any python object to a
|
||
string using PyObject_Repr() and then hex-escapes all non-ASCII
|
||
characters. <code class="docutils literal notranslate"><span class="pre">PyObject_ASCII()</span></code> generates the same string as
|
||
<code class="docutils literal notranslate"><span class="pre">PyObject_Repr()</span></code> in Python 2.</li>
|
||
<li>Add a new built-in function, <code class="docutils literal notranslate"><span class="pre">ascii()</span></code>. This function converts
|
||
any python object to a string using repr() and then hex-escapes all
|
||
non-ASCII characters. <code class="docutils literal notranslate"><span class="pre">ascii()</span></code> generates the same string as
|
||
<code class="docutils literal notranslate"><span class="pre">repr()</span></code> in Python 2.</li>
|
||
<li>Add a <code class="docutils literal notranslate"><span class="pre">'%a'</span></code> string format operator. <code class="docutils literal notranslate"><span class="pre">'%a'</span></code> converts any python
|
||
object to a string using repr() and then hex-escapes all non-ASCII
|
||
characters. The <code class="docutils literal notranslate"><span class="pre">'%a'</span></code> format operator generates the same string
|
||
as <code class="docutils literal notranslate"><span class="pre">'%r'</span></code> in Python 2. Also, add <code class="docutils literal notranslate"><span class="pre">'!a'</span></code> conversion flags to the
|
||
<code class="docutils literal notranslate"><span class="pre">string.format()</span></code> method and add <code class="docutils literal notranslate"><span class="pre">'%A'</span></code> operator to the
|
||
PyUnicode_FromFormat(). They convert any object to an ASCII string
|
||
as <code class="docutils literal notranslate"><span class="pre">'%a'</span></code> string format operator.</li>
|
||
<li>Add an <code class="docutils literal notranslate"><span class="pre">isprintable()</span></code> method to the string type.
|
||
<code class="docutils literal notranslate"><span class="pre">str.isprintable()</span></code> returns False if repr() would escape any
|
||
character in the string; otherwise returns True. The
|
||
<code class="docutils literal notranslate"><span class="pre">isprintable()</span></code> method calls the <code class="docutils literal notranslate"><span class="pre">Py_UNICODE_ISPRINTABLE()</span></code>
|
||
function internally.</li>
|
||
</ul>
|
||
</section>
|
||
<section id="rationale">
|
||
<h2><a class="toc-backref" href="#rationale" role="doc-backlink">Rationale</a></h2>
|
||
<p>The repr() in Python 3000 should be Unicode, not ASCII based, just
|
||
like Python 3000 strings. Also, conversion should not be affected by
|
||
the locale setting, because the locale is not necessarily the same as
|
||
the output device’s locale. For example, it is common for a daemon
|
||
process to be invoked in an ASCII setting, but writes UTF-8 to its log
|
||
files. Also, web applications might want to report the error
|
||
information in more readable form based on the HTML page’s encoding.</p>
|
||
<p>Characters not supported by the user’s console could be hex-escaped on
|
||
printing, by the Unicode encoder’s error-handler. If the
|
||
error-handler of the output file is ‘backslashreplace’, such
|
||
characters are hex-escaped without raising UnicodeEncodeError. For
|
||
example, if the default encoding is ASCII, <code class="docutils literal notranslate"><span class="pre">print('Hello</span> <span class="pre">¢')</span></code> will
|
||
print ‘Hello \xa2’. If the encoding is ISO-8859-1, ‘Hello ¢’ will be
|
||
printed.</p>
|
||
<p>The default error-handler for sys.stdout is ‘strict’. Other
|
||
applications reading the output might not understand hex-escaped
|
||
characters, so unsupported characters should be trapped when writing.
|
||
If unsupported characters must be escaped, the error-handler should be
|
||
changed explicitly. Unlike sys.stdout, sys.stderr doesn’t raise
|
||
UnicodeEncodingError by default, because the default error-handler is
|
||
‘backslashreplace’. So printing error messages containing non-ASCII
|
||
characters to sys.stderr will not raise an exception. Also,
|
||
information about uncaught exceptions (exception object, traceback) is
|
||
printed by the interpreter without raising exceptions.</p>
|
||
<section id="alternate-solutions">
|
||
<h3><a class="toc-backref" href="#alternate-solutions" role="doc-backlink">Alternate Solutions</a></h3>
|
||
<p>To help debugging in non-Latin languages without changing repr(),
|
||
other suggestions were made.</p>
|
||
<ul>
|
||
<li>Supply a tool to print lists or dicts.<p>Strings to be printed for debugging are not only contained by lists
|
||
or dicts, but also in many other types of object. File objects
|
||
contain a file name in Unicode, exception objects contain a message
|
||
in Unicode, etc. These strings should be printed in readable form
|
||
when repr()ed. It is unlikely to be possible to implement a tool to
|
||
print all possible object types.</p>
|
||
</li>
|
||
<li>Use sys.displayhook and sys.excepthook.<p>For interactive sessions, we can write hooks to restore hex escaped
|
||
characters to the original characters. But these hooks are called
|
||
only when printing the result of evaluating an expression entered in
|
||
an interactive Python session, and don’t work for the <code class="docutils literal notranslate"><span class="pre">print()</span></code>
|
||
function, for non-interactive sessions or for <code class="docutils literal notranslate"><span class="pre">logging.debug("%r",</span>
|
||
<span class="pre">...)</span></code>, etc.</p>
|
||
</li>
|
||
<li>Subclass sys.stdout and sys.stderr.<p>It is difficult to implement a subclass to restore hex-escaped
|
||
characters since there isn’t enough information left by the time
|
||
it’s a string to undo the escaping correctly in all cases. For
|
||
example, <code class="docutils literal notranslate"><span class="pre">print("\\"+"u0041")</span></code> should be printed as ‘\u0041’, not
|
||
‘A’. But there is no chance to tell file objects apart.</p>
|
||
</li>
|
||
<li>Make the encoding used by unicode_repr() adjustable, and make the
|
||
existing repr() the default.<p>With adjustable repr(), the result of using repr() is unpredictable
|
||
and would make it impossible to write correct code involving repr().
|
||
And if current repr() is the default, then the old convention
|
||
remains intact and users may expect ASCII strings as the result of
|
||
repr(). Third party applications or libraries could be confused
|
||
when a custom repr() function is used.</p>
|
||
</li>
|
||
</ul>
|
||
</section>
|
||
</section>
|
||
<section id="backwards-compatibility">
|
||
<h2><a class="toc-backref" href="#backwards-compatibility" role="doc-backlink">Backwards Compatibility</a></h2>
|
||
<p>Changing repr() may break some existing code, especially testing code.
|
||
Five of Python’s regression tests fail with this modification. If you
|
||
need repr() strings without non-ASCII character as Python 2, you can
|
||
use the following function.</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">repr_ascii</span><span class="p">(</span><span class="n">obj</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="nb">repr</span><span class="p">(</span><span class="n">obj</span><span class="p">)</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s2">"ASCII"</span><span class="p">,</span> <span class="s2">"backslashreplace"</span><span class="p">),</span> <span class="s2">"ASCII"</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>For logging or for debugging, the following code can raise
|
||
UnicodeEncodeError.</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">log</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="s2">"logfile"</span><span class="p">,</span> <span class="s2">"w"</span><span class="p">)</span>
|
||
<span class="n">log</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="nb">repr</span><span class="p">(</span><span class="n">data</span><span class="p">))</span> <span class="c1"># UnicodeEncodeError will be raised</span>
|
||
<span class="c1"># if data contains unsupported characters.</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>To avoid exceptions being raised, you can explicitly specify the
|
||
error-handler.</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">log</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="s2">"logfile"</span><span class="p">,</span> <span class="s2">"w"</span><span class="p">,</span> <span class="n">errors</span><span class="o">=</span><span class="s2">"backslashreplace"</span><span class="p">)</span>
|
||
<span class="n">log</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="nb">repr</span><span class="p">(</span><span class="n">data</span><span class="p">))</span> <span class="c1"># Unsupported characters will be escaped.</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>For a console that uses a Unicode-based encoding, for example,
|
||
en_US.utf8 or de_DE.utf8, the backslashreplace trick doesn’t work and
|
||
all printable characters are not escaped. This will cause a problem
|
||
of similarly drawing characters in Western, Greek and Cyrillic
|
||
languages. These languages use similar (but different) alphabets
|
||
(descended from a common ancestor) and contain letters that look
|
||
similar but have different character codes. For example, it is hard
|
||
to distinguish Latin ‘a’, ‘e’ and ‘o’ from Cyrillic ‘а’, ‘е’ and ‘о’.
|
||
(The visual representation, of course, very much depends on the fonts
|
||
used but usually these letters are almost indistinguishable.) To
|
||
avoid the problem, the user can adjust the terminal encoding to get a
|
||
result suitable for their environment.</p>
|
||
</section>
|
||
<section id="rejected-proposals">
|
||
<h2><a class="toc-backref" href="#rejected-proposals" role="doc-backlink">Rejected Proposals</a></h2>
|
||
<ul>
|
||
<li>Add encoding and errors arguments to the builtin print() function,
|
||
with defaults of sys.getfilesystemencoding() and ‘backslashreplace’.<p>Complicated to implement, and in general, this is not seen as a good
|
||
idea. <a class="footnote-reference brackets" href="#id4" id="id2">[2]</a></p>
|
||
</li>
|
||
<li>Use character names to escape characters, instead of hex character
|
||
codes. For example, <code class="docutils literal notranslate"><span class="pre">repr('\u03b1')</span></code> can be converted to
|
||
<code class="docutils literal notranslate"><span class="pre">"\N{GREEK</span> <span class="pre">SMALL</span> <span class="pre">LETTER</span> <span class="pre">ALPHA}"</span></code>.<p>Using character names can be very verbose compared to hex-escape.
|
||
e.g., <code class="docutils literal notranslate"><span class="pre">repr("\ufbf9")</span></code> is converted to <code class="docutils literal notranslate"><span class="pre">"\N{ARABIC</span> <span class="pre">LIGATURE</span>
|
||
<span class="pre">UIGHUR</span> <span class="pre">KIRGHIZ</span> <span class="pre">YEH</span> <span class="pre">WITH</span> <span class="pre">HAMZA</span> <span class="pre">ABOVE</span> <span class="pre">WITH</span> <span class="pre">ALEF</span> <span class="pre">MAKSURA</span> <span class="pre">ISOLATED</span>
|
||
<span class="pre">FORM}"</span></code>.</p>
|
||
</li>
|
||
<li>Default error-handler of sys.stdout should be ‘backslashreplace’.<p>Stuff written to stdout might be consumed by another program that
|
||
might misinterpret the \ escapes. For interactive sessions, it is
|
||
possible to make the ‘backslashreplace’ error-handler the default,
|
||
but this may add confusion of the kind “it works in interactive mode
|
||
but not when redirecting to a file”.</p>
|
||
</li>
|
||
</ul>
|
||
</section>
|
||
<section id="implementation">
|
||
<h2><a class="toc-backref" href="#implementation" role="doc-backlink">Implementation</a></h2>
|
||
<p>The author wrote a patch in <a class="reference external" href="http://bugs.python.org/issue2630">http://bugs.python.org/issue2630</a>; this was
|
||
committed to the Python 3.0 branch in revision 64138 on 06-11-2008.</p>
|
||
</section>
|
||
<section id="references">
|
||
<h2><a class="toc-backref" href="#references" role="doc-backlink">References</a></h2>
|
||
<aside class="footnote-list brackets">
|
||
<aside class="footnote brackets" id="id3" role="doc-footnote">
|
||
<dt class="label" id="id3">[<a href="#id1">1</a>]</dt>
|
||
<dd>Multibyte string on string::string_print
|
||
(<a class="reference external" href="http://bugs.python.org/issue479898">http://bugs.python.org/issue479898</a>)</aside>
|
||
<aside class="footnote brackets" id="id4" role="doc-footnote">
|
||
<dt class="label" id="id4">[<a href="#id2">2</a>]</dt>
|
||
<dd>[Python-3000] Displaying strings containing unicode escapes
|
||
(<a class="reference external" href="https://mail.python.org/pipermail/python-3000/2008-April/013366.html">https://mail.python.org/pipermail/python-3000/2008-April/013366.html</a>)</aside>
|
||
</aside>
|
||
</section>
|
||
<section id="copyright">
|
||
<h2><a class="toc-backref" href="#copyright" role="doc-backlink">Copyright</a></h2>
|
||
<p>This document has been placed in the public domain.</p>
|
||
</section>
|
||
</section>
|
||
<hr class="docutils" />
|
||
<p>Source: <a class="reference external" href="https://github.com/python/peps/blob/main/peps/pep-3138.rst">https://github.com/python/peps/blob/main/peps/pep-3138.rst</a></p>
|
||
<p>Last modified: <a class="reference external" href="https://github.com/python/peps/commits/main/peps/pep-3138.rst">2023-09-09 17:39:29 GMT</a></p>
|
||
|
||
</article>
|
||
<nav id="pep-sidebar">
|
||
<h2>Contents</h2>
|
||
<ul>
|
||
<li><a class="reference internal" href="#abstract">Abstract</a></li>
|
||
<li><a class="reference internal" href="#motivation">Motivation</a></li>
|
||
<li><a class="reference internal" href="#specification">Specification</a></li>
|
||
<li><a class="reference internal" href="#rationale">Rationale</a><ul>
|
||
<li><a class="reference internal" href="#alternate-solutions">Alternate Solutions</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#backwards-compatibility">Backwards Compatibility</a></li>
|
||
<li><a class="reference internal" href="#rejected-proposals">Rejected Proposals</a></li>
|
||
<li><a class="reference internal" href="#implementation">Implementation</a></li>
|
||
<li><a class="reference internal" href="#references">References</a></li>
|
||
<li><a class="reference internal" href="#copyright">Copyright</a></li>
|
||
</ul>
|
||
|
||
<br>
|
||
<a id="source" href="https://github.com/python/peps/blob/main/peps/pep-3138.rst">Page Source (GitHub)</a>
|
||
</nav>
|
||
</section>
|
||
<script src="../_static/colour_scheme.js"></script>
|
||
<script src="../_static/wrap_tables.js"></script>
|
||
<script src="../_static/sticky_banner.js"></script>
|
||
</body>
|
||
</html> |