From 4b14e34f00c7de0e1bba65540276f7d788779e05 Mon Sep 17 00:00:00 2001
From: Guido van Rossum <guido@python.org>
Date: Thu, 15 Mar 2007 18:05:48 +0000
Subject: [PATCH] PEP 3116 - new I/O, by Mike Verdone and Daniel Stutzbach;
 converted from HTML by Jason Orendorff.

---
 pep-0000.txt |   4 +
 pep-3116.txt | 460 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 464 insertions(+)
 create mode 100644 pep-3116.txt

diff --git a/pep-0000.txt b/pep-0000.txt
index d9924eb6d..87d8f0d3b 100644
--- a/pep-0000.txt
+++ b/pep-0000.txt
@@ -114,6 +114,7 @@ Index by Category
  S  3108  Standard Library Reorganization              Cannon
  S  3114  Renaming iterator.next() to iterator.__next__() Yee
  S  3115  Metaclasses in Python 3000                   Talin
+ S  3116  New I/O                                      Stutzbach, Verdone, GvR
 
 
  Finished PEPs (done, implemented in Subversion)
@@ -466,6 +467,7 @@ Numerical Index
  SA 3113  Removal of Tuple Parameter Unpacking         Cannon
  S  3114  Renaming iterator.next() to iterator.__next__() Yee
  S  3115  Metaclasses in Python 3000                   Talin
+ S  3116  New I/O                                      Stutzbach, Verdone, GvR
 
 
 Key
@@ -568,10 +570,12 @@ Owners
     Seo, Jiwon               seojiwon@gmail.com
     Smith, Kevin D.          Kevin.Smith@theMorgue.org
     Stein, Greg              gstein@lyra.org
+    Stutzbach, Daniel        daniel.stutzbach@gmail.com
     Suzi, Roman              rnd@onego.ru
     Talin                    talin at acm.org
     Taschuk, Steven          staschuk@telusplanet.net
     Tirosh, Oren             oren at hishome.net
+    Verdone, Mike            mike.verdone@gmail.com
     Warnes, Gregory R.       warnes@users.sourceforge.net
     Warsaw, Barry            barry@python.org
     Way, Terence             terry@wayforward.net
diff --git a/pep-3116.txt b/pep-3116.txt
new file mode 100644
index 000000000..0089d1330
--- /dev/null
+++ b/pep-3116.txt
@@ -0,0 +1,460 @@
+PEP: 3116
+Title: New I/O
+Version: $Revision$
+Last-Modified: $Date$
+Author: Daniel Stutzbach, Mike Verdone, Guido van Rossum
+Status: Draft
+Type: Standards Track
+Content-type: text/x-rst
+Created: 26-Feb-2007
+Post-History: 26-Feb-2007
+Python-Version: 3.0
+
+Rationale and Goals
+===================
+
+Python allows for a variety of stream-like (a.k.a. file-like) objects
+that can be used via ``read()`` and ``write()`` calls.  Anything that
+provides ``read()`` and ``write()`` is stream-like.  However, more
+exotic and extremely useful functions like ``readline()`` or
+``seek()`` may or may not be available on every stream-like object.
+Python needs a specification for basic byte-based I/O streams to which
+we can add buffering and text-handling features.
+
+Once we have a defined raw byte-based I/O interface, we can add
+buffering and text handling layers on top of any byte-based I/O class.
+The same buffering and text handling logic can be used for files,
+sockets, byte arrays, or custom I/O classes developed by Python
+programmers.  Developing a standard definition of a stream lets us
+separate stream-based operations like ``read()`` and ``write()`` from
+implementation specific operations like ``fileno()`` and ``isatty()``.
+It encourages programmers to write code that uses streams as streams
+and not require that all streams support file-specific or
+socket-specific operations.
+
+The new I/O spec is intended to be similar to the Java I/O libraries,
+but generally less confusing.  Programmers who don't want to muck
+about in the new I/O world can expect that the ``open()`` factory
+method will produce an object backwards-compatible with old-style file
+objects.
+
+
+Specification
+=============
+
+The Python I/O Library will consist of three layers: a raw I/O layer,
+a buffered I/O layer, and a text I/O layer.  Each layer is defined by
+an abstract base class, which may have multiple implementations.  The
+raw I/O and buffered I/O layers deal with units of bytes, while the
+text I/O layer deals with units of characters.
+
+
+Raw I/O
+=======
+
+The abstract base class for raw I/O is RawIOBase.  It has several
+methods which are wrappers around the appropriate operating system
+calls.  If one of these functions would not make sense on the object,
+the implementation must raise an IOError exception.  For example, if a
+file is opened read-only, the ``.write()`` method will raise an
+``IOError``.  As another example, if the object represents a socket,
+then ``.seek()``, ``.tell()``, and ``.truncate()`` will raise an
+``IOError``.  Generally, a call to one of these functions maps to
+exactly one operating system call.
+
+    ``.read(n: int) -> bytes``
+
+       Read up to ``n`` bytes from the object and return them.  Fewer
+       than ``n`` bytes may be returned if the operating system call
+       returns fewer than ``n`` bytes.  If 0 bytes are returned, this
+       indicates end of file.  If the object is in non-blocking mode
+       and no bytes are available, the call returns ``None``.
+
+    ``.readinto(b: bytes) -> int``
+
+       Read up to ``n`` bytes from the object and stores them in
+       ``b``, returning the number of bytes read.  Like .read, fewer
+       than ``n`` bytes may be read, and 0 indicates end of file.
+       ``None`` is returned if a non-blocking object has no bytes
+       available.
+
+    ``.write(b: bytes) -> int``
+
+        Returns number of bytes written, which may be ``< len(b)``.
+
+    ``.seek(pos: int, whence: int = 0) -> None``
+
+    ``.tell() -> int``
+
+    ``.truncate(n: int = None) -> None``
+
+    ``.close() -> None``
+
+Additionally, it defines a few other methods:
+
+    ``.readable() -> bool``
+
+       Returns ``True`` if the object was opened for reading,
+       ``False`` otherwise.  If ``False``, ``.read()`` will raise an
+       ``IOError`` if called.
+
+    ``.writable() -> bool``
+
+       Returns ``True`` if the object was opened write writing,
+       ``False`` otherwise.  If ``False``, ``.write()`` and
+       ``.truncate()`` will raise an ``IOError`` if called.
+
+    ``.seekable() -> bool``
+
+       Returns ``True`` if the object supports random access (such as
+       disk files), or ``False`` if the object only supports
+       sequential access (such as sockets, pipes, and ttys).  If
+       ``False``, ``.seek()``, ``.tell()``, and ``.truncate()`` will
+       raise an IOError if called.
+
+    ``.__enter__() -> ContextManager``
+
+       Context management protocol.  Returns ``self``.
+
+    ``.__exit__(...) -> None``
+
+       Context management protocol.  Same as ``.close()``.
+
+If and only if a ``RawIOBase`` implementation operates on an
+underlying file descriptor, it must additionally provide a
+``.fileno()`` member function.  This could be defined specifically by
+the implementation, or a mix-in class could be used (need to decide
+about this).
+
+    ``.fileno() -> int``
+
+       Returns the underlying file descriptor (an integer)
+
+Initially, three implementations will be provided that implement the
+``RawIOBase`` interface: ``FileIO``, ``SocketIO``, and ``ByteIO``
+(also ``MMapIO``?).  Each implementation must determine whether the
+object supports random access as the information provided by the user
+may not be sufficient (consider ``open("/dev/tty", "rw")`` or
+``open("/tmp/named-pipe", "rw")``).  As an example, ``FileIO`` can
+determine this by calling the ``seek()`` system call; if it returns an
+error, the object does not support random access.  Each implementation
+may provided additional methods appropriate to its type.  The
+``ByteIO`` object is analogous to Python 2's ``cStringIO`` library,
+but operating on the new bytes type instead of strings.
+
+
+Buffered I/O
+============
+
+The next layer is the Buffered I/O layer which provides more efficient
+access to file-like objects.  The abstract base class for all Buffered
+I/O implementations is ``BufferedIOBase``, which provides similar methods
+to RawIOBase:
+
+    ``.read(n: int = -1) -> bytes``
+
+       Returns the next ``n`` bytes from the object.  It may return
+       fewer than ``n`` bytes if end-of-file is reached or the object is
+       non-blocking.  0 bytes indicates end-of-file.  This method may
+       make multiple calls to ``RawIOBase.read()`` to gather the bytes,
+       or may make no calls to ``RawIOBase.read()`` if all of the needed
+       bytes are already buffered.
+
+    ``.readinto(b: bytes) -> int``
+
+    ``.write(b: bytes) -> None``
+
+       Write ``b`` bytes to the buffer.  The bytes are not guaranteed to
+       be written to the Raw I/O object immediately; they may be
+       buffered.
+
+    ``.seek(pos: int, whence: int = 0) -> int``
+
+    ``.tell() -> int``
+
+    ``.truncate(pos: int = None) -> None``
+
+    ``.flush() -> None``
+
+    ``.close() -> None``
+
+    ``.readable() -> bool``
+
+    ``.writable() -> bool``
+
+    ``.seekable() -> bool``
+
+    ``.__enter__() -> ContextManager``
+
+    ``.__exit__(...) -> None``
+
+Additionally, the abstract base class provides one member variable:
+
+    ``.raw``
+
+       A reference to the underlying ``RawIOBase`` object.
+
+The ``BufferedIOBase`` methods signatures are mostly identical to that
+of ``RawIOBase`` (exceptions: ``write()`` returns ``None``,
+``read()``'s argument is optional), but may have different semantics.
+In particular, ``BufferedIOBase`` implementations may read more data
+than requested or delay writing data using buffers.  For the most
+part, this will be transparent to the user (unless, for example, they
+open the same file through a different descriptor).  Also, raw reads
+may return a short read without any particular reason; buffered reads
+will only return a short read if EOF is reached; and raw writes may
+return a short count (even when non-blocking I/O is not enabled!),
+while buffered writes will raise ``IOError`` when not all bytes could
+be written or buffered.
+
+There are four implementations of the ``BufferedIOBase`` abstract base
+class, described below.
+
+
+``BufferedReader``
+------------------
+
+The ``BufferedReader`` implementation is for sequential-access
+read-only objects.  Its ``.flush()`` method is a no-op.
+
+
+``BufferedWriter``
+------------------
+
+The ``BufferedWriter`` implementation is for sequential-access
+write-only objects.  Its ``.flush()`` method forces all cached data to
+be written to the underlying RawIOBase object.
+
+
+``BufferedRWPair``
+------------------
+
+The ``BufferedRWPair`` implementation is for sequential-access
+read-write objects such as sockets and ttys.  As the read and write
+streams of these objects are completely independent, it could be
+implemented by simply incorporating a ``BufferedReader`` and
+``BufferedWriter`` instance.  It provides a ``.flush()`` method that
+has the same semantics as a ``BufferedWriter``'s ``.flush()`` method.
+
+
+``BufferedRandom``
+------------------
+
+The ``BufferedRandom`` implementation is for all random-access
+objects, whether they are read-only, write-only, or read-write.
+Compared to the previous classes that operate on sequential-access
+objects, the ``BufferedRandom`` class must contend with the user
+calling ``.seek()`` to reposition the stream.  Therefore, an instance
+of ``BufferedRandom`` must keep track of both the logical and true
+position within the object.  It provides a ``.flush()`` method that
+forces all cached write data to be written to the underlying
+``RawIOBase`` object and all cached read data to be forgotten (so that
+future reads are forced to go back to the disk).
+
+*Q: Do we want to mandate in the specification that switching between
+reading to writing on a read-write object implies a .flush()?  Or is
+that an implementation convenience that users should not rely on?*
+
+For a read-only ``BufferedRandom`` object, ``.writable()`` returns
+``False`` and the ``.write()`` and ``.truncate()`` methods throw
+``IOError``.
+
+For a write-only ``BufferedRandom`` object, ``.readable()`` returns
+``False`` and the ``.read()`` method throws ``IOError``.
+
+
+Text I/O
+========
+
+The text I/O layer provides functions to read and write strings from
+streams.  Some new features include universal newlines and character
+set encoding and decoding.  The Text I/O layer is defined by a
+``TextIOBase`` abstract base class.  It provides several methods that
+are similar to the ``BufferedIOBase`` methods, but operate on a
+per-character basis instead of a per-byte basis.  These methods are:
+
+    ``.read(n: int = -1) -> str``
+
+    ``.write(s: str) -> None``
+
+``TextIOBase`` implementations also provide several methods that are
+pass-throughs to the underlaying ``BufferedIOBase`` objects:
+
+    ``.seek(pos: int, whence: int = 0) -> None``
+
+    ``.tell() -> int``
+
+    ``.truncate(pos: int = None) -> None``
+
+    ``.flush() -> None``
+
+    ``.close() -> None``
+
+    ``.readable() -> bool``
+
+    ``.writable() -> bool``
+
+    ``.seekable() -> bool``
+
+``TextIOBase`` class implementations additionally provide the
+following methods:
+
+    ``.readline() -> str``
+
+        Read until newline or EOF and return the line, or ``""`` if
+        EOF hit immediately.
+
+    ``.__iter__() -> Iterator``
+
+        Returns an iterator that returns lines from the file (which
+        happens to be ``self``).
+
+    ``.next() -> str``
+
+        Same as ``readline()`` except raises ``StopIteration`` if EOF
+        hit immediately.
+
+Two implementations will be provided by the Python library.  The
+primary implementation, ``TextIOWrapper``, wraps a Buffered I/O
+object.  Each ``TextIOWrapper`` object has a property named
+"``.buffer``" that provides a reference to the underlying
+``BufferedIOBase`` object.  Its initializer has the following
+signature:
+
+    ``.__init__(self, buffer, encoding=None, newline=None)``
+
+        ``buffer`` is a reference to the ``BufferedIOBase`` object to
+        be wrapped with the ``TextIOWrapper``.  ``encoding`` refers to
+        an encoding to be used for translating between the
+        byte-representation and character-representation.  If it is
+        ``None``, then the system's locale setting will be used as the
+        default.  ``newline`` can be ``None``, ``'\n'``, or ``'\r\n'``
+        (all other values are illegal); it indicates the translation
+        for ``'\n'`` characters written.  If ``None``, a
+        system-specific default is chosen, i.e., ``'\r\n'`` on Windows
+        and ``'\n'`` on Unix/Linux.  Setting ``newline='\n'`` on input
+        means that no CRLF translation is done; lines ending in
+        ``'\r\n'`` will be returned as ``'\r\n'``.
+
+Another implementation, ``StringIO``, creates a file-like ``TextIO``
+implementation without an underlying Buffered I/O object.  While
+similar functionality could be provided by wrapping a ``BytesIO``
+object in a ``TextIOWrapper``, the ``StringIO`` object allows for much
+greater efficiency as it does not need to actually performing encoding
+and decoding.  A String I/O object can just store the encoded string
+as-is.  The ``StringIO`` object's ``__init__`` signature takes an
+optional string specifying the initial value; the initial position is
+always 0.  It does not support encodings or newline translations; you
+always read back exactly the characters you wrote.
+
+
+Unicode encoding/decoding Issues
+--------------------------------
+
+We should allow passing an error-handling argument whenever an
+encoding is accepted, and we should allow changing the error-handling
+setting later.  The behavior of Text I/O operations in the face of
+Unicode problems and ambiguities (e.g. diacritics, surrogates, invalid
+bytes in an encoding) should be the same as that of the unicode
+``encode()``/``decode()`` methods.  ``UnicodeError`` may be raised.
+
+Implementation note: we should be able to reuse much of the
+infrastructure provided by the ``codecs`` module.  If it doesn't
+provide the exact APIs we need, we should refactor it to avoid
+reinventing the wheel.
+
+
+Non-blocking I/O
+================
+
+Non-blocking I/O is fully supported on the Raw I/O level only.  If a
+raw object is in non-blocking mode and an operation would block, then
+``.read()`` and ``.readinto()`` return ``None``, while ``.write()``
+returns 0.  In order to put an object in object in non-blocking mode,
+the user must extract the fileno and do it by hand.
+
+At the Buffered I/O and Text I/O layers, if a read or write fails due
+a non-blocking condition, they raise an ``IOError`` with ``errno`` set
+to ``EAGAIN``.
+
+Originally, we considered propagating up the Raw I/O behavior, but
+many corner cases and problems were raised.  To address these issues,
+significant changes would need to have been made to the Buffered I/O
+and Text I/O layers.  For example, what should ``.flush()`` do on a
+Buffered non-blocking object?  How would the user instruct the object
+to "Write as much as you can from your buffer, but don't block"?  A
+non-blocking ``.flush()`` that doesn't necessarily flush all available
+data is counter-intuitive.  Since non-blocking and blocking objects
+would have such different semantics at these layers, it was agreed to
+abandon efforts to combine them into a single type.
+
+
+The ``open()`` Built-in Function
+================================
+
+The ``open()`` built-in function is specified by the following
+pseudo-code::
+
+    def open(filename, mode="r", buffering=None, *, encoding=None):
+        assert isinstance(filename, str)
+        assert isinstance(mode, str)
+        assert buffering is None or isinstance(buffering, int)
+        assert encoding is None or isinstance(encoding, str)
+        modes = set(mode)
+        if modes - set("arwb+t") or len(mode) > len(modes):
+            raise ValueError("invalid mode: %r" % mode)
+        reading = "r" in modes
+        writing = "w" in modes
+        binary = "b" in modes
+        appending = "a" in modes
+        updating = "+" in modes
+        text = "t" in modes or not binary
+        if text and binary:
+            raise ValueError("can't have text and binary mode at once")
+        if reading + writing + appending > 1:
+            raise ValueError("can't have read/write/append mode at once")
+        if not (reading or writing or appending):
+            raise ValueError("must have exactly one of read/write/append mode")
+        if binary and encoding is not None:
+            raise ValueError("binary modes doesn't take an encoding")
+        # XXX Need to spec the signature for FileIO()
+        raw = FileIO(filename, mode)
+        if buffering is None:
+            buffering = 8*1024  # International standard buffer size
+            # XXX Try setting it to fstat().st_blksize
+        if buffering < 0:
+            raise ValueError("invalid buffering size")
+        if buffering == 0:
+            if binary:
+                return raw
+            raise ValueError("can't have unbuffered text I/O")
+        if updating:
+            buffer = BufferedRandom(raw, buffering)
+        elif writing or appending:
+            buffer = BufferedWriter(raw, buffering)
+        else:
+            assert reading
+            buffer = BufferedReader(raw, buffering)
+        if binary:
+            return buffer
+        assert text
+        # XXX Need to do something about universal newlines?
+        textio = TextIOWrapper(buffer)
+        return textio
+
+
+Copyright
+=========
+
+This document has been placed in the public domain.
+
+
+
+..
+   Local Variables:
+   mode: indented-text
+   indent-tabs-mode: nil
+   sentence-end-double-space: t
+   fill-column: 70
+   coding: utf-8
+   End: