From c96856e95c6fdd6ca87c1fcc8650266f84935920 Mon Sep 17 00:00:00 2001 From: "Phillip J. Eby" Date: Fri, 1 Oct 2004 20:03:01 +0000 Subject: [PATCH] Clarifications and citations as requested by Mark Nottingham, and discussed further at: http://mail.python.org/pipermail/web-sig/2004-September/000917.html Also, add Mark to the Acknowledgments, since his input has now touched a substantial number of paragraphs in the PEP. :) --- pep-0333.txt | 291 +++++++++++++++++++++++++++++---------------------- 1 file changed, 167 insertions(+), 124 deletions(-) diff --git a/pep-0333.txt b/pep-0333.txt index 19ad85d1f..b4b5cff43 100644 --- a/pep-0333.txt +++ b/pep-0333.txt @@ -167,8 +167,8 @@ other is a class:: def simple_app(environ, start_response): """Simplest possible application object""" status = '200 OK' - headers = [('Content-type','text/plain')] - start_response(status, headers) + response_headers = [('Content-type','text/plain')] + start_response(status, response_headers) return ['Hello world!\n'] @@ -193,8 +193,8 @@ other is a class:: def __iter__(self): status = '200 OK' - headers = [('Content-type','text/plain')] - self.start(status, headers) + response_headers = [('Content-type','text/plain')] + self.start(status, response_headers) yield "Hello world!\n" @@ -237,16 +237,16 @@ server. elif not headers_sent: # Before the first output, send the stored headers - status, headers = headers_sent[:] = headers_set + status, response_headers = headers_sent[:] = headers_set sys.stdout.write('Status: %s\r\n' % status) - for header in headers: + for header in response_headers: sys.stdout.write('%s: %s\r\n' % header) sys.stdout.write('\r\n') sys.stdout.write(data) sys.stdout.flush() - def start_response(status,headers,exc_info=None): + def start_response(status,response_headers,exc_info=None): if exc_info: try: if headers_sent: @@ -257,7 +257,7 @@ server. elif headers_sent: raise AssertionError("Headers already sent!") - headers_set[:] = [status,headers] + headers_set[:] = [status,response_headers] return write result = application(environ, start_response) @@ -356,19 +356,22 @@ a block boundary.) transform_ok = [] - def start_latin(status,headers,exc_info=None): - - for name,value in headers: + def start_latin(status,response_headers,exc_info=None): + + # Reset ok flag, in case this is a repeat call + transform_ok[:]=[] + + for name,value in response_headers: if name.lower()=='content-type' and value=='text/plain': transform_ok.append(True) # Strip content-length if present, else it'll be wrong - headers = [(name,value) - for name,value in headers + response_headers = [(name,value) + for name,value in response_headers if name.lower()<>'content-length' ] break - write = start_response(status,headers,exc_info) + write = start_response(status,response_headers,exc_info) if transform_ok: def write_latin(data): @@ -407,13 +410,14 @@ to a convention that will be described below. The ``start_response`` parameter is a callable accepting two required positional arguments, and one optional argument. For the sake -of illustration, we have named these arguments ``status``, ``headers``, -and ``exc_info``, but they are not required to have these names, and -the application **must** invoke the ``start_response`` callable using -positional arguments (e.g. ``start_response(status,headers)``). +of illustration, we have named these arguments ``status``, +``response_headers``, and ``exc_info``, but they are not required to +have these names, and the application **must** invoke the +``start_response`` callable using positional arguments (e.g. +``start_response(status,response_headers)``). The ``status`` parameter is a status string of the form -``"999 Message here"``, and ``headers`` is a list of +``"999 Message here"``, and ``response_headers`` is a list of ``(header_name,header_value)`` tuples describing the HTTP response header. The optional ``exc_info`` parameter is described below in the sections on `The start_response() Callable`_ and `Error Handling`_. @@ -428,19 +432,29 @@ APIs; it should not be used by new applications or frameworks if it can be avoided. See the `Buffering and Streaming`_ section for more details.) -The application object must return an iterable yielding strings. -(For example, it could be a generator-iterator that yields strings, -or it could be a sequence such as a list of strings.) The server -or gateway must transmit these strings to the client in an -unbuffered fashion, completing the transmission of each string -before requesting another one. (See the `Buffering and Streaming`_ -section below for more on how application output must be handled.) +When called by the server, the application object must return an +iterable yielding zero or more strings. This can be accomplished in a +variety of ways, such as by returning a list of strings, or by the +application being a generator function that yields strings, or +by the application being a class whose instances are iterable. +Regardless of how it is accomplished, the application object must +always return an iterable yielding zero or more strings. -The server or gateway must not modify supplied strings in any way; -they must be treated as binary byte sequences with no character -interpretation, line ending changes, or other modification. The -application is responsible for ensuring that the string(s) to be -written are in a format suitable for the client. +The server or gateway must transmit the yielded strings to the client +in an unbuffered fashion, completing the transmission of each string +before requesting another one. (In other words, applications +**should** perform their own buffering. See the `Buffering and +Streaming`_ section below for more on how application output must be +handled.) + +The server or gateway should treat the yielded strings as binary byte +sequences: in particular, it should ensure that line endings are +not altered. The application is responsible for ensuring that the +string(s) to be written are in a format suitable for the client. (The +server or gateway **may** apply HTTP transfer encodings, or perform +other transformations for the purpose of implementing HTTP features +such as byte-range transmission. See `Other HTTP Features`_, below, +for more details.) If a call to ``len(iterable)`` succeeds, the server must be able to rely on the result being accurate. That is, if the iterable @@ -518,17 +532,20 @@ unless their value would be an empty string, in which case they can never be empty strings, and so are always required. ``HTTP_`` Variables - Variables corresponding to the client-supplied HTTP headers (i.e., - variables whose names begin with ``"HTTP_"``). The presence or + Variables corresponding to the client-supplied HTTP request headers + (i.e., variables whose names begin with ``"HTTP_"``). The presence or absence of these variables should correspond with the presence or absence of the appropriate HTTP header in the request. -In general, a server or gateway **should** attempt to provide as many -other CGI variables as are applicable, including e.g. the nonstandard -SSL variables such as ``HTTPS=on``, if an SSL connection is in effect. -However, an application that uses any CGI variables other than the ones -listed above are necessarily non-portable to web servers that do not -support the relevant extensions. +A server or gateway **should** attempt to provide as many other CGI +variables as are applicable. In addition, if SSL is in use, the server +or gateway **should** also provide as many of the Apache SSL environment +variables [5]_ as are applicable, such as ``HTTPS=on`` and +``SSL_PROTOCOL``. Note, however, an application that uses any CGI +variables other than the ones listed above are necessarily non-portable +to web servers that do not support the relevant extensions. (For +example, web servers that do not publish files will not be able to +provide a meaningful ``DOCUMENT_ROOT`` or ``PATH_TRANSLATED``.) A WSGI-compliant server or gateway **should** document what variables it provides, along with their definitions as appropriate. Applications @@ -542,7 +559,8 @@ if they are present at all. It is a violation of this specification for a CGI variable's value to be of any type other than ``str``. In addition to the CGI-defined variables, the ``environ`` dictionary -must also contain the following WSGI-defined variables: +**may** also contain arbitrary operating-system "environment variables", +and **must** contain the following WSGI-defined variables: ===================== =============================================== Variable Value @@ -555,20 +573,21 @@ Variable Value invoked. Normally, this will have the value ``"http"`` or ``"https"``, as appropriate. -``wsgi.input`` An input stream from which the HTTP request - body can be read. (The server or gateway may - perform reads on-demand as requested by the - application, or it may pre-read the client's - request body and buffer it in-memory or on - disk, or use any other technique for providing - such an input stream, according to its - preference.) +``wsgi.input`` An input stream (file-like object) from which + the HTTP request body can be read. (The server + or gateway may perform reads on-demand as + requested by the application, or it may pre- + read the client's request body and buffer it + in-memory or on disk, or use any other + technique for providing such an input stream, + according to its preference.) -``wsgi.errors`` An output stream to which error output can be - written, for the purpose of recording program - or other errors in a standardized and possibly - centralized location. For many servers, this - will be the server's main error log. +``wsgi.errors`` An output stream (file-like object) to which + error output can be written, for the purpose of + recording program or other errors in a + standardized and possibly centralized location. + For many servers, this will be the server's + main error log. Alternatively, this may be ``sys.stderr``, or a log file of some sort. The server's @@ -578,22 +597,22 @@ Variable Value supply different error streams to different applications, if this is desired. -``wsgi.multithread`` This value should be true if the application - object may be simultaneously invoked by another - thread in the same process, and false - otherwise. - -``wsgi.multiprocess`` This value should be true if an equivalent +``wsgi.multithread`` This value should evaluate true if the application object may be simultaneously - invoked by another process, and false - otherwise. + invoked by another thread in the same process, + and should evaluate false otherwise. -``wsgi.run_once`` This value should be true if the server/gateway - expects (but does not guarantee!) that the - application will only be invoked this one time - during the life of its containing process. - Normally, this will only be true for a gateway - based on CGI (or something similar). +``wsgi.multiprocess`` This value should evaluate true if an + equivalent application object may be + simultaneously invoked by another process, + and should evaluate false otherwise. + +``wsgi.run_once`` This value should evaluate true if the server + or gateway expects (but does not guarantee!) + that the application will only be invoked this + one time during the life of its containing + process. Normally, this will only be true for + a gateway based on CGI (or something similar). ===================== =============================================== Finally, the ``environ`` dictionary may also contain server-defined @@ -639,8 +658,8 @@ Reference, except for these notes as listed in the table above: both caller and implementer. The application is free not to supply it, and the server or gateway is free to ignore it. -4. Since the ``errors`` stream may not be rewound, a container is - free to forward write operations immediately, without buffering. +4. Since the ``errors`` stream may not be rewound, servers and gateways + are free to forward write operations immediately, without buffering. In this case, the ``flush()`` method may be a no-op. Portable applications, however, cannot assume that output is unbuffered or that ``flush()`` is a no-op. They must call ``flush()`` if @@ -660,7 +679,7 @@ The ``start_response()`` Callable --------------------------------- The second parameter passed to the application object is a callable -of the form ``start_response(status,headers,exc_info=None)``. +of the form ``start_response(status,response_headers,exc_info=None)``. (As with all WSGI callables, the arguments must be supplied positionally, not by keyword.) The ``start_response`` callable is used to begin the HTTP response, and it must return a @@ -668,26 +687,32 @@ used to begin the HTTP response, and it must return a section, below). The ``status`` argument is an HTTP "status" string like ``"200 OK"`` -or ``"404 Not Found"``. The string **must not** contain control -characters, and must not be terminated with a carriage return, -linefeed, or combination thereof. +or ``"404 Not Found"``. That is, it is a string consisting of a +Status-Code and a Reason-Phrase, in that order and separated by a +single space, with no surrounding whitespace or other characters. +(See RFC 2616, Section 6.1.1 for more information.) The string +**must not** contain control characters, and must not be terminated +with a carriage return, linefeed, or combination thereof. -The ``headers`` argument is a list of ``(header_name,header_value)`` -tuples. It must be a Python list; i.e. ``type(headers) is -ListType)``, and the server **may** change its contents in any way -it desires. Each ``header_name`` must be a valid HTTP header name, -without a trailing colon or other punctuation. Each ``header_value`` -**must not** include *any* control characters, including carriage -returns or linefeeds, either embedded or at the end. (These -requirements are to minimize the complexity of any parsing that must -be performed by servers, gateways, and intermediate response +The ``response_headers`` argument is a list of ``(header_name, +header_value)`` tuples. It must be a Python list; i.e. +``type(response_headers) is ListType``, and the server **may** change +its contents in any way it desires. Each ``header_name`` must be a +valid HTTP header field-name (as defined by RFC 2616, Section 4.2), +without a trailing colon or other punctuation. + +Each ``header_value`` **must not** include *any* control characters, +including carriage returns or linefeeds, either embedded or at the end. +(These requirements are to minimize the complexity of any parsing that +must be performed by servers, gateways, and intermediate response processors that need to inspect or modify response headers.) In general, the server or gateway is responsible for ensuring that correct headers are sent to the client: if the application omits -a needed header, the server or gateway *should* add it. For example, -the HTTP ``Date:`` and ``Server:`` headers would normally be supplied -by the server or gateway. +a header required by HTTP (or other relevant specifications that are in +effect), the server or gateway **must** add it. For example, the HTTP +``Date:`` and ``Server:`` headers would normally be supplied by the +server or gateway. (A reminder for server/gateway authors: HTTP header names are case-insensitive, so be sure to take that into consideration when @@ -699,23 +724,33 @@ or any headers that would affect the persistence of the client's connection to the web server. These features are the exclusive province of the actual web server, and a server or gateway **should** consider it a fatal error for an application to attempt -using them, and raise an error if they are supplied to +sending them, and raise an error if they are supplied to ``start_response()``. (For more specifics on "hop-by-hop" features and headers, please see the `Other HTTP Features`_ section below.) The ``start_response`` callable **must not** actually transmit the -HTTP headers. It must store them until the first ``write`` call, or -until after the first iteration of the application return value that -yields a non-empty string. This is to ensure that buffered and -asynchronous applications can replace their originally intended output -with error output, up until the last possible moment. +response headers. Instead, it must store them for the server or +gateway to transmit **only** after the first iteration of the +application return value that yields a non-empty string, or upon +the application's first invocation of the ``write()`` callable. In +other words, response headers must not be sent until there is actual +body data available, or until the application's returned iterable is +exhausted. (The only possible exception to this rule is if the +response headers explicitly include a ``Content-Length`` of zero.) + +This delaying of response header transmission is to ensure that buffered +and asynchronous applications can replace their originally intended +output with error output, up until the last possible moment. For +example, the application may need to change the response status from +"200 OK" to "500 Internal Error", if an error occurs while the body is +being generated within an application buffer. The ``exc_info`` argument, if supplied, must be a Python ``sys.exc_info()`` tuple. This argument should be supplied by the application only if ``start_response`` is being called by an error handler. If ``exc_info`` is supplied, and no HTTP headers have been output yet, ``start_response`` should replace the currently-stored -HTTP headers with the newly-supplied ones, thus allowing the +HTTP response headers with the newly-supplied ones, thus allowing the application to "change its mind" about the output when an error has occurred. @@ -747,7 +782,7 @@ parameter beyond the duration of the function's execution, to avoid creating a circular reference through the traceback and frames involved. The simplest way to do this is something like:: - def start_response(status,headers,exc_info=None): + def start_response(status,response_headers,exc_info=None): if exc_info: try: # do stuff w/exc_info here @@ -795,18 +830,15 @@ Buffering and Streaming Generally speaking, applications will achieve the best throughput by buffering their (modestly-sized) output and sending it all at -once. When this is the case, applications **should** simply -return a single-element iterable containing their entire output as -a single string. +once. This is a common approach in existing frameworks such as +Zope: the output is buffered in a StringIO or similar object, then +transmitted all at once, along with the response headers. -(In addition to improved performance, buffering all of an application's -output has an advantage for error handling: the buffered output can -be discarded and replaced by an error page, rather than dumping an -error message in the middle of some partially-completed output. For -this and other reasons, many existing Python frameworks already -accumulate their output for a single write, unless the application -explicitly requests streaming, or the expected output is larger than -practical for buffering (e.g. multi-megabyte PDFs).) +The corresponding approach in WSGI is for the application to simply +return a single-element iterable (such as a list) containing the +response body as a single string. This is the recommended approach +for the vast majority of application functions, that render +HTML pages whose text easily fits in memory. For large files, however, or for specialized uses of HTTP streaming (such as multipart "server push"), an application may need to provide @@ -815,7 +847,7 @@ memory). It's also sometimes the case that part of a response may be time-consuming to produce, but it would be useful to send ahead the portion of the response that precedes it. -In these cases, applications **should** return an iterator (usually +In these cases, applications will usually return an iterator (often a generator-iterator) that produces the output in a block-by-block fashion. These blocks may be broken to coincide with mulitpart boundaries (for "server push"), or just before time-consuming @@ -894,11 +926,10 @@ returned by the ``start_response`` callable. New WSGI applications and frameworks **should not** use the ``write()`` callable if it is possible to avoid doing so. The ``write()`` callable is strictly a hack to support imperative -streaming APIs. In general, applications should either be -internally buffered, or produce iterable output, as this makes -it possible for web servers to interleave other tasks in the -same Python thread, potentially providing better throughput for -the server as a whole. +streaming APIs. In general, applications should produce their +output via their returned iterable, as this makes it possible +for web servers to interleave other tasks in the same Python thread, +potentially providing better throughput for the server as a whole. The ``write()`` callable is returned by the ``start_response()`` callable, and it accepts a single parameter: a string to be @@ -909,11 +940,15 @@ passed-in string was either completely sent to the client, or that it is buffered for transmission while the application proceeds forward. -An application **may** return a non-empty iterable even if it -invokes ``write()``, and that output must be treated normally -by the server or gateway (i.e., it must be sent or queued -immediately). Applications **must not** invoke ``write()`` -from within their return iterable. +An application **must** return an iterable object, even if it +uses ``write()`` to produce all or part of its response body. +The returned iterable **may** be empty (i.e. yield no non-empty +strings), but if it *does* yield non-empty strings, that output +must be treated normally by the server or gateway (i.e., it must be +sent or queued immediately). Applications **must not** invoke +``write()`` from within their return iterable, and therefore any +strings yielded by the iterable are transmitted after all strings +passed to ``write()`` have been sent to the client. Unicode Issues @@ -926,9 +961,9 @@ strings, not Unicode objects. The result of using a Unicode object where a string object is required, is undefined. Note also that strings passed to ``start_response()`` as a status or -as headers **must** follow RFC 2616 with respect to encoding. That -is, they must either be ISO-8859-1 characters, or use RFC 2047 MIME -encoding. +as response headers **must** follow RFC 2616 with respect to encoding. +That is, they must either be ISO-8859-1 characters, or use RFC 2047 +MIME encoding. On Python platforms where the ``str`` or ``StringType`` type is in fact Unicode-based (e.g. Jython, IronPython, Python 3000, etc.), all @@ -964,15 +999,15 @@ of its use:: try: # regular application code here status = "200 Froody" - headers = [("content-type","text/plain")] - start_response(status, headers) + response_headers = [("content-type","text/plain")] + start_response(status, response_headers) return ["normal body goes here"] except: # XXX should trap runtime issues like MemoryError, KeyboardInterrupt # in a separate handler before this bare 'except:'... status = "500 Oops" - headers = [("content-type","text/plain")] - start_response(status, headers, sys.exc_info()) + response_headers = [("content-type","text/plain")] + start_response(status, response_headers, sys.exc_info()) return ["error body goes here"] If no output has been written when an exception occurs, the call to @@ -1041,8 +1076,9 @@ changes that do not alter the effective semantics of the application's response. It is always possible for the application developer to add middleware components to supply additional features, so server/gateway developers should be conservative in their implementation. In a sense, -a server should consider itself to be like an HTTP "proxy server", with -the application being an HTTP "origin server". +a server should consider itself to be like an HTTP "gateway server", +with the application being an HTTP "origin server". (See RFC 2616, +section 1.3, for the definition of these terms.) However, because WSGI servers and applications do not communicate via HTTP, what RFC 2616 calls "hop-by-hop" headers do not apply to WSGI @@ -1564,6 +1600,10 @@ thoughtful feedback made this revised draft possible. Especially: older versions of Python" section, as well as the optional ``wsgi.file_wrapper`` facility. +* Mark Nottingham, who reviewed the spec extensively for issues with + HTTP RFC compliance, especially with regard to HTTP/1.1 features that + I didn't even know existed until he pointed them out. + References ========== @@ -1580,6 +1620,9 @@ References .. [4] "End-to-end and Hop-by-hop Headers" -- HTTP/1.1, Section 13.5.1 (http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.5.1) +.. [5] mod_ssl Reference, "Environment Variables" + (http://www.modssl.org/docs/2.8/ssl_reference.html#ToC25) + Copyright =========