From 76e638005087d4bbfd07432928b98c395ee7d5a4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 17 Feb 2012 02:20:31 +0100 Subject: [PATCH] PEP 410: complete motivation section, add timespec type, rephrase os.stat() and datetime.datetime() sections --- pep-0410.txt | 115 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 100 insertions(+), 15 deletions(-) diff --git a/pep-0410.txt b/pep-0410.txt index a4cbb9d6c..1ab8fbc82 100644 --- a/pep-0410.txt +++ b/pep-0410.txt @@ -22,9 +22,8 @@ Motivation ========== Python 2.3 introduced float timestamps to support subsecond resolutions. -os.stat() uses float timestamps by default since Python 2.5. - -Python 3.3 introduced functions supporting nanosecond resolutions: +os.stat() uses float timestamps by default since Python 2.5. Python 3.3 +introduced functions supporting nanosecond resolutions: * os module: stat(), utimensat(), futimens() * time module: clock_gettime(), clock_getres(), wallclock() @@ -33,10 +32,27 @@ The Python float type uses binary64 format of the IEEE 754 standard. With a resolution of 1 nanosecond (10\ :sup:`-9`), float timestamps lose precision for values bigger than 2\ :sup:`24` seconds (194 days: 1970-07-14 for an Epoch timestamp). +Nanosecond resolution is required to set the exact modification time on +filesystems supporting nanosecond timestamps (e.g ext4, btrfs, NTFS, ...). It +helps also to compare the modification time of two files when checking which +one is newer. Examples: copy a file and its modification time using +shutil.copystat(), create a TAR archive with the tarfile module, manage a +mailbox with the mailbox module, etc. + +An arbitrary resolution is preferred over a fixed resolution (like nanosecond) +to not have to change the API when a better resolution is required. For +example, the NTP protocol uses fractions of 2\ :sup:`32` seconds +(approximatively 2.3 x 10\ :sup:`-10` second), whereas the NTP protocol version +4 uses fractions of 2\ :sup:`64` seconds (5.4 x 10\ :sup:`-20` second). + .. note:: - With a resolution of 1 microsecond (10\ :sup:`-6`), float timestamps lose precision - for values bigger than 2\ :sup:`33` seconds (272 years: 2242-03-16 for an Epoch - timestamp). + With a resolution of 1 microsecond (10\ :sup:`-6`), float timestamps lose + precision for values bigger than 2\ :sup:`33` seconds (272 years: 2242-03-16 + for an Epoch timestamp). + + With a resolution of 100 nanoseconds (10\ :sup:`-7`), float timestamps lose + precision for values bigger than 2\ :sup:`29` seconds (17 years: 1987-01-05 + for an Epoch timestamp). Specification @@ -123,8 +139,9 @@ with the Python license. datetime.datetime ----------------- -Most functions returning timestamps don't have a known starting point or -timezone information, and so cannot be converted to datetime.datetime. +Except os.stat(), time.time() and time.clock_gettime(time.CLOCK_GETTIME), all +time functions have an unspecified starting point and no timezone information, +and so cannot be converted to datetime.datetime. datetime.datetime only supports microsecond resolution, but can be enhanced to support nanosecond. @@ -193,6 +210,60 @@ for float or if the base 10 for Decimal. In other cases, frequency = base\ :sup:`exponent` must be computed again to convert a timestamp as float or Decimal. Storing directly the frequency in the denominator is simpler. +timespec structure +------------------ + +A resolution of one nanosecond is enough to support all current C functions. A +Timespec type can be added to store a timestamp with a nanosecond resolution. +Basic example supporting addition, subtraction and coercion to float:: + + class timespec(tuple): + def __new__(cls, sec, nsec): + if not isinstance(sec, int): + raise TypeError + if not isinstance(nsec, int): + raise TypeError + asec, nsec = divmod(nsec, 10 ** 9) + sec += asec + obj = tuple.__new__(cls, (sec, nsec)) + obj.sec = sec + obj.nsec = nsec + return obj + + def __float__(self): + return self.sec + self.nsec * 1e-9 + + def total_nanoseconds(self): + return self.sec * 10 ** 9 + self.nsec + + def __add__(self, other): + if not isinstance(other, timespec): + raise TypeError + ns_sum = self.total_nanoseconds() + other.total_nanoseconds() + return timespec(*divmod(ns_sum, 10 ** 9)) + + def __sub__(self, other): + if not isinstance(other, timespec): + raise TypeError + ns_diff = self.total_nanoseconds() - other.total_nanoseconds() + return timespec(*divmod(ns_diff, 10 ** 9)) + + def __str__(self): + if self.sec < 0 and self.nsec: + sec = abs(1 + self.sec) + nsec = 10**9 - self.nsec + return '-%i.%09u' % (sec, nsec) + else: + return '%i.%09u' % (self.sec, self.nsec) + + def __repr__(self): + return '' % (self.sec, self.nsec) + +The timespec type is similar to the `Tuple of integer, variant (A) +`_ type, except that it supports arithmetic. + +The timespec type was rejected because it only supports nanosecond resolution. + Alternatives: API design ======================== @@ -244,14 +315,27 @@ introduced later if compelling use cases are discovered. Add new fields to os.stat ------------------------- -It was proposed to add 3 fields to os.stat() structure to get nanoseconds of -timestamps. +To get the creation, modification and access time of a file with a nanosecond +resolution, three fields can be added to os.stat() structure. -Populating the extra fields is time consuming. If new fields are available by -default, any call to os.stat() would be slower. If new fields are optional, the -stat structure would have a variable number of fields, which can be surprising. +The new fields can timestamps with nanosecond resolution (tuple of integers, +timespec structure, Decimal, etc.) or the nanosecond part of each timestamp. -Anyway, this approach does not help with the time module. +If the new fields are timestamps with nanosecond resolution, populating the +extra fields would be time consuming. Any call to os.stat() would be slower, +even if os.stat() is only called to check if the file exists. A parameter can +be added to os.stat() to make these fields optional, but a structure with a +variable number of fields can be problematic. + +If the new fields only contain the fractional part (nanoseconds), os.stat() +would be efficient. These fields would always be present and so set to zero if +the operating system does not support subsecond resolution. Splitting a +timestamp in two parts, seconds and nanoseconds, is similar to the `timespec +type `_ and `tuple of integers `_, and so have the +same drawbacks. + +Adding new fields to the os.stat() structure does not solve the nanosecond +issue in other modules (e.g. time). Add a boolean argument ---------------------- @@ -274,10 +358,11 @@ Add new functions for each type, examples: * time.clock_decimal() * time.time_decimal() * os.stat_decimal() + * os.stat_timespec() * etc. Adding a new function for each function creating timestamps duplicate a lot -of time. +of code. Links