diff --git a/pep-0440.txt b/pep-0440.txt index 28c8eb816..a2add4c5e 100644 --- a/pep-0440.txt +++ b/pep-0440.txt @@ -94,6 +94,11 @@ this scheme but MUST also include the normalizations specified below. Installation tools MAY warn the user when non-compliant or ambiguous versions are detected. +See also `Appendix B : Parsing version strings with regular expressions` which +provides a regular expression to check strict conformance with the canonical +format, as well as a more permissive regular expression accepting inputs that +may require subsequent normalization. + Public version identifiers are separated into up to five segments: * Epoch segment: ``N!`` @@ -1575,6 +1580,62 @@ Metadata v2.0 guidelines versus setuptools:: Projects with No Compatible Versions: 498/47114 (1.06%) Projects with Differing Latest Version: 688/47114 (1.46%) +Appendix B : Parsing version strings with regular expressions +============================================================= + +As noted earlier in the `Public version identifiers` section, published +version identifiers SHOULD use the canonical format. This section provides +regular expressions that can be used to test whether a version is already +in that form, and if it's not, extract the various components for subsequent +normalization. + +To test whether a version identifier is in the canonical format, you can use +the following function:: + + import re + def is_canonical(version): + return re.match(r'^([1-9]\d*!)?(0|[1-9]\d*)(\.(0|[1-9]\d*))*((a|b|rc)(0|[1-9]\d*))?(\.post(0|[1-9]\d*))?(\.dev(0|[1-9]\d*))?$', version) is not None + +To extract the components of a version identifier, use the following regular +expression (as defined by the `packaging `_ +project):: + + VERSION_PATTERN = r""" + v? + (?: + (?:(?P[0-9]+)!)? # epoch + (?P[0-9]+(?:\.[0-9]+)*) # release segment + (?P
                                          # pre-release
+                [-_\.]?
+                (?P(a|b|c|rc|alpha|beta|pre|preview))
+                [-_\.]?
+                (?P[0-9]+)?
+            )?
+            (?P                                         # post release
+                (?:-(?P[0-9]+))
+                |
+                (?:
+                    [-_\.]?
+                    (?Ppost|rev|r)
+                    [-_\.]?
+                    (?P[0-9]+)?
+                )
+            )?
+            (?P                                          # dev release
+                [-_\.]?
+                (?Pdev)
+                [-_\.]?
+                (?P[0-9]+)?
+            )?
+        )
+        (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
+    """
+
+    _regex = re.compile(
+        r"^\s*" + VERSION_PATTERN + r"\s*$",
+        re.VERBOSE | re.IGNORECASE,
+    )
+
 
 Copyright
 =========