From d56d683bd101c764e4f31d875ac6d3aaaa6b5234 Mon Sep 17 00:00:00 2001 From: Taliesin Sisson Date: Wed, 26 Oct 2016 10:12:29 +0100 Subject: [PATCH] Add vendoring for unicode: govendor fetch golang.org/x/text/encoding/unicode --- vendor/github.com/stretchr/testify/LICENSE | 22 + vendor/golang.org/x/text/LICENSE | 27 + vendor/golang.org/x/text/PATENTS | 22 + vendor/golang.org/x/text/encoding/encoding.go | 335 ++++ .../text/encoding/internal/identifier/gen.go | 137 ++ .../internal/identifier/identifier.go | 81 + .../text/encoding/internal/identifier/mib.go | 1621 +++++++++++++++++ .../x/text/encoding/internal/internal.go | 75 + .../x/text/encoding/unicode/override.go | 82 + .../x/text/encoding/unicode/unicode.go | 434 +++++ vendor/golang.org/x/text/internal/gen/code.go | 339 ++++ vendor/golang.org/x/text/internal/gen/gen.go | 281 +++ .../internal/utf8internal/utf8internal.go | 87 + vendor/golang.org/x/text/runes/cond.go | 187 ++ vendor/golang.org/x/text/runes/runes.go | 355 ++++ .../golang.org/x/text/transform/transform.go | 705 +++++++ vendor/golang.org/x/text/unicode/cldr/base.go | 100 + vendor/golang.org/x/text/unicode/cldr/cldr.go | 130 ++ .../golang.org/x/text/unicode/cldr/collate.go | 359 ++++ .../golang.org/x/text/unicode/cldr/decode.go | 171 ++ .../golang.org/x/text/unicode/cldr/makexml.go | 400 ++++ .../golang.org/x/text/unicode/cldr/resolve.go | 602 ++++++ .../golang.org/x/text/unicode/cldr/slice.go | 144 ++ vendor/golang.org/x/text/unicode/cldr/xml.go | 1456 +++++++++++++++ vendor/google.golang.org/cloud/LICENSE | 202 ++ vendor/vendor.json | 6 + 26 files changed, 8360 insertions(+) create mode 100644 vendor/github.com/stretchr/testify/LICENSE create mode 100644 vendor/golang.org/x/text/LICENSE create mode 100644 vendor/golang.org/x/text/PATENTS create mode 100644 vendor/golang.org/x/text/encoding/encoding.go create mode 100644 vendor/golang.org/x/text/encoding/internal/identifier/gen.go create mode 100644 vendor/golang.org/x/text/encoding/internal/identifier/identifier.go create mode 100644 vendor/golang.org/x/text/encoding/internal/identifier/mib.go create mode 100644 vendor/golang.org/x/text/encoding/internal/internal.go create mode 100644 vendor/golang.org/x/text/encoding/unicode/override.go create mode 100644 vendor/golang.org/x/text/encoding/unicode/unicode.go create mode 100644 vendor/golang.org/x/text/internal/gen/code.go create mode 100644 vendor/golang.org/x/text/internal/gen/gen.go create mode 100644 vendor/golang.org/x/text/internal/utf8internal/utf8internal.go create mode 100644 vendor/golang.org/x/text/runes/cond.go create mode 100644 vendor/golang.org/x/text/runes/runes.go create mode 100644 vendor/golang.org/x/text/transform/transform.go create mode 100644 vendor/golang.org/x/text/unicode/cldr/base.go create mode 100644 vendor/golang.org/x/text/unicode/cldr/cldr.go create mode 100644 vendor/golang.org/x/text/unicode/cldr/collate.go create mode 100644 vendor/golang.org/x/text/unicode/cldr/decode.go create mode 100644 vendor/golang.org/x/text/unicode/cldr/makexml.go create mode 100644 vendor/golang.org/x/text/unicode/cldr/resolve.go create mode 100644 vendor/golang.org/x/text/unicode/cldr/slice.go create mode 100644 vendor/golang.org/x/text/unicode/cldr/xml.go create mode 100644 vendor/google.golang.org/cloud/LICENSE diff --git a/vendor/github.com/stretchr/testify/LICENSE b/vendor/github.com/stretchr/testify/LICENSE new file mode 100644 index 000000000..473b670a7 --- /dev/null +++ b/vendor/github.com/stretchr/testify/LICENSE @@ -0,0 +1,22 @@ +Copyright (c) 2012 - 2013 Mat Ryer and Tyler Bunnell + +Please consider promoting this project if you find it useful. + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, +and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT +OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE +OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/golang.org/x/text/LICENSE b/vendor/golang.org/x/text/LICENSE new file mode 100644 index 000000000..6a66aea5e --- /dev/null +++ b/vendor/golang.org/x/text/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/golang.org/x/text/PATENTS b/vendor/golang.org/x/text/PATENTS new file mode 100644 index 000000000..733099041 --- /dev/null +++ b/vendor/golang.org/x/text/PATENTS @@ -0,0 +1,22 @@ +Additional IP Rights Grant (Patents) + +"This implementation" means the copyrightable works distributed by +Google as part of the Go project. + +Google hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable (except as stated in this section) +patent license to make, have made, use, offer to sell, sell, import, +transfer and otherwise run, modify and propagate the contents of this +implementation of Go, where such license applies only to those patent +claims, both currently owned or controlled by Google and acquired in +the future, licensable by Google that are necessarily infringed by this +implementation of Go. This grant does not include claims that would be +infringed only as a consequence of further modification of this +implementation. If you or your agent or exclusive licensee institute or +order or agree to the institution of patent litigation against any +entity (including a cross-claim or counterclaim in a lawsuit) alleging +that this implementation of Go or any code incorporated within this +implementation of Go constitutes direct or contributory patent +infringement, or inducement of patent infringement, then any patent +rights granted to you under this License for this implementation of Go +shall terminate as of the date such litigation is filed. diff --git a/vendor/golang.org/x/text/encoding/encoding.go b/vendor/golang.org/x/text/encoding/encoding.go new file mode 100644 index 000000000..221f175c0 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/encoding.go @@ -0,0 +1,335 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package encoding defines an interface for character encodings, such as Shift +// JIS and Windows 1252, that can convert to and from UTF-8. +// +// Encoding implementations are provided in other packages, such as +// golang.org/x/text/encoding/charmap and +// golang.org/x/text/encoding/japanese. +package encoding // import "golang.org/x/text/encoding" + +import ( + "errors" + "io" + "strconv" + "unicode/utf8" + + "golang.org/x/text/encoding/internal/identifier" + "golang.org/x/text/transform" +) + +// TODO: +// - There seems to be some inconsistency in when decoders return errors +// and when not. Also documentation seems to suggest they shouldn't return +// errors at all (except for UTF-16). +// - Encoders seem to rely on or at least benefit from the input being in NFC +// normal form. Perhaps add an example how users could prepare their output. + +// Encoding is a character set encoding that can be transformed to and from +// UTF-8. +type Encoding interface { + // NewDecoder returns a Decoder. + NewDecoder() *Decoder + + // NewEncoder returns an Encoder. + NewEncoder() *Encoder +} + +// A Decoder converts bytes to UTF-8. It implements transform.Transformer. +// +// Transforming source bytes that are not of that encoding will not result in an +// error per se. Each byte that cannot be transcoded will be represented in the +// output by the UTF-8 encoding of '\uFFFD', the replacement rune. +type Decoder struct { + transform.Transformer + + // This forces external creators of Decoders to use names in struct + // initializers, allowing for future extendibility without having to break + // code. + _ struct{} +} + +// Bytes converts the given encoded bytes to UTF-8. It returns the converted +// bytes or nil, err if any error occurred. +func (d *Decoder) Bytes(b []byte) ([]byte, error) { + b, _, err := transform.Bytes(d, b) + if err != nil { + return nil, err + } + return b, nil +} + +// String converts the given encoded string to UTF-8. It returns the converted +// string or "", err if any error occurred. +func (d *Decoder) String(s string) (string, error) { + s, _, err := transform.String(d, s) + if err != nil { + return "", err + } + return s, nil +} + +// Reader wraps another Reader to decode its bytes. +// +// The Decoder may not be used for any other operation as long as the returned +// Reader is in use. +func (d *Decoder) Reader(r io.Reader) io.Reader { + return transform.NewReader(r, d) +} + +// An Encoder converts bytes from UTF-8. It implements transform.Transformer. +// +// Each rune that cannot be transcoded will result in an error. In this case, +// the transform will consume all source byte up to, not including the offending +// rune. Transforming source bytes that are not valid UTF-8 will be replaced by +// `\uFFFD`. To return early with an error instead, use transform.Chain to +// preprocess the data with a UTF8Validator. +type Encoder struct { + transform.Transformer + + // This forces external creators of Encoders to use names in struct + // initializers, allowing for future extendibility without having to break + // code. + _ struct{} +} + +// Bytes converts bytes from UTF-8. It returns the converted bytes or nil, err if +// any error occurred. +func (e *Encoder) Bytes(b []byte) ([]byte, error) { + b, _, err := transform.Bytes(e, b) + if err != nil { + return nil, err + } + return b, nil +} + +// String converts a string from UTF-8. It returns the converted string or +// "", err if any error occurred. +func (e *Encoder) String(s string) (string, error) { + s, _, err := transform.String(e, s) + if err != nil { + return "", err + } + return s, nil +} + +// Writer wraps another Writer to encode its UTF-8 output. +// +// The Encoder may not be used for any other operation as long as the returned +// Writer is in use. +func (e *Encoder) Writer(w io.Writer) io.Writer { + return transform.NewWriter(w, e) +} + +// ASCIISub is the ASCII substitute character, as recommended by +// http://unicode.org/reports/tr36/#Text_Comparison +const ASCIISub = '\x1a' + +// Nop is the nop encoding. Its transformed bytes are the same as the source +// bytes; it does not replace invalid UTF-8 sequences. +var Nop Encoding = nop{} + +type nop struct{} + +func (nop) NewDecoder() *Decoder { + return &Decoder{Transformer: transform.Nop} +} +func (nop) NewEncoder() *Encoder { + return &Encoder{Transformer: transform.Nop} +} + +// Replacement is the replacement encoding. Decoding from the replacement +// encoding yields a single '\uFFFD' replacement rune. Encoding from UTF-8 to +// the replacement encoding yields the same as the source bytes except that +// invalid UTF-8 is converted to '\uFFFD'. +// +// It is defined at http://encoding.spec.whatwg.org/#replacement +var Replacement Encoding = replacement{} + +type replacement struct{} + +func (replacement) NewDecoder() *Decoder { + return &Decoder{Transformer: replacementDecoder{}} +} + +func (replacement) NewEncoder() *Encoder { + return &Encoder{Transformer: replacementEncoder{}} +} + +func (replacement) ID() (mib identifier.MIB, other string) { + return identifier.Replacement, "" +} + +type replacementDecoder struct{ transform.NopResetter } + +func (replacementDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + if len(dst) < 3 { + return 0, 0, transform.ErrShortDst + } + if atEOF { + const fffd = "\ufffd" + dst[0] = fffd[0] + dst[1] = fffd[1] + dst[2] = fffd[2] + nDst = 3 + } + return nDst, len(src), nil +} + +type replacementEncoder struct{ transform.NopResetter } + +func (replacementEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + r, size := rune(0), 0 + + for ; nSrc < len(src); nSrc += size { + r = rune(src[nSrc]) + + // Decode a 1-byte rune. + if r < utf8.RuneSelf { + size = 1 + + } else { + // Decode a multi-byte rune. + r, size = utf8.DecodeRune(src[nSrc:]) + if size == 1 { + // All valid runes of size 1 (those below utf8.RuneSelf) were + // handled above. We have invalid UTF-8 or we haven't seen the + // full character yet. + if !atEOF && !utf8.FullRune(src[nSrc:]) { + err = transform.ErrShortSrc + break + } + r = '\ufffd' + } + } + + if nDst+utf8.RuneLen(r) > len(dst) { + err = transform.ErrShortDst + break + } + nDst += utf8.EncodeRune(dst[nDst:], r) + } + return nDst, nSrc, err +} + +// HTMLEscapeUnsupported wraps encoders to replace source runes outside the +// repertoire of the destination encoding with HTML escape sequences. +// +// This wrapper exists to comply to URL and HTML forms requiring a +// non-terminating legacy encoder. The produced sequences may lead to data +// loss as they are indistinguishable from legitimate input. To avoid this +// issue, use UTF-8 encodings whenever possible. +func HTMLEscapeUnsupported(e *Encoder) *Encoder { + return &Encoder{Transformer: &errorHandler{e, errorToHTML}} +} + +// ReplaceUnsupported wraps encoders to replace source runes outside the +// repertoire of the destination encoding with an encoding-specific +// replacement. +// +// This wrapper is only provided for backwards compatibility and legacy +// handling. Its use is strongly discouraged. Use UTF-8 whenever possible. +func ReplaceUnsupported(e *Encoder) *Encoder { + return &Encoder{Transformer: &errorHandler{e, errorToReplacement}} +} + +type errorHandler struct { + *Encoder + handler func(dst []byte, r rune, err repertoireError) (n int, ok bool) +} + +// TODO: consider making this error public in some form. +type repertoireError interface { + Replacement() byte +} + +func (h errorHandler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + nDst, nSrc, err = h.Transformer.Transform(dst, src, atEOF) + for err != nil { + rerr, ok := err.(repertoireError) + if !ok { + return nDst, nSrc, err + } + r, sz := utf8.DecodeRune(src[nSrc:]) + n, ok := h.handler(dst[nDst:], r, rerr) + if !ok { + return nDst, nSrc, transform.ErrShortDst + } + err = nil + nDst += n + if nSrc += sz; nSrc < len(src) { + var dn, sn int + dn, sn, err = h.Transformer.Transform(dst[nDst:], src[nSrc:], atEOF) + nDst += dn + nSrc += sn + } + } + return nDst, nSrc, err +} + +func errorToHTML(dst []byte, r rune, err repertoireError) (n int, ok bool) { + buf := [8]byte{} + b := strconv.AppendUint(buf[:0], uint64(r), 10) + if n = len(b) + len("&#;"); n >= len(dst) { + return 0, false + } + dst[0] = '&' + dst[1] = '#' + dst[copy(dst[2:], b)+2] = ';' + return n, true +} + +func errorToReplacement(dst []byte, r rune, err repertoireError) (n int, ok bool) { + if len(dst) == 0 { + return 0, false + } + dst[0] = err.Replacement() + return 1, true +} + +// ErrInvalidUTF8 means that a transformer encountered invalid UTF-8. +var ErrInvalidUTF8 = errors.New("encoding: invalid UTF-8") + +// UTF8Validator is a transformer that returns ErrInvalidUTF8 on the first +// input byte that is not valid UTF-8. +var UTF8Validator transform.Transformer = utf8Validator{} + +type utf8Validator struct{ transform.NopResetter } + +func (utf8Validator) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + n := len(src) + if n > len(dst) { + n = len(dst) + } + for i := 0; i < n; { + if c := src[i]; c < utf8.RuneSelf { + dst[i] = c + i++ + continue + } + _, size := utf8.DecodeRune(src[i:]) + if size == 1 { + // All valid runes of size 1 (those below utf8.RuneSelf) were + // handled above. We have invalid UTF-8 or we haven't seen the + // full character yet. + err = ErrInvalidUTF8 + if !atEOF && !utf8.FullRune(src[i:]) { + err = transform.ErrShortSrc + } + return i, i, err + } + if i+size > len(dst) { + return i, i, transform.ErrShortDst + } + for ; size > 0; size-- { + dst[i] = src[i] + i++ + } + } + if len(src) > len(dst) { + err = transform.ErrShortDst + } + return n, n, err +} diff --git a/vendor/golang.org/x/text/encoding/internal/identifier/gen.go b/vendor/golang.org/x/text/encoding/internal/identifier/gen.go new file mode 100644 index 000000000..0c8eba7e5 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/internal/identifier/gen.go @@ -0,0 +1,137 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +import ( + "bytes" + "encoding/xml" + "fmt" + "io" + "log" + "strings" + + "golang.org/x/text/internal/gen" +) + +type registry struct { + XMLName xml.Name `xml:"registry"` + Updated string `xml:"updated"` + Registry []struct { + ID string `xml:"id,attr"` + Record []struct { + Name string `xml:"name"` + Xref []struct { + Type string `xml:"type,attr"` + Data string `xml:"data,attr"` + } `xml:"xref"` + Desc struct { + Data string `xml:",innerxml"` + // Any []struct { + // Data string `xml:",chardata"` + // } `xml:",any"` + // Data string `xml:",chardata"` + } `xml:"description,"` + MIB string `xml:"value"` + Alias []string `xml:"alias"` + MIME string `xml:"preferred_alias"` + } `xml:"record"` + } `xml:"registry"` +} + +func main() { + r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml") + reg := ®istry{} + if err := xml.NewDecoder(r).Decode(®); err != nil && err != io.EOF { + log.Fatalf("Error decoding charset registry: %v", err) + } + if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" { + log.Fatalf("Unexpected ID %s", reg.Registry[0].ID) + } + + w := &bytes.Buffer{} + fmt.Fprintf(w, "const (\n") + for _, rec := range reg.Registry[0].Record { + constName := "" + for _, a := range rec.Alias { + if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 { + // Some of the constant definitions have comments in them. Strip those. + constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0]) + } + } + if constName == "" { + switch rec.MIB { + case "2085": + constName = "HZGB2312" // Not listed as alias for some reason. + default: + log.Fatalf("No cs alias defined for %s.", rec.MIB) + } + } + if rec.MIME != "" { + rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME) + } + fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME) + if len(rec.Desc.Data) > 0 { + fmt.Fprint(w, "// ") + d := xml.NewDecoder(strings.NewReader(rec.Desc.Data)) + inElem := true + attr := "" + for { + t, err := d.Token() + if err != nil { + if err != io.EOF { + log.Fatal(err) + } + break + } + switch x := t.(type) { + case xml.CharData: + attr = "" // Don't need attribute info. + a := bytes.Split([]byte(x), []byte("\n")) + for i, b := range a { + if b = bytes.TrimSpace(b); len(b) != 0 { + if !inElem && i > 0 { + fmt.Fprint(w, "\n// ") + } + inElem = false + fmt.Fprintf(w, "%s ", string(b)) + } + } + case xml.StartElement: + if x.Name.Local == "xref" { + inElem = true + use := false + for _, a := range x.Attr { + if a.Name.Local == "type" { + use = use || a.Value != "person" + } + if a.Name.Local == "data" && use { + attr = a.Value + " " + } + } + } + case xml.EndElement: + inElem = false + fmt.Fprint(w, attr) + } + } + fmt.Fprint(w, "\n") + } + for _, x := range rec.Xref { + switch x.Type { + case "rfc": + fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data)) + case "uri": + fmt.Fprintf(w, "// Reference: %s\n", x.Data) + } + } + fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB) + fmt.Fprintln(w) + } + fmt.Fprintln(w, ")") + + gen.WriteGoFile("mib.go", "identifier", w.Bytes()) +} diff --git a/vendor/golang.org/x/text/encoding/internal/identifier/identifier.go b/vendor/golang.org/x/text/encoding/internal/identifier/identifier.go new file mode 100644 index 000000000..2a2da0ef2 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/internal/identifier/identifier.go @@ -0,0 +1,81 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate go run gen.go + +// Package identifier defines the contract between implementations of Encoding +// and Index by defining identifiers that uniquely identify standardized coded +// character sets (CCS) and character encoding schemes (CES), which we will +// together refer to as encodings, for which Encoding implementations provide +// converters to and from UTF-8. This package is typically only of concern to +// implementers of Indexes and Encodings. +// +// One part of the identifier is the MIB code, which is defined by IANA and +// uniquely identifies a CCS or CES. Each code is associated with data that +// references authorities, official documentation as well as aliases and MIME +// names. +// +// Not all CESs are covered by the IANA registry. The "other" string that is +// returned by ID can be used to identify other character sets or versions of +// existing ones. +// +// It is recommended that each package that provides a set of Encodings provide +// the All and Common variables to reference all supported encodings and +// commonly used subset. This allows Index implementations to include all +// available encodings without explicitly referencing or knowing about them. +package identifier + +// Note: this package is internal, but could be made public if there is a need +// for writing third-party Indexes and Encodings. + +// References: +// - http://source.icu-project.org/repos/icu/icu/trunk/source/data/mappings/convrtrs.txt +// - http://www.iana.org/assignments/character-sets/character-sets.xhtml +// - http://www.iana.org/assignments/ianacharset-mib/ianacharset-mib +// - http://www.ietf.org/rfc/rfc2978.txt +// - http://www.unicode.org/reports/tr22/ +// - http://www.w3.org/TR/encoding/ +// - http://www.w3.org/TR/encoding/indexes/encodings.json +// - https://encoding.spec.whatwg.org/ +// - https://tools.ietf.org/html/rfc6657#section-5 + +// Interface can be implemented by Encodings to define the CCS or CES for which +// it implements conversions. +type Interface interface { + // ID returns an encoding identifier. Exactly one of the mib and other + // values should be non-zero. + // + // In the usual case it is only necessary to indicate the MIB code. The + // other string can be used to specify encodings for which there is no MIB, + // such as "x-mac-dingbat". + // + // The other string may only contain the characters a-z, A-Z, 0-9, - and _. + ID() (mib MIB, other string) + + // NOTE: the restrictions on the encoding are to allow extending the syntax + // with additional information such as versions, vendors and other variants. +} + +// A MIB identifies an encoding. It is derived from the IANA MIB codes and adds +// some identifiers for some encodings that are not covered by the IANA +// standard. +// +// See http://www.iana.org/assignments/ianacharset-mib. +type MIB uint16 + +// These additional MIB types are not defined in IANA. They are added because +// they are common and defined within the text repo. +const ( + // Unofficial marks the start of encodings not registered by IANA. + Unofficial MIB = 10000 + iota + + // Replacement is the WhatWG replacement encoding. + Replacement + + // XUserDefined is the code for x-user-defined. + XUserDefined + + // MacintoshCyrillic is the code for x-mac-cyrillic. + MacintoshCyrillic +) diff --git a/vendor/golang.org/x/text/encoding/internal/identifier/mib.go b/vendor/golang.org/x/text/encoding/internal/identifier/mib.go new file mode 100644 index 000000000..915abfa29 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/internal/identifier/mib.go @@ -0,0 +1,1621 @@ +// This file was generated by go generate; DO NOT EDIT + +package identifier + +const ( + // ASCII is the MIB identifier with IANA name US-ASCII (MIME: US-ASCII). + // + // ANSI X3.4-1986 + // Reference: RFC2046 + ASCII MIB = 3 + + // ISOLatin1 is the MIB identifier with IANA name ISO_8859-1:1987 (MIME: ISO-8859-1). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatin1 MIB = 4 + + // ISOLatin2 is the MIB identifier with IANA name ISO_8859-2:1987 (MIME: ISO-8859-2). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatin2 MIB = 5 + + // ISOLatin3 is the MIB identifier with IANA name ISO_8859-3:1988 (MIME: ISO-8859-3). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatin3 MIB = 6 + + // ISOLatin4 is the MIB identifier with IANA name ISO_8859-4:1988 (MIME: ISO-8859-4). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatin4 MIB = 7 + + // ISOLatinCyrillic is the MIB identifier with IANA name ISO_8859-5:1988 (MIME: ISO-8859-5). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatinCyrillic MIB = 8 + + // ISOLatinArabic is the MIB identifier with IANA name ISO_8859-6:1987 (MIME: ISO-8859-6). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatinArabic MIB = 9 + + // ISOLatinGreek is the MIB identifier with IANA name ISO_8859-7:1987 (MIME: ISO-8859-7). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1947 + // Reference: RFC1345 + ISOLatinGreek MIB = 10 + + // ISOLatinHebrew is the MIB identifier with IANA name ISO_8859-8:1988 (MIME: ISO-8859-8). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatinHebrew MIB = 11 + + // ISOLatin5 is the MIB identifier with IANA name ISO_8859-9:1989 (MIME: ISO-8859-9). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatin5 MIB = 12 + + // ISOLatin6 is the MIB identifier with IANA name ISO-8859-10 (MIME: ISO-8859-10). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatin6 MIB = 13 + + // ISOTextComm is the MIB identifier with IANA name ISO_6937-2-add. + // + // ISO-IR: International Register of Escape Sequences and ISO 6937-2:1983 + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOTextComm MIB = 14 + + // HalfWidthKatakana is the MIB identifier with IANA name JIS_X0201. + // + // JIS X 0201-1976. One byte only, this is equivalent to + // JIS/Roman (similar to ASCII) plus eight-bit half-width + // Katakana + // Reference: RFC1345 + HalfWidthKatakana MIB = 15 + + // JISEncoding is the MIB identifier with IANA name JIS_Encoding. + // + // JIS X 0202-1991. Uses ISO 2022 escape sequences to + // shift code sets as documented in JIS X 0202-1991. + JISEncoding MIB = 16 + + // ShiftJIS is the MIB identifier with IANA name Shift_JIS (MIME: Shift_JIS). + // + // This charset is an extension of csHalfWidthKatakana by + // adding graphic characters in JIS X 0208. The CCS's are + // JIS X0201:1997 and JIS X0208:1997. The + // complete definition is shown in Appendix 1 of JIS + // X0208:1997. + // This charset can be used for the top-level media type "text". + ShiftJIS MIB = 17 + + // EUCPkdFmtJapanese is the MIB identifier with IANA name Extended_UNIX_Code_Packed_Format_for_Japanese (MIME: EUC-JP). + // + // Standardized by OSF, UNIX International, and UNIX Systems + // Laboratories Pacific. Uses ISO 2022 rules to select + // code set 0: US-ASCII (a single 7-bit byte set) + // code set 1: JIS X0208-1990 (a double 8-bit byte set) + // restricted to A0-FF in both bytes + // code set 2: Half Width Katakana (a single 7-bit byte set) + // requiring SS2 as the character prefix + // code set 3: JIS X0212-1990 (a double 7-bit byte set) + // restricted to A0-FF in both bytes + // requiring SS3 as the character prefix + EUCPkdFmtJapanese MIB = 18 + + // EUCFixWidJapanese is the MIB identifier with IANA name Extended_UNIX_Code_Fixed_Width_for_Japanese. + // + // Used in Japan. Each character is 2 octets. + // code set 0: US-ASCII (a single 7-bit byte set) + // 1st byte = 00 + // 2nd byte = 20-7E + // code set 1: JIS X0208-1990 (a double 7-bit byte set) + // restricted to A0-FF in both bytes + // code set 2: Half Width Katakana (a single 7-bit byte set) + // 1st byte = 00 + // 2nd byte = A0-FF + // code set 3: JIS X0212-1990 (a double 7-bit byte set) + // restricted to A0-FF in + // the first byte + // and 21-7E in the second byte + EUCFixWidJapanese MIB = 19 + + // ISO4UnitedKingdom is the MIB identifier with IANA name BS_4730. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO4UnitedKingdom MIB = 20 + + // ISO11SwedishForNames is the MIB identifier with IANA name SEN_850200_C. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO11SwedishForNames MIB = 21 + + // ISO15Italian is the MIB identifier with IANA name IT. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO15Italian MIB = 22 + + // ISO17Spanish is the MIB identifier with IANA name ES. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO17Spanish MIB = 23 + + // ISO21German is the MIB identifier with IANA name DIN_66003. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO21German MIB = 24 + + // ISO60Norwegian1 is the MIB identifier with IANA name NS_4551-1. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO60Norwegian1 MIB = 25 + + // ISO69French is the MIB identifier with IANA name NF_Z_62-010. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO69French MIB = 26 + + // ISO10646UTF1 is the MIB identifier with IANA name ISO-10646-UTF-1. + // + // Universal Transfer Format (1), this is the multibyte + // encoding, that subsets ASCII-7. It does not have byte + // ordering issues. + ISO10646UTF1 MIB = 27 + + // ISO646basic1983 is the MIB identifier with IANA name ISO_646.basic:1983. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO646basic1983 MIB = 28 + + // INVARIANT is the MIB identifier with IANA name INVARIANT. + // + // Reference: RFC1345 + INVARIANT MIB = 29 + + // ISO2IntlRefVersion is the MIB identifier with IANA name ISO_646.irv:1983. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO2IntlRefVersion MIB = 30 + + // NATSSEFI is the MIB identifier with IANA name NATS-SEFI. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + NATSSEFI MIB = 31 + + // NATSSEFIADD is the MIB identifier with IANA name NATS-SEFI-ADD. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + NATSSEFIADD MIB = 32 + + // NATSDANO is the MIB identifier with IANA name NATS-DANO. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + NATSDANO MIB = 33 + + // NATSDANOADD is the MIB identifier with IANA name NATS-DANO-ADD. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + NATSDANOADD MIB = 34 + + // ISO10Swedish is the MIB identifier with IANA name SEN_850200_B. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO10Swedish MIB = 35 + + // KSC56011987 is the MIB identifier with IANA name KS_C_5601-1987. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + KSC56011987 MIB = 36 + + // ISO2022KR is the MIB identifier with IANA name ISO-2022-KR (MIME: ISO-2022-KR). + // + // rfc1557 (see also KS_C_5601-1987) + // Reference: RFC1557 + ISO2022KR MIB = 37 + + // EUCKR is the MIB identifier with IANA name EUC-KR (MIME: EUC-KR). + // + // rfc1557 (see also KS_C_5861-1992) + // Reference: RFC1557 + EUCKR MIB = 38 + + // ISO2022JP is the MIB identifier with IANA name ISO-2022-JP (MIME: ISO-2022-JP). + // + // rfc1468 (see also rfc2237 ) + // Reference: RFC1468 + ISO2022JP MIB = 39 + + // ISO2022JP2 is the MIB identifier with IANA name ISO-2022-JP-2 (MIME: ISO-2022-JP-2). + // + // rfc1554 + // Reference: RFC1554 + ISO2022JP2 MIB = 40 + + // ISO13JISC6220jp is the MIB identifier with IANA name JIS_C6220-1969-jp. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO13JISC6220jp MIB = 41 + + // ISO14JISC6220ro is the MIB identifier with IANA name JIS_C6220-1969-ro. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO14JISC6220ro MIB = 42 + + // ISO16Portuguese is the MIB identifier with IANA name PT. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO16Portuguese MIB = 43 + + // ISO18Greek7Old is the MIB identifier with IANA name greek7-old. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO18Greek7Old MIB = 44 + + // ISO19LatinGreek is the MIB identifier with IANA name latin-greek. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO19LatinGreek MIB = 45 + + // ISO25French is the MIB identifier with IANA name NF_Z_62-010_(1973). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO25French MIB = 46 + + // ISO27LatinGreek1 is the MIB identifier with IANA name Latin-greek-1. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO27LatinGreek1 MIB = 47 + + // ISO5427Cyrillic is the MIB identifier with IANA name ISO_5427. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO5427Cyrillic MIB = 48 + + // ISO42JISC62261978 is the MIB identifier with IANA name JIS_C6226-1978. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO42JISC62261978 MIB = 49 + + // ISO47BSViewdata is the MIB identifier with IANA name BS_viewdata. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO47BSViewdata MIB = 50 + + // ISO49INIS is the MIB identifier with IANA name INIS. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO49INIS MIB = 51 + + // ISO50INIS8 is the MIB identifier with IANA name INIS-8. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO50INIS8 MIB = 52 + + // ISO51INISCyrillic is the MIB identifier with IANA name INIS-cyrillic. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO51INISCyrillic MIB = 53 + + // ISO54271981 is the MIB identifier with IANA name ISO_5427:1981. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO54271981 MIB = 54 + + // ISO5428Greek is the MIB identifier with IANA name ISO_5428:1980. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO5428Greek MIB = 55 + + // ISO57GB1988 is the MIB identifier with IANA name GB_1988-80. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO57GB1988 MIB = 56 + + // ISO58GB231280 is the MIB identifier with IANA name GB_2312-80. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO58GB231280 MIB = 57 + + // ISO61Norwegian2 is the MIB identifier with IANA name NS_4551-2. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO61Norwegian2 MIB = 58 + + // ISO70VideotexSupp1 is the MIB identifier with IANA name videotex-suppl. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO70VideotexSupp1 MIB = 59 + + // ISO84Portuguese2 is the MIB identifier with IANA name PT2. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO84Portuguese2 MIB = 60 + + // ISO85Spanish2 is the MIB identifier with IANA name ES2. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO85Spanish2 MIB = 61 + + // ISO86Hungarian is the MIB identifier with IANA name MSZ_7795.3. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO86Hungarian MIB = 62 + + // ISO87JISX0208 is the MIB identifier with IANA name JIS_C6226-1983. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO87JISX0208 MIB = 63 + + // ISO88Greek7 is the MIB identifier with IANA name greek7. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO88Greek7 MIB = 64 + + // ISO89ASMO449 is the MIB identifier with IANA name ASMO_449. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO89ASMO449 MIB = 65 + + // ISO90 is the MIB identifier with IANA name iso-ir-90. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO90 MIB = 66 + + // ISO91JISC62291984a is the MIB identifier with IANA name JIS_C6229-1984-a. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO91JISC62291984a MIB = 67 + + // ISO92JISC62991984b is the MIB identifier with IANA name JIS_C6229-1984-b. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO92JISC62991984b MIB = 68 + + // ISO93JIS62291984badd is the MIB identifier with IANA name JIS_C6229-1984-b-add. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO93JIS62291984badd MIB = 69 + + // ISO94JIS62291984hand is the MIB identifier with IANA name JIS_C6229-1984-hand. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO94JIS62291984hand MIB = 70 + + // ISO95JIS62291984handadd is the MIB identifier with IANA name JIS_C6229-1984-hand-add. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO95JIS62291984handadd MIB = 71 + + // ISO96JISC62291984kana is the MIB identifier with IANA name JIS_C6229-1984-kana. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO96JISC62291984kana MIB = 72 + + // ISO2033 is the MIB identifier with IANA name ISO_2033-1983. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO2033 MIB = 73 + + // ISO99NAPLPS is the MIB identifier with IANA name ANSI_X3.110-1983. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO99NAPLPS MIB = 74 + + // ISO102T617bit is the MIB identifier with IANA name T.61-7bit. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO102T617bit MIB = 75 + + // ISO103T618bit is the MIB identifier with IANA name T.61-8bit. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO103T618bit MIB = 76 + + // ISO111ECMACyrillic is the MIB identifier with IANA name ECMA-cyrillic. + // + // ISO registry + // (formerly ECMA + // registry ) + ISO111ECMACyrillic MIB = 77 + + // ISO121Canadian1 is the MIB identifier with IANA name CSA_Z243.4-1985-1. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO121Canadian1 MIB = 78 + + // ISO122Canadian2 is the MIB identifier with IANA name CSA_Z243.4-1985-2. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO122Canadian2 MIB = 79 + + // ISO123CSAZ24341985gr is the MIB identifier with IANA name CSA_Z243.4-1985-gr. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO123CSAZ24341985gr MIB = 80 + + // ISO88596E is the MIB identifier with IANA name ISO_8859-6-E (MIME: ISO-8859-6-E). + // + // rfc1556 + // Reference: RFC1556 + ISO88596E MIB = 81 + + // ISO88596I is the MIB identifier with IANA name ISO_8859-6-I (MIME: ISO-8859-6-I). + // + // rfc1556 + // Reference: RFC1556 + ISO88596I MIB = 82 + + // ISO128T101G2 is the MIB identifier with IANA name T.101-G2. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO128T101G2 MIB = 83 + + // ISO88598E is the MIB identifier with IANA name ISO_8859-8-E (MIME: ISO-8859-8-E). + // + // rfc1556 + // Reference: RFC1556 + ISO88598E MIB = 84 + + // ISO88598I is the MIB identifier with IANA name ISO_8859-8-I (MIME: ISO-8859-8-I). + // + // rfc1556 + // Reference: RFC1556 + ISO88598I MIB = 85 + + // ISO139CSN369103 is the MIB identifier with IANA name CSN_369103. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO139CSN369103 MIB = 86 + + // ISO141JUSIB1002 is the MIB identifier with IANA name JUS_I.B1.002. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO141JUSIB1002 MIB = 87 + + // ISO143IECP271 is the MIB identifier with IANA name IEC_P27-1. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO143IECP271 MIB = 88 + + // ISO146Serbian is the MIB identifier with IANA name JUS_I.B1.003-serb. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO146Serbian MIB = 89 + + // ISO147Macedonian is the MIB identifier with IANA name JUS_I.B1.003-mac. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO147Macedonian MIB = 90 + + // ISO150GreekCCITT is the MIB identifier with IANA name greek-ccitt. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO150GreekCCITT MIB = 91 + + // ISO151Cuba is the MIB identifier with IANA name NC_NC00-10:81. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO151Cuba MIB = 92 + + // ISO6937Add is the MIB identifier with IANA name ISO_6937-2-25. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO6937Add MIB = 93 + + // ISO153GOST1976874 is the MIB identifier with IANA name GOST_19768-74. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO153GOST1976874 MIB = 94 + + // ISO8859Supp is the MIB identifier with IANA name ISO_8859-supp. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO8859Supp MIB = 95 + + // ISO10367Box is the MIB identifier with IANA name ISO_10367-box. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO10367Box MIB = 96 + + // ISO158Lap is the MIB identifier with IANA name latin-lap. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO158Lap MIB = 97 + + // ISO159JISX02121990 is the MIB identifier with IANA name JIS_X0212-1990. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO159JISX02121990 MIB = 98 + + // ISO646Danish is the MIB identifier with IANA name DS_2089. + // + // Danish Standard, DS 2089, February 1974 + // Reference: RFC1345 + ISO646Danish MIB = 99 + + // USDK is the MIB identifier with IANA name us-dk. + // + // Reference: RFC1345 + USDK MIB = 100 + + // DKUS is the MIB identifier with IANA name dk-us. + // + // Reference: RFC1345 + DKUS MIB = 101 + + // KSC5636 is the MIB identifier with IANA name KSC5636. + // + // Reference: RFC1345 + KSC5636 MIB = 102 + + // Unicode11UTF7 is the MIB identifier with IANA name UNICODE-1-1-UTF-7. + // + // rfc1642 + // Reference: RFC1642 + Unicode11UTF7 MIB = 103 + + // ISO2022CN is the MIB identifier with IANA name ISO-2022-CN. + // + // rfc1922 + // Reference: RFC1922 + ISO2022CN MIB = 104 + + // ISO2022CNEXT is the MIB identifier with IANA name ISO-2022-CN-EXT. + // + // rfc1922 + // Reference: RFC1922 + ISO2022CNEXT MIB = 105 + + // UTF8 is the MIB identifier with IANA name UTF-8. + // + // rfc3629 + // Reference: RFC3629 + UTF8 MIB = 106 + + // ISO885913 is the MIB identifier with IANA name ISO-8859-13. + // + // ISO See http://www.iana.org/assignments/charset-reg/ISO-8859-13 http://www.iana.org/assignments/charset-reg/ISO-8859-13 + ISO885913 MIB = 109 + + // ISO885914 is the MIB identifier with IANA name ISO-8859-14. + // + // ISO See http://www.iana.org/assignments/charset-reg/ISO-8859-14 + ISO885914 MIB = 110 + + // ISO885915 is the MIB identifier with IANA name ISO-8859-15. + // + // ISO + // Please see: http://www.iana.org/assignments/charset-reg/ISO-8859-15 + ISO885915 MIB = 111 + + // ISO885916 is the MIB identifier with IANA name ISO-8859-16. + // + // ISO + ISO885916 MIB = 112 + + // GBK is the MIB identifier with IANA name GBK. + // + // Chinese IT Standardization Technical Committee + // Please see: http://www.iana.org/assignments/charset-reg/GBK + GBK MIB = 113 + + // GB18030 is the MIB identifier with IANA name GB18030. + // + // Chinese IT Standardization Technical Committee + // Please see: http://www.iana.org/assignments/charset-reg/GB18030 + GB18030 MIB = 114 + + // OSDEBCDICDF0415 is the MIB identifier with IANA name OSD_EBCDIC_DF04_15. + // + // Fujitsu-Siemens standard mainframe EBCDIC encoding + // Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15 + OSDEBCDICDF0415 MIB = 115 + + // OSDEBCDICDF03IRV is the MIB identifier with IANA name OSD_EBCDIC_DF03_IRV. + // + // Fujitsu-Siemens standard mainframe EBCDIC encoding + // Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV + OSDEBCDICDF03IRV MIB = 116 + + // OSDEBCDICDF041 is the MIB identifier with IANA name OSD_EBCDIC_DF04_1. + // + // Fujitsu-Siemens standard mainframe EBCDIC encoding + // Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1 + OSDEBCDICDF041 MIB = 117 + + // ISO115481 is the MIB identifier with IANA name ISO-11548-1. + // + // See http://www.iana.org/assignments/charset-reg/ISO-11548-1 + ISO115481 MIB = 118 + + // KZ1048 is the MIB identifier with IANA name KZ-1048. + // + // See http://www.iana.org/assignments/charset-reg/KZ-1048 + KZ1048 MIB = 119 + + // Unicode is the MIB identifier with IANA name ISO-10646-UCS-2. + // + // the 2-octet Basic Multilingual Plane, aka Unicode + // this needs to specify network byte order: the standard + // does not specify (it is a 16-bit integer space) + Unicode MIB = 1000 + + // UCS4 is the MIB identifier with IANA name ISO-10646-UCS-4. + // + // the full code space. (same comment about byte order, + // these are 31-bit numbers. + UCS4 MIB = 1001 + + // UnicodeASCII is the MIB identifier with IANA name ISO-10646-UCS-Basic. + // + // ASCII subset of Unicode. Basic Latin = collection 1 + // See ISO 10646, Appendix A + UnicodeASCII MIB = 1002 + + // UnicodeLatin1 is the MIB identifier with IANA name ISO-10646-Unicode-Latin1. + // + // ISO Latin-1 subset of Unicode. Basic Latin and Latin-1 + // Supplement = collections 1 and 2. See ISO 10646, + // Appendix A. See rfc1815 . + UnicodeLatin1 MIB = 1003 + + // UnicodeJapanese is the MIB identifier with IANA name ISO-10646-J-1. + // + // ISO 10646 Japanese, see rfc1815 . + UnicodeJapanese MIB = 1004 + + // UnicodeIBM1261 is the MIB identifier with IANA name ISO-Unicode-IBM-1261. + // + // IBM Latin-2, -3, -5, Extended Presentation Set, GCSGID: 1261 + UnicodeIBM1261 MIB = 1005 + + // UnicodeIBM1268 is the MIB identifier with IANA name ISO-Unicode-IBM-1268. + // + // IBM Latin-4 Extended Presentation Set, GCSGID: 1268 + UnicodeIBM1268 MIB = 1006 + + // UnicodeIBM1276 is the MIB identifier with IANA name ISO-Unicode-IBM-1276. + // + // IBM Cyrillic Greek Extended Presentation Set, GCSGID: 1276 + UnicodeIBM1276 MIB = 1007 + + // UnicodeIBM1264 is the MIB identifier with IANA name ISO-Unicode-IBM-1264. + // + // IBM Arabic Presentation Set, GCSGID: 1264 + UnicodeIBM1264 MIB = 1008 + + // UnicodeIBM1265 is the MIB identifier with IANA name ISO-Unicode-IBM-1265. + // + // IBM Hebrew Presentation Set, GCSGID: 1265 + UnicodeIBM1265 MIB = 1009 + + // Unicode11 is the MIB identifier with IANA name UNICODE-1-1. + // + // rfc1641 + // Reference: RFC1641 + Unicode11 MIB = 1010 + + // SCSU is the MIB identifier with IANA name SCSU. + // + // SCSU See http://www.iana.org/assignments/charset-reg/SCSU + SCSU MIB = 1011 + + // UTF7 is the MIB identifier with IANA name UTF-7. + // + // rfc2152 + // Reference: RFC2152 + UTF7 MIB = 1012 + + // UTF16BE is the MIB identifier with IANA name UTF-16BE. + // + // rfc2781 + // Reference: RFC2781 + UTF16BE MIB = 1013 + + // UTF16LE is the MIB identifier with IANA name UTF-16LE. + // + // rfc2781 + // Reference: RFC2781 + UTF16LE MIB = 1014 + + // UTF16 is the MIB identifier with IANA name UTF-16. + // + // rfc2781 + // Reference: RFC2781 + UTF16 MIB = 1015 + + // CESU8 is the MIB identifier with IANA name CESU-8. + // + // http://www.unicode.org/unicode/reports/tr26 + CESU8 MIB = 1016 + + // UTF32 is the MIB identifier with IANA name UTF-32. + // + // http://www.unicode.org/unicode/reports/tr19/ + UTF32 MIB = 1017 + + // UTF32BE is the MIB identifier with IANA name UTF-32BE. + // + // http://www.unicode.org/unicode/reports/tr19/ + UTF32BE MIB = 1018 + + // UTF32LE is the MIB identifier with IANA name UTF-32LE. + // + // http://www.unicode.org/unicode/reports/tr19/ + UTF32LE MIB = 1019 + + // BOCU1 is the MIB identifier with IANA name BOCU-1. + // + // http://www.unicode.org/notes/tn6/ + BOCU1 MIB = 1020 + + // Windows30Latin1 is the MIB identifier with IANA name ISO-8859-1-Windows-3.0-Latin-1. + // + // Extended ISO 8859-1 Latin-1 for Windows 3.0. + // PCL Symbol Set id: 9U + Windows30Latin1 MIB = 2000 + + // Windows31Latin1 is the MIB identifier with IANA name ISO-8859-1-Windows-3.1-Latin-1. + // + // Extended ISO 8859-1 Latin-1 for Windows 3.1. + // PCL Symbol Set id: 19U + Windows31Latin1 MIB = 2001 + + // Windows31Latin2 is the MIB identifier with IANA name ISO-8859-2-Windows-Latin-2. + // + // Extended ISO 8859-2. Latin-2 for Windows 3.1. + // PCL Symbol Set id: 9E + Windows31Latin2 MIB = 2002 + + // Windows31Latin5 is the MIB identifier with IANA name ISO-8859-9-Windows-Latin-5. + // + // Extended ISO 8859-9. Latin-5 for Windows 3.1 + // PCL Symbol Set id: 5T + Windows31Latin5 MIB = 2003 + + // HPRoman8 is the MIB identifier with IANA name hp-roman8. + // + // LaserJet IIP Printer User's Manual, + // HP part no 33471-90901, Hewlet-Packard, June 1989. + // Reference: RFC1345 + HPRoman8 MIB = 2004 + + // AdobeStandardEncoding is the MIB identifier with IANA name Adobe-Standard-Encoding. + // + // PostScript Language Reference Manual + // PCL Symbol Set id: 10J + AdobeStandardEncoding MIB = 2005 + + // VenturaUS is the MIB identifier with IANA name Ventura-US. + // + // Ventura US. ASCII plus characters typically used in + // publishing, like pilcrow, copyright, registered, trade mark, + // section, dagger, and double dagger in the range A0 (hex) + // to FF (hex). + // PCL Symbol Set id: 14J + VenturaUS MIB = 2006 + + // VenturaInternational is the MIB identifier with IANA name Ventura-International. + // + // Ventura International. ASCII plus coded characters similar + // to Roman8. + // PCL Symbol Set id: 13J + VenturaInternational MIB = 2007 + + // DECMCS is the MIB identifier with IANA name DEC-MCS. + // + // VAX/VMS User's Manual, + // Order Number: AI-Y517A-TE, April 1986. + // Reference: RFC1345 + DECMCS MIB = 2008 + + // PC850Multilingual is the MIB identifier with IANA name IBM850. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + PC850Multilingual MIB = 2009 + + // PC8DanishNorwegian is the MIB identifier with IANA name PC8-Danish-Norwegian. + // + // PC Danish Norwegian + // 8-bit PC set for Danish Norwegian + // PCL Symbol Set id: 11U + PC8DanishNorwegian MIB = 2012 + + // PC862LatinHebrew is the MIB identifier with IANA name IBM862. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + PC862LatinHebrew MIB = 2013 + + // PC8Turkish is the MIB identifier with IANA name PC8-Turkish. + // + // PC Latin Turkish. PCL Symbol Set id: 9T + PC8Turkish MIB = 2014 + + // IBMSymbols is the MIB identifier with IANA name IBM-Symbols. + // + // Presentation Set, CPGID: 259 + IBMSymbols MIB = 2015 + + // IBMThai is the MIB identifier with IANA name IBM-Thai. + // + // Presentation Set, CPGID: 838 + IBMThai MIB = 2016 + + // HPLegal is the MIB identifier with IANA name HP-Legal. + // + // PCL 5 Comparison Guide, Hewlett-Packard, + // HP part number 5961-0510, October 1992 + // PCL Symbol Set id: 1U + HPLegal MIB = 2017 + + // HPPiFont is the MIB identifier with IANA name HP-Pi-font. + // + // PCL 5 Comparison Guide, Hewlett-Packard, + // HP part number 5961-0510, October 1992 + // PCL Symbol Set id: 15U + HPPiFont MIB = 2018 + + // HPMath8 is the MIB identifier with IANA name HP-Math8. + // + // PCL 5 Comparison Guide, Hewlett-Packard, + // HP part number 5961-0510, October 1992 + // PCL Symbol Set id: 8M + HPMath8 MIB = 2019 + + // HPPSMath is the MIB identifier with IANA name Adobe-Symbol-Encoding. + // + // PostScript Language Reference Manual + // PCL Symbol Set id: 5M + HPPSMath MIB = 2020 + + // HPDesktop is the MIB identifier with IANA name HP-DeskTop. + // + // PCL 5 Comparison Guide, Hewlett-Packard, + // HP part number 5961-0510, October 1992 + // PCL Symbol Set id: 7J + HPDesktop MIB = 2021 + + // VenturaMath is the MIB identifier with IANA name Ventura-Math. + // + // PCL 5 Comparison Guide, Hewlett-Packard, + // HP part number 5961-0510, October 1992 + // PCL Symbol Set id: 6M + VenturaMath MIB = 2022 + + // MicrosoftPublishing is the MIB identifier with IANA name Microsoft-Publishing. + // + // PCL 5 Comparison Guide, Hewlett-Packard, + // HP part number 5961-0510, October 1992 + // PCL Symbol Set id: 6J + MicrosoftPublishing MIB = 2023 + + // Windows31J is the MIB identifier with IANA name Windows-31J. + // + // Windows Japanese. A further extension of Shift_JIS + // to include NEC special characters (Row 13), NEC + // selection of IBM extensions (Rows 89 to 92), and IBM + // extensions (Rows 115 to 119). The CCS's are + // JIS X0201:1997, JIS X0208:1997, and these extensions. + // This charset can be used for the top-level media type "text", + // but it is of limited or specialized use (see rfc2278 ). + // PCL Symbol Set id: 19K + Windows31J MIB = 2024 + + // GB2312 is the MIB identifier with IANA name GB2312 (MIME: GB2312). + // + // Chinese for People's Republic of China (PRC) mixed one byte, + // two byte set: + // 20-7E = one byte ASCII + // A1-FE = two byte PRC Kanji + // See GB 2312-80 + // PCL Symbol Set Id: 18C + GB2312 MIB = 2025 + + // Big5 is the MIB identifier with IANA name Big5 (MIME: Big5). + // + // Chinese for Taiwan Multi-byte set. + // PCL Symbol Set Id: 18T + Big5 MIB = 2026 + + // Macintosh is the MIB identifier with IANA name macintosh. + // + // The Unicode Standard ver1.0, ISBN 0-201-56788-1, Oct 1991 + // Reference: RFC1345 + Macintosh MIB = 2027 + + // IBM037 is the MIB identifier with IANA name IBM037. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM037 MIB = 2028 + + // IBM038 is the MIB identifier with IANA name IBM038. + // + // IBM 3174 Character Set Ref, GA27-3831-02, March 1990 + // Reference: RFC1345 + IBM038 MIB = 2029 + + // IBM273 is the MIB identifier with IANA name IBM273. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM273 MIB = 2030 + + // IBM274 is the MIB identifier with IANA name IBM274. + // + // IBM 3174 Character Set Ref, GA27-3831-02, March 1990 + // Reference: RFC1345 + IBM274 MIB = 2031 + + // IBM275 is the MIB identifier with IANA name IBM275. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM275 MIB = 2032 + + // IBM277 is the MIB identifier with IANA name IBM277. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM277 MIB = 2033 + + // IBM278 is the MIB identifier with IANA name IBM278. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM278 MIB = 2034 + + // IBM280 is the MIB identifier with IANA name IBM280. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM280 MIB = 2035 + + // IBM281 is the MIB identifier with IANA name IBM281. + // + // IBM 3174 Character Set Ref, GA27-3831-02, March 1990 + // Reference: RFC1345 + IBM281 MIB = 2036 + + // IBM284 is the MIB identifier with IANA name IBM284. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM284 MIB = 2037 + + // IBM285 is the MIB identifier with IANA name IBM285. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM285 MIB = 2038 + + // IBM290 is the MIB identifier with IANA name IBM290. + // + // IBM 3174 Character Set Ref, GA27-3831-02, March 1990 + // Reference: RFC1345 + IBM290 MIB = 2039 + + // IBM297 is the MIB identifier with IANA name IBM297. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM297 MIB = 2040 + + // IBM420 is the MIB identifier with IANA name IBM420. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990, + // IBM NLS RM p 11-11 + // Reference: RFC1345 + IBM420 MIB = 2041 + + // IBM423 is the MIB identifier with IANA name IBM423. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM423 MIB = 2042 + + // IBM424 is the MIB identifier with IANA name IBM424. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM424 MIB = 2043 + + // PC8CodePage437 is the MIB identifier with IANA name IBM437. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + PC8CodePage437 MIB = 2011 + + // IBM500 is the MIB identifier with IANA name IBM500. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM500 MIB = 2044 + + // IBM851 is the MIB identifier with IANA name IBM851. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM851 MIB = 2045 + + // PCp852 is the MIB identifier with IANA name IBM852. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + PCp852 MIB = 2010 + + // IBM855 is the MIB identifier with IANA name IBM855. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM855 MIB = 2046 + + // IBM857 is the MIB identifier with IANA name IBM857. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM857 MIB = 2047 + + // IBM860 is the MIB identifier with IANA name IBM860. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM860 MIB = 2048 + + // IBM861 is the MIB identifier with IANA name IBM861. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM861 MIB = 2049 + + // IBM863 is the MIB identifier with IANA name IBM863. + // + // IBM Keyboard layouts and code pages, PN 07G4586 June 1991 + // Reference: RFC1345 + IBM863 MIB = 2050 + + // IBM864 is the MIB identifier with IANA name IBM864. + // + // IBM Keyboard layouts and code pages, PN 07G4586 June 1991 + // Reference: RFC1345 + IBM864 MIB = 2051 + + // IBM865 is the MIB identifier with IANA name IBM865. + // + // IBM DOS 3.3 Ref (Abridged), 94X9575 (Feb 1987) + // Reference: RFC1345 + IBM865 MIB = 2052 + + // IBM868 is the MIB identifier with IANA name IBM868. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM868 MIB = 2053 + + // IBM869 is the MIB identifier with IANA name IBM869. + // + // IBM Keyboard layouts and code pages, PN 07G4586 June 1991 + // Reference: RFC1345 + IBM869 MIB = 2054 + + // IBM870 is the MIB identifier with IANA name IBM870. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM870 MIB = 2055 + + // IBM871 is the MIB identifier with IANA name IBM871. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM871 MIB = 2056 + + // IBM880 is the MIB identifier with IANA name IBM880. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM880 MIB = 2057 + + // IBM891 is the MIB identifier with IANA name IBM891. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM891 MIB = 2058 + + // IBM903 is the MIB identifier with IANA name IBM903. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM903 MIB = 2059 + + // IBBM904 is the MIB identifier with IANA name IBM904. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBBM904 MIB = 2060 + + // IBM905 is the MIB identifier with IANA name IBM905. + // + // IBM 3174 Character Set Ref, GA27-3831-02, March 1990 + // Reference: RFC1345 + IBM905 MIB = 2061 + + // IBM918 is the MIB identifier with IANA name IBM918. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM918 MIB = 2062 + + // IBM1026 is the MIB identifier with IANA name IBM1026. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM1026 MIB = 2063 + + // IBMEBCDICATDE is the MIB identifier with IANA name EBCDIC-AT-DE. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + IBMEBCDICATDE MIB = 2064 + + // EBCDICATDEA is the MIB identifier with IANA name EBCDIC-AT-DE-A. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICATDEA MIB = 2065 + + // EBCDICCAFR is the MIB identifier with IANA name EBCDIC-CA-FR. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICCAFR MIB = 2066 + + // EBCDICDKNO is the MIB identifier with IANA name EBCDIC-DK-NO. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICDKNO MIB = 2067 + + // EBCDICDKNOA is the MIB identifier with IANA name EBCDIC-DK-NO-A. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICDKNOA MIB = 2068 + + // EBCDICFISE is the MIB identifier with IANA name EBCDIC-FI-SE. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICFISE MIB = 2069 + + // EBCDICFISEA is the MIB identifier with IANA name EBCDIC-FI-SE-A. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICFISEA MIB = 2070 + + // EBCDICFR is the MIB identifier with IANA name EBCDIC-FR. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICFR MIB = 2071 + + // EBCDICIT is the MIB identifier with IANA name EBCDIC-IT. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICIT MIB = 2072 + + // EBCDICPT is the MIB identifier with IANA name EBCDIC-PT. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICPT MIB = 2073 + + // EBCDICES is the MIB identifier with IANA name EBCDIC-ES. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICES MIB = 2074 + + // EBCDICESA is the MIB identifier with IANA name EBCDIC-ES-A. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICESA MIB = 2075 + + // EBCDICESS is the MIB identifier with IANA name EBCDIC-ES-S. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICESS MIB = 2076 + + // EBCDICUK is the MIB identifier with IANA name EBCDIC-UK. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICUK MIB = 2077 + + // EBCDICUS is the MIB identifier with IANA name EBCDIC-US. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICUS MIB = 2078 + + // Unknown8BiT is the MIB identifier with IANA name UNKNOWN-8BIT. + // + // Reference: RFC1428 + Unknown8BiT MIB = 2079 + + // Mnemonic is the MIB identifier with IANA name MNEMONIC. + // + // rfc1345 , also known as "mnemonic+ascii+38" + // Reference: RFC1345 + Mnemonic MIB = 2080 + + // Mnem is the MIB identifier with IANA name MNEM. + // + // rfc1345 , also known as "mnemonic+ascii+8200" + // Reference: RFC1345 + Mnem MIB = 2081 + + // VISCII is the MIB identifier with IANA name VISCII. + // + // rfc1456 + // Reference: RFC1456 + VISCII MIB = 2082 + + // VIQR is the MIB identifier with IANA name VIQR. + // + // rfc1456 + // Reference: RFC1456 + VIQR MIB = 2083 + + // KOI8R is the MIB identifier with IANA name KOI8-R (MIME: KOI8-R). + // + // rfc1489 , based on GOST-19768-74, ISO-6937/8, + // INIS-Cyrillic, ISO-5427. + // Reference: RFC1489 + KOI8R MIB = 2084 + + // HZGB2312 is the MIB identifier with IANA name HZ-GB-2312. + // + // rfc1842 , rfc1843 rfc1843 rfc1842 + HZGB2312 MIB = 2085 + + // IBM866 is the MIB identifier with IANA name IBM866. + // + // IBM NLDG Volume 2 (SE09-8002-03) August 1994 + IBM866 MIB = 2086 + + // PC775Baltic is the MIB identifier with IANA name IBM775. + // + // HP PCL 5 Comparison Guide (P/N 5021-0329) pp B-13, 1996 + PC775Baltic MIB = 2087 + + // KOI8U is the MIB identifier with IANA name KOI8-U. + // + // rfc2319 + // Reference: RFC2319 + KOI8U MIB = 2088 + + // IBM00858 is the MIB identifier with IANA name IBM00858. + // + // IBM See http://www.iana.org/assignments/charset-reg/IBM00858 + IBM00858 MIB = 2089 + + // IBM00924 is the MIB identifier with IANA name IBM00924. + // + // IBM See http://www.iana.org/assignments/charset-reg/IBM00924 + IBM00924 MIB = 2090 + + // IBM01140 is the MIB identifier with IANA name IBM01140. + // + // IBM See http://www.iana.org/assignments/charset-reg/IBM01140 + IBM01140 MIB = 2091 + + // IBM01141 is the MIB identifier with IANA name IBM01141. + // + // IBM See http://www.iana.org/assignments/charset-reg/IBM01141 + IBM01141 MIB = 2092 + + // IBM01142 is the MIB identifier with IANA name IBM01142. + // + // IBM See http://www.iana.org/assignments/charset-reg/IBM01142 + IBM01142 MIB = 2093 + + // IBM01143 is the MIB identifier with IANA name IBM01143. + // + // IBM See http://www.iana.org/assignments/charset-reg/IBM01143 + IBM01143 MIB = 2094 + + // IBM01144 is the MIB identifier with IANA name IBM01144. + // + // IBM See http://www.iana.org/assignments/charset-reg/IBM01144 + IBM01144 MIB = 2095 + + // IBM01145 is the MIB identifier with IANA name IBM01145. + // + // IBM See http://www.iana.org/assignments/charset-reg/IBM01145 + IBM01145 MIB = 2096 + + // IBM01146 is the MIB identifier with IANA name IBM01146. + // + // IBM See http://www.iana.org/assignments/charset-reg/IBM01146 + IBM01146 MIB = 2097 + + // IBM01147 is the MIB identifier with IANA name IBM01147. + // + // IBM See http://www.iana.org/assignments/charset-reg/IBM01147 + IBM01147 MIB = 2098 + + // IBM01148 is the MIB identifier with IANA name IBM01148. + // + // IBM See http://www.iana.org/assignments/charset-reg/IBM01148 + IBM01148 MIB = 2099 + + // IBM01149 is the MIB identifier with IANA name IBM01149. + // + // IBM See http://www.iana.org/assignments/charset-reg/IBM01149 + IBM01149 MIB = 2100 + + // Big5HKSCS is the MIB identifier with IANA name Big5-HKSCS. + // + // See http://www.iana.org/assignments/charset-reg/Big5-HKSCS + Big5HKSCS MIB = 2101 + + // IBM1047 is the MIB identifier with IANA name IBM1047. + // + // IBM1047 (EBCDIC Latin 1/Open Systems) http://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf + IBM1047 MIB = 2102 + + // PTCP154 is the MIB identifier with IANA name PTCP154. + // + // See http://www.iana.org/assignments/charset-reg/PTCP154 + PTCP154 MIB = 2103 + + // Amiga1251 is the MIB identifier with IANA name Amiga-1251. + // + // See http://www.amiga.ultranet.ru/Amiga-1251.html + Amiga1251 MIB = 2104 + + // KOI7switched is the MIB identifier with IANA name KOI7-switched. + // + // See http://www.iana.org/assignments/charset-reg/KOI7-switched + KOI7switched MIB = 2105 + + // BRF is the MIB identifier with IANA name BRF. + // + // See http://www.iana.org/assignments/charset-reg/BRF + BRF MIB = 2106 + + // TSCII is the MIB identifier with IANA name TSCII. + // + // See http://www.iana.org/assignments/charset-reg/TSCII + TSCII MIB = 2107 + + // CP51932 is the MIB identifier with IANA name CP51932. + // + // See http://www.iana.org/assignments/charset-reg/CP51932 + CP51932 MIB = 2108 + + // Windows874 is the MIB identifier with IANA name windows-874. + // + // See http://www.iana.org/assignments/charset-reg/windows-874 + Windows874 MIB = 2109 + + // Windows1250 is the MIB identifier with IANA name windows-1250. + // + // Microsoft http://www.iana.org/assignments/charset-reg/windows-1250 + Windows1250 MIB = 2250 + + // Windows1251 is the MIB identifier with IANA name windows-1251. + // + // Microsoft http://www.iana.org/assignments/charset-reg/windows-1251 + Windows1251 MIB = 2251 + + // Windows1252 is the MIB identifier with IANA name windows-1252. + // + // Microsoft http://www.iana.org/assignments/charset-reg/windows-1252 + Windows1252 MIB = 2252 + + // Windows1253 is the MIB identifier with IANA name windows-1253. + // + // Microsoft http://www.iana.org/assignments/charset-reg/windows-1253 + Windows1253 MIB = 2253 + + // Windows1254 is the MIB identifier with IANA name windows-1254. + // + // Microsoft http://www.iana.org/assignments/charset-reg/windows-1254 + Windows1254 MIB = 2254 + + // Windows1255 is the MIB identifier with IANA name windows-1255. + // + // Microsoft http://www.iana.org/assignments/charset-reg/windows-1255 + Windows1255 MIB = 2255 + + // Windows1256 is the MIB identifier with IANA name windows-1256. + // + // Microsoft http://www.iana.org/assignments/charset-reg/windows-1256 + Windows1256 MIB = 2256 + + // Windows1257 is the MIB identifier with IANA name windows-1257. + // + // Microsoft http://www.iana.org/assignments/charset-reg/windows-1257 + Windows1257 MIB = 2257 + + // Windows1258 is the MIB identifier with IANA name windows-1258. + // + // Microsoft http://www.iana.org/assignments/charset-reg/windows-1258 + Windows1258 MIB = 2258 + + // TIS620 is the MIB identifier with IANA name TIS-620. + // + // Thai Industrial Standards Institute (TISI) + TIS620 MIB = 2259 + + // CP50220 is the MIB identifier with IANA name CP50220. + // + // See http://www.iana.org/assignments/charset-reg/CP50220 + CP50220 MIB = 2260 +) diff --git a/vendor/golang.org/x/text/encoding/internal/internal.go b/vendor/golang.org/x/text/encoding/internal/internal.go new file mode 100644 index 000000000..75a5fd165 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/internal/internal.go @@ -0,0 +1,75 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package internal contains code that is shared among encoding implementations. +package internal + +import ( + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/internal/identifier" + "golang.org/x/text/transform" +) + +// Encoding is an implementation of the Encoding interface that adds the String +// and ID methods to an existing encoding. +type Encoding struct { + encoding.Encoding + Name string + MIB identifier.MIB +} + +// _ verifies that Encoding implements identifier.Interface. +var _ identifier.Interface = (*Encoding)(nil) + +func (e *Encoding) String() string { + return e.Name +} + +func (e *Encoding) ID() (mib identifier.MIB, other string) { + return e.MIB, "" +} + +// SimpleEncoding is an Encoding that combines two Transformers. +type SimpleEncoding struct { + Decoder transform.Transformer + Encoder transform.Transformer +} + +func (e *SimpleEncoding) NewDecoder() *encoding.Decoder { + return &encoding.Decoder{Transformer: e.Decoder} +} + +func (e *SimpleEncoding) NewEncoder() *encoding.Encoder { + return &encoding.Encoder{Transformer: e.Encoder} +} + +// FuncEncoding is an Encoding that combines two functions returning a new +// Transformer. +type FuncEncoding struct { + Decoder func() transform.Transformer + Encoder func() transform.Transformer +} + +func (e FuncEncoding) NewDecoder() *encoding.Decoder { + return &encoding.Decoder{Transformer: e.Decoder()} +} + +func (e FuncEncoding) NewEncoder() *encoding.Encoder { + return &encoding.Encoder{Transformer: e.Encoder()} +} + +// A RepertoireError indicates a rune is not in the repertoire of a destination +// encoding. It is associated with an encoding-specific suggested replacement +// byte. +type RepertoireError byte + +// Error implements the error interrface. +func (r RepertoireError) Error() string { + return "encoding: rune not supported by encoding." +} + +// Replacement returns the replacement string associated with this error. +func (r RepertoireError) Replacement() byte { return byte(r) } + +var ErrASCIIReplacement = RepertoireError(encoding.ASCIISub) diff --git a/vendor/golang.org/x/text/encoding/unicode/override.go b/vendor/golang.org/x/text/encoding/unicode/override.go new file mode 100644 index 000000000..35d62fcc9 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/unicode/override.go @@ -0,0 +1,82 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unicode + +import ( + "golang.org/x/text/transform" +) + +// BOMOverride returns a new decoder transformer that is identical to fallback, +// except that the presence of a Byte Order Mark at the start of the input +// causes it to switch to the corresponding Unicode decoding. It will only +// consider BOMs for UTF-8, UTF-16BE, and UTF-16LE. +// +// This differs from using ExpectBOM by allowing a BOM to switch to UTF-8, not +// just UTF-16 variants, and allowing falling back to any encoding scheme. +// +// This technique is recommended by the W3C for use in HTML 5: "For +// compatibility with deployed content, the byte order mark (also known as BOM) +// is considered more authoritative than anything else." +// http://www.w3.org/TR/encoding/#specification-hooks +// +// Using BOMOverride is mostly intended for use cases where the first characters +// of a fallback encoding are known to not be a BOM, for example, for valid HTML +// and most encodings. +func BOMOverride(fallback transform.Transformer) transform.Transformer { + // TODO: possibly allow a variadic argument of unicode encodings to allow + // specifying details of which fallbacks are supported as well as + // specifying the details of the implementations. This would also allow for + // support for UTF-32, which should not be supported by default. + return &bomOverride{fallback: fallback} +} + +type bomOverride struct { + fallback transform.Transformer + current transform.Transformer +} + +func (d *bomOverride) Reset() { + d.current = nil + d.fallback.Reset() +} + +var ( + // TODO: we could use decode functions here, instead of allocating a new + // decoder on every NewDecoder as IgnoreBOM decoders can be stateless. + utf16le = UTF16(LittleEndian, IgnoreBOM) + utf16be = UTF16(BigEndian, IgnoreBOM) +) + +const utf8BOM = "\ufeff" + +func (d *bomOverride) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + if d.current != nil { + return d.current.Transform(dst, src, atEOF) + } + if len(src) < 3 && !atEOF { + return 0, 0, transform.ErrShortSrc + } + d.current = d.fallback + bomSize := 0 + if len(src) >= 2 { + if src[0] == 0xFF && src[1] == 0xFE { + d.current = utf16le.NewDecoder() + bomSize = 2 + } else if src[0] == 0xFE && src[1] == 0xFF { + d.current = utf16be.NewDecoder() + bomSize = 2 + } else if len(src) >= 3 && + src[0] == utf8BOM[0] && + src[1] == utf8BOM[1] && + src[2] == utf8BOM[2] { + d.current = transform.Nop + bomSize = 3 + } + } + if bomSize < len(src) { + nDst, nSrc, err = d.current.Transform(dst, src[bomSize:], atEOF) + } + return nDst, nSrc + bomSize, err +} diff --git a/vendor/golang.org/x/text/encoding/unicode/unicode.go b/vendor/golang.org/x/text/encoding/unicode/unicode.go new file mode 100644 index 000000000..579cadfb1 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/unicode/unicode.go @@ -0,0 +1,434 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package unicode provides Unicode encodings such as UTF-16. +package unicode // import "golang.org/x/text/encoding/unicode" + +import ( + "errors" + "unicode/utf16" + "unicode/utf8" + + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/internal" + "golang.org/x/text/encoding/internal/identifier" + "golang.org/x/text/internal/utf8internal" + "golang.org/x/text/runes" + "golang.org/x/text/transform" +) + +// TODO: I think the Transformers really should return errors on unmatched +// surrogate pairs and odd numbers of bytes. This is not required by RFC 2781, +// which leaves it open, but is suggested by WhatWG. It will allow for all error +// modes as defined by WhatWG: fatal, HTML and Replacement. This would require +// the introduction of some kind of error type for conveying the erroneous code +// point. + +// UTF8 is the UTF-8 encoding. +var UTF8 encoding.Encoding = utf8enc + +var utf8enc = &internal.Encoding{ + &internal.SimpleEncoding{utf8Decoder{}, runes.ReplaceIllFormed()}, + "UTF-8", + identifier.UTF8, +} + +type utf8Decoder struct{ transform.NopResetter } + +func (utf8Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + var pSrc int // point from which to start copy in src + var accept utf8internal.AcceptRange + + // The decoder can only make the input larger, not smaller. + n := len(src) + if len(dst) < n { + err = transform.ErrShortDst + n = len(dst) + atEOF = false + } + for nSrc < n { + c := src[nSrc] + if c < utf8.RuneSelf { + nSrc++ + continue + } + first := utf8internal.First[c] + size := int(first & utf8internal.SizeMask) + if first == utf8internal.FirstInvalid { + goto handleInvalid // invalid starter byte + } + accept = utf8internal.AcceptRanges[first>>utf8internal.AcceptShift] + if nSrc+size > n { + if !atEOF { + // We may stop earlier than necessary here if the short sequence + // has invalid bytes. Not checking for this simplifies the code + // and may avoid duplicate computations in certain conditions. + if err == nil { + err = transform.ErrShortSrc + } + break + } + // Determine the maximal subpart of an ill-formed subsequence. + switch { + case nSrc+1 >= n || src[nSrc+1] < accept.Lo || accept.Hi < src[nSrc+1]: + size = 1 + case nSrc+2 >= n || src[nSrc+2] < utf8internal.LoCB || utf8internal.HiCB < src[nSrc+2]: + size = 2 + default: + size = 3 // As we are short, the maximum is 3. + } + goto handleInvalid + } + if c = src[nSrc+1]; c < accept.Lo || accept.Hi < c { + size = 1 + goto handleInvalid // invalid continuation byte + } else if size == 2 { + } else if c = src[nSrc+2]; c < utf8internal.LoCB || utf8internal.HiCB < c { + size = 2 + goto handleInvalid // invalid continuation byte + } else if size == 3 { + } else if c = src[nSrc+3]; c < utf8internal.LoCB || utf8internal.HiCB < c { + size = 3 + goto handleInvalid // invalid continuation byte + } + nSrc += size + continue + + handleInvalid: + // Copy the scanned input so far. + nDst += copy(dst[nDst:], src[pSrc:nSrc]) + + // Append RuneError to the destination. + const runeError = "\ufffd" + if nDst+len(runeError) > len(dst) { + return nDst, nSrc, transform.ErrShortDst + } + nDst += copy(dst[nDst:], runeError) + + // Skip the maximal subpart of an ill-formed subsequence according to + // the W3C standard way instead of the Go way. This Transform is + // probably the only place in the text repo where it is warranted. + nSrc += size + pSrc = nSrc + + // Recompute the maximum source length. + if sz := len(dst) - nDst; sz < len(src)-nSrc { + err = transform.ErrShortDst + n = nSrc + sz + atEOF = false + } + } + return nDst + copy(dst[nDst:], src[pSrc:nSrc]), nSrc, err +} + +// UTF16 returns a UTF-16 Encoding for the given default endianness and byte +// order mark (BOM) policy. +// +// When decoding from UTF-16 to UTF-8, if the BOMPolicy is IgnoreBOM then +// neither BOMs U+FEFF nor noncharacters U+FFFE in the input stream will affect +// the endianness used for decoding, and will instead be output as their +// standard UTF-8 encodings: "\xef\xbb\xbf" and "\xef\xbf\xbe". If the BOMPolicy +// is UseBOM or ExpectBOM a staring BOM is not written to the UTF-8 output. +// Instead, it overrides the default endianness e for the remainder of the +// transformation. Any subsequent BOMs U+FEFF or noncharacters U+FFFE will not +// affect the endianness used, and will instead be output as their standard +// UTF-8 encodings. For UseBOM, if there is no starting BOM, it will proceed +// with the default Endianness. For ExpectBOM, in that case, the transformation +// will return early with an ErrMissingBOM error. +// +// When encoding from UTF-8 to UTF-16, a BOM will be inserted at the start of +// the output if the BOMPolicy is UseBOM or ExpectBOM. Otherwise, a BOM will not +// be inserted. The UTF-8 input does not need to contain a BOM. +// +// There is no concept of a 'native' endianness. If the UTF-16 data is produced +// and consumed in a greater context that implies a certain endianness, use +// IgnoreBOM. Otherwise, use ExpectBOM and always produce and consume a BOM. +// +// In the language of http://www.unicode.org/faq/utf_bom.html#bom10, IgnoreBOM +// corresponds to "Where the precise type of the data stream is known... the +// BOM should not be used" and ExpectBOM corresponds to "A particular +// protocol... may require use of the BOM". +func UTF16(e Endianness, b BOMPolicy) encoding.Encoding { + return utf16Encoding{config{e, b}, mibValue[e][b&bomMask]} +} + +// mibValue maps Endianness and BOMPolicy settings to MIB constants. Note that +// some configurations map to the same MIB identifier. RFC 2781 has requirements +// and recommendations. Some of the "configurations" are merely recommendations, +// so multiple configurations could match. +var mibValue = map[Endianness][numBOMValues]identifier.MIB{ + BigEndian: [numBOMValues]identifier.MIB{ + IgnoreBOM: identifier.UTF16BE, + UseBOM: identifier.UTF16, // BigEnding default is preferred by RFC 2781. + // TODO: acceptBOM | strictBOM would map to UTF16BE as well. + }, + LittleEndian: [numBOMValues]identifier.MIB{ + IgnoreBOM: identifier.UTF16LE, + UseBOM: identifier.UTF16, // LittleEndian default is allowed and preferred on Windows. + // TODO: acceptBOM | strictBOM would map to UTF16LE as well. + }, + // ExpectBOM is not widely used and has no valid MIB identifier. +} + +// All lists a configuration for each IANA-defined UTF-16 variant. +var All = []encoding.Encoding{ + UTF8, + UTF16(BigEndian, UseBOM), + UTF16(BigEndian, IgnoreBOM), + UTF16(LittleEndian, IgnoreBOM), +} + +// BOMPolicy is a UTF-16 encoding's byte order mark policy. +type BOMPolicy uint8 + +const ( + writeBOM BOMPolicy = 0x01 + acceptBOM BOMPolicy = 0x02 + requireBOM BOMPolicy = 0x04 + bomMask BOMPolicy = 0x07 + + // HACK: numBOMValues == 8 triggers a bug in the 1.4 compiler (cannot have a + // map of an array of length 8 of a type that is also used as a key or value + // in another map). See golang.org/issue/11354. + // TODO: consider changing this value back to 8 if the use of 1.4.* has + // been minimized. + numBOMValues = 8 + 1 + + // IgnoreBOM means to ignore any byte order marks. + IgnoreBOM BOMPolicy = 0 + // Common and RFC 2781-compliant interpretation for UTF-16BE/LE. + + // UseBOM means that the UTF-16 form may start with a byte order mark, which + // will be used to override the default encoding. + UseBOM BOMPolicy = writeBOM | acceptBOM + // Common and RFC 2781-compliant interpretation for UTF-16. + + // ExpectBOM means that the UTF-16 form must start with a byte order mark, + // which will be used to override the default encoding. + ExpectBOM BOMPolicy = writeBOM | acceptBOM | requireBOM + // Used in Java as Unicode (not to be confused with Java's UTF-16) and + // ICU's UTF-16,version=1. Not compliant with RFC 2781. + + // TODO (maybe): strictBOM: BOM must match Endianness. This would allow: + // - UTF-16(B|L)E,version=1: writeBOM | acceptBOM | requireBOM | strictBOM + // (UnicodeBig and UnicodeLittle in Java) + // - RFC 2781-compliant, but less common interpretation for UTF-16(B|L)E: + // acceptBOM | strictBOM (e.g. assigned to CheckBOM). + // This addition would be consistent with supporting ExpectBOM. +) + +// Endianness is a UTF-16 encoding's default endianness. +type Endianness bool + +const ( + // BigEndian is UTF-16BE. + BigEndian Endianness = false + // LittleEndian is UTF-16LE. + LittleEndian Endianness = true +) + +// ErrMissingBOM means that decoding UTF-16 input with ExpectBOM did not find a +// starting byte order mark. +var ErrMissingBOM = errors.New("encoding: missing byte order mark") + +type utf16Encoding struct { + config + mib identifier.MIB +} + +type config struct { + endianness Endianness + bomPolicy BOMPolicy +} + +func (u utf16Encoding) NewDecoder() *encoding.Decoder { + return &encoding.Decoder{Transformer: &utf16Decoder{ + initial: u.config, + current: u.config, + }} +} + +func (u utf16Encoding) NewEncoder() *encoding.Encoder { + return &encoding.Encoder{Transformer: &utf16Encoder{ + endianness: u.endianness, + initialBOMPolicy: u.bomPolicy, + currentBOMPolicy: u.bomPolicy, + }} +} + +func (u utf16Encoding) ID() (mib identifier.MIB, other string) { + return u.mib, "" +} + +func (u utf16Encoding) String() string { + e, b := "B", "" + if u.endianness == LittleEndian { + e = "L" + } + switch u.bomPolicy { + case ExpectBOM: + b = "Expect" + case UseBOM: + b = "Use" + case IgnoreBOM: + b = "Ignore" + } + return "UTF-16" + e + "E (" + b + " BOM)" +} + +type utf16Decoder struct { + initial config + current config +} + +func (u *utf16Decoder) Reset() { + u.current = u.initial +} + +func (u *utf16Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + if len(src) == 0 { + if atEOF && u.current.bomPolicy&requireBOM != 0 { + return 0, 0, ErrMissingBOM + } + return 0, 0, nil + } + if u.current.bomPolicy&acceptBOM != 0 { + if len(src) < 2 { + return 0, 0, transform.ErrShortSrc + } + switch { + case src[0] == 0xfe && src[1] == 0xff: + u.current.endianness = BigEndian + nSrc = 2 + case src[0] == 0xff && src[1] == 0xfe: + u.current.endianness = LittleEndian + nSrc = 2 + default: + if u.current.bomPolicy&requireBOM != 0 { + return 0, 0, ErrMissingBOM + } + } + u.current.bomPolicy = IgnoreBOM + } + + var r rune + var dSize, sSize int + for nSrc < len(src) { + if nSrc+1 < len(src) { + x := uint16(src[nSrc+0])<<8 | uint16(src[nSrc+1]) + if u.current.endianness == LittleEndian { + x = x>>8 | x<<8 + } + r, sSize = rune(x), 2 + if utf16.IsSurrogate(r) { + if nSrc+3 < len(src) { + x = uint16(src[nSrc+2])<<8 | uint16(src[nSrc+3]) + if u.current.endianness == LittleEndian { + x = x>>8 | x<<8 + } + // Save for next iteration if it is not a high surrogate. + if isHighSurrogate(rune(x)) { + r, sSize = utf16.DecodeRune(r, rune(x)), 4 + } + } else if !atEOF { + err = transform.ErrShortSrc + break + } + } + if dSize = utf8.RuneLen(r); dSize < 0 { + r, dSize = utf8.RuneError, 3 + } + } else if atEOF { + // Single trailing byte. + r, dSize, sSize = utf8.RuneError, 3, 1 + } else { + err = transform.ErrShortSrc + break + } + if nDst+dSize > len(dst) { + err = transform.ErrShortDst + break + } + nDst += utf8.EncodeRune(dst[nDst:], r) + nSrc += sSize + } + return nDst, nSrc, err +} + +func isHighSurrogate(r rune) bool { + return 0xDC00 <= r && r <= 0xDFFF +} + +type utf16Encoder struct { + endianness Endianness + initialBOMPolicy BOMPolicy + currentBOMPolicy BOMPolicy +} + +func (u *utf16Encoder) Reset() { + u.currentBOMPolicy = u.initialBOMPolicy +} + +func (u *utf16Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + if u.currentBOMPolicy&writeBOM != 0 { + if len(dst) < 2 { + return 0, 0, transform.ErrShortDst + } + dst[0], dst[1] = 0xfe, 0xff + u.currentBOMPolicy = IgnoreBOM + nDst = 2 + } + + r, size := rune(0), 0 + for nSrc < len(src) { + r = rune(src[nSrc]) + + // Decode a 1-byte rune. + if r < utf8.RuneSelf { + size = 1 + + } else { + // Decode a multi-byte rune. + r, size = utf8.DecodeRune(src[nSrc:]) + if size == 1 { + // All valid runes of size 1 (those below utf8.RuneSelf) were + // handled above. We have invalid UTF-8 or we haven't seen the + // full character yet. + if !atEOF && !utf8.FullRune(src[nSrc:]) { + err = transform.ErrShortSrc + break + } + } + } + + if r <= 0xffff { + if nDst+2 > len(dst) { + err = transform.ErrShortDst + break + } + dst[nDst+0] = uint8(r >> 8) + dst[nDst+1] = uint8(r) + nDst += 2 + } else { + if nDst+4 > len(dst) { + err = transform.ErrShortDst + break + } + r1, r2 := utf16.EncodeRune(r) + dst[nDst+0] = uint8(r1 >> 8) + dst[nDst+1] = uint8(r1) + dst[nDst+2] = uint8(r2 >> 8) + dst[nDst+3] = uint8(r2) + nDst += 4 + } + nSrc += size + } + + if u.endianness == LittleEndian { + for i := 0; i < nDst; i += 2 { + dst[i], dst[i+1] = dst[i+1], dst[i] + } + } + return nDst, nSrc, err +} diff --git a/vendor/golang.org/x/text/internal/gen/code.go b/vendor/golang.org/x/text/internal/gen/code.go new file mode 100644 index 000000000..2202c9f64 --- /dev/null +++ b/vendor/golang.org/x/text/internal/gen/code.go @@ -0,0 +1,339 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gen + +import ( + "bytes" + "encoding/gob" + "fmt" + "hash" + "hash/fnv" + "io" + "log" + "os" + "reflect" + "strings" + "unicode" + "unicode/utf8" +) + +// This file contains utilities for generating code. + +// TODO: other write methods like: +// - slices, maps, types, etc. + +// CodeWriter is a utility for writing structured code. It computes the content +// hash and size of written content. It ensures there are newlines between +// written code blocks. +type CodeWriter struct { + buf bytes.Buffer + Size int + Hash hash.Hash32 // content hash + gob *gob.Encoder + // For comments we skip the usual one-line separator if they are followed by + // a code block. + skipSep bool +} + +func (w *CodeWriter) Write(p []byte) (n int, err error) { + return w.buf.Write(p) +} + +// NewCodeWriter returns a new CodeWriter. +func NewCodeWriter() *CodeWriter { + h := fnv.New32() + return &CodeWriter{Hash: h, gob: gob.NewEncoder(h)} +} + +// WriteGoFile appends the buffer with the total size of all created structures +// and writes it as a Go file to the the given file with the given package name. +func (w *CodeWriter) WriteGoFile(filename, pkg string) { + f, err := os.Create(filename) + if err != nil { + log.Fatalf("Could not create file %s: %v", filename, err) + } + defer f.Close() + if _, err = w.WriteGo(f, pkg); err != nil { + log.Fatalf("Error writing file %s: %v", filename, err) + } +} + +// WriteGo appends the buffer with the total size of all created structures and +// writes it as a Go file to the the given writer with the given package name. +func (w *CodeWriter) WriteGo(out io.Writer, pkg string) (n int, err error) { + sz := w.Size + w.WriteComment("Total table size %d bytes (%dKiB); checksum: %X\n", sz, sz/1024, w.Hash.Sum32()) + defer w.buf.Reset() + return WriteGo(out, pkg, w.buf.Bytes()) +} + +func (w *CodeWriter) printf(f string, x ...interface{}) { + fmt.Fprintf(w, f, x...) +} + +func (w *CodeWriter) insertSep() { + if w.skipSep { + w.skipSep = false + return + } + // Use at least two newlines to ensure a blank space between the previous + // block. WriteGoFile will remove extraneous newlines. + w.printf("\n\n") +} + +// WriteComment writes a comment block. All line starts are prefixed with "//". +// Initial empty lines are gobbled. The indentation for the first line is +// stripped from consecutive lines. +func (w *CodeWriter) WriteComment(comment string, args ...interface{}) { + s := fmt.Sprintf(comment, args...) + s = strings.Trim(s, "\n") + + // Use at least two newlines to ensure a blank space between the previous + // block. WriteGoFile will remove extraneous newlines. + w.printf("\n\n// ") + w.skipSep = true + + // strip first indent level. + sep := "\n" + for ; len(s) > 0 && (s[0] == '\t' || s[0] == ' '); s = s[1:] { + sep += s[:1] + } + + strings.NewReplacer(sep, "\n// ", "\n", "\n// ").WriteString(w, s) + + w.printf("\n") +} + +func (w *CodeWriter) writeSizeInfo(size int) { + w.printf("// Size: %d bytes\n", size) +} + +// WriteConst writes a constant of the given name and value. +func (w *CodeWriter) WriteConst(name string, x interface{}) { + w.insertSep() + v := reflect.ValueOf(x) + + switch v.Type().Kind() { + case reflect.String: + // See golang.org/issue/13145. + const arbitraryCutoff = 16 + if v.Len() > arbitraryCutoff { + w.printf("var %s %s = ", name, typeName(x)) + } else { + w.printf("const %s %s = ", name, typeName(x)) + } + w.WriteString(v.String()) + w.printf("\n") + default: + w.printf("const %s = %#v\n", name, x) + } +} + +// WriteVar writes a variable of the given name and value. +func (w *CodeWriter) WriteVar(name string, x interface{}) { + w.insertSep() + v := reflect.ValueOf(x) + oldSize := w.Size + sz := int(v.Type().Size()) + w.Size += sz + + switch v.Type().Kind() { + case reflect.String: + w.printf("var %s %s = ", name, typeName(x)) + w.WriteString(v.String()) + case reflect.Struct: + w.gob.Encode(x) + fallthrough + case reflect.Slice, reflect.Array: + w.printf("var %s = ", name) + w.writeValue(v) + w.writeSizeInfo(w.Size - oldSize) + default: + w.printf("var %s %s = ", name, typeName(x)) + w.gob.Encode(x) + w.writeValue(v) + w.writeSizeInfo(w.Size - oldSize) + } + w.printf("\n") +} + +func (w *CodeWriter) writeValue(v reflect.Value) { + x := v.Interface() + switch v.Kind() { + case reflect.String: + w.WriteString(v.String()) + case reflect.Array: + // Don't double count: callers of WriteArray count on the size being + // added, so we need to discount it here. + w.Size -= int(v.Type().Size()) + w.writeSlice(x, true) + case reflect.Slice: + w.writeSlice(x, false) + case reflect.Struct: + w.printf("%s{\n", typeName(v.Interface())) + t := v.Type() + for i := 0; i < v.NumField(); i++ { + w.printf("%s: ", t.Field(i).Name) + w.writeValue(v.Field(i)) + w.printf(",\n") + } + w.printf("}") + default: + w.printf("%#v", x) + } +} + +// WriteString writes a string literal. +func (w *CodeWriter) WriteString(s string) { + s = strings.Replace(s, `\`, `\\`, -1) + io.WriteString(w.Hash, s) // content hash + w.Size += len(s) + + const maxInline = 40 + if len(s) <= maxInline { + w.printf("%q", s) + return + } + + // We will render the string as a multi-line string. + const maxWidth = 80 - 4 - len(`"`) - len(`" +`) + + // When starting on its own line, go fmt indents line 2+ an extra level. + n, max := maxWidth, maxWidth-4 + + // Print "" +\n, if a string does not start on its own line. + b := w.buf.Bytes() + if p := len(bytes.TrimRight(b, " \t")); p > 0 && b[p-1] != '\n' { + w.printf("\"\" + // Size: %d bytes\n", len(s)) + n, max = maxWidth, maxWidth + } + + w.printf(`"`) + + for sz, p := 0, 0; p < len(s); { + var r rune + r, sz = utf8.DecodeRuneInString(s[p:]) + out := s[p : p+sz] + chars := 1 + if !unicode.IsPrint(r) || r == utf8.RuneError || r == '"' { + switch sz { + case 1: + out = fmt.Sprintf("\\x%02x", s[p]) + case 2, 3: + out = fmt.Sprintf("\\u%04x", r) + case 4: + out = fmt.Sprintf("\\U%08x", r) + } + chars = len(out) + } + if n -= chars; n < 0 { + w.printf("\" +\n\"") + n = max - len(out) + } + w.printf("%s", out) + p += sz + } + w.printf(`"`) +} + +// WriteSlice writes a slice value. +func (w *CodeWriter) WriteSlice(x interface{}) { + w.writeSlice(x, false) +} + +// WriteArray writes an array value. +func (w *CodeWriter) WriteArray(x interface{}) { + w.writeSlice(x, true) +} + +func (w *CodeWriter) writeSlice(x interface{}, isArray bool) { + v := reflect.ValueOf(x) + w.gob.Encode(v.Len()) + w.Size += v.Len() * int(v.Type().Elem().Size()) + name := typeName(x) + if isArray { + name = fmt.Sprintf("[%d]%s", v.Len(), name[strings.Index(name, "]")+1:]) + } + if isArray { + w.printf("%s{\n", name) + } else { + w.printf("%s{ // %d elements\n", name, v.Len()) + } + + switch kind := v.Type().Elem().Kind(); kind { + case reflect.String: + for _, s := range x.([]string) { + w.WriteString(s) + w.printf(",\n") + } + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + // nLine and nBlock are the number of elements per line and block. + nLine, nBlock, format := 8, 64, "%d," + switch kind { + case reflect.Uint8: + format = "%#02x," + case reflect.Uint16: + format = "%#04x," + case reflect.Uint32: + nLine, nBlock, format = 4, 32, "%#08x," + case reflect.Uint, reflect.Uint64: + nLine, nBlock, format = 4, 32, "%#016x," + case reflect.Int8: + nLine = 16 + } + n := nLine + for i := 0; i < v.Len(); i++ { + if i%nBlock == 0 && v.Len() > nBlock { + w.printf("// Entry %X - %X\n", i, i+nBlock-1) + } + x := v.Index(i).Interface() + w.gob.Encode(x) + w.printf(format, x) + if n--; n == 0 { + n = nLine + w.printf("\n") + } + } + w.printf("\n") + case reflect.Struct: + zero := reflect.Zero(v.Type().Elem()).Interface() + for i := 0; i < v.Len(); i++ { + x := v.Index(i).Interface() + w.gob.EncodeValue(v) + if !reflect.DeepEqual(zero, x) { + line := fmt.Sprintf("%#v,\n", x) + line = line[strings.IndexByte(line, '{'):] + w.printf("%d: ", i) + w.printf(line) + } + } + case reflect.Array: + for i := 0; i < v.Len(); i++ { + w.printf("%d: %#v,\n", i, v.Index(i).Interface()) + } + default: + panic("gen: slice elem type not supported") + } + w.printf("}") +} + +// WriteType writes a definition of the type of the given value and returns the +// type name. +func (w *CodeWriter) WriteType(x interface{}) string { + t := reflect.TypeOf(x) + w.printf("type %s struct {\n", t.Name()) + for i := 0; i < t.NumField(); i++ { + w.printf("\t%s %s\n", t.Field(i).Name, t.Field(i).Type) + } + w.printf("}\n") + return t.Name() +} + +// typeName returns the name of the go type of x. +func typeName(x interface{}) string { + t := reflect.ValueOf(x).Type() + return strings.Replace(fmt.Sprint(t), "main.", "", 1) +} diff --git a/vendor/golang.org/x/text/internal/gen/gen.go b/vendor/golang.org/x/text/internal/gen/gen.go new file mode 100644 index 000000000..84c699faa --- /dev/null +++ b/vendor/golang.org/x/text/internal/gen/gen.go @@ -0,0 +1,281 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package gen contains common code for the various code generation tools in the +// text repository. Its usage ensures consistency between tools. +// +// This package defines command line flags that are common to most generation +// tools. The flags allow for specifying specific Unicode and CLDR versions +// in the public Unicode data repository (http://www.unicode.org/Public). +// +// A local Unicode data mirror can be set through the flag -local or the +// environment variable UNICODE_DIR. The former takes precedence. The local +// directory should follow the same structure as the public repository. +// +// IANA data can also optionally be mirrored by putting it in the iana directory +// rooted at the top of the local mirror. Beware, though, that IANA data is not +// versioned. So it is up to the developer to use the right version. +package gen // import "golang.org/x/text/internal/gen" + +import ( + "bytes" + "flag" + "fmt" + "go/build" + "go/format" + "io" + "io/ioutil" + "log" + "net/http" + "os" + "path" + "path/filepath" + "sync" + "unicode" + + "golang.org/x/text/unicode/cldr" +) + +var ( + url = flag.String("url", + "http://www.unicode.org/Public", + "URL of Unicode database directory") + iana = flag.String("iana", + "http://www.iana.org", + "URL of the IANA repository") + unicodeVersion = flag.String("unicode", + getEnv("UNICODE_VERSION", unicode.Version), + "unicode version to use") + cldrVersion = flag.String("cldr", + getEnv("CLDR_VERSION", cldr.Version), + "cldr version to use") +) + +func getEnv(name, def string) string { + if v := os.Getenv(name); v != "" { + return v + } + return def +} + +// Init performs common initialization for a gen command. It parses the flags +// and sets up the standard logging parameters. +func Init() { + log.SetPrefix("") + log.SetFlags(log.Lshortfile) + flag.Parse() +} + +const header = `// This file was generated by go generate; DO NOT EDIT + +package %s + +` + +// UnicodeVersion reports the requested Unicode version. +func UnicodeVersion() string { + return *unicodeVersion +} + +// UnicodeVersion reports the requested CLDR version. +func CLDRVersion() string { + return *cldrVersion +} + +// IsLocal reports whether data files are available locally. +func IsLocal() bool { + dir, err := localReadmeFile() + if err != nil { + return false + } + if _, err = os.Stat(dir); err != nil { + return false + } + return true +} + +// OpenUCDFile opens the requested UCD file. The file is specified relative to +// the public Unicode root directory. It will call log.Fatal if there are any +// errors. +func OpenUCDFile(file string) io.ReadCloser { + return openUnicode(path.Join(*unicodeVersion, "ucd", file)) +} + +// OpenCLDRCoreZip opens the CLDR core zip file. It will call log.Fatal if there +// are any errors. +func OpenCLDRCoreZip() io.ReadCloser { + return OpenUnicodeFile("cldr", *cldrVersion, "core.zip") +} + +// OpenUnicodeFile opens the requested file of the requested category from the +// root of the Unicode data archive. The file is specified relative to the +// public Unicode root directory. If version is "", it will use the default +// Unicode version. It will call log.Fatal if there are any errors. +func OpenUnicodeFile(category, version, file string) io.ReadCloser { + if version == "" { + version = UnicodeVersion() + } + return openUnicode(path.Join(category, version, file)) +} + +// OpenIANAFile opens the requested IANA file. The file is specified relative +// to the IANA root, which is typically either http://www.iana.org or the +// iana directory in the local mirror. It will call log.Fatal if there are any +// errors. +func OpenIANAFile(path string) io.ReadCloser { + return Open(*iana, "iana", path) +} + +var ( + dirMutex sync.Mutex + localDir string +) + +const permissions = 0755 + +func localReadmeFile() (string, error) { + p, err := build.Import("golang.org/x/text", "", build.FindOnly) + if err != nil { + return "", fmt.Errorf("Could not locate package: %v", err) + } + return filepath.Join(p.Dir, "DATA", "README"), nil +} + +func getLocalDir() string { + dirMutex.Lock() + defer dirMutex.Unlock() + + readme, err := localReadmeFile() + if err != nil { + log.Fatal(err) + } + dir := filepath.Dir(readme) + if _, err := os.Stat(readme); err != nil { + if err := os.MkdirAll(dir, permissions); err != nil { + log.Fatalf("Could not create directory: %v", err) + } + ioutil.WriteFile(readme, []byte(readmeTxt), permissions) + } + return dir +} + +const readmeTxt = `Generated by golang.org/x/text/internal/gen. DO NOT EDIT. + +This directory contains downloaded files used to generate the various tables +in the golang.org/x/text subrepo. + +Note that the language subtag repo (iana/assignments/language-subtag-registry) +and all other times in the iana subdirectory are not versioned and will need +to be periodically manually updated. The easiest way to do this is to remove +the entire iana directory. This is mostly of concern when updating the language +package. +` + +// Open opens subdir/path if a local directory is specified and the file exists, +// where subdir is a directory relative to the local root, or fetches it from +// urlRoot/path otherwise. It will call log.Fatal if there are any errors. +func Open(urlRoot, subdir, path string) io.ReadCloser { + file := filepath.Join(getLocalDir(), subdir, filepath.FromSlash(path)) + return open(file, urlRoot, path) +} + +func openUnicode(path string) io.ReadCloser { + file := filepath.Join(getLocalDir(), filepath.FromSlash(path)) + return open(file, *url, path) +} + +// TODO: automatically periodically update non-versioned files. + +func open(file, urlRoot, path string) io.ReadCloser { + if f, err := os.Open(file); err == nil { + return f + } + r := get(urlRoot, path) + defer r.Close() + b, err := ioutil.ReadAll(r) + if err != nil { + log.Fatalf("Could not download file: %v", err) + } + os.MkdirAll(filepath.Dir(file), permissions) + if err := ioutil.WriteFile(file, b, permissions); err != nil { + log.Fatalf("Could not create file: %v", err) + } + return ioutil.NopCloser(bytes.NewReader(b)) +} + +func get(root, path string) io.ReadCloser { + url := root + "/" + path + fmt.Printf("Fetching %s...", url) + defer fmt.Println(" done.") + resp, err := http.Get(url) + if err != nil { + log.Fatalf("HTTP GET: %v", err) + } + if resp.StatusCode != 200 { + log.Fatalf("Bad GET status for %q: %q", url, resp.Status) + } + return resp.Body +} + +// TODO: use Write*Version in all applicable packages. + +// WriteUnicodeVersion writes a constant for the Unicode version from which the +// tables are generated. +func WriteUnicodeVersion(w io.Writer) { + fmt.Fprintf(w, "// UnicodeVersion is the Unicode version from which the tables in this package are derived.\n") + fmt.Fprintf(w, "const UnicodeVersion = %q\n\n", UnicodeVersion()) +} + +// WriteCLDRVersion writes a constant for the CLDR version from which the +// tables are generated. +func WriteCLDRVersion(w io.Writer) { + fmt.Fprintf(w, "// CLDRVersion is the CLDR version from which the tables in this package are derived.\n") + fmt.Fprintf(w, "const CLDRVersion = %q\n\n", CLDRVersion()) +} + +// WriteGoFile prepends a standard file comment and package statement to the +// given bytes, applies gofmt, and writes them to a file with the given name. +// It will call log.Fatal if there are any errors. +func WriteGoFile(filename, pkg string, b []byte) { + w, err := os.Create(filename) + if err != nil { + log.Fatalf("Could not create file %s: %v", filename, err) + } + defer w.Close() + if _, err = WriteGo(w, pkg, b); err != nil { + log.Fatalf("Error writing file %s: %v", filename, err) + } +} + +// WriteGo prepends a standard file comment and package statement to the given +// bytes, applies gofmt, and writes them to w. +func WriteGo(w io.Writer, pkg string, b []byte) (n int, err error) { + src := []byte(fmt.Sprintf(header, pkg)) + src = append(src, b...) + formatted, err := format.Source(src) + if err != nil { + // Print the generated code even in case of an error so that the + // returned error can be meaningfully interpreted. + n, _ = w.Write(src) + return n, err + } + return w.Write(formatted) +} + +// Repackage rewrites a Go file from belonging to package main to belonging to +// the given package. +func Repackage(inFile, outFile, pkg string) { + src, err := ioutil.ReadFile(inFile) + if err != nil { + log.Fatalf("reading %s: %v", inFile, err) + } + const toDelete = "package main\n\n" + i := bytes.Index(src, []byte(toDelete)) + if i < 0 { + log.Fatalf("Could not find %q in %s.", toDelete, inFile) + } + w := &bytes.Buffer{} + w.Write(src[i+len(toDelete):]) + WriteGoFile(outFile, pkg, w.Bytes()) +} diff --git a/vendor/golang.org/x/text/internal/utf8internal/utf8internal.go b/vendor/golang.org/x/text/internal/utf8internal/utf8internal.go new file mode 100644 index 000000000..575cea870 --- /dev/null +++ b/vendor/golang.org/x/text/internal/utf8internal/utf8internal.go @@ -0,0 +1,87 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package utf8internal contains low-level utf8-related constants, tables, etc. +// that are used internally by the text package. +package utf8internal + +// The default lowest and highest continuation byte. +const ( + LoCB = 0x80 // 1000 0000 + HiCB = 0xBF // 1011 1111 +) + +// Constants related to getting information of first bytes of UTF-8 sequences. +const ( + // ASCII identifies a UTF-8 byte as ASCII. + ASCII = as + + // FirstInvalid indicates a byte is invalid as a first byte of a UTF-8 + // sequence. + FirstInvalid = xx + + // SizeMask is a mask for the size bits. Use use x&SizeMask to get the size. + SizeMask = 7 + + // AcceptShift is the right-shift count for the first byte info byte to get + // the index into the AcceptRanges table. See AcceptRanges. + AcceptShift = 4 + + // The names of these constants are chosen to give nice alignment in the + // table below. The first nibble is an index into acceptRanges or F for + // special one-byte cases. The second nibble is the Rune length or the + // Status for the special one-byte case. + xx = 0xF1 // invalid: size 1 + as = 0xF0 // ASCII: size 1 + s1 = 0x02 // accept 0, size 2 + s2 = 0x13 // accept 1, size 3 + s3 = 0x03 // accept 0, size 3 + s4 = 0x23 // accept 2, size 3 + s5 = 0x34 // accept 3, size 4 + s6 = 0x04 // accept 0, size 4 + s7 = 0x44 // accept 4, size 4 +) + +// First is information about the first byte in a UTF-8 sequence. +var First = [256]uint8{ + // 1 2 3 4 5 6 7 8 9 A B C D E F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x00-0x0F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x10-0x1F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x20-0x2F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x30-0x3F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x40-0x4F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x50-0x5F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x60-0x6F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x70-0x7F + // 1 2 3 4 5 6 7 8 9 A B C D E F + xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x80-0x8F + xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x90-0x9F + xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xA0-0xAF + xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xB0-0xBF + xx, xx, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xC0-0xCF + s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xD0-0xDF + s2, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s4, s3, s3, // 0xE0-0xEF + s5, s6, s6, s6, s7, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xF0-0xFF +} + +// AcceptRange gives the range of valid values for the second byte in a UTF-8 +// sequence for any value for First that is not ASCII or FirstInvalid. +type AcceptRange struct { + Lo uint8 // lowest value for second byte. + Hi uint8 // highest value for second byte. +} + +// AcceptRanges is a slice of AcceptRange values. For a given byte sequence b +// +// AcceptRanges[First[b[0]]>>AcceptShift] +// +// will give the value of AcceptRange for the multi-byte UTF-8 sequence starting +// at b[0]. +var AcceptRanges = [...]AcceptRange{ + 0: {LoCB, HiCB}, + 1: {0xA0, HiCB}, + 2: {LoCB, 0x9F}, + 3: {0x90, HiCB}, + 4: {LoCB, 0x8F}, +} diff --git a/vendor/golang.org/x/text/runes/cond.go b/vendor/golang.org/x/text/runes/cond.go new file mode 100644 index 000000000..df7aa02db --- /dev/null +++ b/vendor/golang.org/x/text/runes/cond.go @@ -0,0 +1,187 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runes + +import ( + "unicode/utf8" + + "golang.org/x/text/transform" +) + +// Note: below we pass invalid UTF-8 to the tIn and tNotIn transformers as is. +// This is done for various reasons: +// - To retain the semantics of the Nop transformer: if input is passed to a Nop +// one would expect it to be unchanged. +// - It would be very expensive to pass a converted RuneError to a transformer: +// a transformer might need more source bytes after RuneError, meaning that +// the only way to pass it safely is to create a new buffer and manage the +// intermingling of RuneErrors and normal input. +// - Many transformers leave ill-formed UTF-8 as is, so this is not +// inconsistent. Generally ill-formed UTF-8 is only replaced if it is a +// logical consequence of the operation (as for Map) or if it otherwise would +// pose security concerns (as for Remove). +// - An alternative would be to return an error on ill-formed UTF-8, but this +// would be inconsistent with other operations. + +// If returns a transformer that applies tIn to consecutive runes for which +// s.Contains(r) and tNotIn to consecutive runes for which !s.Contains(r). Reset +// is called on tIn and tNotIn at the start of each run. A Nop transformer will +// substitute a nil value passed to tIn or tNotIn. Invalid UTF-8 is translated +// to RuneError to determine which transformer to apply, but is passed as is to +// the respective transformer. +func If(s Set, tIn, tNotIn transform.Transformer) Transformer { + if tIn == nil && tNotIn == nil { + return Transformer{transform.Nop} + } + if tIn == nil { + tIn = transform.Nop + } + if tNotIn == nil { + tNotIn = transform.Nop + } + sIn, ok := tIn.(transform.SpanningTransformer) + if !ok { + sIn = dummySpan{tIn} + } + sNotIn, ok := tNotIn.(transform.SpanningTransformer) + if !ok { + sNotIn = dummySpan{tNotIn} + } + + a := &cond{ + tIn: sIn, + tNotIn: sNotIn, + f: s.Contains, + } + a.Reset() + return Transformer{a} +} + +type dummySpan struct{ transform.Transformer } + +func (d dummySpan) Span(src []byte, atEOF bool) (n int, err error) { + return 0, transform.ErrEndOfSpan +} + +type cond struct { + tIn, tNotIn transform.SpanningTransformer + f func(rune) bool + check func(rune) bool // current check to perform + t transform.SpanningTransformer // current transformer to use +} + +// Reset implements transform.Transformer. +func (t *cond) Reset() { + t.check = t.is + t.t = t.tIn + t.t.Reset() // notIn will be reset on first usage. +} + +func (t *cond) is(r rune) bool { + if t.f(r) { + return true + } + t.check = t.isNot + t.t = t.tNotIn + t.tNotIn.Reset() + return false +} + +func (t *cond) isNot(r rune) bool { + if !t.f(r) { + return true + } + t.check = t.is + t.t = t.tIn + t.tIn.Reset() + return false +} + +// This implementation of Span doesn't help all too much, but it needs to be +// there to satisfy this package's Transformer interface. +// TODO: there are certainly room for improvements, though. For example, if +// t.t == transform.Nop (which will a common occurrence) it will save a bundle +// to special-case that loop. +func (t *cond) Span(src []byte, atEOF bool) (n int, err error) { + p := 0 + for n < len(src) && err == nil { + // Don't process too much at a time as the Spanner that will be + // called on this block may terminate early. + const maxChunk = 4096 + max := len(src) + if v := n + maxChunk; v < max { + max = v + } + atEnd := false + size := 0 + current := t.t + for ; p < max; p += size { + r := rune(src[p]) + if r < utf8.RuneSelf { + size = 1 + } else if r, size = utf8.DecodeRune(src[p:]); size == 1 { + if !atEOF && !utf8.FullRune(src[p:]) { + err = transform.ErrShortSrc + break + } + } + if !t.check(r) { + // The next rune will be the start of a new run. + atEnd = true + break + } + } + n2, err2 := current.Span(src[n:p], atEnd || (atEOF && p == len(src))) + n += n2 + if err2 != nil { + return n, err2 + } + // At this point either err != nil or t.check will pass for the rune at p. + p = n + size + } + return n, err +} + +func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + p := 0 + for nSrc < len(src) && err == nil { + // Don't process too much at a time, as the work might be wasted if the + // destination buffer isn't large enough to hold the result or a + // transform returns an error early. + const maxChunk = 4096 + max := len(src) + if n := nSrc + maxChunk; n < len(src) { + max = n + } + atEnd := false + size := 0 + current := t.t + for ; p < max; p += size { + r := rune(src[p]) + if r < utf8.RuneSelf { + size = 1 + } else if r, size = utf8.DecodeRune(src[p:]); size == 1 { + if !atEOF && !utf8.FullRune(src[p:]) { + err = transform.ErrShortSrc + break + } + } + if !t.check(r) { + // The next rune will be the start of a new run. + atEnd = true + break + } + } + nDst2, nSrc2, err2 := current.Transform(dst[nDst:], src[nSrc:p], atEnd || (atEOF && p == len(src))) + nDst += nDst2 + nSrc += nSrc2 + if err2 != nil { + return nDst, nSrc, err2 + } + // At this point either err != nil or t.check will pass for the rune at p. + p = nSrc + size + } + return nDst, nSrc, err +} diff --git a/vendor/golang.org/x/text/runes/runes.go b/vendor/golang.org/x/text/runes/runes.go new file mode 100644 index 000000000..71933696f --- /dev/null +++ b/vendor/golang.org/x/text/runes/runes.go @@ -0,0 +1,355 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package runes provide transforms for UTF-8 encoded text. +package runes // import "golang.org/x/text/runes" + +import ( + "unicode" + "unicode/utf8" + + "golang.org/x/text/transform" +) + +// A Set is a collection of runes. +type Set interface { + // Contains returns true if r is contained in the set. + Contains(r rune) bool +} + +type setFunc func(rune) bool + +func (s setFunc) Contains(r rune) bool { + return s(r) +} + +// Note: using funcs here instead of wrapping types result in cleaner +// documentation and a smaller API. + +// In creates a Set with a Contains method that returns true for all runes in +// the given RangeTable. +func In(rt *unicode.RangeTable) Set { + return setFunc(func(r rune) bool { return unicode.Is(rt, r) }) +} + +// In creates a Set with a Contains method that returns true for all runes not +// in the given RangeTable. +func NotIn(rt *unicode.RangeTable) Set { + return setFunc(func(r rune) bool { return !unicode.Is(rt, r) }) +} + +// Predicate creates a Set with a Contains method that returns f(r). +func Predicate(f func(rune) bool) Set { + return setFunc(f) +} + +// Transformer implements the transform.Transformer interface. +type Transformer struct { + t transform.SpanningTransformer +} + +func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + return t.t.Transform(dst, src, atEOF) +} + +func (t Transformer) Span(b []byte, atEOF bool) (n int, err error) { + return t.t.Span(b, atEOF) +} + +func (t Transformer) Reset() { t.t.Reset() } + +// Bytes returns a new byte slice with the result of converting b using t. It +// calls Reset on t. It returns nil if any error was found. This can only happen +// if an error-producing Transformer is passed to If. +func (t Transformer) Bytes(b []byte) []byte { + b, _, err := transform.Bytes(t, b) + if err != nil { + return nil + } + return b +} + +// String returns a string with the result of converting s using t. It calls +// Reset on t. It returns the empty string if any error was found. This can only +// happen if an error-producing Transformer is passed to If. +func (t Transformer) String(s string) string { + s, _, err := transform.String(t, s) + if err != nil { + return "" + } + return s +} + +// TODO: +// - Copy: copying strings and bytes in whole-rune units. +// - Validation (maybe) +// - Well-formed-ness (maybe) + +const runeErrorString = string(utf8.RuneError) + +// Remove returns a Transformer that removes runes r for which s.Contains(r). +// Illegal input bytes are replaced by RuneError before being passed to f. +func Remove(s Set) Transformer { + if f, ok := s.(setFunc); ok { + // This little trick cuts the running time of BenchmarkRemove for sets + // created by Predicate roughly in half. + // TODO: special-case RangeTables as well. + return Transformer{remove(f)} + } + return Transformer{remove(s.Contains)} +} + +// TODO: remove transform.RemoveFunc. + +type remove func(r rune) bool + +func (remove) Reset() {} + +// Span implements transform.Spanner. +func (t remove) Span(src []byte, atEOF bool) (n int, err error) { + for r, size := rune(0), 0; n < len(src); { + if r = rune(src[n]); r < utf8.RuneSelf { + size = 1 + } else if r, size = utf8.DecodeRune(src[n:]); size == 1 { + // Invalid rune. + if !atEOF && !utf8.FullRune(src[n:]) { + err = transform.ErrShortSrc + } else { + err = transform.ErrEndOfSpan + } + break + } + if t(r) { + err = transform.ErrEndOfSpan + break + } + n += size + } + return +} + +// Transform implements transform.Transformer. +func (t remove) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + for r, size := rune(0), 0; nSrc < len(src); { + if r = rune(src[nSrc]); r < utf8.RuneSelf { + size = 1 + } else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 { + // Invalid rune. + if !atEOF && !utf8.FullRune(src[nSrc:]) { + err = transform.ErrShortSrc + break + } + // We replace illegal bytes with RuneError. Not doing so might + // otherwise turn a sequence of invalid UTF-8 into valid UTF-8. + // The resulting byte sequence may subsequently contain runes + // for which t(r) is true that were passed unnoticed. + if !t(utf8.RuneError) { + if nDst+3 > len(dst) { + err = transform.ErrShortDst + break + } + dst[nDst+0] = runeErrorString[0] + dst[nDst+1] = runeErrorString[1] + dst[nDst+2] = runeErrorString[2] + nDst += 3 + } + nSrc++ + continue + } + if t(r) { + nSrc += size + continue + } + if nDst+size > len(dst) { + err = transform.ErrShortDst + break + } + for i := 0; i < size; i++ { + dst[nDst] = src[nSrc] + nDst++ + nSrc++ + } + } + return +} + +// Map returns a Transformer that maps the runes in the input using the given +// mapping. Illegal bytes in the input are converted to utf8.RuneError before +// being passed to the mapping func. +func Map(mapping func(rune) rune) Transformer { + return Transformer{mapper(mapping)} +} + +type mapper func(rune) rune + +func (mapper) Reset() {} + +// Span implements transform.Spanner. +func (t mapper) Span(src []byte, atEOF bool) (n int, err error) { + for r, size := rune(0), 0; n < len(src); n += size { + if r = rune(src[n]); r < utf8.RuneSelf { + size = 1 + } else if r, size = utf8.DecodeRune(src[n:]); size == 1 { + // Invalid rune. + if !atEOF && !utf8.FullRune(src[n:]) { + err = transform.ErrShortSrc + } else { + err = transform.ErrEndOfSpan + } + break + } + if t(r) != r { + err = transform.ErrEndOfSpan + break + } + } + return n, err +} + +// Transform implements transform.Transformer. +func (t mapper) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + var replacement rune + var b [utf8.UTFMax]byte + + for r, size := rune(0), 0; nSrc < len(src); { + if r = rune(src[nSrc]); r < utf8.RuneSelf { + if replacement = t(r); replacement < utf8.RuneSelf { + if nDst == len(dst) { + err = transform.ErrShortDst + break + } + dst[nDst] = byte(replacement) + nDst++ + nSrc++ + continue + } + size = 1 + } else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 { + // Invalid rune. + if !atEOF && !utf8.FullRune(src[nSrc:]) { + err = transform.ErrShortSrc + break + } + + if replacement = t(utf8.RuneError); replacement == utf8.RuneError { + if nDst+3 > len(dst) { + err = transform.ErrShortDst + break + } + dst[nDst+0] = runeErrorString[0] + dst[nDst+1] = runeErrorString[1] + dst[nDst+2] = runeErrorString[2] + nDst += 3 + nSrc++ + continue + } + } else if replacement = t(r); replacement == r { + if nDst+size > len(dst) { + err = transform.ErrShortDst + break + } + for i := 0; i < size; i++ { + dst[nDst] = src[nSrc] + nDst++ + nSrc++ + } + continue + } + + n := utf8.EncodeRune(b[:], replacement) + + if nDst+n > len(dst) { + err = transform.ErrShortDst + break + } + for i := 0; i < n; i++ { + dst[nDst] = b[i] + nDst++ + } + nSrc += size + } + return +} + +// ReplaceIllFormed returns a transformer that replaces all input bytes that are +// not part of a well-formed UTF-8 code sequence with utf8.RuneError. +func ReplaceIllFormed() Transformer { + return Transformer{&replaceIllFormed{}} +} + +type replaceIllFormed struct{ transform.NopResetter } + +func (t replaceIllFormed) Span(src []byte, atEOF bool) (n int, err error) { + for n < len(src) { + // ASCII fast path. + if src[n] < utf8.RuneSelf { + n++ + continue + } + + r, size := utf8.DecodeRune(src[n:]) + + // Look for a valid non-ASCII rune. + if r != utf8.RuneError || size != 1 { + n += size + continue + } + + // Look for short source data. + if !atEOF && !utf8.FullRune(src[n:]) { + err = transform.ErrShortSrc + break + } + + // We have an invalid rune. + err = transform.ErrEndOfSpan + break + } + return n, err +} + +func (t replaceIllFormed) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + for nSrc < len(src) { + // ASCII fast path. + if r := src[nSrc]; r < utf8.RuneSelf { + if nDst == len(dst) { + err = transform.ErrShortDst + break + } + dst[nDst] = r + nDst++ + nSrc++ + continue + } + + // Look for a valid non-ASCII rune. + if _, size := utf8.DecodeRune(src[nSrc:]); size != 1 { + if size != copy(dst[nDst:], src[nSrc:nSrc+size]) { + err = transform.ErrShortDst + break + } + nDst += size + nSrc += size + continue + } + + // Look for short source data. + if !atEOF && !utf8.FullRune(src[nSrc:]) { + err = transform.ErrShortSrc + break + } + + // We have an invalid rune. + if nDst+3 > len(dst) { + err = transform.ErrShortDst + break + } + dst[nDst+0] = runeErrorString[0] + dst[nDst+1] = runeErrorString[1] + dst[nDst+2] = runeErrorString[2] + nDst += 3 + nSrc++ + } + return nDst, nSrc, err +} diff --git a/vendor/golang.org/x/text/transform/transform.go b/vendor/golang.org/x/text/transform/transform.go new file mode 100644 index 000000000..fe47b9b35 --- /dev/null +++ b/vendor/golang.org/x/text/transform/transform.go @@ -0,0 +1,705 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package transform provides reader and writer wrappers that transform the +// bytes passing through as well as various transformations. Example +// transformations provided by other packages include normalization and +// conversion between character sets. +package transform // import "golang.org/x/text/transform" + +import ( + "bytes" + "errors" + "io" + "unicode/utf8" +) + +var ( + // ErrShortDst means that the destination buffer was too short to + // receive all of the transformed bytes. + ErrShortDst = errors.New("transform: short destination buffer") + + // ErrShortSrc means that the source buffer has insufficient data to + // complete the transformation. + ErrShortSrc = errors.New("transform: short source buffer") + + // ErrEndOfSpan means that the input and output (the transformed input) + // are not identical. + ErrEndOfSpan = errors.New("transform: input and output are not identical") + + // errInconsistentByteCount means that Transform returned success (nil + // error) but also returned nSrc inconsistent with the src argument. + errInconsistentByteCount = errors.New("transform: inconsistent byte count returned") + + // errShortInternal means that an internal buffer is not large enough + // to make progress and the Transform operation must be aborted. + errShortInternal = errors.New("transform: short internal buffer") +) + +// Transformer transforms bytes. +type Transformer interface { + // Transform writes to dst the transformed bytes read from src, and + // returns the number of dst bytes written and src bytes read. The + // atEOF argument tells whether src represents the last bytes of the + // input. + // + // Callers should always process the nDst bytes produced and account + // for the nSrc bytes consumed before considering the error err. + // + // A nil error means that all of the transformed bytes (whether freshly + // transformed from src or left over from previous Transform calls) + // were written to dst. A nil error can be returned regardless of + // whether atEOF is true. If err is nil then nSrc must equal len(src); + // the converse is not necessarily true. + // + // ErrShortDst means that dst was too short to receive all of the + // transformed bytes. ErrShortSrc means that src had insufficient data + // to complete the transformation. If both conditions apply, then + // either error may be returned. Other than the error conditions listed + // here, implementations are free to report other errors that arise. + Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) + + // Reset resets the state and allows a Transformer to be reused. + Reset() +} + +// SpanningTransformer extends the Transformer interface with a Span method +// that determines how much of the input already conforms to the Transformer. +type SpanningTransformer interface { + Transformer + + // Span returns a position in src such that transforming src[:n] results in + // identical output src[:n] for these bytes. It does not necessarily return + // the largest such n. The atEOF argument tells whether src represents the + // last bytes of the input. + // + // Callers should always account for the n bytes consumed before + // considering the error err. + // + // A nil error means that all input bytes are known to be identical to the + // output produced by the Transformer. A nil error can be be returned + // regardless of whether atEOF is true. If err is nil, then then n must + // equal len(src); the converse is not necessarily true. + // + // ErrEndOfSpan means that the Transformer output may differ from the + // input after n bytes. Note that n may be len(src), meaning that the output + // would contain additional bytes after otherwise identical output. + // ErrShortSrc means that src had insufficient data to determine whether the + // remaining bytes would change. Other than the error conditions listed + // here, implementations are free to report other errors that arise. + // + // Calling Span can modify the Transformer state as a side effect. In + // effect, it does the transformation just as calling Transform would, only + // without copying to a destination buffer and only up to a point it can + // determine the input and output bytes are the same. This is obviously more + // limited than calling Transform, but can be more efficient in terms of + // copying and allocating buffers. Calls to Span and Transform may be + // interleaved. + Span(src []byte, atEOF bool) (n int, err error) +} + +// NopResetter can be embedded by implementations of Transformer to add a nop +// Reset method. +type NopResetter struct{} + +// Reset implements the Reset method of the Transformer interface. +func (NopResetter) Reset() {} + +// Reader wraps another io.Reader by transforming the bytes read. +type Reader struct { + r io.Reader + t Transformer + err error + + // dst[dst0:dst1] contains bytes that have been transformed by t but + // not yet copied out via Read. + dst []byte + dst0, dst1 int + + // src[src0:src1] contains bytes that have been read from r but not + // yet transformed through t. + src []byte + src0, src1 int + + // transformComplete is whether the transformation is complete, + // regardless of whether or not it was successful. + transformComplete bool +} + +const defaultBufSize = 4096 + +// NewReader returns a new Reader that wraps r by transforming the bytes read +// via t. It calls Reset on t. +func NewReader(r io.Reader, t Transformer) *Reader { + t.Reset() + return &Reader{ + r: r, + t: t, + dst: make([]byte, defaultBufSize), + src: make([]byte, defaultBufSize), + } +} + +// Read implements the io.Reader interface. +func (r *Reader) Read(p []byte) (int, error) { + n, err := 0, error(nil) + for { + // Copy out any transformed bytes and return the final error if we are done. + if r.dst0 != r.dst1 { + n = copy(p, r.dst[r.dst0:r.dst1]) + r.dst0 += n + if r.dst0 == r.dst1 && r.transformComplete { + return n, r.err + } + return n, nil + } else if r.transformComplete { + return 0, r.err + } + + // Try to transform some source bytes, or to flush the transformer if we + // are out of source bytes. We do this even if r.r.Read returned an error. + // As the io.Reader documentation says, "process the n > 0 bytes returned + // before considering the error". + if r.src0 != r.src1 || r.err != nil { + r.dst0 = 0 + r.dst1, n, err = r.t.Transform(r.dst, r.src[r.src0:r.src1], r.err == io.EOF) + r.src0 += n + + switch { + case err == nil: + if r.src0 != r.src1 { + r.err = errInconsistentByteCount + } + // The Transform call was successful; we are complete if we + // cannot read more bytes into src. + r.transformComplete = r.err != nil + continue + case err == ErrShortDst && (r.dst1 != 0 || n != 0): + // Make room in dst by copying out, and try again. + continue + case err == ErrShortSrc && r.src1-r.src0 != len(r.src) && r.err == nil: + // Read more bytes into src via the code below, and try again. + default: + r.transformComplete = true + // The reader error (r.err) takes precedence over the + // transformer error (err) unless r.err is nil or io.EOF. + if r.err == nil || r.err == io.EOF { + r.err = err + } + continue + } + } + + // Move any untransformed source bytes to the start of the buffer + // and read more bytes. + if r.src0 != 0 { + r.src0, r.src1 = 0, copy(r.src, r.src[r.src0:r.src1]) + } + n, r.err = r.r.Read(r.src[r.src1:]) + r.src1 += n + } +} + +// TODO: implement ReadByte (and ReadRune??). + +// Writer wraps another io.Writer by transforming the bytes read. +// The user needs to call Close to flush unwritten bytes that may +// be buffered. +type Writer struct { + w io.Writer + t Transformer + dst []byte + + // src[:n] contains bytes that have not yet passed through t. + src []byte + n int +} + +// NewWriter returns a new Writer that wraps w by transforming the bytes written +// via t. It calls Reset on t. +func NewWriter(w io.Writer, t Transformer) *Writer { + t.Reset() + return &Writer{ + w: w, + t: t, + dst: make([]byte, defaultBufSize), + src: make([]byte, defaultBufSize), + } +} + +// Write implements the io.Writer interface. If there are not enough +// bytes available to complete a Transform, the bytes will be buffered +// for the next write. Call Close to convert the remaining bytes. +func (w *Writer) Write(data []byte) (n int, err error) { + src := data + if w.n > 0 { + // Append bytes from data to the last remainder. + // TODO: limit the amount copied on first try. + n = copy(w.src[w.n:], data) + w.n += n + src = w.src[:w.n] + } + for { + nDst, nSrc, err := w.t.Transform(w.dst, src, false) + if _, werr := w.w.Write(w.dst[:nDst]); werr != nil { + return n, werr + } + src = src[nSrc:] + if w.n == 0 { + n += nSrc + } else if len(src) <= n { + // Enough bytes from w.src have been consumed. We make src point + // to data instead to reduce the copying. + w.n = 0 + n -= len(src) + src = data[n:] + if n < len(data) && (err == nil || err == ErrShortSrc) { + continue + } + } + switch err { + case ErrShortDst: + // This error is okay as long as we are making progress. + if nDst > 0 || nSrc > 0 { + continue + } + case ErrShortSrc: + if len(src) < len(w.src) { + m := copy(w.src, src) + // If w.n > 0, bytes from data were already copied to w.src and n + // was already set to the number of bytes consumed. + if w.n == 0 { + n += m + } + w.n = m + err = nil + } else if nDst > 0 || nSrc > 0 { + // Not enough buffer to store the remainder. Keep processing as + // long as there is progress. Without this case, transforms that + // require a lookahead larger than the buffer may result in an + // error. This is not something one may expect to be common in + // practice, but it may occur when buffers are set to small + // sizes during testing. + continue + } + case nil: + if w.n > 0 { + err = errInconsistentByteCount + } + } + return n, err + } +} + +// Close implements the io.Closer interface. +func (w *Writer) Close() error { + src := w.src[:w.n] + for { + nDst, nSrc, err := w.t.Transform(w.dst, src, true) + if _, werr := w.w.Write(w.dst[:nDst]); werr != nil { + return werr + } + if err != ErrShortDst { + return err + } + src = src[nSrc:] + } +} + +type nop struct{ NopResetter } + +func (nop) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + n := copy(dst, src) + if n < len(src) { + err = ErrShortDst + } + return n, n, err +} + +func (nop) Span(src []byte, atEOF bool) (n int, err error) { + return len(src), nil +} + +type discard struct{ NopResetter } + +func (discard) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + return 0, len(src), nil +} + +var ( + // Discard is a Transformer for which all Transform calls succeed + // by consuming all bytes and writing nothing. + Discard Transformer = discard{} + + // Nop is a SpanningTransformer that copies src to dst. + Nop SpanningTransformer = nop{} +) + +// chain is a sequence of links. A chain with N Transformers has N+1 links and +// N+1 buffers. Of those N+1 buffers, the first and last are the src and dst +// buffers given to chain.Transform and the middle N-1 buffers are intermediate +// buffers owned by the chain. The i'th link transforms bytes from the i'th +// buffer chain.link[i].b at read offset chain.link[i].p to the i+1'th buffer +// chain.link[i+1].b at write offset chain.link[i+1].n, for i in [0, N). +type chain struct { + link []link + err error + // errStart is the index at which the error occurred plus 1. Processing + // errStart at this level at the next call to Transform. As long as + // errStart > 0, chain will not consume any more source bytes. + errStart int +} + +func (c *chain) fatalError(errIndex int, err error) { + if i := errIndex + 1; i > c.errStart { + c.errStart = i + c.err = err + } +} + +type link struct { + t Transformer + // b[p:n] holds the bytes to be transformed by t. + b []byte + p int + n int +} + +func (l *link) src() []byte { + return l.b[l.p:l.n] +} + +func (l *link) dst() []byte { + return l.b[l.n:] +} + +// Chain returns a Transformer that applies t in sequence. +func Chain(t ...Transformer) Transformer { + if len(t) == 0 { + return nop{} + } + c := &chain{link: make([]link, len(t)+1)} + for i, tt := range t { + c.link[i].t = tt + } + // Allocate intermediate buffers. + b := make([][defaultBufSize]byte, len(t)-1) + for i := range b { + c.link[i+1].b = b[i][:] + } + return c +} + +// Reset resets the state of Chain. It calls Reset on all the Transformers. +func (c *chain) Reset() { + for i, l := range c.link { + if l.t != nil { + l.t.Reset() + } + c.link[i].p, c.link[i].n = 0, 0 + } +} + +// TODO: make chain use Span (is going to be fun to implement!) + +// Transform applies the transformers of c in sequence. +func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + // Set up src and dst in the chain. + srcL := &c.link[0] + dstL := &c.link[len(c.link)-1] + srcL.b, srcL.p, srcL.n = src, 0, len(src) + dstL.b, dstL.n = dst, 0 + var lastFull, needProgress bool // for detecting progress + + // i is the index of the next Transformer to apply, for i in [low, high]. + // low is the lowest index for which c.link[low] may still produce bytes. + // high is the highest index for which c.link[high] has a Transformer. + // The error returned by Transform determines whether to increase or + // decrease i. We try to completely fill a buffer before converting it. + for low, i, high := c.errStart, c.errStart, len(c.link)-2; low <= i && i <= high; { + in, out := &c.link[i], &c.link[i+1] + nDst, nSrc, err0 := in.t.Transform(out.dst(), in.src(), atEOF && low == i) + out.n += nDst + in.p += nSrc + if i > 0 && in.p == in.n { + in.p, in.n = 0, 0 + } + needProgress, lastFull = lastFull, false + switch err0 { + case ErrShortDst: + // Process the destination buffer next. Return if we are already + // at the high index. + if i == high { + return dstL.n, srcL.p, ErrShortDst + } + if out.n != 0 { + i++ + // If the Transformer at the next index is not able to process any + // source bytes there is nothing that can be done to make progress + // and the bytes will remain unprocessed. lastFull is used to + // detect this and break out of the loop with a fatal error. + lastFull = true + continue + } + // The destination buffer was too small, but is completely empty. + // Return a fatal error as this transformation can never complete. + c.fatalError(i, errShortInternal) + case ErrShortSrc: + if i == 0 { + // Save ErrShortSrc in err. All other errors take precedence. + err = ErrShortSrc + break + } + // Source bytes were depleted before filling up the destination buffer. + // Verify we made some progress, move the remaining bytes to the errStart + // and try to get more source bytes. + if needProgress && nSrc == 0 || in.n-in.p == len(in.b) { + // There were not enough source bytes to proceed while the source + // buffer cannot hold any more bytes. Return a fatal error as this + // transformation can never complete. + c.fatalError(i, errShortInternal) + break + } + // in.b is an internal buffer and we can make progress. + in.p, in.n = 0, copy(in.b, in.src()) + fallthrough + case nil: + // if i == low, we have depleted the bytes at index i or any lower levels. + // In that case we increase low and i. In all other cases we decrease i to + // fetch more bytes before proceeding to the next index. + if i > low { + i-- + continue + } + default: + c.fatalError(i, err0) + } + // Exhausted level low or fatal error: increase low and continue + // to process the bytes accepted so far. + i++ + low = i + } + + // If c.errStart > 0, this means we found a fatal error. We will clear + // all upstream buffers. At this point, no more progress can be made + // downstream, as Transform would have bailed while handling ErrShortDst. + if c.errStart > 0 { + for i := 1; i < c.errStart; i++ { + c.link[i].p, c.link[i].n = 0, 0 + } + err, c.errStart, c.err = c.err, 0, nil + } + return dstL.n, srcL.p, err +} + +// Deprecated: use runes.Remove instead. +func RemoveFunc(f func(r rune) bool) Transformer { + return removeF(f) +} + +type removeF func(r rune) bool + +func (removeF) Reset() {} + +// Transform implements the Transformer interface. +func (t removeF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + for r, sz := rune(0), 0; len(src) > 0; src = src[sz:] { + + if r = rune(src[0]); r < utf8.RuneSelf { + sz = 1 + } else { + r, sz = utf8.DecodeRune(src) + + if sz == 1 { + // Invalid rune. + if !atEOF && !utf8.FullRune(src) { + err = ErrShortSrc + break + } + // We replace illegal bytes with RuneError. Not doing so might + // otherwise turn a sequence of invalid UTF-8 into valid UTF-8. + // The resulting byte sequence may subsequently contain runes + // for which t(r) is true that were passed unnoticed. + if !t(r) { + if nDst+3 > len(dst) { + err = ErrShortDst + break + } + nDst += copy(dst[nDst:], "\uFFFD") + } + nSrc++ + continue + } + } + + if !t(r) { + if nDst+sz > len(dst) { + err = ErrShortDst + break + } + nDst += copy(dst[nDst:], src[:sz]) + } + nSrc += sz + } + return +} + +// grow returns a new []byte that is longer than b, and copies the first n bytes +// of b to the start of the new slice. +func grow(b []byte, n int) []byte { + m := len(b) + if m <= 32 { + m = 64 + } else if m <= 256 { + m *= 2 + } else { + m += m >> 1 + } + buf := make([]byte, m) + copy(buf, b[:n]) + return buf +} + +const initialBufSize = 128 + +// String returns a string with the result of converting s[:n] using t, where +// n <= len(s). If err == nil, n will be len(s). It calls Reset on t. +func String(t Transformer, s string) (result string, n int, err error) { + t.Reset() + if s == "" { + // Fast path for the common case for empty input. Results in about a + // 86% reduction of running time for BenchmarkStringLowerEmpty. + if _, _, err := t.Transform(nil, nil, true); err == nil { + return "", 0, nil + } + } + + // Allocate only once. Note that both dst and src escape when passed to + // Transform. + buf := [2 * initialBufSize]byte{} + dst := buf[:initialBufSize:initialBufSize] + src := buf[initialBufSize : 2*initialBufSize] + + // The input string s is transformed in multiple chunks (starting with a + // chunk size of initialBufSize). nDst and nSrc are per-chunk (or + // per-Transform-call) indexes, pDst and pSrc are overall indexes. + nDst, nSrc := 0, 0 + pDst, pSrc := 0, 0 + + // pPrefix is the length of a common prefix: the first pPrefix bytes of the + // result will equal the first pPrefix bytes of s. It is not guaranteed to + // be the largest such value, but if pPrefix, len(result) and len(s) are + // all equal after the final transform (i.e. calling Transform with atEOF + // being true returned nil error) then we don't need to allocate a new + // result string. + pPrefix := 0 + for { + // Invariant: pDst == pPrefix && pSrc == pPrefix. + + n := copy(src, s[pSrc:]) + nDst, nSrc, err = t.Transform(dst, src[:n], pSrc+n == len(s)) + pDst += nDst + pSrc += nSrc + + // TODO: let transformers implement an optional Spanner interface, akin + // to norm's QuickSpan. This would even allow us to avoid any allocation. + if !bytes.Equal(dst[:nDst], src[:nSrc]) { + break + } + pPrefix = pSrc + if err == ErrShortDst { + // A buffer can only be short if a transformer modifies its input. + break + } else if err == ErrShortSrc { + if nSrc == 0 { + // No progress was made. + break + } + // Equal so far and !atEOF, so continue checking. + } else if err != nil || pPrefix == len(s) { + return string(s[:pPrefix]), pPrefix, err + } + } + // Post-condition: pDst == pPrefix + nDst && pSrc == pPrefix + nSrc. + + // We have transformed the first pSrc bytes of the input s to become pDst + // transformed bytes. Those transformed bytes are discontiguous: the first + // pPrefix of them equal s[:pPrefix] and the last nDst of them equal + // dst[:nDst]. We copy them around, into a new dst buffer if necessary, so + // that they become one contiguous slice: dst[:pDst]. + if pPrefix != 0 { + newDst := dst + if pDst > len(newDst) { + newDst = make([]byte, len(s)+nDst-nSrc) + } + copy(newDst[pPrefix:pDst], dst[:nDst]) + copy(newDst[:pPrefix], s[:pPrefix]) + dst = newDst + } + + // Prevent duplicate Transform calls with atEOF being true at the end of + // the input. Also return if we have an unrecoverable error. + if (err == nil && pSrc == len(s)) || + (err != nil && err != ErrShortDst && err != ErrShortSrc) { + return string(dst[:pDst]), pSrc, err + } + + // Transform the remaining input, growing dst and src buffers as necessary. + for { + n := copy(src, s[pSrc:]) + nDst, nSrc, err := t.Transform(dst[pDst:], src[:n], pSrc+n == len(s)) + pDst += nDst + pSrc += nSrc + + // If we got ErrShortDst or ErrShortSrc, do not grow as long as we can + // make progress. This may avoid excessive allocations. + if err == ErrShortDst { + if nDst == 0 { + dst = grow(dst, pDst) + } + } else if err == ErrShortSrc { + if nSrc == 0 { + src = grow(src, 0) + } + } else if err != nil || pSrc == len(s) { + return string(dst[:pDst]), pSrc, err + } + } +} + +// Bytes returns a new byte slice with the result of converting b[:n] using t, +// where n <= len(b). If err == nil, n will be len(b). It calls Reset on t. +func Bytes(t Transformer, b []byte) (result []byte, n int, err error) { + return doAppend(t, 0, make([]byte, len(b)), b) +} + +// Append appends the result of converting src[:n] using t to dst, where +// n <= len(src), If err == nil, n will be len(src). It calls Reset on t. +func Append(t Transformer, dst, src []byte) (result []byte, n int, err error) { + if len(dst) == cap(dst) { + n := len(src) + len(dst) // It is okay for this to be 0. + b := make([]byte, n) + dst = b[:copy(b, dst)] + } + return doAppend(t, len(dst), dst[:cap(dst)], src) +} + +func doAppend(t Transformer, pDst int, dst, src []byte) (result []byte, n int, err error) { + t.Reset() + pSrc := 0 + for { + nDst, nSrc, err := t.Transform(dst[pDst:], src[pSrc:], true) + pDst += nDst + pSrc += nSrc + if err != ErrShortDst { + return dst[:pDst], pSrc, err + } + + // Grow the destination buffer, but do not grow as long as we can make + // progress. This may avoid excessive allocations. + if nDst == 0 { + dst = grow(dst, pDst) + } + } +} diff --git a/vendor/golang.org/x/text/unicode/cldr/base.go b/vendor/golang.org/x/text/unicode/cldr/base.go new file mode 100644 index 000000000..2382f4d6d --- /dev/null +++ b/vendor/golang.org/x/text/unicode/cldr/base.go @@ -0,0 +1,100 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cldr + +import ( + "encoding/xml" + "regexp" + "strconv" +) + +// Elem is implemented by every XML element. +type Elem interface { + setEnclosing(Elem) + setName(string) + enclosing() Elem + + GetCommon() *Common +} + +type hidden struct { + CharData string `xml:",chardata"` + Alias *struct { + Common + Source string `xml:"source,attr"` + Path string `xml:"path,attr"` + } `xml:"alias"` + Def *struct { + Common + Choice string `xml:"choice,attr,omitempty"` + Type string `xml:"type,attr,omitempty"` + } `xml:"default"` +} + +// Common holds several of the most common attributes and sub elements +// of an XML element. +type Common struct { + XMLName xml.Name + name string + enclElem Elem + Type string `xml:"type,attr,omitempty"` + Reference string `xml:"reference,attr,omitempty"` + Alt string `xml:"alt,attr,omitempty"` + ValidSubLocales string `xml:"validSubLocales,attr,omitempty"` + Draft string `xml:"draft,attr,omitempty"` + hidden +} + +// Default returns the default type to select from the enclosed list +// or "" if no default value is specified. +func (e *Common) Default() string { + if e.Def == nil { + return "" + } + if e.Def.Choice != "" { + return e.Def.Choice + } else if e.Def.Type != "" { + // Type is still used by the default element in collation. + return e.Def.Type + } + return "" +} + +// GetCommon returns e. It is provided such that Common implements Elem. +func (e *Common) GetCommon() *Common { + return e +} + +// Data returns the character data accumulated for this element. +func (e *Common) Data() string { + e.CharData = charRe.ReplaceAllStringFunc(e.CharData, replaceUnicode) + return e.CharData +} + +func (e *Common) setName(s string) { + e.name = s +} + +func (e *Common) enclosing() Elem { + return e.enclElem +} + +func (e *Common) setEnclosing(en Elem) { + e.enclElem = en +} + +// Escape characters that can be escaped without further escaping the string. +var charRe = regexp.MustCompile(`&#x[0-9a-fA-F]*;|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\[abtnvfr]`) + +// replaceUnicode converts hexadecimal Unicode codepoint notations to a one-rune string. +// It assumes the input string is correctly formatted. +func replaceUnicode(s string) string { + if s[1] == '#' { + r, _ := strconv.ParseInt(s[3:len(s)-1], 16, 32) + return string(r) + } + r, _, _, _ := strconv.UnquoteChar(s, 0) + return string(r) +} diff --git a/vendor/golang.org/x/text/unicode/cldr/cldr.go b/vendor/golang.org/x/text/unicode/cldr/cldr.go new file mode 100644 index 000000000..2197f8ac2 --- /dev/null +++ b/vendor/golang.org/x/text/unicode/cldr/cldr.go @@ -0,0 +1,130 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate go run makexml.go -output xml.go + +// Package cldr provides a parser for LDML and related XML formats. +// This package is intended to be used by the table generation tools +// for the various internationalization-related packages. +// As the XML types are generated from the CLDR DTD, and as the CLDR standard +// is periodically amended, this package may change considerably over time. +// This mostly means that data may appear and disappear between versions. +// That is, old code should keep compiling for newer versions, but data +// may have moved or changed. +// CLDR version 22 is the first version supported by this package. +// Older versions may not work. +package cldr // import "golang.org/x/text/unicode/cldr" + +import ( + "fmt" + "sort" +) + +// CLDR provides access to parsed data of the Unicode Common Locale Data Repository. +type CLDR struct { + parent map[string][]string + locale map[string]*LDML + resolved map[string]*LDML + bcp47 *LDMLBCP47 + supp *SupplementalData +} + +func makeCLDR() *CLDR { + return &CLDR{ + parent: make(map[string][]string), + locale: make(map[string]*LDML), + resolved: make(map[string]*LDML), + bcp47: &LDMLBCP47{}, + supp: &SupplementalData{}, + } +} + +// BCP47 returns the parsed BCP47 LDML data. If no such data was parsed, nil is returned. +func (cldr *CLDR) BCP47() *LDMLBCP47 { + return nil +} + +// Draft indicates the draft level of an element. +type Draft int + +const ( + Approved Draft = iota + Contributed + Provisional + Unconfirmed +) + +var drafts = []string{"unconfirmed", "provisional", "contributed", "approved", ""} + +// ParseDraft returns the Draft value corresponding to the given string. The +// empty string corresponds to Approved. +func ParseDraft(level string) (Draft, error) { + if level == "" { + return Approved, nil + } + for i, s := range drafts { + if level == s { + return Unconfirmed - Draft(i), nil + } + } + return Approved, fmt.Errorf("cldr: unknown draft level %q", level) +} + +func (d Draft) String() string { + return drafts[len(drafts)-1-int(d)] +} + +// SetDraftLevel sets which draft levels to include in the evaluated LDML. +// Any draft element for which the draft level is higher than lev will be excluded. +// If multiple draft levels are available for a single element, the one with the +// lowest draft level will be selected, unless preferDraft is true, in which case +// the highest draft will be chosen. +// It is assumed that the underlying LDML is canonicalized. +func (cldr *CLDR) SetDraftLevel(lev Draft, preferDraft bool) { + // TODO: implement + cldr.resolved = make(map[string]*LDML) +} + +// RawLDML returns the LDML XML for id in unresolved form. +// id must be one of the strings returned by Locales. +func (cldr *CLDR) RawLDML(loc string) *LDML { + return cldr.locale[loc] +} + +// LDML returns the fully resolved LDML XML for loc, which must be one of +// the strings returned by Locales. +func (cldr *CLDR) LDML(loc string) (*LDML, error) { + return cldr.resolve(loc) +} + +// Supplemental returns the parsed supplemental data. If no such data was parsed, +// nil is returned. +func (cldr *CLDR) Supplemental() *SupplementalData { + return cldr.supp +} + +// Locales returns the locales for which there exist files. +// Valid sublocales for which there is no file are not included. +// The root locale is always sorted first. +func (cldr *CLDR) Locales() []string { + loc := []string{"root"} + hasRoot := false + for l, _ := range cldr.locale { + if l == "root" { + hasRoot = true + continue + } + loc = append(loc, l) + } + sort.Strings(loc[1:]) + if !hasRoot { + return loc[1:] + } + return loc +} + +// Get fills in the fields of x based on the XPath path. +func Get(e Elem, path string) (res Elem, err error) { + return walkXPath(e, path) +} diff --git a/vendor/golang.org/x/text/unicode/cldr/collate.go b/vendor/golang.org/x/text/unicode/cldr/collate.go new file mode 100644 index 000000000..80ee28d79 --- /dev/null +++ b/vendor/golang.org/x/text/unicode/cldr/collate.go @@ -0,0 +1,359 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cldr + +import ( + "bufio" + "encoding/xml" + "errors" + "fmt" + "strconv" + "strings" + "unicode" + "unicode/utf8" +) + +// RuleProcessor can be passed to Collator's Process method, which +// parses the rules and calls the respective method for each rule found. +type RuleProcessor interface { + Reset(anchor string, before int) error + Insert(level int, str, context, extend string) error + Index(id string) +} + +const ( + // cldrIndex is a Unicode-reserved sentinel value used to mark the start + // of a grouping within an index. + // We ignore any rule that starts with this rune. + // See http://unicode.org/reports/tr35/#Collation_Elements for details. + cldrIndex = "\uFDD0" + + // specialAnchor is the format in which to represent logical reset positions, + // such as "first tertiary ignorable". + specialAnchor = "<%s/>" +) + +// Process parses the rules for the tailorings of this collation +// and calls the respective methods of p for each rule found. +func (c Collation) Process(p RuleProcessor) (err error) { + if len(c.Cr) > 0 { + if len(c.Cr) > 1 { + return fmt.Errorf("multiple cr elements, want 0 or 1") + } + return processRules(p, c.Cr[0].Data()) + } + if c.Rules.Any != nil { + return c.processXML(p) + } + return errors.New("no tailoring data") +} + +// processRules parses rules in the Collation Rule Syntax defined in +// http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Tailorings. +func processRules(p RuleProcessor, s string) (err error) { + chk := func(s string, e error) string { + if err == nil { + err = e + } + return s + } + i := 0 // Save the line number for use after the loop. + scanner := bufio.NewScanner(strings.NewReader(s)) + for ; scanner.Scan() && err == nil; i++ { + for s := skipSpace(scanner.Text()); s != "" && s[0] != '#'; s = skipSpace(s) { + level := 5 + var ch byte + switch ch, s = s[0], s[1:]; ch { + case '&': // followed by or '[' ']' + if s = skipSpace(s); consume(&s, '[') { + s = chk(parseSpecialAnchor(p, s)) + } else { + s = chk(parseAnchor(p, 0, s)) + } + case '<': // sort relation '<'{1,4}, optionally followed by '*'. + for level = 1; consume(&s, '<'); level++ { + } + if level > 4 { + err = fmt.Errorf("level %d > 4", level) + } + fallthrough + case '=': // identity relation, optionally followed by *. + if consume(&s, '*') { + s = chk(parseSequence(p, level, s)) + } else { + s = chk(parseOrder(p, level, s)) + } + default: + chk("", fmt.Errorf("illegal operator %q", ch)) + break + } + } + } + if chk("", scanner.Err()); err != nil { + return fmt.Errorf("%d: %v", i, err) + } + return nil +} + +// parseSpecialAnchor parses the anchor syntax which is either of the form +// ['before' ] +// or +// [