packer-cn/vendor/github.com/klauspost/pgzip/gzip.go

486 lines
12 KiB
Go
Raw Normal View History

// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package pgzip
import (
"bytes"
"errors"
"fmt"
"hash"
"io"
"sync"
"github.com/klauspost/compress/flate"
"github.com/klauspost/crc32"
)
const (
defaultBlockSize = 250000
tailSize = 16384
defaultBlocks = 16
)
// These constants are copied from the flate package, so that code that imports
// "compress/gzip" does not also have to import "compress/flate".
const (
NoCompression = flate.NoCompression
BestSpeed = flate.BestSpeed
BestCompression = flate.BestCompression
DefaultCompression = flate.DefaultCompression
ConstantCompression = flate.ConstantCompression
)
// A Writer is an io.WriteCloser.
// Writes to a Writer are compressed and written to w.
type Writer struct {
Header
w io.Writer
level int
wroteHeader bool
blockSize int
blocks int
currentBuffer []byte
prevTail []byte
digest hash.Hash32
size int
closed bool
buf [10]byte
err error
pushedErr chan error
results chan result
dictFlatePool *sync.Pool
dstPool *sync.Pool
}
type result struct {
result chan []byte
notifyWritten chan struct{}
}
// Use SetConcurrency to finetune the concurrency level if needed.
//
// With this you can control the approximate size of your blocks,
// as well as how many you want to be processing in parallel.
//
// Default values for this is SetConcurrency(250000, 16),
// meaning blocks are split at 250000 bytes and up to 16 blocks
// can be processing at once before the writer blocks.
func (z *Writer) SetConcurrency(blockSize, blocks int) error {
if blockSize <= tailSize {
return fmt.Errorf("gzip: block size cannot be less than or equal to %d", tailSize)
}
if blocks <= 0 {
return errors.New("gzip: blocks cannot be zero or less")
}
z.blockSize = blockSize
z.results = make(chan result, blocks)
z.blocks = blocks
return nil
}
// NewWriter returns a new Writer.
// Writes to the returned writer are compressed and written to w.
//
// It is the caller's responsibility to call Close on the WriteCloser when done.
// Writes may be buffered and not flushed until Close.
//
// Callers that wish to set the fields in Writer.Header must do so before
// the first call to Write or Close. The Comment and Name header fields are
// UTF-8 strings in Go, but the underlying format requires NUL-terminated ISO
// 8859-1 (Latin-1). NUL or non-Latin-1 runes in those strings will lead to an
// error on Write.
func NewWriter(w io.Writer) *Writer {
z, _ := NewWriterLevel(w, DefaultCompression)
return z
}
// NewWriterLevel is like NewWriter but specifies the compression level instead
// of assuming DefaultCompression.
//
// The compression level can be DefaultCompression, NoCompression, or any
// integer value between BestSpeed and BestCompression inclusive. The error
// returned will be nil if the level is valid.
func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
if level < ConstantCompression || level > BestCompression {
return nil, fmt.Errorf("gzip: invalid compression level: %d", level)
}
z := new(Writer)
z.SetConcurrency(defaultBlockSize, defaultBlocks)
z.init(w, level)
return z, nil
}
// This function must be used by goroutines to set an
// error condition, since z.err access is restricted
// to the callers goruotine.
func (z *Writer) pushError(err error) {
z.pushedErr <- err
close(z.pushedErr)
}
func (z *Writer) init(w io.Writer, level int) {
digest := z.digest
if digest != nil {
digest.Reset()
} else {
digest = crc32.NewIEEE()
}
*z = Writer{
Header: Header{
OS: 255, // unknown
},
w: w,
level: level,
digest: digest,
pushedErr: make(chan error, 1),
results: make(chan result, z.blocks),
blockSize: z.blockSize,
blocks: z.blocks,
}
z.dictFlatePool = &sync.Pool{
New: func() interface{} {
f, _ := flate.NewWriterDict(w, level, nil)
return f
},
}
z.dstPool = &sync.Pool{New: func() interface{} { return make([]byte, 0, z.blockSize) }}
}
// Reset discards the Writer z's state and makes it equivalent to the
// result of its original state from NewWriter or NewWriterLevel, but
// writing to w instead. This permits reusing a Writer rather than
// allocating a new one.
func (z *Writer) Reset(w io.Writer) {
if z.results != nil && !z.closed {
close(z.results)
}
z.SetConcurrency(defaultBlockSize, defaultBlocks)
z.init(w, z.level)
}
// GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950).
func put2(p []byte, v uint16) {
p[0] = uint8(v >> 0)
p[1] = uint8(v >> 8)
}
func put4(p []byte, v uint32) {
p[0] = uint8(v >> 0)
p[1] = uint8(v >> 8)
p[2] = uint8(v >> 16)
p[3] = uint8(v >> 24)
}
// writeBytes writes a length-prefixed byte slice to z.w.
func (z *Writer) writeBytes(b []byte) error {
if len(b) > 0xffff {
return errors.New("gzip.Write: Extra data is too large")
}
put2(z.buf[0:2], uint16(len(b)))
_, err := z.w.Write(z.buf[0:2])
if err != nil {
return err
}
_, err = z.w.Write(b)
return err
}
// writeString writes a UTF-8 string s in GZIP's format to z.w.
// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
func (z *Writer) writeString(s string) (err error) {
// GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII.
needconv := false
for _, v := range s {
if v == 0 || v > 0xff {
return errors.New("gzip.Write: non-Latin-1 header string")
}
if v > 0x7f {
needconv = true
}
}
if needconv {
b := make([]byte, 0, len(s))
for _, v := range s {
b = append(b, byte(v))
}
_, err = z.w.Write(b)
} else {
_, err = io.WriteString(z.w, s)
}
if err != nil {
return err
}
// GZIP strings are NUL-terminated.
z.buf[0] = 0
_, err = z.w.Write(z.buf[0:1])
return err
}
// compressCurrent will compress the data currently buffered
// This should only be called from the main writer/flush/closer
func (z *Writer) compressCurrent(flush bool) {
r := result{}
r.result = make(chan []byte, 1)
r.notifyWritten = make(chan struct{}, 0)
z.results <- r
// If block given is more than twice the block size, split it.
c := z.currentBuffer
if len(c) > z.blockSize*2 {
c = c[:z.blockSize]
go compressBlock(c, z.prevTail, *z, r)
z.prevTail = c[len(c)-tailSize:]
z.currentBuffer = z.currentBuffer[z.blockSize:]
z.compressCurrent(flush)
// Last one flushes if needed
return
}
go compressBlock(c, z.prevTail, *z, r)
if len(c) > tailSize {
z.prevTail = c[len(c)-tailSize:]
} else {
z.prevTail = nil
}
z.currentBuffer = make([]byte, 0, z.blockSize+(z.blockSize/4))
// Wait if flushing
if flush {
_ = <-r.notifyWritten
}
}
// Returns an error if it has been set.
// Cannot be used by functions that are from internal goroutines.
func (z *Writer) checkError() error {
if z.err != nil {
return z.err
}
select {
case err := <-z.pushedErr:
z.err = err
default:
}
return z.err
}
// Write writes a compressed form of p to the underlying io.Writer. The
// compressed bytes are not necessarily flushed to output until
// the Writer is closed or Flush() is called.
//
// The function will return quickly, if there are unused buffers.
// The sent slice (p) is copied, and the caller is free to re-use the buffer
// when the function returns.
//
// Errors that occur during compression will be reported later, and a nil error
// does not signify that the compression succeeded (since it is most likely still running)
// That means that the call that returns an error may not be the call that caused it.
// Only Flush and Close functions are guaranteed to return any errors up to that point.
func (z *Writer) Write(p []byte) (int, error) {
if z.checkError() != nil {
return 0, z.err
}
// Write the GZIP header lazily.
if !z.wroteHeader {
z.wroteHeader = true
z.buf[0] = gzipID1
z.buf[1] = gzipID2
z.buf[2] = gzipDeflate
z.buf[3] = 0
if z.Extra != nil {
z.buf[3] |= 0x04
}
if z.Name != "" {
z.buf[3] |= 0x08
}
if z.Comment != "" {
z.buf[3] |= 0x10
}
put4(z.buf[4:8], uint32(z.ModTime.Unix()))
if z.level == BestCompression {
z.buf[8] = 2
} else if z.level == BestSpeed {
z.buf[8] = 4
} else {
z.buf[8] = 0
}
z.buf[9] = z.OS
var n int
n, z.err = z.w.Write(z.buf[0:10])
if z.err != nil {
return n, z.err
}
if z.Extra != nil {
z.err = z.writeBytes(z.Extra)
if z.err != nil {
return n, z.err
}
}
if z.Name != "" {
z.err = z.writeString(z.Name)
if z.err != nil {
return n, z.err
}
}
if z.Comment != "" {
z.err = z.writeString(z.Comment)
if z.err != nil {
return n, z.err
}
}
// Start receiving data from compressors
go func() {
listen := z.results
for {
r, ok := <-listen
// If closed, we are finished.
if !ok {
return
}
buf := <-r.result
n, err := z.w.Write(buf)
if err != nil {
z.pushError(err)
close(r.notifyWritten)
return
}
if n != len(buf) {
z.pushError(fmt.Errorf("gzip: short write %d should be %d", n, len(buf)))
close(r.notifyWritten)
return
}
z.dstPool.Put(buf)
close(r.notifyWritten)
}
}()
z.currentBuffer = make([]byte, 0, z.blockSize+(z.blockSize/4))
}
// Handle very large writes in a loop
if len(p) > z.blockSize*z.blocks {
q := p
for len(q) > 0 {
length := len(q)
if length > z.blockSize {
length = z.blockSize
}
z.digest.Write(q[:length])
z.currentBuffer = append(z.currentBuffer, q[:length]...)
if len(z.currentBuffer) >= z.blockSize {
z.compressCurrent(false)
if z.err != nil {
return len(p) - len(q) - length, z.err
}
}
z.size += length
q = q[length:]
}
return len(p), z.err
} else {
z.size += len(p)
z.digest.Write(p)
z.currentBuffer = append(z.currentBuffer, p...)
if len(z.currentBuffer) >= z.blockSize {
z.compressCurrent(false)
}
return len(p), z.err
}
}
// Step 1: compresses buffer to buffer
// Step 2: send writer to channel
// Step 3: Close result channel to indicate we are done
func compressBlock(p, prevTail []byte, z Writer, r result) {
defer close(r.result)
buf := z.dstPool.Get().([]byte)
dest := bytes.NewBuffer(buf[:0])
compressor := z.dictFlatePool.Get().(*flate.Writer)
compressor.ResetDict(dest, prevTail)
compressor.Write(p)
err := compressor.Flush()
if err != nil {
z.pushError(err)
return
}
if z.closed {
err = compressor.Close()
if err != nil {
z.pushError(err)
return
}
}
z.dictFlatePool.Put(compressor)
// Read back buffer
buf = dest.Bytes()
r.result <- buf
}
// Flush flushes any pending compressed data to the underlying writer.
//
// It is useful mainly in compressed network protocols, to ensure that
// a remote reader has enough data to reconstruct a packet. Flush does
// not return until the data has been written. If the underlying
// writer returns an error, Flush returns that error.
//
// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
func (z *Writer) Flush() error {
if z.checkError() != nil {
return z.err
}
if z.closed {
return nil
}
if !z.wroteHeader {
_, err := z.Write(nil)
if err != nil {
return err
}
}
// We send current block to compression
z.compressCurrent(true)
if z.checkError() != nil {
return z.err
}
return nil
}
// UncompressedSize will return the number of bytes written.
// pgzip only, not a function in the official gzip package.
func (z Writer) UncompressedSize() int {
return z.size
}
// Close closes the Writer, flushing any unwritten data to the underlying
// io.Writer, but does not close the underlying io.Writer.
func (z *Writer) Close() error {
if z.checkError() != nil {
return z.err
}
if z.closed {
return nil
}
z.closed = true
if !z.wroteHeader {
z.Write(nil)
if z.err != nil {
return z.err
}
}
z.compressCurrent(true)
if z.checkError() != nil {
return z.err
}
close(z.results)
put4(z.buf[0:4], z.digest.Sum32())
put4(z.buf[4:8], uint32(z.size))
_, z.err = z.w.Write(z.buf[0:8])
return z.err
}