Merge pull request #6534 from ChrisLundquist/clundquist/xz
add support for xz compression #6523
This commit is contained in:
commit
10c4aca521
|
@ -105,6 +105,16 @@ func main() {
|
|||
c.Close()
|
||||
fmt.Printf("lz4:\twriter %s\treader %s\tsize %d\n", resw.T.String(), resr.T.String(), c.sw)
|
||||
|
||||
c, err = NewCompressor("/tmp/image.r", "/tmp/image.w")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
resw = testing.Benchmark(c.BenchmarkXZWriter)
|
||||
c.w.Seek(0, 0)
|
||||
resr = testing.Benchmark(c.BenchmarkXZReader)
|
||||
c.Close()
|
||||
fmt.Printf("xz:\twriter %s\treader %s\tsize %d\n", resw.T.String(), resr.T.String(), c.sw)
|
||||
|
||||
}
|
||||
|
||||
func (c *Compressor) BenchmarkGZIPWriter(b *testing.B) {
|
||||
|
@ -195,3 +205,25 @@ func (c *Compressor) BenchmarkLZ4Reader(b *testing.B) {
|
|||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Compressor) BenchmarkXZWriter(b *testing.B) {
|
||||
cw := xz.NewWriter(c.w)
|
||||
b.ResetTimer()
|
||||
|
||||
_, err := io.Copy(cw, c.r)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
cw.Close()
|
||||
c.w.Sync()
|
||||
}
|
||||
|
||||
func (c *Compressor) BenchmarkXZReader(b *testing.B) {
|
||||
cr := xz.NewReader(c.w)
|
||||
b.ResetTimer()
|
||||
|
||||
_, err := io.Copy(ioutil.Discard, cr)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ import (
|
|||
"github.com/hashicorp/packer/template/interpolate"
|
||||
"github.com/klauspost/pgzip"
|
||||
"github.com/pierrec/lz4"
|
||||
"github.com/ulikunitz/xz"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -142,6 +143,11 @@ func (p *PostProcessor) PostProcess(ui packer.Ui, artifact packer.Artifact) (pac
|
|||
runtime.GOMAXPROCS(-1), target))
|
||||
output, err = makeLZ4Writer(outputFile, p.config.CompressionLevel)
|
||||
defer output.Close()
|
||||
case "xz":
|
||||
ui.Say(fmt.Sprintf("Using xz compression with 1 core for %s (library does not support MT)",
|
||||
target))
|
||||
output, err = makeXZWriter(outputFile, p.config.CompressionLevel)
|
||||
defer output.Close()
|
||||
case "pgzip":
|
||||
ui.Say(fmt.Sprintf("Using pgzip compression with %d cores for %s",
|
||||
runtime.GOMAXPROCS(-1), target))
|
||||
|
@ -209,6 +215,7 @@ func (config *Config) detectFromFilename() {
|
|||
"gz": "pgzip",
|
||||
"lz4": "lz4",
|
||||
"bgzf": "bgzf",
|
||||
"xz": "xz",
|
||||
}
|
||||
|
||||
if config.Format == "" {
|
||||
|
@ -273,6 +280,14 @@ func makeLZ4Writer(output io.WriteCloser, compressionLevel int) (io.WriteCloser,
|
|||
return lzwriter, nil
|
||||
}
|
||||
|
||||
func makeXZWriter(output io.WriteCloser, compressionLevel int) (io.WriteCloser, error) {
|
||||
xzwriter, err := xz.NewWriter(output)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return xzwriter, nil
|
||||
}
|
||||
|
||||
func makePgzipWriter(output io.WriteCloser, compressionLevel int) (io.WriteCloser, error) {
|
||||
gzipWriter, err := pgzip.NewWriterLevel(output, compressionLevel)
|
||||
if err != nil {
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
Copyright (c) 2014-2016 Ulrich Kunitz
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* My name, Ulrich Kunitz, may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,73 @@
|
|||
# Package xz
|
||||
|
||||
This Go language package supports the reading and writing of xz
|
||||
compressed streams. It includes also a gxz command for compressing and
|
||||
decompressing data. The package is completely written in Go and doesn't
|
||||
have any dependency on any C code.
|
||||
|
||||
The package is currently under development. There might be bugs and APIs
|
||||
are not considered stable. At this time the package cannot compete with
|
||||
the xz tool regarding compression speed and size. The algorithms there
|
||||
have been developed over a long time and are highly optimized. However
|
||||
there are a number of improvements planned and I'm very optimistic about
|
||||
parallel compression and decompression. Stay tuned!
|
||||
|
||||
## Using the API
|
||||
|
||||
The following example program shows how to use the API.
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/ulikunitz/xz"
|
||||
)
|
||||
|
||||
func main() {
|
||||
const text = "The quick brown fox jumps over the lazy dog.\n"
|
||||
var buf bytes.Buffer
|
||||
// compress text
|
||||
w, err := xz.NewWriter(&buf)
|
||||
if err != nil {
|
||||
log.Fatalf("xz.NewWriter error %s", err)
|
||||
}
|
||||
if _, err := io.WriteString(w, text); err != nil {
|
||||
log.Fatalf("WriteString error %s", err)
|
||||
}
|
||||
if err := w.Close(); err != nil {
|
||||
log.Fatalf("w.Close error %s", err)
|
||||
}
|
||||
// decompress buffer and write output to stdout
|
||||
r, err := xz.NewReader(&buf)
|
||||
if err != nil {
|
||||
log.Fatalf("NewReader error %s", err)
|
||||
}
|
||||
if _, err = io.Copy(os.Stdout, r); err != nil {
|
||||
log.Fatalf("io.Copy error %s", err)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Using the gxz compression tool
|
||||
|
||||
The package includes a gxz command line utility for compression and
|
||||
decompression.
|
||||
|
||||
Use following command for installation:
|
||||
|
||||
$ go get github.com/ulikunitz/xz/cmd/gxz
|
||||
|
||||
To test it call the following command.
|
||||
|
||||
$ gxz bigfile
|
||||
|
||||
After some time a much smaller file bigfile.xz will replace bigfile.
|
||||
To decompress it use the following command.
|
||||
|
||||
$ gxz -d bigfile.xz
|
||||
|
|
@ -0,0 +1,315 @@
|
|||
# TODO list
|
||||
|
||||
## Release v0.6
|
||||
|
||||
1. Review encoder and check for lzma improvements under xz.
|
||||
2. Fix binary tree matcher.
|
||||
3. Compare compression ratio with xz tool using comparable parameters
|
||||
and optimize parameters
|
||||
4. Do some optimizations
|
||||
- rename operation action and make it a simple type of size 8
|
||||
- make maxMatches, wordSize parameters
|
||||
- stop searching after a certain length is found (parameter sweetLen)
|
||||
|
||||
## Release v0.7
|
||||
|
||||
1. Optimize code
|
||||
2. Do statistical analysis to get linear presets.
|
||||
3. Test sync.Pool compatability for xz and lzma Writer and Reader
|
||||
3. Fuzz optimized code.
|
||||
|
||||
## Release v0.8
|
||||
|
||||
1. Support parallel go routines for writing and reading xz files.
|
||||
2. Support a ReaderAt interface for xz files with small block sizes.
|
||||
3. Improve compatibility between gxz and xz
|
||||
4. Provide manual page for gxz
|
||||
|
||||
## Release v0.9
|
||||
|
||||
1. Improve documentation
|
||||
2. Fuzz again
|
||||
|
||||
## Release v1.0
|
||||
|
||||
1. Full functioning gxz
|
||||
2. Add godoc URL to README.md (godoc.org)
|
||||
3. Resolve all issues.
|
||||
4. Define release candidates.
|
||||
5. Public announcement.
|
||||
|
||||
## Package lzma
|
||||
|
||||
### Release v0.6
|
||||
|
||||
- Rewrite Encoder into a simple greedy one-op-at-a-time encoder
|
||||
including
|
||||
+ simple scan at the dictionary head for the same byte
|
||||
+ use the killer byte (requiring matches to get longer, the first
|
||||
test should be the byte that would make the match longer)
|
||||
|
||||
|
||||
## Optimizations
|
||||
|
||||
- There may be a lot of false sharing in lzma.State; check whether this
|
||||
can be improved by reorganizing the internal structure of it.
|
||||
- Check whether batching encoding and decoding improves speed.
|
||||
|
||||
### DAG optimizations
|
||||
|
||||
- Use full buffer to create minimal bit-length above range encoder.
|
||||
- Might be too slow (see v0.4)
|
||||
|
||||
### Different match finders
|
||||
|
||||
- hashes with 2, 3 characters additional to 4 characters
|
||||
- binary trees with 2-7 characters (uint64 as key, use uint32 as
|
||||
pointers into a an array)
|
||||
- rb-trees with 2-7 characters (uint64 as key, use uint32 as pointers
|
||||
into an array with bit-steeling for the colors)
|
||||
|
||||
## Release Procedure
|
||||
|
||||
- execute goch -l for all packages; probably with lower param like 0.5.
|
||||
- check orthography with gospell
|
||||
- Write release notes in doc/relnotes.
|
||||
- Update README.md
|
||||
- xb copyright . in xz directory to ensure all new files have Copyright
|
||||
header
|
||||
- VERSION=<version> go generate github.com/ulikunitz/xz/... to update
|
||||
version files
|
||||
- Execute test for Linux/amd64, Linux/x86 and Windows/amd64.
|
||||
- Update TODO.md - write short log entry
|
||||
- git checkout master && git merge dev
|
||||
- git tag -a <version>
|
||||
- git push
|
||||
|
||||
## Log
|
||||
|
||||
### 2017-06-05
|
||||
|
||||
Release v0.5.4 fixes issues #15 of another problem with the padding size
|
||||
check for the xz block header. I removed the check completely.
|
||||
|
||||
### 2017-02-15
|
||||
|
||||
Release v0.5.3 fixes issue #12 regarding the decompression of an empty
|
||||
XZ stream. Many thanks to Tomasz Kłak, who reported the issue.
|
||||
|
||||
### 2016-12-02
|
||||
|
||||
Release v0.5.2 became necessary to allow the decoding of xz files with
|
||||
4-byte padding in the block header. Many thanks to Greg, who reported
|
||||
the issue.
|
||||
|
||||
### 2016-07-23
|
||||
|
||||
Release v0.5.1 became necessary to fix problems with 32-bit platforms.
|
||||
Many thanks to Bruno Brigas, who reported the issue.
|
||||
|
||||
### 2016-07-04
|
||||
|
||||
Release v0.5 provides improvements to the compressor and provides support for
|
||||
the decompression of xz files with multiple xz streams.
|
||||
|
||||
### 2016-01-31
|
||||
|
||||
Another compression rate increase by checking the byte at length of the
|
||||
best match first, before checking the whole prefix. This makes the
|
||||
compressor even faster. We have now a large time budget to beat the
|
||||
compression ratio of the xz tool. For enwik8 we have now over 40 seconds
|
||||
to reduce the compressed file size for another 7 MiB.
|
||||
|
||||
### 2016-01-30
|
||||
|
||||
I simplified the encoder. Speed and compression rate increased
|
||||
dramatically. A high compression rate affects also the decompression
|
||||
speed. The approach with the buffer and optimizing for operation
|
||||
compression rate has not been successful. Going for the maximum length
|
||||
appears to be the best approach.
|
||||
|
||||
### 2016-01-28
|
||||
|
||||
The release v0.4 is ready. It provides a working xz implementation,
|
||||
which is rather slow, but works and is interoperable with the xz tool.
|
||||
It is an important milestone.
|
||||
|
||||
### 2016-01-10
|
||||
|
||||
I have the first working implementation of an xz reader and writer. I'm
|
||||
happy about reaching this milestone.
|
||||
|
||||
### 2015-12-02
|
||||
|
||||
I'm now ready to implement xz because, I have a working LZMA2
|
||||
implementation. I decided today that v0.4 will use the slow encoder
|
||||
using the operations buffer to be able to go back, if I intend to do so.
|
||||
|
||||
### 2015-10-21
|
||||
|
||||
I have restarted the work on the library. While trying to implement
|
||||
LZMA2, I discovered that I need to resimplify the encoder and decoder
|
||||
functions. The option approach is too complicated. Using a limited byte
|
||||
writer and not caring for written bytes at all and not to try to handle
|
||||
uncompressed data simplifies the LZMA encoder and decoder much.
|
||||
Processing uncompressed data and handling limits is a feature of the
|
||||
LZMA2 format not of LZMA.
|
||||
|
||||
I learned an interesting method from the LZO format. If the last copy is
|
||||
too far away they are moving the head one 2 bytes and not 1 byte to
|
||||
reduce processing times.
|
||||
|
||||
### 2015-08-26
|
||||
|
||||
I have now reimplemented the lzma package. The code is reasonably fast,
|
||||
but can still be optimized. The next step is to implement LZMA2 and then
|
||||
xz.
|
||||
|
||||
### 2015-07-05
|
||||
|
||||
Created release v0.3. The version is the foundation for a full xz
|
||||
implementation that is the target of v0.4.
|
||||
|
||||
### 2015-06-11
|
||||
|
||||
The gflag package has been developed because I couldn't use flag and
|
||||
pflag for a fully compatible support of gzip's and lzma's options. It
|
||||
seems to work now quite nicely.
|
||||
|
||||
### 2015-06-05
|
||||
|
||||
The overflow issue was interesting to research, however Henry S. Warren
|
||||
Jr. Hacker's Delight book was very helpful as usual and had the issue
|
||||
explained perfectly. Fefe's information on his website was based on the
|
||||
C FAQ and quite bad, because it didn't address the issue of -MININT ==
|
||||
MININT.
|
||||
|
||||
### 2015-06-04
|
||||
|
||||
It has been a productive day. I improved the interface of lzma.Reader
|
||||
and lzma.Writer and fixed the error handling.
|
||||
|
||||
### 2015-06-01
|
||||
|
||||
By computing the bit length of the LZMA operations I was able to
|
||||
improve the greedy algorithm implementation. By using an 8 MByte buffer
|
||||
the compression rate was not as good as for xz but already better then
|
||||
gzip default.
|
||||
|
||||
Compression is currently slow, but this is something we will be able to
|
||||
improve over time.
|
||||
|
||||
### 2015-05-26
|
||||
|
||||
Checked the license of ogier/pflag. The binary lzmago binary should
|
||||
include the license terms for the pflag library.
|
||||
|
||||
I added the endorsement clause as used by Google for the Go sources the
|
||||
LICENSE file.
|
||||
|
||||
### 2015-05-22
|
||||
|
||||
The package lzb contains now the basic implementation for creating or
|
||||
reading LZMA byte streams. It allows the support for the implementation
|
||||
of the DAG-shortest-path algorithm for the compression function.
|
||||
|
||||
### 2015-04-23
|
||||
|
||||
Completed yesterday the lzbase classes. I'm a little bit concerned that
|
||||
using the components may require too much code, but on the other hand
|
||||
there is a lot of flexibility.
|
||||
|
||||
### 2015-04-22
|
||||
|
||||
Implemented Reader and Writer during the Bayern game against Porto. The
|
||||
second half gave me enough time.
|
||||
|
||||
### 2015-04-21
|
||||
|
||||
While showering today morning I discovered that the design for OpEncoder
|
||||
and OpDecoder doesn't work, because encoding/decoding might depend on
|
||||
the current status of the dictionary. This is not exactly the right way
|
||||
to start the day.
|
||||
|
||||
Therefore we need to keep the Reader and Writer design. This time around
|
||||
we simplify it by ignoring size limits. These can be added by wrappers
|
||||
around the Reader and Writer interfaces. The Parameters type isn't
|
||||
needed anymore.
|
||||
|
||||
However I will implement a ReaderState and WriterState type to use
|
||||
static typing to ensure the right State object is combined with the
|
||||
right lzbase.Reader and lzbase.Writer.
|
||||
|
||||
As a start I have implemented ReaderState and WriterState to ensure
|
||||
that the state for reading is only used by readers and WriterState only
|
||||
used by Writers.
|
||||
|
||||
### 2015-04-20
|
||||
|
||||
Today I implemented the OpDecoder and tested OpEncoder and OpDecoder.
|
||||
|
||||
### 2015-04-08
|
||||
|
||||
Came up with a new simplified design for lzbase. I implemented already
|
||||
the type State that replaces OpCodec.
|
||||
|
||||
### 2015-04-06
|
||||
|
||||
The new lzma package is now fully usable and lzmago is using it now. The
|
||||
old lzma package has been completely removed.
|
||||
|
||||
### 2015-04-05
|
||||
|
||||
Implemented lzma.Reader and tested it.
|
||||
|
||||
### 2015-04-04
|
||||
|
||||
Implemented baseReader by adapting code form lzma.Reader.
|
||||
|
||||
### 2015-04-03
|
||||
|
||||
The opCodec has been copied yesterday to lzma2. opCodec has a high
|
||||
number of dependencies on other files in lzma2. Therefore I had to copy
|
||||
almost all files from lzma.
|
||||
|
||||
### 2015-03-31
|
||||
|
||||
Removed only a TODO item.
|
||||
|
||||
However in Francesco Campoy's presentation "Go for Javaneros
|
||||
(Javaïstes?)" is the the idea that using an embedded field E, all the
|
||||
methods of E will be defined on T. If E is an interface T satisfies E.
|
||||
|
||||
https://talks.golang.org/2014/go4java.slide#51
|
||||
|
||||
I have never used this, but it seems to be a cool idea.
|
||||
|
||||
### 2015-03-30
|
||||
|
||||
Finished the type writerDict and wrote a simple test.
|
||||
|
||||
### 2015-03-25
|
||||
|
||||
I started to implement the writerDict.
|
||||
|
||||
### 2015-03-24
|
||||
|
||||
After thinking long about the LZMA2 code and several false starts, I
|
||||
have now a plan to create a self-sufficient lzma2 package that supports
|
||||
the classic LZMA format as well as LZMA2. The core idea is to support a
|
||||
baseReader and baseWriter type that support the basic LZMA stream
|
||||
without any headers. Both types must support the reuse of dictionaries
|
||||
and the opCodec.
|
||||
|
||||
### 2015-01-10
|
||||
|
||||
1. Implemented simple lzmago tool
|
||||
2. Tested tool against large 4.4G file
|
||||
- compression worked correctly; tested decompression with lzma
|
||||
- decompression hits a full buffer condition
|
||||
3. Fixed a bug in the compressor and wrote a test for it
|
||||
4. Executed full cycle for 4.4 GB file; performance can be improved ;-)
|
||||
|
||||
### 2015-01-11
|
||||
|
||||
- Release v0.2 because of the working LZMA encoder and decoder
|
|
@ -0,0 +1,74 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package xz
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
)
|
||||
|
||||
// putUint32LE puts the little-endian representation of x into the first
|
||||
// four bytes of p.
|
||||
func putUint32LE(p []byte, x uint32) {
|
||||
p[0] = byte(x)
|
||||
p[1] = byte(x >> 8)
|
||||
p[2] = byte(x >> 16)
|
||||
p[3] = byte(x >> 24)
|
||||
}
|
||||
|
||||
// putUint64LE puts the little-endian representation of x into the first
|
||||
// eight bytes of p.
|
||||
func putUint64LE(p []byte, x uint64) {
|
||||
p[0] = byte(x)
|
||||
p[1] = byte(x >> 8)
|
||||
p[2] = byte(x >> 16)
|
||||
p[3] = byte(x >> 24)
|
||||
p[4] = byte(x >> 32)
|
||||
p[5] = byte(x >> 40)
|
||||
p[6] = byte(x >> 48)
|
||||
p[7] = byte(x >> 56)
|
||||
}
|
||||
|
||||
// uint32LE converts a little endian representation to an uint32 value.
|
||||
func uint32LE(p []byte) uint32 {
|
||||
return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 |
|
||||
uint32(p[3])<<24
|
||||
}
|
||||
|
||||
// putUvarint puts a uvarint representation of x into the byte slice.
|
||||
func putUvarint(p []byte, x uint64) int {
|
||||
i := 0
|
||||
for x >= 0x80 {
|
||||
p[i] = byte(x) | 0x80
|
||||
x >>= 7
|
||||
i++
|
||||
}
|
||||
p[i] = byte(x)
|
||||
return i + 1
|
||||
}
|
||||
|
||||
// errOverflow indicates an overflow of the 64-bit unsigned integer.
|
||||
var errOverflowU64 = errors.New("xz: uvarint overflows 64-bit unsigned integer")
|
||||
|
||||
// readUvarint reads a uvarint from the given byte reader.
|
||||
func readUvarint(r io.ByteReader) (x uint64, n int, err error) {
|
||||
var s uint
|
||||
i := 0
|
||||
for {
|
||||
b, err := r.ReadByte()
|
||||
if err != nil {
|
||||
return x, i, err
|
||||
}
|
||||
i++
|
||||
if b < 0x80 {
|
||||
if i > 10 || i == 10 && b > 1 {
|
||||
return x, i, errOverflowU64
|
||||
}
|
||||
return x | uint64(b)<<s, i, nil
|
||||
}
|
||||
x |= uint64(b&0x7f) << s
|
||||
s += 7
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package xz
|
||||
|
||||
import (
|
||||
"hash"
|
||||
"hash/crc32"
|
||||
"hash/crc64"
|
||||
)
|
||||
|
||||
// crc32Hash implements the hash.Hash32 interface with Sum returning the
|
||||
// crc32 value in little-endian encoding.
|
||||
type crc32Hash struct {
|
||||
hash.Hash32
|
||||
}
|
||||
|
||||
// Sum returns the crc32 value as little endian.
|
||||
func (h crc32Hash) Sum(b []byte) []byte {
|
||||
p := make([]byte, 4)
|
||||
putUint32LE(p, h.Hash32.Sum32())
|
||||
b = append(b, p...)
|
||||
return b
|
||||
}
|
||||
|
||||
// newCRC32 returns a CRC-32 hash that returns the 64-bit value in
|
||||
// little-endian encoding using the IEEE polynomial.
|
||||
func newCRC32() hash.Hash {
|
||||
return crc32Hash{Hash32: crc32.NewIEEE()}
|
||||
}
|
||||
|
||||
// crc64Hash implements the Hash64 interface with Sum returning the
|
||||
// CRC-64 value in little-endian encoding.
|
||||
type crc64Hash struct {
|
||||
hash.Hash64
|
||||
}
|
||||
|
||||
// Sum returns the CRC-64 value in little-endian encoding.
|
||||
func (h crc64Hash) Sum(b []byte) []byte {
|
||||
p := make([]byte, 8)
|
||||
putUint64LE(p, h.Hash64.Sum64())
|
||||
b = append(b, p...)
|
||||
return b
|
||||
}
|
||||
|
||||
// crc64Table is used to create a CRC-64 hash.
|
||||
var crc64Table = crc64.MakeTable(crc64.ECMA)
|
||||
|
||||
// newCRC64 returns a CRC-64 hash that returns the 64-bit value in
|
||||
// little-endian encoding using the ECMA polynomial.
|
||||
func newCRC64() hash.Hash {
|
||||
return crc64Hash{Hash64: crc64.New(crc64Table)}
|
||||
}
|
|
@ -0,0 +1,728 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package xz
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash"
|
||||
"hash/crc32"
|
||||
"io"
|
||||
|
||||
"github.com/ulikunitz/xz/lzma"
|
||||
)
|
||||
|
||||
// allZeros checks whether a given byte slice has only zeros.
|
||||
func allZeros(p []byte) bool {
|
||||
for _, c := range p {
|
||||
if c != 0 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// padLen returns the length of the padding required for the given
|
||||
// argument.
|
||||
func padLen(n int64) int {
|
||||
k := int(n % 4)
|
||||
if k > 0 {
|
||||
k = 4 - k
|
||||
}
|
||||
return k
|
||||
}
|
||||
|
||||
/*** Header ***/
|
||||
|
||||
// headerMagic stores the magic bytes for the header
|
||||
var headerMagic = []byte{0xfd, '7', 'z', 'X', 'Z', 0x00}
|
||||
|
||||
// HeaderLen provides the length of the xz file header.
|
||||
const HeaderLen = 12
|
||||
|
||||
// Constants for the checksum methods supported by xz.
|
||||
const (
|
||||
CRC32 byte = 0x1
|
||||
CRC64 = 0x4
|
||||
SHA256 = 0xa
|
||||
)
|
||||
|
||||
// errInvalidFlags indicates that flags are invalid.
|
||||
var errInvalidFlags = errors.New("xz: invalid flags")
|
||||
|
||||
// verifyFlags returns the error errInvalidFlags if the value is
|
||||
// invalid.
|
||||
func verifyFlags(flags byte) error {
|
||||
switch flags {
|
||||
case CRC32, CRC64, SHA256:
|
||||
return nil
|
||||
default:
|
||||
return errInvalidFlags
|
||||
}
|
||||
}
|
||||
|
||||
// flagstrings maps flag values to strings.
|
||||
var flagstrings = map[byte]string{
|
||||
CRC32: "CRC-32",
|
||||
CRC64: "CRC-64",
|
||||
SHA256: "SHA-256",
|
||||
}
|
||||
|
||||
// flagString returns the string representation for the given flags.
|
||||
func flagString(flags byte) string {
|
||||
s, ok := flagstrings[flags]
|
||||
if !ok {
|
||||
return "invalid"
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// newHashFunc returns a function that creates hash instances for the
|
||||
// hash method encoded in flags.
|
||||
func newHashFunc(flags byte) (newHash func() hash.Hash, err error) {
|
||||
switch flags {
|
||||
case CRC32:
|
||||
newHash = newCRC32
|
||||
case CRC64:
|
||||
newHash = newCRC64
|
||||
case SHA256:
|
||||
newHash = sha256.New
|
||||
default:
|
||||
err = errInvalidFlags
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// header provides the actual content of the xz file header: the flags.
|
||||
type header struct {
|
||||
flags byte
|
||||
}
|
||||
|
||||
// Errors returned by readHeader.
|
||||
var errHeaderMagic = errors.New("xz: invalid header magic bytes")
|
||||
|
||||
// ValidHeader checks whether data is a correct xz file header. The
|
||||
// length of data must be HeaderLen.
|
||||
func ValidHeader(data []byte) bool {
|
||||
var h header
|
||||
err := h.UnmarshalBinary(data)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// String returns a string representation of the flags.
|
||||
func (h header) String() string {
|
||||
return flagString(h.flags)
|
||||
}
|
||||
|
||||
// UnmarshalBinary reads header from the provided data slice.
|
||||
func (h *header) UnmarshalBinary(data []byte) error {
|
||||
// header length
|
||||
if len(data) != HeaderLen {
|
||||
return errors.New("xz: wrong file header length")
|
||||
}
|
||||
|
||||
// magic header
|
||||
if !bytes.Equal(headerMagic, data[:6]) {
|
||||
return errHeaderMagic
|
||||
}
|
||||
|
||||
// checksum
|
||||
crc := crc32.NewIEEE()
|
||||
crc.Write(data[6:8])
|
||||
if uint32LE(data[8:]) != crc.Sum32() {
|
||||
return errors.New("xz: invalid checksum for file header")
|
||||
}
|
||||
|
||||
// stream flags
|
||||
if data[6] != 0 {
|
||||
return errInvalidFlags
|
||||
}
|
||||
flags := data[7]
|
||||
if err := verifyFlags(flags); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
h.flags = flags
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarshalBinary generates the xz file header.
|
||||
func (h *header) MarshalBinary() (data []byte, err error) {
|
||||
if err = verifyFlags(h.flags); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data = make([]byte, 12)
|
||||
copy(data, headerMagic)
|
||||
data[7] = h.flags
|
||||
|
||||
crc := crc32.NewIEEE()
|
||||
crc.Write(data[6:8])
|
||||
putUint32LE(data[8:], crc.Sum32())
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
/*** Footer ***/
|
||||
|
||||
// footerLen defines the length of the footer.
|
||||
const footerLen = 12
|
||||
|
||||
// footerMagic contains the footer magic bytes.
|
||||
var footerMagic = []byte{'Y', 'Z'}
|
||||
|
||||
// footer represents the content of the xz file footer.
|
||||
type footer struct {
|
||||
indexSize int64
|
||||
flags byte
|
||||
}
|
||||
|
||||
// String prints a string representation of the footer structure.
|
||||
func (f footer) String() string {
|
||||
return fmt.Sprintf("%s index size %d", flagString(f.flags), f.indexSize)
|
||||
}
|
||||
|
||||
// Minimum and maximum for the size of the index (backward size).
|
||||
const (
|
||||
minIndexSize = 4
|
||||
maxIndexSize = (1 << 32) * 4
|
||||
)
|
||||
|
||||
// MarshalBinary converts footer values into an xz file footer. Note
|
||||
// that the footer value is checked for correctness.
|
||||
func (f *footer) MarshalBinary() (data []byte, err error) {
|
||||
if err = verifyFlags(f.flags); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !(minIndexSize <= f.indexSize && f.indexSize <= maxIndexSize) {
|
||||
return nil, errors.New("xz: index size out of range")
|
||||
}
|
||||
if f.indexSize%4 != 0 {
|
||||
return nil, errors.New(
|
||||
"xz: index size not aligned to four bytes")
|
||||
}
|
||||
|
||||
data = make([]byte, footerLen)
|
||||
|
||||
// backward size (index size)
|
||||
s := (f.indexSize / 4) - 1
|
||||
putUint32LE(data[4:], uint32(s))
|
||||
// flags
|
||||
data[9] = f.flags
|
||||
// footer magic
|
||||
copy(data[10:], footerMagic)
|
||||
|
||||
// CRC-32
|
||||
crc := crc32.NewIEEE()
|
||||
crc.Write(data[4:10])
|
||||
putUint32LE(data, crc.Sum32())
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// UnmarshalBinary sets the footer value by unmarshalling an xz file
|
||||
// footer.
|
||||
func (f *footer) UnmarshalBinary(data []byte) error {
|
||||
if len(data) != footerLen {
|
||||
return errors.New("xz: wrong footer length")
|
||||
}
|
||||
|
||||
// magic bytes
|
||||
if !bytes.Equal(data[10:], footerMagic) {
|
||||
return errors.New("xz: footer magic invalid")
|
||||
}
|
||||
|
||||
// CRC-32
|
||||
crc := crc32.NewIEEE()
|
||||
crc.Write(data[4:10])
|
||||
if uint32LE(data) != crc.Sum32() {
|
||||
return errors.New("xz: footer checksum error")
|
||||
}
|
||||
|
||||
var g footer
|
||||
// backward size (index size)
|
||||
g.indexSize = (int64(uint32LE(data[4:])) + 1) * 4
|
||||
|
||||
// flags
|
||||
if data[8] != 0 {
|
||||
return errInvalidFlags
|
||||
}
|
||||
g.flags = data[9]
|
||||
if err := verifyFlags(g.flags); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*f = g
|
||||
return nil
|
||||
}
|
||||
|
||||
/*** Block Header ***/
|
||||
|
||||
// blockHeader represents the content of an xz block header.
|
||||
type blockHeader struct {
|
||||
compressedSize int64
|
||||
uncompressedSize int64
|
||||
filters []filter
|
||||
}
|
||||
|
||||
// String converts the block header into a string.
|
||||
func (h blockHeader) String() string {
|
||||
var buf bytes.Buffer
|
||||
first := true
|
||||
if h.compressedSize >= 0 {
|
||||
fmt.Fprintf(&buf, "compressed size %d", h.compressedSize)
|
||||
first = false
|
||||
}
|
||||
if h.uncompressedSize >= 0 {
|
||||
if !first {
|
||||
buf.WriteString(" ")
|
||||
}
|
||||
fmt.Fprintf(&buf, "uncompressed size %d", h.uncompressedSize)
|
||||
first = false
|
||||
}
|
||||
for _, f := range h.filters {
|
||||
if !first {
|
||||
buf.WriteString(" ")
|
||||
}
|
||||
fmt.Fprintf(&buf, "filter %s", f)
|
||||
first = false
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// Masks for the block flags.
|
||||
const (
|
||||
filterCountMask = 0x03
|
||||
compressedSizePresent = 0x40
|
||||
uncompressedSizePresent = 0x80
|
||||
reservedBlockFlags = 0x3C
|
||||
)
|
||||
|
||||
// errIndexIndicator signals that an index indicator (0x00) has been found
|
||||
// instead of an expected block header indicator.
|
||||
var errIndexIndicator = errors.New("xz: found index indicator")
|
||||
|
||||
// readBlockHeader reads the block header.
|
||||
func readBlockHeader(r io.Reader) (h *blockHeader, n int, err error) {
|
||||
var buf bytes.Buffer
|
||||
buf.Grow(20)
|
||||
|
||||
// block header size
|
||||
z, err := io.CopyN(&buf, r, 1)
|
||||
n = int(z)
|
||||
if err != nil {
|
||||
return nil, n, err
|
||||
}
|
||||
s := buf.Bytes()[0]
|
||||
if s == 0 {
|
||||
return nil, n, errIndexIndicator
|
||||
}
|
||||
|
||||
// read complete header
|
||||
headerLen := (int(s) + 1) * 4
|
||||
buf.Grow(headerLen - 1)
|
||||
z, err = io.CopyN(&buf, r, int64(headerLen-1))
|
||||
n += int(z)
|
||||
if err != nil {
|
||||
return nil, n, err
|
||||
}
|
||||
|
||||
// unmarshal block header
|
||||
h = new(blockHeader)
|
||||
if err = h.UnmarshalBinary(buf.Bytes()); err != nil {
|
||||
return nil, n, err
|
||||
}
|
||||
|
||||
return h, n, nil
|
||||
}
|
||||
|
||||
// readSizeInBlockHeader reads the uncompressed or compressed size
|
||||
// fields in the block header. The present value informs the function
|
||||
// whether the respective field is actually present in the header.
|
||||
func readSizeInBlockHeader(r io.ByteReader, present bool) (n int64, err error) {
|
||||
if !present {
|
||||
return -1, nil
|
||||
}
|
||||
x, _, err := readUvarint(r)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if x >= 1<<63 {
|
||||
return 0, errors.New("xz: size overflow in block header")
|
||||
}
|
||||
return int64(x), nil
|
||||
}
|
||||
|
||||
// UnmarshalBinary unmarshals the block header.
|
||||
func (h *blockHeader) UnmarshalBinary(data []byte) error {
|
||||
// Check header length
|
||||
s := data[0]
|
||||
if data[0] == 0 {
|
||||
return errIndexIndicator
|
||||
}
|
||||
headerLen := (int(s) + 1) * 4
|
||||
if len(data) != headerLen {
|
||||
return fmt.Errorf("xz: data length %d; want %d", len(data),
|
||||
headerLen)
|
||||
}
|
||||
n := headerLen - 4
|
||||
|
||||
// Check CRC-32
|
||||
crc := crc32.NewIEEE()
|
||||
crc.Write(data[:n])
|
||||
if crc.Sum32() != uint32LE(data[n:]) {
|
||||
return errors.New("xz: checksum error for block header")
|
||||
}
|
||||
|
||||
// Block header flags
|
||||
flags := data[1]
|
||||
if flags&reservedBlockFlags != 0 {
|
||||
return errors.New("xz: reserved block header flags set")
|
||||
}
|
||||
|
||||
r := bytes.NewReader(data[2:n])
|
||||
|
||||
// Compressed size
|
||||
var err error
|
||||
h.compressedSize, err = readSizeInBlockHeader(
|
||||
r, flags&compressedSizePresent != 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Uncompressed size
|
||||
h.uncompressedSize, err = readSizeInBlockHeader(
|
||||
r, flags&uncompressedSizePresent != 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
h.filters, err = readFilters(r, int(flags&filterCountMask)+1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Check padding
|
||||
// Since headerLen is a multiple of 4 we don't need to check
|
||||
// alignment.
|
||||
k := r.Len()
|
||||
// The standard spec says that the padding should have not more
|
||||
// than 3 bytes. However we found paddings of 4 or 5 in the
|
||||
// wild. See https://github.com/ulikunitz/xz/pull/11 and
|
||||
// https://github.com/ulikunitz/xz/issues/15
|
||||
//
|
||||
// The only reasonable approach seems to be to ignore the
|
||||
// padding size. We still check that all padding bytes are zero.
|
||||
if !allZeros(data[n-k : n]) {
|
||||
return errPadding
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarshalBinary marshals the binary header.
|
||||
func (h *blockHeader) MarshalBinary() (data []byte, err error) {
|
||||
if !(minFilters <= len(h.filters) && len(h.filters) <= maxFilters) {
|
||||
return nil, errors.New("xz: filter count wrong")
|
||||
}
|
||||
for i, f := range h.filters {
|
||||
if i < len(h.filters)-1 {
|
||||
if f.id() == lzmaFilterID {
|
||||
return nil, errors.New(
|
||||
"xz: LZMA2 filter is not the last")
|
||||
}
|
||||
} else {
|
||||
// last filter
|
||||
if f.id() != lzmaFilterID {
|
||||
return nil, errors.New("xz: " +
|
||||
"last filter must be the LZMA2 filter")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
// header size must set at the end
|
||||
buf.WriteByte(0)
|
||||
|
||||
// flags
|
||||
flags := byte(len(h.filters) - 1)
|
||||
if h.compressedSize >= 0 {
|
||||
flags |= compressedSizePresent
|
||||
}
|
||||
if h.uncompressedSize >= 0 {
|
||||
flags |= uncompressedSizePresent
|
||||
}
|
||||
buf.WriteByte(flags)
|
||||
|
||||
p := make([]byte, 10)
|
||||
if h.compressedSize >= 0 {
|
||||
k := putUvarint(p, uint64(h.compressedSize))
|
||||
buf.Write(p[:k])
|
||||
}
|
||||
if h.uncompressedSize >= 0 {
|
||||
k := putUvarint(p, uint64(h.uncompressedSize))
|
||||
buf.Write(p[:k])
|
||||
}
|
||||
|
||||
for _, f := range h.filters {
|
||||
fp, err := f.MarshalBinary()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
buf.Write(fp)
|
||||
}
|
||||
|
||||
// padding
|
||||
for i := padLen(int64(buf.Len())); i > 0; i-- {
|
||||
buf.WriteByte(0)
|
||||
}
|
||||
|
||||
// crc place holder
|
||||
buf.Write(p[:4])
|
||||
|
||||
data = buf.Bytes()
|
||||
if len(data)%4 != 0 {
|
||||
panic("data length not aligned")
|
||||
}
|
||||
s := len(data)/4 - 1
|
||||
if !(1 < s && s <= 255) {
|
||||
panic("wrong block header size")
|
||||
}
|
||||
data[0] = byte(s)
|
||||
|
||||
crc := crc32.NewIEEE()
|
||||
crc.Write(data[:len(data)-4])
|
||||
putUint32LE(data[len(data)-4:], crc.Sum32())
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// Constants used for marshalling and unmarshalling filters in the xz
|
||||
// block header.
|
||||
const (
|
||||
minFilters = 1
|
||||
maxFilters = 4
|
||||
minReservedID = 1 << 62
|
||||
)
|
||||
|
||||
// filter represents a filter in the block header.
|
||||
type filter interface {
|
||||
id() uint64
|
||||
UnmarshalBinary(data []byte) error
|
||||
MarshalBinary() (data []byte, err error)
|
||||
reader(r io.Reader, c *ReaderConfig) (fr io.Reader, err error)
|
||||
writeCloser(w io.WriteCloser, c *WriterConfig) (fw io.WriteCloser, err error)
|
||||
// filter must be last filter
|
||||
last() bool
|
||||
}
|
||||
|
||||
// readFilter reads a block filter from the block header. At this point
|
||||
// in time only the LZMA2 filter is supported.
|
||||
func readFilter(r io.Reader) (f filter, err error) {
|
||||
br := lzma.ByteReader(r)
|
||||
|
||||
// index
|
||||
id, _, err := readUvarint(br)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var data []byte
|
||||
switch id {
|
||||
case lzmaFilterID:
|
||||
data = make([]byte, lzmaFilterLen)
|
||||
data[0] = lzmaFilterID
|
||||
if _, err = io.ReadFull(r, data[1:]); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
f = new(lzmaFilter)
|
||||
default:
|
||||
if id >= minReservedID {
|
||||
return nil, errors.New(
|
||||
"xz: reserved filter id in block stream header")
|
||||
}
|
||||
return nil, errors.New("xz: invalid filter id")
|
||||
}
|
||||
if err = f.UnmarshalBinary(data); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return f, err
|
||||
}
|
||||
|
||||
// readFilters reads count filters. At this point in time only the count
|
||||
// 1 is supported.
|
||||
func readFilters(r io.Reader, count int) (filters []filter, err error) {
|
||||
if count != 1 {
|
||||
return nil, errors.New("xz: unsupported filter count")
|
||||
}
|
||||
f, err := readFilter(r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return []filter{f}, err
|
||||
}
|
||||
|
||||
// writeFilters writes the filters.
|
||||
func writeFilters(w io.Writer, filters []filter) (n int, err error) {
|
||||
for _, f := range filters {
|
||||
p, err := f.MarshalBinary()
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
k, err := w.Write(p)
|
||||
n += k
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
/*** Index ***/
|
||||
|
||||
// record describes a block in the xz file index.
|
||||
type record struct {
|
||||
unpaddedSize int64
|
||||
uncompressedSize int64
|
||||
}
|
||||
|
||||
// readRecord reads an index record.
|
||||
func readRecord(r io.ByteReader) (rec record, n int, err error) {
|
||||
u, k, err := readUvarint(r)
|
||||
n += k
|
||||
if err != nil {
|
||||
return rec, n, err
|
||||
}
|
||||
rec.unpaddedSize = int64(u)
|
||||
if rec.unpaddedSize < 0 {
|
||||
return rec, n, errors.New("xz: unpadded size negative")
|
||||
}
|
||||
|
||||
u, k, err = readUvarint(r)
|
||||
n += k
|
||||
if err != nil {
|
||||
return rec, n, err
|
||||
}
|
||||
rec.uncompressedSize = int64(u)
|
||||
if rec.uncompressedSize < 0 {
|
||||
return rec, n, errors.New("xz: uncompressed size negative")
|
||||
}
|
||||
|
||||
return rec, n, nil
|
||||
}
|
||||
|
||||
// MarshalBinary converts an index record in its binary encoding.
|
||||
func (rec *record) MarshalBinary() (data []byte, err error) {
|
||||
// maximum length of a uvarint is 10
|
||||
p := make([]byte, 20)
|
||||
n := putUvarint(p, uint64(rec.unpaddedSize))
|
||||
n += putUvarint(p[n:], uint64(rec.uncompressedSize))
|
||||
return p[:n], nil
|
||||
}
|
||||
|
||||
// writeIndex writes the index, a sequence of records.
|
||||
func writeIndex(w io.Writer, index []record) (n int64, err error) {
|
||||
crc := crc32.NewIEEE()
|
||||
mw := io.MultiWriter(w, crc)
|
||||
|
||||
// index indicator
|
||||
k, err := mw.Write([]byte{0})
|
||||
n += int64(k)
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
|
||||
// number of records
|
||||
p := make([]byte, 10)
|
||||
k = putUvarint(p, uint64(len(index)))
|
||||
k, err = mw.Write(p[:k])
|
||||
n += int64(k)
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
|
||||
// list of records
|
||||
for _, rec := range index {
|
||||
p, err := rec.MarshalBinary()
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
k, err = mw.Write(p)
|
||||
n += int64(k)
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
|
||||
// index padding
|
||||
k, err = mw.Write(make([]byte, padLen(int64(n))))
|
||||
n += int64(k)
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
|
||||
// crc32 checksum
|
||||
putUint32LE(p, crc.Sum32())
|
||||
k, err = w.Write(p[:4])
|
||||
n += int64(k)
|
||||
|
||||
return n, err
|
||||
}
|
||||
|
||||
// readIndexBody reads the index from the reader. It assumes that the
|
||||
// index indicator has already been read.
|
||||
func readIndexBody(r io.Reader) (records []record, n int64, err error) {
|
||||
crc := crc32.NewIEEE()
|
||||
// index indicator
|
||||
crc.Write([]byte{0})
|
||||
|
||||
br := lzma.ByteReader(io.TeeReader(r, crc))
|
||||
|
||||
// number of records
|
||||
u, k, err := readUvarint(br)
|
||||
n += int64(k)
|
||||
if err != nil {
|
||||
return nil, n, err
|
||||
}
|
||||
recLen := int(u)
|
||||
if recLen < 0 || uint64(recLen) != u {
|
||||
return nil, n, errors.New("xz: record number overflow")
|
||||
}
|
||||
|
||||
// list of records
|
||||
records = make([]record, recLen)
|
||||
for i := range records {
|
||||
records[i], k, err = readRecord(br)
|
||||
n += int64(k)
|
||||
if err != nil {
|
||||
return nil, n, err
|
||||
}
|
||||
}
|
||||
|
||||
p := make([]byte, padLen(int64(n+1)), 4)
|
||||
k, err = io.ReadFull(br.(io.Reader), p)
|
||||
n += int64(k)
|
||||
if err != nil {
|
||||
return nil, n, err
|
||||
}
|
||||
if !allZeros(p) {
|
||||
return nil, n, errors.New("xz: non-zero byte in index padding")
|
||||
}
|
||||
|
||||
// crc32
|
||||
s := crc.Sum32()
|
||||
p = p[:4]
|
||||
k, err = io.ReadFull(br.(io.Reader), p)
|
||||
n += int64(k)
|
||||
if err != nil {
|
||||
return records, n, err
|
||||
}
|
||||
if uint32LE(p) != s {
|
||||
return nil, n, errors.New("xz: wrong checksum for index")
|
||||
}
|
||||
|
||||
return records, n, nil
|
||||
}
|
Binary file not shown.
|
@ -0,0 +1,181 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package hash
|
||||
|
||||
// CyclicPoly provides a cyclic polynomial rolling hash.
|
||||
type CyclicPoly struct {
|
||||
h uint64
|
||||
p []uint64
|
||||
i int
|
||||
}
|
||||
|
||||
// ror rotates the unsigned 64-bit integer to right. The argument s must be
|
||||
// less than 64.
|
||||
func ror(x uint64, s uint) uint64 {
|
||||
return (x >> s) | (x << (64 - s))
|
||||
}
|
||||
|
||||
// NewCyclicPoly creates a new instance of the CyclicPoly structure. The
|
||||
// argument n gives the number of bytes for which a hash will be executed.
|
||||
// This number must be positive; the method panics if this isn't the case.
|
||||
func NewCyclicPoly(n int) *CyclicPoly {
|
||||
if n < 1 {
|
||||
panic("argument n must be positive")
|
||||
}
|
||||
return &CyclicPoly{p: make([]uint64, 0, n)}
|
||||
}
|
||||
|
||||
// Len returns the length of the byte sequence for which a hash is generated.
|
||||
func (r *CyclicPoly) Len() int {
|
||||
return cap(r.p)
|
||||
}
|
||||
|
||||
// RollByte hashes the next byte and returns a hash value. The complete becomes
|
||||
// available after at least Len() bytes have been hashed.
|
||||
func (r *CyclicPoly) RollByte(x byte) uint64 {
|
||||
y := hash[x]
|
||||
if len(r.p) < cap(r.p) {
|
||||
r.h = ror(r.h, 1) ^ y
|
||||
r.p = append(r.p, y)
|
||||
} else {
|
||||
r.h ^= ror(r.p[r.i], uint(cap(r.p)-1))
|
||||
r.h = ror(r.h, 1) ^ y
|
||||
r.p[r.i] = y
|
||||
r.i = (r.i + 1) % cap(r.p)
|
||||
}
|
||||
return r.h
|
||||
}
|
||||
|
||||
// Stores the hash for the individual bytes.
|
||||
var hash = [256]uint64{
|
||||
0x2e4fc3f904065142, 0xc790984cfbc99527,
|
||||
0x879f95eb8c62f187, 0x3b61be86b5021ef2,
|
||||
0x65a896a04196f0a5, 0xc5b307b80470b59e,
|
||||
0xd3bff376a70df14b, 0xc332f04f0b3f1701,
|
||||
0x753b5f0e9abf3e0d, 0xb41538fdfe66ef53,
|
||||
0x1906a10c2c1c0208, 0xfb0c712a03421c0d,
|
||||
0x38be311a65c9552b, 0xfee7ee4ca6445c7e,
|
||||
0x71aadeded184f21e, 0xd73426fccda23b2d,
|
||||
0x29773fb5fb9600b5, 0xce410261cd32981a,
|
||||
0xfe2848b3c62dbc2d, 0x459eaaff6e43e11c,
|
||||
0xc13e35fc9c73a887, 0xf30ed5c201e76dbc,
|
||||
0xa5f10b3910482cea, 0x2945d59be02dfaad,
|
||||
0x06ee334ff70571b5, 0xbabf9d8070f44380,
|
||||
0xee3e2e9912ffd27c, 0x2a7118d1ea6b8ea7,
|
||||
0x26183cb9f7b1664c, 0xea71dac7da068f21,
|
||||
0xea92eca5bd1d0bb7, 0x415595862defcd75,
|
||||
0x248a386023c60648, 0x9cf021ab284b3c8a,
|
||||
0xfc9372df02870f6c, 0x2b92d693eeb3b3fc,
|
||||
0x73e799d139dc6975, 0x7b15ae312486363c,
|
||||
0xb70e5454a2239c80, 0x208e3fb31d3b2263,
|
||||
0x01f563cabb930f44, 0x2ac4533d2a3240d8,
|
||||
0x84231ed1064f6f7c, 0xa9f020977c2a6d19,
|
||||
0x213c227271c20122, 0x09fe8a9a0a03d07a,
|
||||
0x4236dc75bcaf910c, 0x460a8b2bead8f17e,
|
||||
0xd9b27be1aa07055f, 0xd202d5dc4b11c33e,
|
||||
0x70adb010543bea12, 0xcdae938f7ea6f579,
|
||||
0x3f3d870208672f4d, 0x8e6ccbce9d349536,
|
||||
0xe4c0871a389095ae, 0xf5f2a49152bca080,
|
||||
0x9a43f9b97269934e, 0xc17b3753cb6f475c,
|
||||
0xd56d941e8e206bd4, 0xac0a4f3e525eda00,
|
||||
0xa06d5a011912a550, 0x5537ed19537ad1df,
|
||||
0xa32fe713d611449d, 0x2a1d05b47c3b579f,
|
||||
0x991d02dbd30a2a52, 0x39e91e7e28f93eb0,
|
||||
0x40d06adb3e92c9ac, 0x9b9d3afde1c77c97,
|
||||
0x9a3f3f41c02c616f, 0x22ecd4ba00f60c44,
|
||||
0x0b63d5d801708420, 0x8f227ca8f37ffaec,
|
||||
0x0256278670887c24, 0x107e14877dbf540b,
|
||||
0x32c19f2786ac1c05, 0x1df5b12bb4bc9c61,
|
||||
0xc0cac129d0d4c4e2, 0x9fdb52ee9800b001,
|
||||
0x31f601d5d31c48c4, 0x72ff3c0928bcaec7,
|
||||
0xd99264421147eb03, 0x535a2d6d38aefcfe,
|
||||
0x6ba8b4454a916237, 0xfa39366eaae4719c,
|
||||
0x10f00fd7bbb24b6f, 0x5bd23185c76c84d4,
|
||||
0xb22c3d7e1b00d33f, 0x3efc20aa6bc830a8,
|
||||
0xd61c2503fe639144, 0x30ce625441eb92d3,
|
||||
0xe5d34cf359e93100, 0xa8e5aa13f2b9f7a5,
|
||||
0x5c2b8d851ca254a6, 0x68fb6c5e8b0d5fdf,
|
||||
0xc7ea4872c96b83ae, 0x6dd5d376f4392382,
|
||||
0x1be88681aaa9792f, 0xfef465ee1b6c10d9,
|
||||
0x1f98b65ed43fcb2e, 0x4d1ca11eb6e9a9c9,
|
||||
0x7808e902b3857d0b, 0x171c9c4ea4607972,
|
||||
0x58d66274850146df, 0x42b311c10d3981d1,
|
||||
0x647fa8c621c41a4c, 0xf472771c66ddfedc,
|
||||
0x338d27e3f847b46b, 0x6402ce3da97545ce,
|
||||
0x5162db616fc38638, 0x9c83be97bc22a50e,
|
||||
0x2d3d7478a78d5e72, 0xe621a9b938fd5397,
|
||||
0x9454614eb0f81c45, 0x395fb6e742ed39b6,
|
||||
0x77dd9179d06037bf, 0xc478d0fee4d2656d,
|
||||
0x35d9d6cb772007af, 0x83a56e92c883f0f6,
|
||||
0x27937453250c00a1, 0x27bd6ebc3a46a97d,
|
||||
0x9f543bf784342d51, 0xd158f38c48b0ed52,
|
||||
0x8dd8537c045f66b4, 0x846a57230226f6d5,
|
||||
0x6b13939e0c4e7cdf, 0xfca25425d8176758,
|
||||
0x92e5fc6cd52788e6, 0x9992e13d7a739170,
|
||||
0x518246f7a199e8ea, 0xf104c2a71b9979c7,
|
||||
0x86b3ffaabea4768f, 0x6388061cf3e351ad,
|
||||
0x09d9b5295de5bbb5, 0x38bf1638c2599e92,
|
||||
0x1d759846499e148d, 0x4c0ff015e5f96ef4,
|
||||
0xa41a94cfa270f565, 0x42d76f9cb2326c0b,
|
||||
0x0cf385dd3c9c23ba, 0x0508a6c7508d6e7a,
|
||||
0x337523aabbe6cf8d, 0x646bb14001d42b12,
|
||||
0xc178729d138adc74, 0xf900ef4491f24086,
|
||||
0xee1a90d334bb5ac4, 0x9755c92247301a50,
|
||||
0xb999bf7c4ff1b610, 0x6aeeb2f3b21e8fc9,
|
||||
0x0fa8084cf91ac6ff, 0x10d226cf136e6189,
|
||||
0xd302057a07d4fb21, 0x5f03800e20a0fcc3,
|
||||
0x80118d4ae46bd210, 0x58ab61a522843733,
|
||||
0x51edd575c5432a4b, 0x94ee6ff67f9197f7,
|
||||
0x765669e0e5e8157b, 0xa5347830737132f0,
|
||||
0x3ba485a69f01510c, 0x0b247d7b957a01c3,
|
||||
0x1b3d63449fd807dc, 0x0fdc4721c30ad743,
|
||||
0x8b535ed3829b2b14, 0xee41d0cad65d232c,
|
||||
0xe6a99ed97a6a982f, 0x65ac6194c202003d,
|
||||
0x692accf3a70573eb, 0xcc3c02c3e200d5af,
|
||||
0x0d419e8b325914a3, 0x320f160f42c25e40,
|
||||
0x00710d647a51fe7a, 0x3c947692330aed60,
|
||||
0x9288aa280d355a7a, 0xa1806a9b791d1696,
|
||||
0x5d60e38496763da1, 0x6c69e22e613fd0f4,
|
||||
0x977fc2a5aadffb17, 0xfb7bd063fc5a94ba,
|
||||
0x460c17992cbaece1, 0xf7822c5444d3297f,
|
||||
0x344a9790c69b74aa, 0xb80a42e6cae09dce,
|
||||
0x1b1361eaf2b1e757, 0xd84c1e758e236f01,
|
||||
0x88e0b7be347627cc, 0x45246009b7a99490,
|
||||
0x8011c6dd3fe50472, 0xc341d682bffb99d7,
|
||||
0x2511be93808e2d15, 0xd5bc13d7fd739840,
|
||||
0x2a3cd030679ae1ec, 0x8ad9898a4b9ee157,
|
||||
0x3245fef0a8eaf521, 0x3d6d8dbbb427d2b0,
|
||||
0x1ed146d8968b3981, 0x0c6a28bf7d45f3fc,
|
||||
0x4a1fd3dbcee3c561, 0x4210ff6a476bf67e,
|
||||
0xa559cce0d9199aac, 0xde39d47ef3723380,
|
||||
0xe5b69d848ce42e35, 0xefa24296f8e79f52,
|
||||
0x70190b59db9a5afc, 0x26f166cdb211e7bf,
|
||||
0x4deaf2df3c6b8ef5, 0xf171dbdd670f1017,
|
||||
0xb9059b05e9420d90, 0x2f0da855c9388754,
|
||||
0x611d5e9ab77949cc, 0x2912038ac01163f4,
|
||||
0x0231df50402b2fba, 0x45660fc4f3245f58,
|
||||
0xb91cc97c7c8dac50, 0xb72d2aafe4953427,
|
||||
0xfa6463f87e813d6b, 0x4515f7ee95d5c6a2,
|
||||
0x1310e1c1a48d21c3, 0xad48a7810cdd8544,
|
||||
0x4d5bdfefd5c9e631, 0xa43ed43f1fdcb7de,
|
||||
0xe70cfc8fe1ee9626, 0xef4711b0d8dda442,
|
||||
0xb80dd9bd4dab6c93, 0xa23be08d31ba4d93,
|
||||
0x9b37db9d0335a39c, 0x494b6f870f5cfebc,
|
||||
0x6d1b3c1149dda943, 0x372c943a518c1093,
|
||||
0xad27af45e77c09c4, 0x3b6f92b646044604,
|
||||
0xac2917909f5fcf4f, 0x2069a60e977e5557,
|
||||
0x353a469e71014de5, 0x24be356281f55c15,
|
||||
0x2b6d710ba8e9adea, 0x404ad1751c749c29,
|
||||
0xed7311bf23d7f185, 0xba4f6976b4acc43e,
|
||||
0x32d7198d2bc39000, 0xee667019014d6e01,
|
||||
0x494ef3e128d14c83, 0x1f95a152baecd6be,
|
||||
0x201648dff1f483a5, 0x68c28550c8384af6,
|
||||
0x5fc834a6824a7f48, 0x7cd06cb7365eaf28,
|
||||
0xd82bbd95e9b30909, 0x234f0d1694c53f6d,
|
||||
0xd2fb7f4a96d83f4a, 0xff0d5da83acac05e,
|
||||
0xf8f6b97f5585080a, 0x74236084be57b95b,
|
||||
0xa25e40c03bbc36ad, 0x6b6e5c14ce88465b,
|
||||
0x4378ffe93e1528c5, 0x94ca92a17118e2d2,
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
/*
|
||||
Package hash provides rolling hashes.
|
||||
|
||||
Rolling hashes have to be used for maintaining the positions of n-byte
|
||||
sequences in the dictionary buffer.
|
||||
|
||||
The package provides currently the Rabin-Karp rolling hash and a Cyclic
|
||||
Polynomial hash. Both support the Hashes method to be used with an interface.
|
||||
*/
|
||||
package hash
|
|
@ -0,0 +1,66 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package hash
|
||||
|
||||
// A is the default constant for Robin-Karp rolling hash. This is a random
|
||||
// prime.
|
||||
const A = 0x97b548add41d5da1
|
||||
|
||||
// RabinKarp supports the computation of a rolling hash.
|
||||
type RabinKarp struct {
|
||||
A uint64
|
||||
// a^n
|
||||
aOldest uint64
|
||||
h uint64
|
||||
p []byte
|
||||
i int
|
||||
}
|
||||
|
||||
// NewRabinKarp creates a new RabinKarp value. The argument n defines the
|
||||
// length of the byte sequence to be hashed. The default constant will will be
|
||||
// used.
|
||||
func NewRabinKarp(n int) *RabinKarp {
|
||||
return NewRabinKarpConst(n, A)
|
||||
}
|
||||
|
||||
// NewRabinKarpConst creates a new RabinKarp value. The argument n defines the
|
||||
// length of the byte sequence to be hashed. The argument a provides the
|
||||
// constant used to compute the hash.
|
||||
func NewRabinKarpConst(n int, a uint64) *RabinKarp {
|
||||
if n <= 0 {
|
||||
panic("number of bytes n must be positive")
|
||||
}
|
||||
aOldest := uint64(1)
|
||||
// There are faster methods. For the small n required by the LZMA
|
||||
// compressor O(n) is sufficient.
|
||||
for i := 0; i < n; i++ {
|
||||
aOldest *= a
|
||||
}
|
||||
return &RabinKarp{
|
||||
A: a, aOldest: aOldest,
|
||||
p: make([]byte, 0, n),
|
||||
}
|
||||
}
|
||||
|
||||
// Len returns the length of the byte sequence.
|
||||
func (r *RabinKarp) Len() int {
|
||||
return cap(r.p)
|
||||
}
|
||||
|
||||
// RollByte computes the hash after x has been added.
|
||||
func (r *RabinKarp) RollByte(x byte) uint64 {
|
||||
if len(r.p) < cap(r.p) {
|
||||
r.h += uint64(x)
|
||||
r.h *= r.A
|
||||
r.p = append(r.p, x)
|
||||
} else {
|
||||
r.h -= uint64(r.p[r.i]) * r.aOldest
|
||||
r.h += uint64(x)
|
||||
r.h *= r.A
|
||||
r.p[r.i] = x
|
||||
r.i = (r.i + 1) % cap(r.p)
|
||||
}
|
||||
return r.h
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package hash
|
||||
|
||||
// Roller provides an interface for rolling hashes. The hash value will become
|
||||
// valid after hash has been called Len times.
|
||||
type Roller interface {
|
||||
Len() int
|
||||
RollByte(x byte) uint64
|
||||
}
|
||||
|
||||
// Hashes computes all hash values for the array p. Note that the state of the
|
||||
// roller is changed.
|
||||
func Hashes(r Roller, p []byte) []uint64 {
|
||||
n := r.Len()
|
||||
if len(p) < n {
|
||||
return nil
|
||||
}
|
||||
h := make([]uint64, len(p)-n+1)
|
||||
for i := 0; i < n-1; i++ {
|
||||
r.RollByte(p[i])
|
||||
}
|
||||
for i := range h {
|
||||
h[i] = r.RollByte(p[i+n-1])
|
||||
}
|
||||
return h
|
||||
}
|
|
@ -0,0 +1,457 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package xlog provides a simple logging package that allows to disable
|
||||
// certain message categories. It defines a type, Logger, with multiple
|
||||
// methods for formatting output. The package has also a predefined
|
||||
// 'standard' Logger accessible through helper function Print[f|ln],
|
||||
// Fatal[f|ln], Panic[f|ln], Warn[f|ln], Print[f|ln] and Debug[f|ln]
|
||||
// that are easier to use then creating a Logger manually. That logger
|
||||
// writes to standard error and prints the date and time of each logged
|
||||
// message, which can be configured using the function SetFlags.
|
||||
//
|
||||
// The Fatal functions call os.Exit(1) after the message is output
|
||||
// unless not suppressed by the flags. The Panic functions call panic
|
||||
// after the writing the log message unless suppressed.
|
||||
package xlog
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"runtime"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// The flags define what information is prefixed to each log entry
|
||||
// generated by the Logger. The Lno* versions allow the suppression of
|
||||
// specific output. The bits are or'ed together to control what will be
|
||||
// printed. There is no control over the order of the items printed and
|
||||
// the format. The full format is:
|
||||
//
|
||||
// 2009-01-23 01:23:23.123123 /a/b/c/d.go:23: message
|
||||
//
|
||||
const (
|
||||
Ldate = 1 << iota // the date: 2009-01-23
|
||||
Ltime // the time: 01:23:23
|
||||
Lmicroseconds // microsecond resolution: 01:23:23.123123
|
||||
Llongfile // full file name and line number: /a/b/c/d.go:23
|
||||
Lshortfile // final file name element and line number: d.go:23
|
||||
Lnopanic // suppresses output from Panic[f|ln] but not the panic call
|
||||
Lnofatal // suppresses output from Fatal[f|ln] but not the exit
|
||||
Lnowarn // suppresses output from Warn[f|ln]
|
||||
Lnoprint // suppresses output from Print[f|ln]
|
||||
Lnodebug // suppresses output from Debug[f|ln]
|
||||
// initial values for the standard logger
|
||||
Lstdflags = Ldate | Ltime | Lnodebug
|
||||
)
|
||||
|
||||
// A Logger represents an active logging object that generates lines of
|
||||
// output to an io.Writer. Each logging operation if not suppressed
|
||||
// makes a single call to the Writer's Write method. A Logger can be
|
||||
// used simultaneously from multiple goroutines; it guarantees to
|
||||
// serialize access to the Writer.
|
||||
type Logger struct {
|
||||
mu sync.Mutex // ensures atomic writes; and protects the following
|
||||
// fields
|
||||
prefix string // prefix to write at beginning of each line
|
||||
flag int // properties
|
||||
out io.Writer // destination for output
|
||||
buf []byte // for accumulating text to write
|
||||
}
|
||||
|
||||
// New creates a new Logger. The out argument sets the destination to
|
||||
// which the log output will be written. The prefix appears at the
|
||||
// beginning of each log line. The flag argument defines the logging
|
||||
// properties.
|
||||
func New(out io.Writer, prefix string, flag int) *Logger {
|
||||
return &Logger{out: out, prefix: prefix, flag: flag}
|
||||
}
|
||||
|
||||
// std is the standard logger used by the package scope functions.
|
||||
var std = New(os.Stderr, "", Lstdflags)
|
||||
|
||||
// itoa converts the integer to ASCII. A negative widths will avoid
|
||||
// zero-padding. The function supports only non-negative integers.
|
||||
func itoa(buf *[]byte, i int, wid int) {
|
||||
var u = uint(i)
|
||||
if u == 0 && wid <= 1 {
|
||||
*buf = append(*buf, '0')
|
||||
return
|
||||
}
|
||||
var b [32]byte
|
||||
bp := len(b)
|
||||
for ; u > 0 || wid > 0; u /= 10 {
|
||||
bp--
|
||||
wid--
|
||||
b[bp] = byte(u%10) + '0'
|
||||
}
|
||||
*buf = append(*buf, b[bp:]...)
|
||||
}
|
||||
|
||||
// formatHeader puts the header into the buf field of the buffer.
|
||||
func (l *Logger) formatHeader(t time.Time, file string, line int) {
|
||||
l.buf = append(l.buf, l.prefix...)
|
||||
if l.flag&(Ldate|Ltime|Lmicroseconds) != 0 {
|
||||
if l.flag&Ldate != 0 {
|
||||
year, month, day := t.Date()
|
||||
itoa(&l.buf, year, 4)
|
||||
l.buf = append(l.buf, '-')
|
||||
itoa(&l.buf, int(month), 2)
|
||||
l.buf = append(l.buf, '-')
|
||||
itoa(&l.buf, day, 2)
|
||||
l.buf = append(l.buf, ' ')
|
||||
}
|
||||
if l.flag&(Ltime|Lmicroseconds) != 0 {
|
||||
hour, min, sec := t.Clock()
|
||||
itoa(&l.buf, hour, 2)
|
||||
l.buf = append(l.buf, ':')
|
||||
itoa(&l.buf, min, 2)
|
||||
l.buf = append(l.buf, ':')
|
||||
itoa(&l.buf, sec, 2)
|
||||
if l.flag&Lmicroseconds != 0 {
|
||||
l.buf = append(l.buf, '.')
|
||||
itoa(&l.buf, t.Nanosecond()/1e3, 6)
|
||||
}
|
||||
l.buf = append(l.buf, ' ')
|
||||
}
|
||||
}
|
||||
if l.flag&(Lshortfile|Llongfile) != 0 {
|
||||
if l.flag&Lshortfile != 0 {
|
||||
short := file
|
||||
for i := len(file) - 1; i > 0; i-- {
|
||||
if file[i] == '/' {
|
||||
short = file[i+1:]
|
||||
break
|
||||
}
|
||||
}
|
||||
file = short
|
||||
}
|
||||
l.buf = append(l.buf, file...)
|
||||
l.buf = append(l.buf, ':')
|
||||
itoa(&l.buf, line, -1)
|
||||
l.buf = append(l.buf, ": "...)
|
||||
}
|
||||
}
|
||||
|
||||
func (l *Logger) output(calldepth int, now time.Time, s string) error {
|
||||
var file string
|
||||
var line int
|
||||
if l.flag&(Lshortfile|Llongfile) != 0 {
|
||||
l.mu.Unlock()
|
||||
var ok bool
|
||||
_, file, line, ok = runtime.Caller(calldepth)
|
||||
if !ok {
|
||||
file = "???"
|
||||
line = 0
|
||||
}
|
||||
l.mu.Lock()
|
||||
}
|
||||
l.buf = l.buf[:0]
|
||||
l.formatHeader(now, file, line)
|
||||
l.buf = append(l.buf, s...)
|
||||
if len(s) == 0 || s[len(s)-1] != '\n' {
|
||||
l.buf = append(l.buf, '\n')
|
||||
}
|
||||
_, err := l.out.Write(l.buf)
|
||||
return err
|
||||
}
|
||||
|
||||
// Output writes the string s with the header controlled by the flags to
|
||||
// the l.out writer. A newline will be appended if s doesn't end in a
|
||||
// newline. Calldepth is used to recover the PC, although all current
|
||||
// calls of Output use the call depth 2. Access to the function is serialized.
|
||||
func (l *Logger) Output(calldepth, noflag int, v ...interface{}) error {
|
||||
now := time.Now()
|
||||
l.mu.Lock()
|
||||
defer l.mu.Unlock()
|
||||
if l.flag&noflag != 0 {
|
||||
return nil
|
||||
}
|
||||
s := fmt.Sprint(v...)
|
||||
return l.output(calldepth+1, now, s)
|
||||
}
|
||||
|
||||
// Outputf works like output but formats the output like Printf.
|
||||
func (l *Logger) Outputf(calldepth int, noflag int, format string, v ...interface{}) error {
|
||||
now := time.Now()
|
||||
l.mu.Lock()
|
||||
defer l.mu.Unlock()
|
||||
if l.flag&noflag != 0 {
|
||||
return nil
|
||||
}
|
||||
s := fmt.Sprintf(format, v...)
|
||||
return l.output(calldepth+1, now, s)
|
||||
}
|
||||
|
||||
// Outputln works like output but formats the output like Println.
|
||||
func (l *Logger) Outputln(calldepth int, noflag int, v ...interface{}) error {
|
||||
now := time.Now()
|
||||
l.mu.Lock()
|
||||
defer l.mu.Unlock()
|
||||
if l.flag&noflag != 0 {
|
||||
return nil
|
||||
}
|
||||
s := fmt.Sprintln(v...)
|
||||
return l.output(calldepth+1, now, s)
|
||||
}
|
||||
|
||||
// Panic prints the message like Print and calls panic. The printing
|
||||
// might be suppressed by the flag Lnopanic.
|
||||
func (l *Logger) Panic(v ...interface{}) {
|
||||
l.Output(2, Lnopanic, v...)
|
||||
s := fmt.Sprint(v...)
|
||||
panic(s)
|
||||
}
|
||||
|
||||
// Panic prints the message like Print and calls panic. The printing
|
||||
// might be suppressed by the flag Lnopanic.
|
||||
func Panic(v ...interface{}) {
|
||||
std.Output(2, Lnopanic, v...)
|
||||
s := fmt.Sprint(v...)
|
||||
panic(s)
|
||||
}
|
||||
|
||||
// Panicf prints the message like Printf and calls panic. The printing
|
||||
// might be suppressed by the flag Lnopanic.
|
||||
func (l *Logger) Panicf(format string, v ...interface{}) {
|
||||
l.Outputf(2, Lnopanic, format, v...)
|
||||
s := fmt.Sprintf(format, v...)
|
||||
panic(s)
|
||||
}
|
||||
|
||||
// Panicf prints the message like Printf and calls panic. The printing
|
||||
// might be suppressed by the flag Lnopanic.
|
||||
func Panicf(format string, v ...interface{}) {
|
||||
std.Outputf(2, Lnopanic, format, v...)
|
||||
s := fmt.Sprintf(format, v...)
|
||||
panic(s)
|
||||
}
|
||||
|
||||
// Panicln prints the message like Println and calls panic. The printing
|
||||
// might be suppressed by the flag Lnopanic.
|
||||
func (l *Logger) Panicln(v ...interface{}) {
|
||||
l.Outputln(2, Lnopanic, v...)
|
||||
s := fmt.Sprintln(v...)
|
||||
panic(s)
|
||||
}
|
||||
|
||||
// Panicln prints the message like Println and calls panic. The printing
|
||||
// might be suppressed by the flag Lnopanic.
|
||||
func Panicln(v ...interface{}) {
|
||||
std.Outputln(2, Lnopanic, v...)
|
||||
s := fmt.Sprintln(v...)
|
||||
panic(s)
|
||||
}
|
||||
|
||||
// Fatal prints the message like Print and calls os.Exit(1). The
|
||||
// printing might be suppressed by the flag Lnofatal.
|
||||
func (l *Logger) Fatal(v ...interface{}) {
|
||||
l.Output(2, Lnofatal, v...)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Fatal prints the message like Print and calls os.Exit(1). The
|
||||
// printing might be suppressed by the flag Lnofatal.
|
||||
func Fatal(v ...interface{}) {
|
||||
std.Output(2, Lnofatal, v...)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Fatalf prints the message like Printf and calls os.Exit(1). The
|
||||
// printing might be suppressed by the flag Lnofatal.
|
||||
func (l *Logger) Fatalf(format string, v ...interface{}) {
|
||||
l.Outputf(2, Lnofatal, format, v...)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Fatalf prints the message like Printf and calls os.Exit(1). The
|
||||
// printing might be suppressed by the flag Lnofatal.
|
||||
func Fatalf(format string, v ...interface{}) {
|
||||
std.Outputf(2, Lnofatal, format, v...)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Fatalln prints the message like Println and calls os.Exit(1). The
|
||||
// printing might be suppressed by the flag Lnofatal.
|
||||
func (l *Logger) Fatalln(format string, v ...interface{}) {
|
||||
l.Outputln(2, Lnofatal, v...)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Fatalln prints the message like Println and calls os.Exit(1). The
|
||||
// printing might be suppressed by the flag Lnofatal.
|
||||
func Fatalln(format string, v ...interface{}) {
|
||||
std.Outputln(2, Lnofatal, v...)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Warn prints the message like Print. The printing might be suppressed
|
||||
// by the flag Lnowarn.
|
||||
func (l *Logger) Warn(v ...interface{}) {
|
||||
l.Output(2, Lnowarn, v...)
|
||||
}
|
||||
|
||||
// Warn prints the message like Print. The printing might be suppressed
|
||||
// by the flag Lnowarn.
|
||||
func Warn(v ...interface{}) {
|
||||
std.Output(2, Lnowarn, v...)
|
||||
}
|
||||
|
||||
// Warnf prints the message like Printf. The printing might be suppressed
|
||||
// by the flag Lnowarn.
|
||||
func (l *Logger) Warnf(format string, v ...interface{}) {
|
||||
l.Outputf(2, Lnowarn, format, v...)
|
||||
}
|
||||
|
||||
// Warnf prints the message like Printf. The printing might be suppressed
|
||||
// by the flag Lnowarn.
|
||||
func Warnf(format string, v ...interface{}) {
|
||||
std.Outputf(2, Lnowarn, format, v...)
|
||||
}
|
||||
|
||||
// Warnln prints the message like Println. The printing might be suppressed
|
||||
// by the flag Lnowarn.
|
||||
func (l *Logger) Warnln(v ...interface{}) {
|
||||
l.Outputln(2, Lnowarn, v...)
|
||||
}
|
||||
|
||||
// Warnln prints the message like Println. The printing might be suppressed
|
||||
// by the flag Lnowarn.
|
||||
func Warnln(v ...interface{}) {
|
||||
std.Outputln(2, Lnowarn, v...)
|
||||
}
|
||||
|
||||
// Print prints the message like fmt.Print. The printing might be suppressed
|
||||
// by the flag Lnoprint.
|
||||
func (l *Logger) Print(v ...interface{}) {
|
||||
l.Output(2, Lnoprint, v...)
|
||||
}
|
||||
|
||||
// Print prints the message like fmt.Print. The printing might be suppressed
|
||||
// by the flag Lnoprint.
|
||||
func Print(v ...interface{}) {
|
||||
std.Output(2, Lnoprint, v...)
|
||||
}
|
||||
|
||||
// Printf prints the message like fmt.Printf. The printing might be suppressed
|
||||
// by the flag Lnoprint.
|
||||
func (l *Logger) Printf(format string, v ...interface{}) {
|
||||
l.Outputf(2, Lnoprint, format, v...)
|
||||
}
|
||||
|
||||
// Printf prints the message like fmt.Printf. The printing might be suppressed
|
||||
// by the flag Lnoprint.
|
||||
func Printf(format string, v ...interface{}) {
|
||||
std.Outputf(2, Lnoprint, format, v...)
|
||||
}
|
||||
|
||||
// Println prints the message like fmt.Println. The printing might be
|
||||
// suppressed by the flag Lnoprint.
|
||||
func (l *Logger) Println(v ...interface{}) {
|
||||
l.Outputln(2, Lnoprint, v...)
|
||||
}
|
||||
|
||||
// Println prints the message like fmt.Println. The printing might be
|
||||
// suppressed by the flag Lnoprint.
|
||||
func Println(v ...interface{}) {
|
||||
std.Outputln(2, Lnoprint, v...)
|
||||
}
|
||||
|
||||
// Debug prints the message like Print. The printing might be suppressed
|
||||
// by the flag Lnodebug.
|
||||
func (l *Logger) Debug(v ...interface{}) {
|
||||
l.Output(2, Lnodebug, v...)
|
||||
}
|
||||
|
||||
// Debug prints the message like Print. The printing might be suppressed
|
||||
// by the flag Lnodebug.
|
||||
func Debug(v ...interface{}) {
|
||||
std.Output(2, Lnodebug, v...)
|
||||
}
|
||||
|
||||
// Debugf prints the message like Printf. The printing might be suppressed
|
||||
// by the flag Lnodebug.
|
||||
func (l *Logger) Debugf(format string, v ...interface{}) {
|
||||
l.Outputf(2, Lnodebug, format, v...)
|
||||
}
|
||||
|
||||
// Debugf prints the message like Printf. The printing might be suppressed
|
||||
// by the flag Lnodebug.
|
||||
func Debugf(format string, v ...interface{}) {
|
||||
std.Outputf(2, Lnodebug, format, v...)
|
||||
}
|
||||
|
||||
// Debugln prints the message like Println. The printing might be suppressed
|
||||
// by the flag Lnodebug.
|
||||
func (l *Logger) Debugln(v ...interface{}) {
|
||||
l.Outputln(2, Lnodebug, v...)
|
||||
}
|
||||
|
||||
// Debugln prints the message like Println. The printing might be suppressed
|
||||
// by the flag Lnodebug.
|
||||
func Debugln(v ...interface{}) {
|
||||
std.Outputln(2, Lnodebug, v...)
|
||||
}
|
||||
|
||||
// Flags returns the current flags used by the logger.
|
||||
func (l *Logger) Flags() int {
|
||||
l.mu.Lock()
|
||||
defer l.mu.Unlock()
|
||||
return l.flag
|
||||
}
|
||||
|
||||
// Flags returns the current flags used by the standard logger.
|
||||
func Flags() int {
|
||||
return std.Flags()
|
||||
}
|
||||
|
||||
// SetFlags sets the flags of the logger.
|
||||
func (l *Logger) SetFlags(flag int) {
|
||||
l.mu.Lock()
|
||||
defer l.mu.Unlock()
|
||||
l.flag = flag
|
||||
}
|
||||
|
||||
// SetFlags sets the flags for the standard logger.
|
||||
func SetFlags(flag int) {
|
||||
std.SetFlags(flag)
|
||||
}
|
||||
|
||||
// Prefix returns the prefix used by the logger.
|
||||
func (l *Logger) Prefix() string {
|
||||
l.mu.Lock()
|
||||
defer l.mu.Unlock()
|
||||
return l.prefix
|
||||
}
|
||||
|
||||
// Prefix returns the prefix used by the standard logger of the package.
|
||||
func Prefix() string {
|
||||
return std.Prefix()
|
||||
}
|
||||
|
||||
// SetPrefix sets the prefix for the logger.
|
||||
func (l *Logger) SetPrefix(prefix string) {
|
||||
l.mu.Lock()
|
||||
defer l.mu.Unlock()
|
||||
l.prefix = prefix
|
||||
}
|
||||
|
||||
// SetPrefix sets the prefix of the standard logger of the package.
|
||||
func SetPrefix(prefix string) {
|
||||
std.SetPrefix(prefix)
|
||||
}
|
||||
|
||||
// SetOutput sets the output of the logger.
|
||||
func (l *Logger) SetOutput(w io.Writer) {
|
||||
l.mu.Lock()
|
||||
defer l.mu.Unlock()
|
||||
l.out = w
|
||||
}
|
||||
|
||||
// SetOutput sets the output for the standard logger of the package.
|
||||
func SetOutput(w io.Writer) {
|
||||
std.SetOutput(w)
|
||||
}
|
|
@ -0,0 +1,523 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// node represents a node in the binary tree.
|
||||
type node struct {
|
||||
// x is the search value
|
||||
x uint32
|
||||
// p parent node
|
||||
p uint32
|
||||
// l left child
|
||||
l uint32
|
||||
// r right child
|
||||
r uint32
|
||||
}
|
||||
|
||||
// wordLen is the number of bytes represented by the v field of a node.
|
||||
const wordLen = 4
|
||||
|
||||
// binTree supports the identification of the next operation based on a
|
||||
// binary tree.
|
||||
//
|
||||
// Nodes will be identified by their index into the ring buffer.
|
||||
type binTree struct {
|
||||
dict *encoderDict
|
||||
// ring buffer of nodes
|
||||
node []node
|
||||
// absolute offset of the entry for the next node. Position 4
|
||||
// byte larger.
|
||||
hoff int64
|
||||
// front position in the node ring buffer
|
||||
front uint32
|
||||
// index of the root node
|
||||
root uint32
|
||||
// current x value
|
||||
x uint32
|
||||
// preallocated array
|
||||
data []byte
|
||||
}
|
||||
|
||||
// null represents the nonexistent index. We can't use zero because it
|
||||
// would always exist or we would need to decrease the index for each
|
||||
// reference.
|
||||
const null uint32 = 1<<32 - 1
|
||||
|
||||
// newBinTree initializes the binTree structure. The capacity defines
|
||||
// the size of the buffer and defines the maximum distance for which
|
||||
// matches will be found.
|
||||
func newBinTree(capacity int) (t *binTree, err error) {
|
||||
if capacity < 1 {
|
||||
return nil, errors.New(
|
||||
"newBinTree: capacity must be larger than zero")
|
||||
}
|
||||
if int64(capacity) >= int64(null) {
|
||||
return nil, errors.New(
|
||||
"newBinTree: capacity must less 2^{32}-1")
|
||||
}
|
||||
t = &binTree{
|
||||
node: make([]node, capacity),
|
||||
hoff: -int64(wordLen),
|
||||
root: null,
|
||||
data: make([]byte, maxMatchLen),
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
func (t *binTree) SetDict(d *encoderDict) { t.dict = d }
|
||||
|
||||
// WriteByte writes a single byte into the binary tree.
|
||||
func (t *binTree) WriteByte(c byte) error {
|
||||
t.x = (t.x << 8) | uint32(c)
|
||||
t.hoff++
|
||||
if t.hoff < 0 {
|
||||
return nil
|
||||
}
|
||||
v := t.front
|
||||
if int64(v) < t.hoff {
|
||||
// We are overwriting old nodes stored in the tree.
|
||||
t.remove(v)
|
||||
}
|
||||
t.node[v].x = t.x
|
||||
t.add(v)
|
||||
t.front++
|
||||
if int64(t.front) >= int64(len(t.node)) {
|
||||
t.front = 0
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Writes writes a sequence of bytes into the binTree structure.
|
||||
func (t *binTree) Write(p []byte) (n int, err error) {
|
||||
for _, c := range p {
|
||||
t.WriteByte(c)
|
||||
}
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
// add puts the node v into the tree. The node must not be part of the
|
||||
// tree before.
|
||||
func (t *binTree) add(v uint32) {
|
||||
vn := &t.node[v]
|
||||
// Set left and right to null indices.
|
||||
vn.l, vn.r = null, null
|
||||
// If the binary tree is empty make v the root.
|
||||
if t.root == null {
|
||||
t.root = v
|
||||
vn.p = null
|
||||
return
|
||||
}
|
||||
x := vn.x
|
||||
p := t.root
|
||||
// Search for the right leave link and add the new node.
|
||||
for {
|
||||
pn := &t.node[p]
|
||||
if x <= pn.x {
|
||||
if pn.l == null {
|
||||
pn.l = v
|
||||
vn.p = p
|
||||
return
|
||||
}
|
||||
p = pn.l
|
||||
} else {
|
||||
if pn.r == null {
|
||||
pn.r = v
|
||||
vn.p = p
|
||||
return
|
||||
}
|
||||
p = pn.r
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// parent returns the parent node index of v and the pointer to v value
|
||||
// in the parent.
|
||||
func (t *binTree) parent(v uint32) (p uint32, ptr *uint32) {
|
||||
if t.root == v {
|
||||
return null, &t.root
|
||||
}
|
||||
p = t.node[v].p
|
||||
if t.node[p].l == v {
|
||||
ptr = &t.node[p].l
|
||||
} else {
|
||||
ptr = &t.node[p].r
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Remove node v.
|
||||
func (t *binTree) remove(v uint32) {
|
||||
vn := &t.node[v]
|
||||
p, ptr := t.parent(v)
|
||||
l, r := vn.l, vn.r
|
||||
if l == null {
|
||||
// Move the right child up.
|
||||
*ptr = r
|
||||
if r != null {
|
||||
t.node[r].p = p
|
||||
}
|
||||
return
|
||||
}
|
||||
if r == null {
|
||||
// Move the left child up.
|
||||
*ptr = l
|
||||
t.node[l].p = p
|
||||
return
|
||||
}
|
||||
|
||||
// Search the in-order predecessor u.
|
||||
un := &t.node[l]
|
||||
ur := un.r
|
||||
if ur == null {
|
||||
// In order predecessor is l. Move it up.
|
||||
un.r = r
|
||||
t.node[r].p = l
|
||||
un.p = p
|
||||
*ptr = l
|
||||
return
|
||||
}
|
||||
var u uint32
|
||||
for {
|
||||
// Look for the max value in the tree where l is root.
|
||||
u = ur
|
||||
ur = t.node[u].r
|
||||
if ur == null {
|
||||
break
|
||||
}
|
||||
}
|
||||
// replace u with ul
|
||||
un = &t.node[u]
|
||||
ul := un.l
|
||||
up := un.p
|
||||
t.node[up].r = ul
|
||||
if ul != null {
|
||||
t.node[ul].p = up
|
||||
}
|
||||
|
||||
// replace v by u
|
||||
un.l, un.r = l, r
|
||||
t.node[l].p = u
|
||||
t.node[r].p = u
|
||||
*ptr = u
|
||||
un.p = p
|
||||
}
|
||||
|
||||
// search looks for the node that have the value x or for the nodes that
|
||||
// brace it. The node highest in the tree with the value x will be
|
||||
// returned. All other nodes with the same value live in left subtree of
|
||||
// the returned node.
|
||||
func (t *binTree) search(v uint32, x uint32) (a, b uint32) {
|
||||
a, b = null, null
|
||||
if v == null {
|
||||
return
|
||||
}
|
||||
for {
|
||||
vn := &t.node[v]
|
||||
if x <= vn.x {
|
||||
if x == vn.x {
|
||||
return v, v
|
||||
}
|
||||
b = v
|
||||
if vn.l == null {
|
||||
return
|
||||
}
|
||||
v = vn.l
|
||||
} else {
|
||||
a = v
|
||||
if vn.r == null {
|
||||
return
|
||||
}
|
||||
v = vn.r
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// max returns the node with maximum value in the subtree with v as
|
||||
// root.
|
||||
func (t *binTree) max(v uint32) uint32 {
|
||||
if v == null {
|
||||
return null
|
||||
}
|
||||
for {
|
||||
r := t.node[v].r
|
||||
if r == null {
|
||||
return v
|
||||
}
|
||||
v = r
|
||||
}
|
||||
}
|
||||
|
||||
// min returns the node with the minimum value in the subtree with v as
|
||||
// root.
|
||||
func (t *binTree) min(v uint32) uint32 {
|
||||
if v == null {
|
||||
return null
|
||||
}
|
||||
for {
|
||||
l := t.node[v].l
|
||||
if l == null {
|
||||
return v
|
||||
}
|
||||
v = l
|
||||
}
|
||||
}
|
||||
|
||||
// pred returns the in-order predecessor of node v.
|
||||
func (t *binTree) pred(v uint32) uint32 {
|
||||
if v == null {
|
||||
return null
|
||||
}
|
||||
u := t.max(t.node[v].l)
|
||||
if u != null {
|
||||
return u
|
||||
}
|
||||
for {
|
||||
p := t.node[v].p
|
||||
if p == null {
|
||||
return null
|
||||
}
|
||||
if t.node[p].r == v {
|
||||
return p
|
||||
}
|
||||
v = p
|
||||
}
|
||||
}
|
||||
|
||||
// succ returns the in-order successor of node v.
|
||||
func (t *binTree) succ(v uint32) uint32 {
|
||||
if v == null {
|
||||
return null
|
||||
}
|
||||
u := t.min(t.node[v].r)
|
||||
if u != null {
|
||||
return u
|
||||
}
|
||||
for {
|
||||
p := t.node[v].p
|
||||
if p == null {
|
||||
return null
|
||||
}
|
||||
if t.node[p].l == v {
|
||||
return p
|
||||
}
|
||||
v = p
|
||||
}
|
||||
}
|
||||
|
||||
// xval converts the first four bytes of a into an 32-bit unsigned
|
||||
// integer in big-endian order.
|
||||
func xval(a []byte) uint32 {
|
||||
var x uint32
|
||||
switch len(a) {
|
||||
default:
|
||||
x |= uint32(a[3])
|
||||
fallthrough
|
||||
case 3:
|
||||
x |= uint32(a[2]) << 8
|
||||
fallthrough
|
||||
case 2:
|
||||
x |= uint32(a[1]) << 16
|
||||
fallthrough
|
||||
case 1:
|
||||
x |= uint32(a[0]) << 24
|
||||
case 0:
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
// dumpX converts value x into a four-letter string.
|
||||
func dumpX(x uint32) string {
|
||||
a := make([]byte, 4)
|
||||
for i := 0; i < 4; i++ {
|
||||
c := byte(x >> uint((3-i)*8))
|
||||
if unicode.IsGraphic(rune(c)) {
|
||||
a[i] = c
|
||||
} else {
|
||||
a[i] = '.'
|
||||
}
|
||||
}
|
||||
return string(a)
|
||||
}
|
||||
|
||||
// dumpNode writes a representation of the node v into the io.Writer.
|
||||
func (t *binTree) dumpNode(w io.Writer, v uint32, indent int) {
|
||||
if v == null {
|
||||
return
|
||||
}
|
||||
|
||||
vn := &t.node[v]
|
||||
|
||||
t.dumpNode(w, vn.r, indent+2)
|
||||
|
||||
for i := 0; i < indent; i++ {
|
||||
fmt.Fprint(w, " ")
|
||||
}
|
||||
if vn.p == null {
|
||||
fmt.Fprintf(w, "node %d %q parent null\n", v, dumpX(vn.x))
|
||||
} else {
|
||||
fmt.Fprintf(w, "node %d %q parent %d\n", v, dumpX(vn.x), vn.p)
|
||||
}
|
||||
|
||||
t.dumpNode(w, vn.l, indent+2)
|
||||
}
|
||||
|
||||
// dump prints a representation of the binary tree into the writer.
|
||||
func (t *binTree) dump(w io.Writer) error {
|
||||
bw := bufio.NewWriter(w)
|
||||
t.dumpNode(bw, t.root, 0)
|
||||
return bw.Flush()
|
||||
}
|
||||
|
||||
func (t *binTree) distance(v uint32) int {
|
||||
dist := int(t.front) - int(v)
|
||||
if dist <= 0 {
|
||||
dist += len(t.node)
|
||||
}
|
||||
return dist
|
||||
}
|
||||
|
||||
type matchParams struct {
|
||||
rep [4]uint32
|
||||
// length when match will be accepted
|
||||
nAccept int
|
||||
// nodes to check
|
||||
check int
|
||||
// finish if length get shorter
|
||||
stopShorter bool
|
||||
}
|
||||
|
||||
func (t *binTree) match(m match, distIter func() (int, bool), p matchParams,
|
||||
) (r match, checked int, accepted bool) {
|
||||
buf := &t.dict.buf
|
||||
for {
|
||||
if checked >= p.check {
|
||||
return m, checked, true
|
||||
}
|
||||
dist, ok := distIter()
|
||||
if !ok {
|
||||
return m, checked, false
|
||||
}
|
||||
checked++
|
||||
if m.n > 0 {
|
||||
i := buf.rear - dist + m.n - 1
|
||||
if i < 0 {
|
||||
i += len(buf.data)
|
||||
} else if i >= len(buf.data) {
|
||||
i -= len(buf.data)
|
||||
}
|
||||
if buf.data[i] != t.data[m.n-1] {
|
||||
if p.stopShorter {
|
||||
return m, checked, false
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
n := buf.matchLen(dist, t.data)
|
||||
switch n {
|
||||
case 0:
|
||||
if p.stopShorter {
|
||||
return m, checked, false
|
||||
}
|
||||
continue
|
||||
case 1:
|
||||
if uint32(dist-minDistance) != p.rep[0] {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if n < m.n || (n == m.n && int64(dist) >= m.distance) {
|
||||
continue
|
||||
}
|
||||
m = match{int64(dist), n}
|
||||
if n >= p.nAccept {
|
||||
return m, checked, true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (t *binTree) NextOp(rep [4]uint32) operation {
|
||||
// retrieve maxMatchLen data
|
||||
n, _ := t.dict.buf.Peek(t.data[:maxMatchLen])
|
||||
if n == 0 {
|
||||
panic("no data in buffer")
|
||||
}
|
||||
t.data = t.data[:n]
|
||||
|
||||
var (
|
||||
m match
|
||||
x, u, v uint32
|
||||
iterPred, iterSucc func() (int, bool)
|
||||
)
|
||||
p := matchParams{
|
||||
rep: rep,
|
||||
nAccept: maxMatchLen,
|
||||
check: 32,
|
||||
}
|
||||
i := 4
|
||||
iterSmall := func() (dist int, ok bool) {
|
||||
i--
|
||||
if i <= 0 {
|
||||
return 0, false
|
||||
}
|
||||
return i, true
|
||||
}
|
||||
m, checked, accepted := t.match(m, iterSmall, p)
|
||||
if accepted {
|
||||
goto end
|
||||
}
|
||||
p.check -= checked
|
||||
x = xval(t.data)
|
||||
u, v = t.search(t.root, x)
|
||||
if u == v && len(t.data) == 4 {
|
||||
iter := func() (dist int, ok bool) {
|
||||
if u == null {
|
||||
return 0, false
|
||||
}
|
||||
dist = t.distance(u)
|
||||
u, v = t.search(t.node[u].l, x)
|
||||
if u != v {
|
||||
u = null
|
||||
}
|
||||
return dist, true
|
||||
}
|
||||
m, _, _ = t.match(m, iter, p)
|
||||
goto end
|
||||
}
|
||||
p.stopShorter = true
|
||||
iterSucc = func() (dist int, ok bool) {
|
||||
if v == null {
|
||||
return 0, false
|
||||
}
|
||||
dist = t.distance(v)
|
||||
v = t.succ(v)
|
||||
return dist, true
|
||||
}
|
||||
m, checked, accepted = t.match(m, iterSucc, p)
|
||||
if accepted {
|
||||
goto end
|
||||
}
|
||||
p.check -= checked
|
||||
iterPred = func() (dist int, ok bool) {
|
||||
if u == null {
|
||||
return 0, false
|
||||
}
|
||||
dist = t.distance(u)
|
||||
u = t.pred(u)
|
||||
return dist, true
|
||||
}
|
||||
m, _, _ = t.match(m, iterPred, p)
|
||||
end:
|
||||
if m.n == 0 {
|
||||
return lit{t.data[0]}
|
||||
}
|
||||
return m
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
/* Naming conventions follows the CodeReviewComments in the Go Wiki. */
|
||||
|
||||
// ntz32Const is used by the functions NTZ and NLZ.
|
||||
const ntz32Const = 0x04d7651f
|
||||
|
||||
// ntz32Table is a helper table for de Bruijn algorithm by Danny Dubé.
|
||||
// See Henry S. Warren, Jr. "Hacker's Delight" section 5-1 figure 5-26.
|
||||
var ntz32Table = [32]int8{
|
||||
0, 1, 2, 24, 3, 19, 6, 25,
|
||||
22, 4, 20, 10, 16, 7, 12, 26,
|
||||
31, 23, 18, 5, 21, 9, 15, 11,
|
||||
30, 17, 8, 14, 29, 13, 28, 27,
|
||||
}
|
||||
|
||||
// ntz32 computes the number of trailing zeros for an unsigned 32-bit integer.
|
||||
func ntz32(x uint32) int {
|
||||
if x == 0 {
|
||||
return 32
|
||||
}
|
||||
x = (x & -x) * ntz32Const
|
||||
return int(ntz32Table[x>>27])
|
||||
}
|
||||
|
||||
// nlz32 computes the number of leading zeros for an unsigned 32-bit integer.
|
||||
func nlz32(x uint32) int {
|
||||
// Smear left most bit to the right
|
||||
x |= x >> 1
|
||||
x |= x >> 2
|
||||
x |= x >> 4
|
||||
x |= x >> 8
|
||||
x |= x >> 16
|
||||
// Use ntz mechanism to calculate nlz.
|
||||
x++
|
||||
if x == 0 {
|
||||
return 0
|
||||
}
|
||||
x *= ntz32Const
|
||||
return 32 - int(ntz32Table[x>>27])
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
)
|
||||
|
||||
// breader provides the ReadByte function for a Reader. It doesn't read
|
||||
// more data from the reader than absolutely necessary.
|
||||
type breader struct {
|
||||
io.Reader
|
||||
// helper slice to save allocations
|
||||
p []byte
|
||||
}
|
||||
|
||||
// ByteReader converts an io.Reader into an io.ByteReader.
|
||||
func ByteReader(r io.Reader) io.ByteReader {
|
||||
br, ok := r.(io.ByteReader)
|
||||
if !ok {
|
||||
return &breader{r, make([]byte, 1)}
|
||||
}
|
||||
return br
|
||||
}
|
||||
|
||||
// ReadByte read byte function.
|
||||
func (r *breader) ReadByte() (c byte, err error) {
|
||||
n, err := r.Reader.Read(r.p)
|
||||
if n < 1 {
|
||||
if err == nil {
|
||||
err = errors.New("breader.ReadByte: no data")
|
||||
}
|
||||
return 0, err
|
||||
}
|
||||
return r.p[0], nil
|
||||
}
|
|
@ -0,0 +1,171 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
// buffer provides a circular buffer of bytes. If the front index equals
|
||||
// the rear index the buffer is empty. As a consequence front cannot be
|
||||
// equal rear for a full buffer. So a full buffer has a length that is
|
||||
// one byte less the the length of the data slice.
|
||||
type buffer struct {
|
||||
data []byte
|
||||
front int
|
||||
rear int
|
||||
}
|
||||
|
||||
// newBuffer creates a buffer with the given size.
|
||||
func newBuffer(size int) *buffer {
|
||||
return &buffer{data: make([]byte, size+1)}
|
||||
}
|
||||
|
||||
// Cap returns the capacity of the buffer.
|
||||
func (b *buffer) Cap() int {
|
||||
return len(b.data) - 1
|
||||
}
|
||||
|
||||
// Resets the buffer. The front and rear index are set to zero.
|
||||
func (b *buffer) Reset() {
|
||||
b.front = 0
|
||||
b.rear = 0
|
||||
}
|
||||
|
||||
// Buffered returns the number of bytes buffered.
|
||||
func (b *buffer) Buffered() int {
|
||||
delta := b.front - b.rear
|
||||
if delta < 0 {
|
||||
delta += len(b.data)
|
||||
}
|
||||
return delta
|
||||
}
|
||||
|
||||
// Available returns the number of bytes available for writing.
|
||||
func (b *buffer) Available() int {
|
||||
delta := b.rear - 1 - b.front
|
||||
if delta < 0 {
|
||||
delta += len(b.data)
|
||||
}
|
||||
return delta
|
||||
}
|
||||
|
||||
// addIndex adds a non-negative integer to the index i and returns the
|
||||
// resulting index. The function takes care of wrapping the index as
|
||||
// well as potential overflow situations.
|
||||
func (b *buffer) addIndex(i int, n int) int {
|
||||
// subtraction of len(b.data) prevents overflow
|
||||
i += n - len(b.data)
|
||||
if i < 0 {
|
||||
i += len(b.data)
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// Read reads bytes from the buffer into p and returns the number of
|
||||
// bytes read. The function never returns an error but might return less
|
||||
// data than requested.
|
||||
func (b *buffer) Read(p []byte) (n int, err error) {
|
||||
n, err = b.Peek(p)
|
||||
b.rear = b.addIndex(b.rear, n)
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Peek reads bytes from the buffer into p without changing the buffer.
|
||||
// Peek will never return an error but might return less data than
|
||||
// requested.
|
||||
func (b *buffer) Peek(p []byte) (n int, err error) {
|
||||
m := b.Buffered()
|
||||
n = len(p)
|
||||
if m < n {
|
||||
n = m
|
||||
p = p[:n]
|
||||
}
|
||||
k := copy(p, b.data[b.rear:])
|
||||
if k < n {
|
||||
copy(p[k:], b.data)
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// Discard skips the n next bytes to read from the buffer, returning the
|
||||
// bytes discarded.
|
||||
//
|
||||
// If Discards skips fewer than n bytes, it returns an error.
|
||||
func (b *buffer) Discard(n int) (discarded int, err error) {
|
||||
if n < 0 {
|
||||
return 0, errors.New("buffer.Discard: negative argument")
|
||||
}
|
||||
m := b.Buffered()
|
||||
if m < n {
|
||||
n = m
|
||||
err = errors.New(
|
||||
"buffer.Discard: discarded less bytes then requested")
|
||||
}
|
||||
b.rear = b.addIndex(b.rear, n)
|
||||
return n, err
|
||||
}
|
||||
|
||||
// ErrNoSpace indicates that there is insufficient space for the Write
|
||||
// operation.
|
||||
var ErrNoSpace = errors.New("insufficient space")
|
||||
|
||||
// Write puts data into the buffer. If less bytes are written than
|
||||
// requested ErrNoSpace is returned.
|
||||
func (b *buffer) Write(p []byte) (n int, err error) {
|
||||
m := b.Available()
|
||||
n = len(p)
|
||||
if m < n {
|
||||
n = m
|
||||
p = p[:m]
|
||||
err = ErrNoSpace
|
||||
}
|
||||
k := copy(b.data[b.front:], p)
|
||||
if k < n {
|
||||
copy(b.data, p[k:])
|
||||
}
|
||||
b.front = b.addIndex(b.front, n)
|
||||
return n, err
|
||||
}
|
||||
|
||||
// WriteByte writes a single byte into the buffer. The error ErrNoSpace
|
||||
// is returned if no single byte is available in the buffer for writing.
|
||||
func (b *buffer) WriteByte(c byte) error {
|
||||
if b.Available() < 1 {
|
||||
return ErrNoSpace
|
||||
}
|
||||
b.data[b.front] = c
|
||||
b.front = b.addIndex(b.front, 1)
|
||||
return nil
|
||||
}
|
||||
|
||||
// prefixLen returns the length of the common prefix of a and b.
|
||||
func prefixLen(a, b []byte) int {
|
||||
if len(a) > len(b) {
|
||||
a, b = b, a
|
||||
}
|
||||
for i, c := range a {
|
||||
if b[i] != c {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return len(a)
|
||||
}
|
||||
|
||||
// matchLen returns the length of the common prefix for the given
|
||||
// distance from the rear and the byte slice p.
|
||||
func (b *buffer) matchLen(distance int, p []byte) int {
|
||||
var n int
|
||||
i := b.rear - distance
|
||||
if i < 0 {
|
||||
if n = prefixLen(p, b.data[len(b.data)+i:]); n < -i {
|
||||
return n
|
||||
}
|
||||
p = p[n:]
|
||||
i = 0
|
||||
}
|
||||
n += prefixLen(p, b.data[i:])
|
||||
return n
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
)
|
||||
|
||||
// ErrLimit indicates that the limit of the LimitedByteWriter has been
|
||||
// reached.
|
||||
var ErrLimit = errors.New("limit reached")
|
||||
|
||||
// LimitedByteWriter provides a byte writer that can be written until a
|
||||
// limit is reached. The field N provides the number of remaining
|
||||
// bytes.
|
||||
type LimitedByteWriter struct {
|
||||
BW io.ByteWriter
|
||||
N int64
|
||||
}
|
||||
|
||||
// WriteByte writes a single byte to the limited byte writer. It returns
|
||||
// ErrLimit if the limit has been reached. If the byte is successfully
|
||||
// written the field N of the LimitedByteWriter will be decremented by
|
||||
// one.
|
||||
func (l *LimitedByteWriter) WriteByte(c byte) error {
|
||||
if l.N <= 0 {
|
||||
return ErrLimit
|
||||
}
|
||||
if err := l.BW.WriteByte(c); err != nil {
|
||||
return err
|
||||
}
|
||||
l.N--
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,277 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
// decoder decodes a raw LZMA stream without any header.
|
||||
type decoder struct {
|
||||
// dictionary; the rear pointer of the buffer will be used for
|
||||
// reading the data.
|
||||
Dict *decoderDict
|
||||
// decoder state
|
||||
State *state
|
||||
// range decoder
|
||||
rd *rangeDecoder
|
||||
// start stores the head value of the dictionary for the LZMA
|
||||
// stream
|
||||
start int64
|
||||
// size of uncompressed data
|
||||
size int64
|
||||
// end-of-stream encountered
|
||||
eos bool
|
||||
// EOS marker found
|
||||
eosMarker bool
|
||||
}
|
||||
|
||||
// newDecoder creates a new decoder instance. The parameter size provides
|
||||
// the expected byte size of the decompressed data. If the size is
|
||||
// unknown use a negative value. In that case the decoder will look for
|
||||
// a terminating end-of-stream marker.
|
||||
func newDecoder(br io.ByteReader, state *state, dict *decoderDict, size int64) (d *decoder, err error) {
|
||||
rd, err := newRangeDecoder(br)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
d = &decoder{
|
||||
State: state,
|
||||
Dict: dict,
|
||||
rd: rd,
|
||||
size: size,
|
||||
start: dict.pos(),
|
||||
}
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// Reopen restarts the decoder with a new byte reader and a new size. Reopen
|
||||
// resets the Decompressed counter to zero.
|
||||
func (d *decoder) Reopen(br io.ByteReader, size int64) error {
|
||||
var err error
|
||||
if d.rd, err = newRangeDecoder(br); err != nil {
|
||||
return err
|
||||
}
|
||||
d.start = d.Dict.pos()
|
||||
d.size = size
|
||||
d.eos = false
|
||||
return nil
|
||||
}
|
||||
|
||||
// decodeLiteral decodes a single literal from the LZMA stream.
|
||||
func (d *decoder) decodeLiteral() (op operation, err error) {
|
||||
litState := d.State.litState(d.Dict.byteAt(1), d.Dict.head)
|
||||
match := d.Dict.byteAt(int(d.State.rep[0]) + 1)
|
||||
s, err := d.State.litCodec.Decode(d.rd, d.State.state, match, litState)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return lit{s}, nil
|
||||
}
|
||||
|
||||
// errEOS indicates that an EOS marker has been found.
|
||||
var errEOS = errors.New("EOS marker found")
|
||||
|
||||
// readOp decodes the next operation from the compressed stream. It
|
||||
// returns the operation. If an explicit end of stream marker is
|
||||
// identified the eos error is returned.
|
||||
func (d *decoder) readOp() (op operation, err error) {
|
||||
// Value of the end of stream (EOS) marker
|
||||
const eosDist = 1<<32 - 1
|
||||
|
||||
state, state2, posState := d.State.states(d.Dict.head)
|
||||
|
||||
b, err := d.State.isMatch[state2].Decode(d.rd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if b == 0 {
|
||||
// literal
|
||||
op, err := d.decodeLiteral()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
d.State.updateStateLiteral()
|
||||
return op, nil
|
||||
}
|
||||
b, err = d.State.isRep[state].Decode(d.rd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if b == 0 {
|
||||
// simple match
|
||||
d.State.rep[3], d.State.rep[2], d.State.rep[1] =
|
||||
d.State.rep[2], d.State.rep[1], d.State.rep[0]
|
||||
|
||||
d.State.updateStateMatch()
|
||||
// The length decoder returns the length offset.
|
||||
n, err := d.State.lenCodec.Decode(d.rd, posState)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// The dist decoder returns the distance offset. The actual
|
||||
// distance is 1 higher.
|
||||
d.State.rep[0], err = d.State.distCodec.Decode(d.rd, n)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if d.State.rep[0] == eosDist {
|
||||
d.eosMarker = true
|
||||
return nil, errEOS
|
||||
}
|
||||
op = match{n: int(n) + minMatchLen,
|
||||
distance: int64(d.State.rep[0]) + minDistance}
|
||||
return op, nil
|
||||
}
|
||||
b, err = d.State.isRepG0[state].Decode(d.rd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dist := d.State.rep[0]
|
||||
if b == 0 {
|
||||
// rep match 0
|
||||
b, err = d.State.isRepG0Long[state2].Decode(d.rd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if b == 0 {
|
||||
d.State.updateStateShortRep()
|
||||
op = match{n: 1, distance: int64(dist) + minDistance}
|
||||
return op, nil
|
||||
}
|
||||
} else {
|
||||
b, err = d.State.isRepG1[state].Decode(d.rd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if b == 0 {
|
||||
dist = d.State.rep[1]
|
||||
} else {
|
||||
b, err = d.State.isRepG2[state].Decode(d.rd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if b == 0 {
|
||||
dist = d.State.rep[2]
|
||||
} else {
|
||||
dist = d.State.rep[3]
|
||||
d.State.rep[3] = d.State.rep[2]
|
||||
}
|
||||
d.State.rep[2] = d.State.rep[1]
|
||||
}
|
||||
d.State.rep[1] = d.State.rep[0]
|
||||
d.State.rep[0] = dist
|
||||
}
|
||||
n, err := d.State.repLenCodec.Decode(d.rd, posState)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
d.State.updateStateRep()
|
||||
op = match{n: int(n) + minMatchLen, distance: int64(dist) + minDistance}
|
||||
return op, nil
|
||||
}
|
||||
|
||||
// apply takes the operation and transforms the decoder dictionary accordingly.
|
||||
func (d *decoder) apply(op operation) error {
|
||||
var err error
|
||||
switch x := op.(type) {
|
||||
case match:
|
||||
err = d.Dict.writeMatch(x.distance, x.n)
|
||||
case lit:
|
||||
err = d.Dict.WriteByte(x.b)
|
||||
default:
|
||||
panic("op is neither a match nor a literal")
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// decompress fills the dictionary unless no space for new data is
|
||||
// available. If the end of the LZMA stream has been reached io.EOF will
|
||||
// be returned.
|
||||
func (d *decoder) decompress() error {
|
||||
if d.eos {
|
||||
return io.EOF
|
||||
}
|
||||
for d.Dict.Available() >= maxMatchLen {
|
||||
op, err := d.readOp()
|
||||
switch err {
|
||||
case nil:
|
||||
break
|
||||
case errEOS:
|
||||
d.eos = true
|
||||
if !d.rd.possiblyAtEnd() {
|
||||
return errDataAfterEOS
|
||||
}
|
||||
if d.size >= 0 && d.size != d.Decompressed() {
|
||||
return errSize
|
||||
}
|
||||
return io.EOF
|
||||
case io.EOF:
|
||||
d.eos = true
|
||||
return io.ErrUnexpectedEOF
|
||||
default:
|
||||
return err
|
||||
}
|
||||
if err = d.apply(op); err != nil {
|
||||
return err
|
||||
}
|
||||
if d.size >= 0 && d.Decompressed() >= d.size {
|
||||
d.eos = true
|
||||
if d.Decompressed() > d.size {
|
||||
return errSize
|
||||
}
|
||||
if !d.rd.possiblyAtEnd() {
|
||||
switch _, err = d.readOp(); err {
|
||||
case nil:
|
||||
return errSize
|
||||
case io.EOF:
|
||||
return io.ErrUnexpectedEOF
|
||||
case errEOS:
|
||||
break
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
return io.EOF
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Errors that may be returned while decoding data.
|
||||
var (
|
||||
errDataAfterEOS = errors.New("lzma: data after end of stream marker")
|
||||
errSize = errors.New("lzma: wrong uncompressed data size")
|
||||
)
|
||||
|
||||
// Read reads data from the buffer. If no more data is available io.EOF is
|
||||
// returned.
|
||||
func (d *decoder) Read(p []byte) (n int, err error) {
|
||||
var k int
|
||||
for {
|
||||
// Read of decoder dict never returns an error.
|
||||
k, err = d.Dict.Read(p[n:])
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("dictionary read error %s", err))
|
||||
}
|
||||
if k == 0 && d.eos {
|
||||
return n, io.EOF
|
||||
}
|
||||
n += k
|
||||
if n >= len(p) {
|
||||
return n, nil
|
||||
}
|
||||
if err = d.decompress(); err != nil && err != io.EOF {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Decompressed returns the number of bytes decompressed by the decoder.
|
||||
func (d *decoder) Decompressed() int64 {
|
||||
return d.Dict.pos() - d.start
|
||||
}
|
|
@ -0,0 +1,135 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// decoderDict provides the dictionary for the decoder. The whole
|
||||
// dictionary is used as reader buffer.
|
||||
type decoderDict struct {
|
||||
buf buffer
|
||||
head int64
|
||||
}
|
||||
|
||||
// newDecoderDict creates a new decoder dictionary. The whole dictionary
|
||||
// will be used as reader buffer.
|
||||
func newDecoderDict(dictCap int) (d *decoderDict, err error) {
|
||||
// lower limit supports easy test cases
|
||||
if !(1 <= dictCap && int64(dictCap) <= MaxDictCap) {
|
||||
return nil, errors.New("lzma: dictCap out of range")
|
||||
}
|
||||
d = &decoderDict{buf: *newBuffer(dictCap)}
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// Reset clears the dictionary. The read buffer is not changed, so the
|
||||
// buffered data can still be read.
|
||||
func (d *decoderDict) Reset() {
|
||||
d.head = 0
|
||||
}
|
||||
|
||||
// WriteByte writes a single byte into the dictionary. It is used to
|
||||
// write literals into the dictionary.
|
||||
func (d *decoderDict) WriteByte(c byte) error {
|
||||
if err := d.buf.WriteByte(c); err != nil {
|
||||
return err
|
||||
}
|
||||
d.head++
|
||||
return nil
|
||||
}
|
||||
|
||||
// pos returns the position of the dictionary head.
|
||||
func (d *decoderDict) pos() int64 { return d.head }
|
||||
|
||||
// dictLen returns the actual length of the dictionary.
|
||||
func (d *decoderDict) dictLen() int {
|
||||
capacity := d.buf.Cap()
|
||||
if d.head >= int64(capacity) {
|
||||
return capacity
|
||||
}
|
||||
return int(d.head)
|
||||
}
|
||||
|
||||
// byteAt returns a byte stored in the dictionary. If the distance is
|
||||
// non-positive or exceeds the current length of the dictionary the zero
|
||||
// byte is returned.
|
||||
func (d *decoderDict) byteAt(dist int) byte {
|
||||
if !(0 < dist && dist <= d.dictLen()) {
|
||||
return 0
|
||||
}
|
||||
i := d.buf.front - dist
|
||||
if i < 0 {
|
||||
i += len(d.buf.data)
|
||||
}
|
||||
return d.buf.data[i]
|
||||
}
|
||||
|
||||
// writeMatch writes the match at the top of the dictionary. The given
|
||||
// distance must point in the current dictionary and the length must not
|
||||
// exceed the maximum length 273 supported in LZMA.
|
||||
//
|
||||
// The error value ErrNoSpace indicates that no space is available in
|
||||
// the dictionary for writing. You need to read from the dictionary
|
||||
// first.
|
||||
func (d *decoderDict) writeMatch(dist int64, length int) error {
|
||||
if !(0 < dist && dist <= int64(d.dictLen())) {
|
||||
return errors.New("writeMatch: distance out of range")
|
||||
}
|
||||
if !(0 < length && length <= maxMatchLen) {
|
||||
return errors.New("writeMatch: length out of range")
|
||||
}
|
||||
if length > d.buf.Available() {
|
||||
return ErrNoSpace
|
||||
}
|
||||
d.head += int64(length)
|
||||
|
||||
i := d.buf.front - int(dist)
|
||||
if i < 0 {
|
||||
i += len(d.buf.data)
|
||||
}
|
||||
for length > 0 {
|
||||
var p []byte
|
||||
if i >= d.buf.front {
|
||||
p = d.buf.data[i:]
|
||||
i = 0
|
||||
} else {
|
||||
p = d.buf.data[i:d.buf.front]
|
||||
i = d.buf.front
|
||||
}
|
||||
if len(p) > length {
|
||||
p = p[:length]
|
||||
}
|
||||
if _, err := d.buf.Write(p); err != nil {
|
||||
panic(fmt.Errorf("d.buf.Write returned error %s", err))
|
||||
}
|
||||
length -= len(p)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Write writes the given bytes into the dictionary and advances the
|
||||
// head.
|
||||
func (d *decoderDict) Write(p []byte) (n int, err error) {
|
||||
n, err = d.buf.Write(p)
|
||||
d.head += int64(n)
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Available returns the number of available bytes for writing into the
|
||||
// decoder dictionary.
|
||||
func (d *decoderDict) Available() int { return d.buf.Available() }
|
||||
|
||||
// Read reads data from the buffer contained in the decoder dictionary.
|
||||
func (d *decoderDict) Read(p []byte) (n int, err error) { return d.buf.Read(p) }
|
||||
|
||||
// Buffered returns the number of bytes currently buffered in the
|
||||
// decoder dictionary.
|
||||
func (d *decoderDict) buffered() int { return d.buf.Buffered() }
|
||||
|
||||
// Peek gets data from the buffer without advancing the rear index.
|
||||
func (d *decoderDict) peek(p []byte) (n int, err error) { return d.buf.Peek(p) }
|
|
@ -0,0 +1,49 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import "fmt"
|
||||
|
||||
// directCodec allows the encoding and decoding of values with a fixed number
|
||||
// of bits. The number of bits must be in the range [1,32].
|
||||
type directCodec byte
|
||||
|
||||
// makeDirectCodec creates a directCodec. The function panics if the number of
|
||||
// bits is not in the range [1,32].
|
||||
func makeDirectCodec(bits int) directCodec {
|
||||
if !(1 <= bits && bits <= 32) {
|
||||
panic(fmt.Errorf("bits=%d out of range", bits))
|
||||
}
|
||||
return directCodec(bits)
|
||||
}
|
||||
|
||||
// Bits returns the number of bits supported by this codec.
|
||||
func (dc directCodec) Bits() int {
|
||||
return int(dc)
|
||||
}
|
||||
|
||||
// Encode uses the range encoder to encode a value with the fixed number of
|
||||
// bits. The most-significant bit is encoded first.
|
||||
func (dc directCodec) Encode(e *rangeEncoder, v uint32) error {
|
||||
for i := int(dc) - 1; i >= 0; i-- {
|
||||
if err := e.DirectEncodeBit(v >> uint(i)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Decode uses the range decoder to decode a value with the given number of
|
||||
// given bits. The most-significant bit is decoded first.
|
||||
func (dc directCodec) Decode(d *rangeDecoder) (v uint32, err error) {
|
||||
for i := int(dc) - 1; i >= 0; i-- {
|
||||
x, err := d.DirectDecodeBit()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
v = (v << 1) | x
|
||||
}
|
||||
return v, nil
|
||||
}
|
|
@ -0,0 +1,156 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
// Constants used by the distance codec.
|
||||
const (
|
||||
// minimum supported distance
|
||||
minDistance = 1
|
||||
// maximum supported distance, value is used for the eos marker.
|
||||
maxDistance = 1 << 32
|
||||
// number of the supported len states
|
||||
lenStates = 4
|
||||
// start for the position models
|
||||
startPosModel = 4
|
||||
// first index with align bits support
|
||||
endPosModel = 14
|
||||
// bits for the position slots
|
||||
posSlotBits = 6
|
||||
// number of align bits
|
||||
alignBits = 4
|
||||
// maximum position slot
|
||||
maxPosSlot = 63
|
||||
)
|
||||
|
||||
// distCodec provides encoding and decoding of distance values.
|
||||
type distCodec struct {
|
||||
posSlotCodecs [lenStates]treeCodec
|
||||
posModel [endPosModel - startPosModel]treeReverseCodec
|
||||
alignCodec treeReverseCodec
|
||||
}
|
||||
|
||||
// deepcopy initializes dc as deep copy of the source.
|
||||
func (dc *distCodec) deepcopy(src *distCodec) {
|
||||
if dc == src {
|
||||
return
|
||||
}
|
||||
for i := range dc.posSlotCodecs {
|
||||
dc.posSlotCodecs[i].deepcopy(&src.posSlotCodecs[i])
|
||||
}
|
||||
for i := range dc.posModel {
|
||||
dc.posModel[i].deepcopy(&src.posModel[i])
|
||||
}
|
||||
dc.alignCodec.deepcopy(&src.alignCodec)
|
||||
}
|
||||
|
||||
// distBits returns the number of bits required to encode dist.
|
||||
func distBits(dist uint32) int {
|
||||
if dist < startPosModel {
|
||||
return 6
|
||||
}
|
||||
// slot s > 3, dist d
|
||||
// s = 2(bits(d)-1) + bit(d, bits(d)-2)
|
||||
// s>>1 = bits(d)-1
|
||||
// bits(d) = 32-nlz32(d)
|
||||
// s>>1=31-nlz32(d)
|
||||
// n = 5 + (s>>1) = 36 - nlz32(d)
|
||||
return 36 - nlz32(dist)
|
||||
}
|
||||
|
||||
// newDistCodec creates a new distance codec.
|
||||
func (dc *distCodec) init() {
|
||||
for i := range dc.posSlotCodecs {
|
||||
dc.posSlotCodecs[i] = makeTreeCodec(posSlotBits)
|
||||
}
|
||||
for i := range dc.posModel {
|
||||
posSlot := startPosModel + i
|
||||
bits := (posSlot >> 1) - 1
|
||||
dc.posModel[i] = makeTreeReverseCodec(bits)
|
||||
}
|
||||
dc.alignCodec = makeTreeReverseCodec(alignBits)
|
||||
}
|
||||
|
||||
// lenState converts the value l to a supported lenState value.
|
||||
func lenState(l uint32) uint32 {
|
||||
if l >= lenStates {
|
||||
l = lenStates - 1
|
||||
}
|
||||
return l
|
||||
}
|
||||
|
||||
// Encode encodes the distance using the parameter l. Dist can have values from
|
||||
// the full range of uint32 values. To get the distance offset the actual match
|
||||
// distance has to be decreased by 1. A distance offset of 0xffffffff (eos)
|
||||
// indicates the end of the stream.
|
||||
func (dc *distCodec) Encode(e *rangeEncoder, dist uint32, l uint32) (err error) {
|
||||
// Compute the posSlot using nlz32
|
||||
var posSlot uint32
|
||||
var bits uint32
|
||||
if dist < startPosModel {
|
||||
posSlot = dist
|
||||
} else {
|
||||
bits = uint32(30 - nlz32(dist))
|
||||
posSlot = startPosModel - 2 + (bits << 1)
|
||||
posSlot += (dist >> uint(bits)) & 1
|
||||
}
|
||||
|
||||
if err = dc.posSlotCodecs[lenState(l)].Encode(e, posSlot); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
switch {
|
||||
case posSlot < startPosModel:
|
||||
return nil
|
||||
case posSlot < endPosModel:
|
||||
tc := &dc.posModel[posSlot-startPosModel]
|
||||
return tc.Encode(dist, e)
|
||||
}
|
||||
dic := directCodec(bits - alignBits)
|
||||
if err = dic.Encode(e, dist>>alignBits); err != nil {
|
||||
return
|
||||
}
|
||||
return dc.alignCodec.Encode(dist, e)
|
||||
}
|
||||
|
||||
// Decode decodes the distance offset using the parameter l. The dist value
|
||||
// 0xffffffff (eos) indicates the end of the stream. Add one to the distance
|
||||
// offset to get the actual match distance.
|
||||
func (dc *distCodec) Decode(d *rangeDecoder, l uint32) (dist uint32, err error) {
|
||||
posSlot, err := dc.posSlotCodecs[lenState(l)].Decode(d)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// posSlot equals distance
|
||||
if posSlot < startPosModel {
|
||||
return posSlot, nil
|
||||
}
|
||||
|
||||
// posSlot uses the individual models
|
||||
bits := (posSlot >> 1) - 1
|
||||
dist = (2 | (posSlot & 1)) << bits
|
||||
var u uint32
|
||||
if posSlot < endPosModel {
|
||||
tc := &dc.posModel[posSlot-startPosModel]
|
||||
if u, err = tc.Decode(d); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
dist += u
|
||||
return dist, nil
|
||||
}
|
||||
|
||||
// posSlots use direct encoding and a single model for the four align
|
||||
// bits.
|
||||
dic := directCodec(bits - alignBits)
|
||||
if u, err = dic.Decode(d); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
dist += u << alignBits
|
||||
if u, err = dc.alignCodec.Decode(d); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
dist += u
|
||||
return dist, nil
|
||||
}
|
|
@ -0,0 +1,268 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
// opLenMargin provides the upper limit of the number of bytes required
|
||||
// to encode a single operation.
|
||||
const opLenMargin = 10
|
||||
|
||||
// compressFlags control the compression process.
|
||||
type compressFlags uint32
|
||||
|
||||
// Values for compressFlags.
|
||||
const (
|
||||
// all data should be compressed, even if compression is not
|
||||
// optimal.
|
||||
all compressFlags = 1 << iota
|
||||
)
|
||||
|
||||
// encoderFlags provide the flags for an encoder.
|
||||
type encoderFlags uint32
|
||||
|
||||
// Flags for the encoder.
|
||||
const (
|
||||
// eosMarker requests an EOS marker to be written.
|
||||
eosMarker encoderFlags = 1 << iota
|
||||
)
|
||||
|
||||
// Encoder compresses data buffered in the encoder dictionary and writes
|
||||
// it into a byte writer.
|
||||
type encoder struct {
|
||||
dict *encoderDict
|
||||
state *state
|
||||
re *rangeEncoder
|
||||
start int64
|
||||
// generate eos marker
|
||||
marker bool
|
||||
limit bool
|
||||
margin int
|
||||
}
|
||||
|
||||
// newEncoder creates a new encoder. If the byte writer must be
|
||||
// limited use LimitedByteWriter provided by this package. The flags
|
||||
// argument supports the eosMarker flag, controlling whether a
|
||||
// terminating end-of-stream marker must be written.
|
||||
func newEncoder(bw io.ByteWriter, state *state, dict *encoderDict,
|
||||
flags encoderFlags) (e *encoder, err error) {
|
||||
|
||||
re, err := newRangeEncoder(bw)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
e = &encoder{
|
||||
dict: dict,
|
||||
state: state,
|
||||
re: re,
|
||||
marker: flags&eosMarker != 0,
|
||||
start: dict.Pos(),
|
||||
margin: opLenMargin,
|
||||
}
|
||||
if e.marker {
|
||||
e.margin += 5
|
||||
}
|
||||
return e, nil
|
||||
}
|
||||
|
||||
// Write writes the bytes from p into the dictionary. If not enough
|
||||
// space is available the data in the dictionary buffer will be
|
||||
// compressed to make additional space available. If the limit of the
|
||||
// underlying writer has been reached ErrLimit will be returned.
|
||||
func (e *encoder) Write(p []byte) (n int, err error) {
|
||||
for {
|
||||
k, err := e.dict.Write(p[n:])
|
||||
n += k
|
||||
if err == ErrNoSpace {
|
||||
if err = e.compress(0); err != nil {
|
||||
return n, err
|
||||
}
|
||||
continue
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
|
||||
// Reopen reopens the encoder with a new byte writer.
|
||||
func (e *encoder) Reopen(bw io.ByteWriter) error {
|
||||
var err error
|
||||
if e.re, err = newRangeEncoder(bw); err != nil {
|
||||
return err
|
||||
}
|
||||
e.start = e.dict.Pos()
|
||||
e.limit = false
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeLiteral writes a literal into the LZMA stream
|
||||
func (e *encoder) writeLiteral(l lit) error {
|
||||
var err error
|
||||
state, state2, _ := e.state.states(e.dict.Pos())
|
||||
if err = e.state.isMatch[state2].Encode(e.re, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
litState := e.state.litState(e.dict.ByteAt(1), e.dict.Pos())
|
||||
match := e.dict.ByteAt(int(e.state.rep[0]) + 1)
|
||||
err = e.state.litCodec.Encode(e.re, l.b, state, match, litState)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
e.state.updateStateLiteral()
|
||||
return nil
|
||||
}
|
||||
|
||||
// iverson implements the Iverson operator as proposed by Donald Knuth in his
|
||||
// book Concrete Mathematics.
|
||||
func iverson(ok bool) uint32 {
|
||||
if ok {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// writeMatch writes a repetition operation into the operation stream
|
||||
func (e *encoder) writeMatch(m match) error {
|
||||
var err error
|
||||
if !(minDistance <= m.distance && m.distance <= maxDistance) {
|
||||
panic(fmt.Errorf("match distance %d out of range", m.distance))
|
||||
}
|
||||
dist := uint32(m.distance - minDistance)
|
||||
if !(minMatchLen <= m.n && m.n <= maxMatchLen) &&
|
||||
!(dist == e.state.rep[0] && m.n == 1) {
|
||||
panic(fmt.Errorf(
|
||||
"match length %d out of range; dist %d rep[0] %d",
|
||||
m.n, dist, e.state.rep[0]))
|
||||
}
|
||||
state, state2, posState := e.state.states(e.dict.Pos())
|
||||
if err = e.state.isMatch[state2].Encode(e.re, 1); err != nil {
|
||||
return err
|
||||
}
|
||||
g := 0
|
||||
for ; g < 4; g++ {
|
||||
if e.state.rep[g] == dist {
|
||||
break
|
||||
}
|
||||
}
|
||||
b := iverson(g < 4)
|
||||
if err = e.state.isRep[state].Encode(e.re, b); err != nil {
|
||||
return err
|
||||
}
|
||||
n := uint32(m.n - minMatchLen)
|
||||
if b == 0 {
|
||||
// simple match
|
||||
e.state.rep[3], e.state.rep[2], e.state.rep[1], e.state.rep[0] =
|
||||
e.state.rep[2], e.state.rep[1], e.state.rep[0], dist
|
||||
e.state.updateStateMatch()
|
||||
if err = e.state.lenCodec.Encode(e.re, n, posState); err != nil {
|
||||
return err
|
||||
}
|
||||
return e.state.distCodec.Encode(e.re, dist, n)
|
||||
}
|
||||
b = iverson(g != 0)
|
||||
if err = e.state.isRepG0[state].Encode(e.re, b); err != nil {
|
||||
return err
|
||||
}
|
||||
if b == 0 {
|
||||
// g == 0
|
||||
b = iverson(m.n != 1)
|
||||
if err = e.state.isRepG0Long[state2].Encode(e.re, b); err != nil {
|
||||
return err
|
||||
}
|
||||
if b == 0 {
|
||||
e.state.updateStateShortRep()
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
// g in {1,2,3}
|
||||
b = iverson(g != 1)
|
||||
if err = e.state.isRepG1[state].Encode(e.re, b); err != nil {
|
||||
return err
|
||||
}
|
||||
if b == 1 {
|
||||
// g in {2,3}
|
||||
b = iverson(g != 2)
|
||||
err = e.state.isRepG2[state].Encode(e.re, b)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if b == 1 {
|
||||
e.state.rep[3] = e.state.rep[2]
|
||||
}
|
||||
e.state.rep[2] = e.state.rep[1]
|
||||
}
|
||||
e.state.rep[1] = e.state.rep[0]
|
||||
e.state.rep[0] = dist
|
||||
}
|
||||
e.state.updateStateRep()
|
||||
return e.state.repLenCodec.Encode(e.re, n, posState)
|
||||
}
|
||||
|
||||
// writeOp writes a single operation to the range encoder. The function
|
||||
// checks whether there is enough space available to close the LZMA
|
||||
// stream.
|
||||
func (e *encoder) writeOp(op operation) error {
|
||||
if e.re.Available() < int64(e.margin) {
|
||||
return ErrLimit
|
||||
}
|
||||
switch x := op.(type) {
|
||||
case lit:
|
||||
return e.writeLiteral(x)
|
||||
case match:
|
||||
return e.writeMatch(x)
|
||||
default:
|
||||
panic("unexpected operation")
|
||||
}
|
||||
}
|
||||
|
||||
// compress compressed data from the dictionary buffer. If the flag all
|
||||
// is set, all data in the dictionary buffer will be compressed. The
|
||||
// function returns ErrLimit if the underlying writer has reached its
|
||||
// limit.
|
||||
func (e *encoder) compress(flags compressFlags) error {
|
||||
n := 0
|
||||
if flags&all == 0 {
|
||||
n = maxMatchLen - 1
|
||||
}
|
||||
d := e.dict
|
||||
m := d.m
|
||||
for d.Buffered() > n {
|
||||
op := m.NextOp(e.state.rep)
|
||||
if err := e.writeOp(op); err != nil {
|
||||
return err
|
||||
}
|
||||
d.Discard(op.Len())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// eosMatch is a pseudo operation that indicates the end of the stream.
|
||||
var eosMatch = match{distance: maxDistance, n: minMatchLen}
|
||||
|
||||
// Close terminates the LZMA stream. If requested the end-of-stream
|
||||
// marker will be written. If the byte writer limit has been or will be
|
||||
// reached during compression of the remaining data in the buffer the
|
||||
// LZMA stream will be closed and data will remain in the buffer.
|
||||
func (e *encoder) Close() error {
|
||||
err := e.compress(all)
|
||||
if err != nil && err != ErrLimit {
|
||||
return err
|
||||
}
|
||||
if e.marker {
|
||||
if err := e.writeMatch(eosMatch); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
err = e.re.Close()
|
||||
return err
|
||||
}
|
||||
|
||||
// Compressed returns the number bytes of the input data that been
|
||||
// compressed.
|
||||
func (e *encoder) Compressed() int64 {
|
||||
return e.dict.Pos() - e.start
|
||||
}
|
|
@ -0,0 +1,149 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
// matcher is an interface that supports the identification of the next
|
||||
// operation.
|
||||
type matcher interface {
|
||||
io.Writer
|
||||
SetDict(d *encoderDict)
|
||||
NextOp(rep [4]uint32) operation
|
||||
}
|
||||
|
||||
// encoderDict provides the dictionary of the encoder. It includes an
|
||||
// addtional buffer atop of the actual dictionary.
|
||||
type encoderDict struct {
|
||||
buf buffer
|
||||
m matcher
|
||||
head int64
|
||||
capacity int
|
||||
// preallocated array
|
||||
data [maxMatchLen]byte
|
||||
}
|
||||
|
||||
// newEncoderDict creates the encoder dictionary. The argument bufSize
|
||||
// defines the size of the additional buffer.
|
||||
func newEncoderDict(dictCap, bufSize int, m matcher) (d *encoderDict, err error) {
|
||||
if !(1 <= dictCap && int64(dictCap) <= MaxDictCap) {
|
||||
return nil, errors.New(
|
||||
"lzma: dictionary capacity out of range")
|
||||
}
|
||||
if bufSize < 1 {
|
||||
return nil, errors.New(
|
||||
"lzma: buffer size must be larger than zero")
|
||||
}
|
||||
d = &encoderDict{
|
||||
buf: *newBuffer(dictCap + bufSize),
|
||||
capacity: dictCap,
|
||||
m: m,
|
||||
}
|
||||
m.SetDict(d)
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// Discard discards n bytes. Note that n must not be larger than
|
||||
// MaxMatchLen.
|
||||
func (d *encoderDict) Discard(n int) {
|
||||
p := d.data[:n]
|
||||
k, _ := d.buf.Read(p)
|
||||
if k < n {
|
||||
panic(fmt.Errorf("lzma: can't discard %d bytes", n))
|
||||
}
|
||||
d.head += int64(n)
|
||||
d.m.Write(p)
|
||||
}
|
||||
|
||||
// Len returns the data available in the encoder dictionary.
|
||||
func (d *encoderDict) Len() int {
|
||||
n := d.buf.Available()
|
||||
if int64(n) > d.head {
|
||||
return int(d.head)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// DictLen returns the actual length of data in the dictionary.
|
||||
func (d *encoderDict) DictLen() int {
|
||||
if d.head < int64(d.capacity) {
|
||||
return int(d.head)
|
||||
}
|
||||
return d.capacity
|
||||
}
|
||||
|
||||
// Available returns the number of bytes that can be written by a
|
||||
// following Write call.
|
||||
func (d *encoderDict) Available() int {
|
||||
return d.buf.Available() - d.DictLen()
|
||||
}
|
||||
|
||||
// Write writes data into the dictionary buffer. Note that the position
|
||||
// of the dictionary head will not be moved. If there is not enough
|
||||
// space in the buffer ErrNoSpace will be returned.
|
||||
func (d *encoderDict) Write(p []byte) (n int, err error) {
|
||||
m := d.Available()
|
||||
if len(p) > m {
|
||||
p = p[:m]
|
||||
err = ErrNoSpace
|
||||
}
|
||||
var e error
|
||||
if n, e = d.buf.Write(p); e != nil {
|
||||
err = e
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Pos returns the position of the head.
|
||||
func (d *encoderDict) Pos() int64 { return d.head }
|
||||
|
||||
// ByteAt returns the byte at the given distance.
|
||||
func (d *encoderDict) ByteAt(distance int) byte {
|
||||
if !(0 < distance && distance <= d.Len()) {
|
||||
return 0
|
||||
}
|
||||
i := d.buf.rear - distance
|
||||
if i < 0 {
|
||||
i += len(d.buf.data)
|
||||
}
|
||||
return d.buf.data[i]
|
||||
}
|
||||
|
||||
// CopyN copies the last n bytes from the dictionary into the provided
|
||||
// writer. This is used for copying uncompressed data into an
|
||||
// uncompressed segment.
|
||||
func (d *encoderDict) CopyN(w io.Writer, n int) (written int, err error) {
|
||||
if n <= 0 {
|
||||
return 0, nil
|
||||
}
|
||||
m := d.Len()
|
||||
if n > m {
|
||||
n = m
|
||||
err = ErrNoSpace
|
||||
}
|
||||
i := d.buf.rear - n
|
||||
var e error
|
||||
if i < 0 {
|
||||
i += len(d.buf.data)
|
||||
if written, e = w.Write(d.buf.data[i:]); e != nil {
|
||||
return written, e
|
||||
}
|
||||
i = 0
|
||||
}
|
||||
var k int
|
||||
k, e = w.Write(d.buf.data[i:d.buf.rear])
|
||||
written += k
|
||||
if e != nil {
|
||||
err = e
|
||||
}
|
||||
return written, err
|
||||
}
|
||||
|
||||
// Buffered returns the number of bytes in the buffer.
|
||||
func (d *encoderDict) Buffered() int { return d.buf.Buffered() }
|
Binary file not shown.
|
@ -0,0 +1,309 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/ulikunitz/xz/internal/hash"
|
||||
)
|
||||
|
||||
/* For compression we need to find byte sequences that match the byte
|
||||
* sequence at the dictionary head. A hash table is a simple method to
|
||||
* provide this capability.
|
||||
*/
|
||||
|
||||
// maxMatches limits the number of matches requested from the Matches
|
||||
// function. This controls the speed of the overall encoding.
|
||||
const maxMatches = 16
|
||||
|
||||
// shortDists defines the number of short distances supported by the
|
||||
// implementation.
|
||||
const shortDists = 8
|
||||
|
||||
// The minimum is somehow arbitrary but the maximum is limited by the
|
||||
// memory requirements of the hash table.
|
||||
const (
|
||||
minTableExponent = 9
|
||||
maxTableExponent = 20
|
||||
)
|
||||
|
||||
// newRoller contains the function used to create an instance of the
|
||||
// hash.Roller.
|
||||
var newRoller = func(n int) hash.Roller { return hash.NewCyclicPoly(n) }
|
||||
|
||||
// hashTable stores the hash table including the rolling hash method.
|
||||
//
|
||||
// We implement chained hashing into a circular buffer. Each entry in
|
||||
// the circular buffer stores the delta distance to the next position with a
|
||||
// word that has the same hash value.
|
||||
type hashTable struct {
|
||||
dict *encoderDict
|
||||
// actual hash table
|
||||
t []int64
|
||||
// circular list data with the offset to the next word
|
||||
data []uint32
|
||||
front int
|
||||
// mask for computing the index for the hash table
|
||||
mask uint64
|
||||
// hash offset; initial value is -int64(wordLen)
|
||||
hoff int64
|
||||
// length of the hashed word
|
||||
wordLen int
|
||||
// hash roller for computing the hash values for the Write
|
||||
// method
|
||||
wr hash.Roller
|
||||
// hash roller for computing arbitrary hashes
|
||||
hr hash.Roller
|
||||
// preallocated slices
|
||||
p [maxMatches]int64
|
||||
distances [maxMatches + shortDists]int
|
||||
}
|
||||
|
||||
// hashTableExponent derives the hash table exponent from the dictionary
|
||||
// capacity.
|
||||
func hashTableExponent(n uint32) int {
|
||||
e := 30 - nlz32(n)
|
||||
switch {
|
||||
case e < minTableExponent:
|
||||
e = minTableExponent
|
||||
case e > maxTableExponent:
|
||||
e = maxTableExponent
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// newHashTable creates a new hash table for words of length wordLen
|
||||
func newHashTable(capacity int, wordLen int) (t *hashTable, err error) {
|
||||
if !(0 < capacity) {
|
||||
return nil, errors.New(
|
||||
"newHashTable: capacity must not be negative")
|
||||
}
|
||||
exp := hashTableExponent(uint32(capacity))
|
||||
if !(1 <= wordLen && wordLen <= 4) {
|
||||
return nil, errors.New("newHashTable: " +
|
||||
"argument wordLen out of range")
|
||||
}
|
||||
n := 1 << uint(exp)
|
||||
if n <= 0 {
|
||||
panic("newHashTable: exponent is too large")
|
||||
}
|
||||
t = &hashTable{
|
||||
t: make([]int64, n),
|
||||
data: make([]uint32, capacity),
|
||||
mask: (uint64(1) << uint(exp)) - 1,
|
||||
hoff: -int64(wordLen),
|
||||
wordLen: wordLen,
|
||||
wr: newRoller(wordLen),
|
||||
hr: newRoller(wordLen),
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
func (t *hashTable) SetDict(d *encoderDict) { t.dict = d }
|
||||
|
||||
// buffered returns the number of bytes that are currently hashed.
|
||||
func (t *hashTable) buffered() int {
|
||||
n := t.hoff + 1
|
||||
switch {
|
||||
case n <= 0:
|
||||
return 0
|
||||
case n >= int64(len(t.data)):
|
||||
return len(t.data)
|
||||
}
|
||||
return int(n)
|
||||
}
|
||||
|
||||
// addIndex adds n to an index ensuring that is stays inside the
|
||||
// circular buffer for the hash chain.
|
||||
func (t *hashTable) addIndex(i, n int) int {
|
||||
i += n - len(t.data)
|
||||
if i < 0 {
|
||||
i += len(t.data)
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// putDelta puts the delta instance at the current front of the circular
|
||||
// chain buffer.
|
||||
func (t *hashTable) putDelta(delta uint32) {
|
||||
t.data[t.front] = delta
|
||||
t.front = t.addIndex(t.front, 1)
|
||||
}
|
||||
|
||||
// putEntry puts a new entry into the hash table. If there is already a
|
||||
// value stored it is moved into the circular chain buffer.
|
||||
func (t *hashTable) putEntry(h uint64, pos int64) {
|
||||
if pos < 0 {
|
||||
return
|
||||
}
|
||||
i := h & t.mask
|
||||
old := t.t[i] - 1
|
||||
t.t[i] = pos + 1
|
||||
var delta int64
|
||||
if old >= 0 {
|
||||
delta = pos - old
|
||||
if delta > 1<<32-1 || delta > int64(t.buffered()) {
|
||||
delta = 0
|
||||
}
|
||||
}
|
||||
t.putDelta(uint32(delta))
|
||||
}
|
||||
|
||||
// WriteByte converts a single byte into a hash and puts them into the hash
|
||||
// table.
|
||||
func (t *hashTable) WriteByte(b byte) error {
|
||||
h := t.wr.RollByte(b)
|
||||
t.hoff++
|
||||
t.putEntry(h, t.hoff)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Write converts the bytes provided into hash tables and stores the
|
||||
// abbreviated offsets into the hash table. The method will never return an
|
||||
// error.
|
||||
func (t *hashTable) Write(p []byte) (n int, err error) {
|
||||
for _, b := range p {
|
||||
// WriteByte doesn't generate an error.
|
||||
t.WriteByte(b)
|
||||
}
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
// getMatches the matches for a specific hash. The functions returns the
|
||||
// number of positions found.
|
||||
//
|
||||
// TODO: Make a getDistances because that we are actually interested in.
|
||||
func (t *hashTable) getMatches(h uint64, positions []int64) (n int) {
|
||||
if t.hoff < 0 || len(positions) == 0 {
|
||||
return 0
|
||||
}
|
||||
buffered := t.buffered()
|
||||
tailPos := t.hoff + 1 - int64(buffered)
|
||||
rear := t.front - buffered
|
||||
if rear >= 0 {
|
||||
rear -= len(t.data)
|
||||
}
|
||||
// get the slot for the hash
|
||||
pos := t.t[h&t.mask] - 1
|
||||
delta := pos - tailPos
|
||||
for {
|
||||
if delta < 0 {
|
||||
return n
|
||||
}
|
||||
positions[n] = tailPos + delta
|
||||
n++
|
||||
if n >= len(positions) {
|
||||
return n
|
||||
}
|
||||
i := rear + int(delta)
|
||||
if i < 0 {
|
||||
i += len(t.data)
|
||||
}
|
||||
u := t.data[i]
|
||||
if u == 0 {
|
||||
return n
|
||||
}
|
||||
delta -= int64(u)
|
||||
}
|
||||
}
|
||||
|
||||
// hash computes the rolling hash for the word stored in p. For correct
|
||||
// results its length must be equal to t.wordLen.
|
||||
func (t *hashTable) hash(p []byte) uint64 {
|
||||
var h uint64
|
||||
for _, b := range p {
|
||||
h = t.hr.RollByte(b)
|
||||
}
|
||||
return h
|
||||
}
|
||||
|
||||
// Matches fills the positions slice with potential matches. The
|
||||
// functions returns the number of positions filled into positions. The
|
||||
// byte slice p must have word length of the hash table.
|
||||
func (t *hashTable) Matches(p []byte, positions []int64) int {
|
||||
if len(p) != t.wordLen {
|
||||
panic(fmt.Errorf(
|
||||
"byte slice must have length %d", t.wordLen))
|
||||
}
|
||||
h := t.hash(p)
|
||||
return t.getMatches(h, positions)
|
||||
}
|
||||
|
||||
// NextOp identifies the next operation using the hash table.
|
||||
//
|
||||
// TODO: Use all repetitions to find matches.
|
||||
func (t *hashTable) NextOp(rep [4]uint32) operation {
|
||||
// get positions
|
||||
data := t.dict.data[:maxMatchLen]
|
||||
n, _ := t.dict.buf.Peek(data)
|
||||
data = data[:n]
|
||||
var p []int64
|
||||
if n < t.wordLen {
|
||||
p = t.p[:0]
|
||||
} else {
|
||||
p = t.p[:maxMatches]
|
||||
n = t.Matches(data[:t.wordLen], p)
|
||||
p = p[:n]
|
||||
}
|
||||
|
||||
// convert positions in potential distances
|
||||
head := t.dict.head
|
||||
dists := append(t.distances[:0], 1, 2, 3, 4, 5, 6, 7, 8)
|
||||
for _, pos := range p {
|
||||
dis := int(head - pos)
|
||||
if dis > shortDists {
|
||||
dists = append(dists, dis)
|
||||
}
|
||||
}
|
||||
|
||||
// check distances
|
||||
var m match
|
||||
dictLen := t.dict.DictLen()
|
||||
for _, dist := range dists {
|
||||
if dist > dictLen {
|
||||
continue
|
||||
}
|
||||
|
||||
// Here comes a trick. We are only interested in matches
|
||||
// that are longer than the matches we have been found
|
||||
// before. So before we test the whole byte sequence at
|
||||
// the given distance, we test the first byte that would
|
||||
// make the match longer. If it doesn't match the byte
|
||||
// to match, we don't to care any longer.
|
||||
i := t.dict.buf.rear - dist + m.n
|
||||
if i < 0 {
|
||||
i += len(t.dict.buf.data)
|
||||
}
|
||||
if t.dict.buf.data[i] != data[m.n] {
|
||||
// We can't get a longer match. Jump to the next
|
||||
// distance.
|
||||
continue
|
||||
}
|
||||
|
||||
n := t.dict.buf.matchLen(dist, data)
|
||||
switch n {
|
||||
case 0:
|
||||
continue
|
||||
case 1:
|
||||
if uint32(dist-minDistance) != rep[0] {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if n > m.n {
|
||||
m = match{int64(dist), n}
|
||||
if n == len(data) {
|
||||
// No better match will be found.
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if m.n == 0 {
|
||||
return lit{data[0]}
|
||||
}
|
||||
return m
|
||||
}
|
|
@ -0,0 +1,167 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// uint32LE reads an uint32 integer from a byte slice
|
||||
func uint32LE(b []byte) uint32 {
|
||||
x := uint32(b[3]) << 24
|
||||
x |= uint32(b[2]) << 16
|
||||
x |= uint32(b[1]) << 8
|
||||
x |= uint32(b[0])
|
||||
return x
|
||||
}
|
||||
|
||||
// uint64LE converts the uint64 value stored as little endian to an uint64
|
||||
// value.
|
||||
func uint64LE(b []byte) uint64 {
|
||||
x := uint64(b[7]) << 56
|
||||
x |= uint64(b[6]) << 48
|
||||
x |= uint64(b[5]) << 40
|
||||
x |= uint64(b[4]) << 32
|
||||
x |= uint64(b[3]) << 24
|
||||
x |= uint64(b[2]) << 16
|
||||
x |= uint64(b[1]) << 8
|
||||
x |= uint64(b[0])
|
||||
return x
|
||||
}
|
||||
|
||||
// putUint32LE puts an uint32 integer into a byte slice that must have at least
|
||||
// a length of 4 bytes.
|
||||
func putUint32LE(b []byte, x uint32) {
|
||||
b[0] = byte(x)
|
||||
b[1] = byte(x >> 8)
|
||||
b[2] = byte(x >> 16)
|
||||
b[3] = byte(x >> 24)
|
||||
}
|
||||
|
||||
// putUint64LE puts the uint64 value into the byte slice as little endian
|
||||
// value. The byte slice b must have at least place for 8 bytes.
|
||||
func putUint64LE(b []byte, x uint64) {
|
||||
b[0] = byte(x)
|
||||
b[1] = byte(x >> 8)
|
||||
b[2] = byte(x >> 16)
|
||||
b[3] = byte(x >> 24)
|
||||
b[4] = byte(x >> 32)
|
||||
b[5] = byte(x >> 40)
|
||||
b[6] = byte(x >> 48)
|
||||
b[7] = byte(x >> 56)
|
||||
}
|
||||
|
||||
// noHeaderSize defines the value of the length field in the LZMA header.
|
||||
const noHeaderSize uint64 = 1<<64 - 1
|
||||
|
||||
// HeaderLen provides the length of the LZMA file header.
|
||||
const HeaderLen = 13
|
||||
|
||||
// header represents the header of an LZMA file.
|
||||
type header struct {
|
||||
properties Properties
|
||||
dictCap int
|
||||
// uncompressed size; negative value if no size is given
|
||||
size int64
|
||||
}
|
||||
|
||||
// marshalBinary marshals the header.
|
||||
func (h *header) marshalBinary() (data []byte, err error) {
|
||||
if err = h.properties.verify(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !(0 <= h.dictCap && int64(h.dictCap) <= MaxDictCap) {
|
||||
return nil, fmt.Errorf("lzma: DictCap %d out of range",
|
||||
h.dictCap)
|
||||
}
|
||||
|
||||
data = make([]byte, 13)
|
||||
|
||||
// property byte
|
||||
data[0] = h.properties.Code()
|
||||
|
||||
// dictionary capacity
|
||||
putUint32LE(data[1:5], uint32(h.dictCap))
|
||||
|
||||
// uncompressed size
|
||||
var s uint64
|
||||
if h.size > 0 {
|
||||
s = uint64(h.size)
|
||||
} else {
|
||||
s = noHeaderSize
|
||||
}
|
||||
putUint64LE(data[5:], s)
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// unmarshalBinary unmarshals the header.
|
||||
func (h *header) unmarshalBinary(data []byte) error {
|
||||
if len(data) != HeaderLen {
|
||||
return errors.New("lzma.unmarshalBinary: data has wrong length")
|
||||
}
|
||||
|
||||
// properties
|
||||
var err error
|
||||
if h.properties, err = PropertiesForCode(data[0]); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// dictionary capacity
|
||||
h.dictCap = int(uint32LE(data[1:]))
|
||||
if h.dictCap < 0 {
|
||||
return errors.New(
|
||||
"LZMA header: dictionary capacity exceeds maximum " +
|
||||
"integer")
|
||||
}
|
||||
|
||||
// uncompressed size
|
||||
s := uint64LE(data[5:])
|
||||
if s == noHeaderSize {
|
||||
h.size = -1
|
||||
} else {
|
||||
h.size = int64(s)
|
||||
if h.size < 0 {
|
||||
return errors.New(
|
||||
"LZMA header: uncompressed size " +
|
||||
"out of int64 range")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// validDictCap checks whether the dictionary capacity is correct. This
|
||||
// is used to weed out wrong file headers.
|
||||
func validDictCap(dictcap int) bool {
|
||||
if int64(dictcap) == MaxDictCap {
|
||||
return true
|
||||
}
|
||||
for n := uint(10); n < 32; n++ {
|
||||
if dictcap == 1<<n {
|
||||
return true
|
||||
}
|
||||
if dictcap == 1<<n+1<<(n-1) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ValidHeader checks for a valid LZMA file header. It allows only
|
||||
// dictionary sizes of 2^n or 2^n+2^(n-1) with n >= 10 or 2^32-1. If
|
||||
// there is an explicit size it must not exceed 256 GiB. The length of
|
||||
// the data argument must be HeaderLen.
|
||||
func ValidHeader(data []byte) bool {
|
||||
var h header
|
||||
if err := h.unmarshalBinary(data); err != nil {
|
||||
return false
|
||||
}
|
||||
if !validDictCap(h.dictCap) {
|
||||
return false
|
||||
}
|
||||
return h.size < 0 || h.size <= 1<<38
|
||||
}
|
|
@ -0,0 +1,398 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
const (
|
||||
// maximum size of compressed data in a chunk
|
||||
maxCompressed = 1 << 16
|
||||
// maximum size of uncompressed data in a chunk
|
||||
maxUncompressed = 1 << 21
|
||||
)
|
||||
|
||||
// chunkType represents the type of an LZMA2 chunk. Note that this
|
||||
// value is an internal representation and no actual encoding of a LZMA2
|
||||
// chunk header.
|
||||
type chunkType byte
|
||||
|
||||
// Possible values for the chunk type.
|
||||
const (
|
||||
// end of stream
|
||||
cEOS chunkType = iota
|
||||
// uncompressed; reset dictionary
|
||||
cUD
|
||||
// uncompressed; no reset of dictionary
|
||||
cU
|
||||
// LZMA compressed; no reset
|
||||
cL
|
||||
// LZMA compressed; reset state
|
||||
cLR
|
||||
// LZMA compressed; reset state; new property value
|
||||
cLRN
|
||||
// LZMA compressed; reset state; new property value; reset dictionary
|
||||
cLRND
|
||||
)
|
||||
|
||||
// chunkTypeStrings provide a string representation for the chunk types.
|
||||
var chunkTypeStrings = [...]string{
|
||||
cEOS: "EOS",
|
||||
cU: "U",
|
||||
cUD: "UD",
|
||||
cL: "L",
|
||||
cLR: "LR",
|
||||
cLRN: "LRN",
|
||||
cLRND: "LRND",
|
||||
}
|
||||
|
||||
// String returns a string representation of the chunk type.
|
||||
func (c chunkType) String() string {
|
||||
if !(cEOS <= c && c <= cLRND) {
|
||||
return "unknown"
|
||||
}
|
||||
return chunkTypeStrings[c]
|
||||
}
|
||||
|
||||
// Actual encodings for the chunk types in the value. Note that the high
|
||||
// uncompressed size bits are stored in the header byte additionally.
|
||||
const (
|
||||
hEOS = 0
|
||||
hUD = 1
|
||||
hU = 2
|
||||
hL = 1 << 7
|
||||
hLR = 1<<7 | 1<<5
|
||||
hLRN = 1<<7 | 1<<6
|
||||
hLRND = 1<<7 | 1<<6 | 1<<5
|
||||
)
|
||||
|
||||
// errHeaderByte indicates an unsupported value for the chunk header
|
||||
// byte. These bytes starts the variable-length chunk header.
|
||||
var errHeaderByte = errors.New("lzma: unsupported chunk header byte")
|
||||
|
||||
// headerChunkType converts the header byte into a chunk type. It
|
||||
// ignores the uncompressed size bits in the chunk header byte.
|
||||
func headerChunkType(h byte) (c chunkType, err error) {
|
||||
if h&hL == 0 {
|
||||
// no compression
|
||||
switch h {
|
||||
case hEOS:
|
||||
c = cEOS
|
||||
case hUD:
|
||||
c = cUD
|
||||
case hU:
|
||||
c = cU
|
||||
default:
|
||||
return 0, errHeaderByte
|
||||
}
|
||||
return
|
||||
}
|
||||
switch h & hLRND {
|
||||
case hL:
|
||||
c = cL
|
||||
case hLR:
|
||||
c = cLR
|
||||
case hLRN:
|
||||
c = cLRN
|
||||
case hLRND:
|
||||
c = cLRND
|
||||
default:
|
||||
return 0, errHeaderByte
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// uncompressedHeaderLen provides the length of an uncompressed header
|
||||
const uncompressedHeaderLen = 3
|
||||
|
||||
// headerLen returns the length of the LZMA2 header for a given chunk
|
||||
// type.
|
||||
func headerLen(c chunkType) int {
|
||||
switch c {
|
||||
case cEOS:
|
||||
return 1
|
||||
case cU, cUD:
|
||||
return uncompressedHeaderLen
|
||||
case cL, cLR:
|
||||
return 5
|
||||
case cLRN, cLRND:
|
||||
return 6
|
||||
}
|
||||
panic(fmt.Errorf("unsupported chunk type %d", c))
|
||||
}
|
||||
|
||||
// chunkHeader represents the contents of a chunk header.
|
||||
type chunkHeader struct {
|
||||
ctype chunkType
|
||||
uncompressed uint32
|
||||
compressed uint16
|
||||
props Properties
|
||||
}
|
||||
|
||||
// String returns a string representation of the chunk header.
|
||||
func (h *chunkHeader) String() string {
|
||||
return fmt.Sprintf("%s %d %d %s", h.ctype, h.uncompressed,
|
||||
h.compressed, &h.props)
|
||||
}
|
||||
|
||||
// UnmarshalBinary reads the content of the chunk header from the data
|
||||
// slice. The slice must have the correct length.
|
||||
func (h *chunkHeader) UnmarshalBinary(data []byte) error {
|
||||
if len(data) == 0 {
|
||||
return errors.New("no data")
|
||||
}
|
||||
c, err := headerChunkType(data[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
n := headerLen(c)
|
||||
if len(data) < n {
|
||||
return errors.New("incomplete data")
|
||||
}
|
||||
if len(data) > n {
|
||||
return errors.New("invalid data length")
|
||||
}
|
||||
|
||||
*h = chunkHeader{ctype: c}
|
||||
if c == cEOS {
|
||||
return nil
|
||||
}
|
||||
|
||||
h.uncompressed = uint32(uint16BE(data[1:3]))
|
||||
if c <= cU {
|
||||
return nil
|
||||
}
|
||||
h.uncompressed |= uint32(data[0]&^hLRND) << 16
|
||||
|
||||
h.compressed = uint16BE(data[3:5])
|
||||
if c <= cLR {
|
||||
return nil
|
||||
}
|
||||
|
||||
h.props, err = PropertiesForCode(data[5])
|
||||
return err
|
||||
}
|
||||
|
||||
// MarshalBinary encodes the chunk header value. The function checks
|
||||
// whether the content of the chunk header is correct.
|
||||
func (h *chunkHeader) MarshalBinary() (data []byte, err error) {
|
||||
if h.ctype > cLRND {
|
||||
return nil, errors.New("invalid chunk type")
|
||||
}
|
||||
if err = h.props.verify(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data = make([]byte, headerLen(h.ctype))
|
||||
|
||||
switch h.ctype {
|
||||
case cEOS:
|
||||
return data, nil
|
||||
case cUD:
|
||||
data[0] = hUD
|
||||
case cU:
|
||||
data[0] = hU
|
||||
case cL:
|
||||
data[0] = hL
|
||||
case cLR:
|
||||
data[0] = hLR
|
||||
case cLRN:
|
||||
data[0] = hLRN
|
||||
case cLRND:
|
||||
data[0] = hLRND
|
||||
}
|
||||
|
||||
putUint16BE(data[1:3], uint16(h.uncompressed))
|
||||
if h.ctype <= cU {
|
||||
return data, nil
|
||||
}
|
||||
data[0] |= byte(h.uncompressed>>16) &^ hLRND
|
||||
|
||||
putUint16BE(data[3:5], h.compressed)
|
||||
if h.ctype <= cLR {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
data[5] = h.props.Code()
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// readChunkHeader reads the chunk header from the IO reader.
|
||||
func readChunkHeader(r io.Reader) (h *chunkHeader, err error) {
|
||||
p := make([]byte, 1, 6)
|
||||
if _, err = io.ReadFull(r, p); err != nil {
|
||||
return
|
||||
}
|
||||
c, err := headerChunkType(p[0])
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
p = p[:headerLen(c)]
|
||||
if _, err = io.ReadFull(r, p[1:]); err != nil {
|
||||
return
|
||||
}
|
||||
h = new(chunkHeader)
|
||||
if err = h.UnmarshalBinary(p); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return h, nil
|
||||
}
|
||||
|
||||
// uint16BE converts a big-endian uint16 representation to an uint16
|
||||
// value.
|
||||
func uint16BE(p []byte) uint16 {
|
||||
return uint16(p[0])<<8 | uint16(p[1])
|
||||
}
|
||||
|
||||
// putUint16BE puts the big-endian uint16 presentation into the given
|
||||
// slice.
|
||||
func putUint16BE(p []byte, x uint16) {
|
||||
p[0] = byte(x >> 8)
|
||||
p[1] = byte(x)
|
||||
}
|
||||
|
||||
// chunkState is used to manage the state of the chunks
|
||||
type chunkState byte
|
||||
|
||||
// start and stop define the initial and terminating state of the chunk
|
||||
// state
|
||||
const (
|
||||
start chunkState = 'S'
|
||||
stop = 'T'
|
||||
)
|
||||
|
||||
// errors for the chunk state handling
|
||||
var (
|
||||
errChunkType = errors.New("lzma: unexpected chunk type")
|
||||
errState = errors.New("lzma: wrong chunk state")
|
||||
)
|
||||
|
||||
// next transitions state based on chunk type input
|
||||
func (c *chunkState) next(ctype chunkType) error {
|
||||
switch *c {
|
||||
// start state
|
||||
case 'S':
|
||||
switch ctype {
|
||||
case cEOS:
|
||||
*c = 'T'
|
||||
case cUD:
|
||||
*c = 'R'
|
||||
case cLRND:
|
||||
*c = 'L'
|
||||
default:
|
||||
return errChunkType
|
||||
}
|
||||
// normal LZMA mode
|
||||
case 'L':
|
||||
switch ctype {
|
||||
case cEOS:
|
||||
*c = 'T'
|
||||
case cUD:
|
||||
*c = 'R'
|
||||
case cU:
|
||||
*c = 'U'
|
||||
case cL, cLR, cLRN, cLRND:
|
||||
break
|
||||
default:
|
||||
return errChunkType
|
||||
}
|
||||
// reset required
|
||||
case 'R':
|
||||
switch ctype {
|
||||
case cEOS:
|
||||
*c = 'T'
|
||||
case cUD, cU:
|
||||
break
|
||||
case cLRN, cLRND:
|
||||
*c = 'L'
|
||||
default:
|
||||
return errChunkType
|
||||
}
|
||||
// uncompressed
|
||||
case 'U':
|
||||
switch ctype {
|
||||
case cEOS:
|
||||
*c = 'T'
|
||||
case cUD:
|
||||
*c = 'R'
|
||||
case cU:
|
||||
break
|
||||
case cL, cLR, cLRN, cLRND:
|
||||
*c = 'L'
|
||||
default:
|
||||
return errChunkType
|
||||
}
|
||||
// terminal state
|
||||
case 'T':
|
||||
return errChunkType
|
||||
default:
|
||||
return errState
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// defaultChunkType returns the default chunk type for each chunk state.
|
||||
func (c chunkState) defaultChunkType() chunkType {
|
||||
switch c {
|
||||
case 'S':
|
||||
return cLRND
|
||||
case 'L', 'U':
|
||||
return cL
|
||||
case 'R':
|
||||
return cLRN
|
||||
default:
|
||||
// no error
|
||||
return cEOS
|
||||
}
|
||||
}
|
||||
|
||||
// maxDictCap defines the maximum dictionary capacity supported by the
|
||||
// LZMA2 dictionary capacity encoding.
|
||||
const maxDictCap = 1<<32 - 1
|
||||
|
||||
// maxDictCapCode defines the maximum dictionary capacity code.
|
||||
const maxDictCapCode = 40
|
||||
|
||||
// The function decodes the dictionary capacity byte, but doesn't change
|
||||
// for the correct range of the given byte.
|
||||
func decodeDictCap(c byte) int64 {
|
||||
return (2 | int64(c)&1) << (11 + (c>>1)&0x1f)
|
||||
}
|
||||
|
||||
// DecodeDictCap decodes the encoded dictionary capacity. The function
|
||||
// returns an error if the code is out of range.
|
||||
func DecodeDictCap(c byte) (n int64, err error) {
|
||||
if c >= maxDictCapCode {
|
||||
if c == maxDictCapCode {
|
||||
return maxDictCap, nil
|
||||
}
|
||||
return 0, errors.New("lzma: invalid dictionary size code")
|
||||
}
|
||||
return decodeDictCap(c), nil
|
||||
}
|
||||
|
||||
// EncodeDictCap encodes a dictionary capacity. The function returns the
|
||||
// code for the capacity that is greater or equal n. If n exceeds the
|
||||
// maximum support dictionary capacity, the maximum value is returned.
|
||||
func EncodeDictCap(n int64) byte {
|
||||
a, b := byte(0), byte(40)
|
||||
for a < b {
|
||||
c := a + (b-a)>>1
|
||||
m := decodeDictCap(c)
|
||||
if n <= m {
|
||||
if n == m {
|
||||
return c
|
||||
}
|
||||
b = c
|
||||
} else {
|
||||
a = c + 1
|
||||
}
|
||||
}
|
||||
return a
|
||||
}
|
|
@ -0,0 +1,129 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import "errors"
|
||||
|
||||
// maxPosBits defines the number of bits of the position value that are used to
|
||||
// to compute the posState value. The value is used to select the tree codec
|
||||
// for length encoding and decoding.
|
||||
const maxPosBits = 4
|
||||
|
||||
// minMatchLen and maxMatchLen give the minimum and maximum values for
|
||||
// encoding and decoding length values. minMatchLen is also used as base
|
||||
// for the encoded length values.
|
||||
const (
|
||||
minMatchLen = 2
|
||||
maxMatchLen = minMatchLen + 16 + 256 - 1
|
||||
)
|
||||
|
||||
// lengthCodec support the encoding of the length value.
|
||||
type lengthCodec struct {
|
||||
choice [2]prob
|
||||
low [1 << maxPosBits]treeCodec
|
||||
mid [1 << maxPosBits]treeCodec
|
||||
high treeCodec
|
||||
}
|
||||
|
||||
// deepcopy initializes the lc value as deep copy of the source value.
|
||||
func (lc *lengthCodec) deepcopy(src *lengthCodec) {
|
||||
if lc == src {
|
||||
return
|
||||
}
|
||||
lc.choice = src.choice
|
||||
for i := range lc.low {
|
||||
lc.low[i].deepcopy(&src.low[i])
|
||||
}
|
||||
for i := range lc.mid {
|
||||
lc.mid[i].deepcopy(&src.mid[i])
|
||||
}
|
||||
lc.high.deepcopy(&src.high)
|
||||
}
|
||||
|
||||
// init initializes a new length codec.
|
||||
func (lc *lengthCodec) init() {
|
||||
for i := range lc.choice {
|
||||
lc.choice[i] = probInit
|
||||
}
|
||||
for i := range lc.low {
|
||||
lc.low[i] = makeTreeCodec(3)
|
||||
}
|
||||
for i := range lc.mid {
|
||||
lc.mid[i] = makeTreeCodec(3)
|
||||
}
|
||||
lc.high = makeTreeCodec(8)
|
||||
}
|
||||
|
||||
// lBits gives the number of bits used for the encoding of the l value
|
||||
// provided to the range encoder.
|
||||
func lBits(l uint32) int {
|
||||
switch {
|
||||
case l < 8:
|
||||
return 4
|
||||
case l < 16:
|
||||
return 5
|
||||
default:
|
||||
return 10
|
||||
}
|
||||
}
|
||||
|
||||
// Encode encodes the length offset. The length offset l can be compute by
|
||||
// subtracting minMatchLen (2) from the actual length.
|
||||
//
|
||||
// l = length - minMatchLen
|
||||
//
|
||||
func (lc *lengthCodec) Encode(e *rangeEncoder, l uint32, posState uint32,
|
||||
) (err error) {
|
||||
if l > maxMatchLen-minMatchLen {
|
||||
return errors.New("lengthCodec.Encode: l out of range")
|
||||
}
|
||||
if l < 8 {
|
||||
if err = lc.choice[0].Encode(e, 0); err != nil {
|
||||
return
|
||||
}
|
||||
return lc.low[posState].Encode(e, l)
|
||||
}
|
||||
if err = lc.choice[0].Encode(e, 1); err != nil {
|
||||
return
|
||||
}
|
||||
if l < 16 {
|
||||
if err = lc.choice[1].Encode(e, 0); err != nil {
|
||||
return
|
||||
}
|
||||
return lc.mid[posState].Encode(e, l-8)
|
||||
}
|
||||
if err = lc.choice[1].Encode(e, 1); err != nil {
|
||||
return
|
||||
}
|
||||
if err = lc.high.Encode(e, l-16); err != nil {
|
||||
return
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Decode reads the length offset. Add minMatchLen to compute the actual length
|
||||
// to the length offset l.
|
||||
func (lc *lengthCodec) Decode(d *rangeDecoder, posState uint32,
|
||||
) (l uint32, err error) {
|
||||
var b uint32
|
||||
if b, err = lc.choice[0].Decode(d); err != nil {
|
||||
return
|
||||
}
|
||||
if b == 0 {
|
||||
l, err = lc.low[posState].Decode(d)
|
||||
return
|
||||
}
|
||||
if b, err = lc.choice[1].Decode(d); err != nil {
|
||||
return
|
||||
}
|
||||
if b == 0 {
|
||||
l, err = lc.mid[posState].Decode(d)
|
||||
l += 8
|
||||
return
|
||||
}
|
||||
l, err = lc.high.Decode(d)
|
||||
l += 16
|
||||
return
|
||||
}
|
|
@ -0,0 +1,132 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
// literalCodec supports the encoding of literal. It provides 768 probability
|
||||
// values per literal state. The upper 512 probabilities are used with the
|
||||
// context of a match bit.
|
||||
type literalCodec struct {
|
||||
probs []prob
|
||||
}
|
||||
|
||||
// deepcopy initializes literal codec c as a deep copy of the source.
|
||||
func (c *literalCodec) deepcopy(src *literalCodec) {
|
||||
if c == src {
|
||||
return
|
||||
}
|
||||
c.probs = make([]prob, len(src.probs))
|
||||
copy(c.probs, src.probs)
|
||||
}
|
||||
|
||||
// init initializes the literal codec.
|
||||
func (c *literalCodec) init(lc, lp int) {
|
||||
switch {
|
||||
case !(minLC <= lc && lc <= maxLC):
|
||||
panic("lc out of range")
|
||||
case !(minLP <= lp && lp <= maxLP):
|
||||
panic("lp out of range")
|
||||
}
|
||||
c.probs = make([]prob, 0x300<<uint(lc+lp))
|
||||
for i := range c.probs {
|
||||
c.probs[i] = probInit
|
||||
}
|
||||
}
|
||||
|
||||
// Encode encodes the byte s using a range encoder as well as the current LZMA
|
||||
// encoder state, a match byte and the literal state.
|
||||
func (c *literalCodec) Encode(e *rangeEncoder, s byte,
|
||||
state uint32, match byte, litState uint32,
|
||||
) (err error) {
|
||||
k := litState * 0x300
|
||||
probs := c.probs[k : k+0x300]
|
||||
symbol := uint32(1)
|
||||
r := uint32(s)
|
||||
if state >= 7 {
|
||||
m := uint32(match)
|
||||
for {
|
||||
matchBit := (m >> 7) & 1
|
||||
m <<= 1
|
||||
bit := (r >> 7) & 1
|
||||
r <<= 1
|
||||
i := ((1 + matchBit) << 8) | symbol
|
||||
if err = probs[i].Encode(e, bit); err != nil {
|
||||
return
|
||||
}
|
||||
symbol = (symbol << 1) | bit
|
||||
if matchBit != bit {
|
||||
break
|
||||
}
|
||||
if symbol >= 0x100 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
for symbol < 0x100 {
|
||||
bit := (r >> 7) & 1
|
||||
r <<= 1
|
||||
if err = probs[symbol].Encode(e, bit); err != nil {
|
||||
return
|
||||
}
|
||||
symbol = (symbol << 1) | bit
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Decode decodes a literal byte using the range decoder as well as the LZMA
|
||||
// state, a match byte, and the literal state.
|
||||
func (c *literalCodec) Decode(d *rangeDecoder,
|
||||
state uint32, match byte, litState uint32,
|
||||
) (s byte, err error) {
|
||||
k := litState * 0x300
|
||||
probs := c.probs[k : k+0x300]
|
||||
symbol := uint32(1)
|
||||
if state >= 7 {
|
||||
m := uint32(match)
|
||||
for {
|
||||
matchBit := (m >> 7) & 1
|
||||
m <<= 1
|
||||
i := ((1 + matchBit) << 8) | symbol
|
||||
bit, err := d.DecodeBit(&probs[i])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
symbol = (symbol << 1) | bit
|
||||
if matchBit != bit {
|
||||
break
|
||||
}
|
||||
if symbol >= 0x100 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
for symbol < 0x100 {
|
||||
bit, err := d.DecodeBit(&probs[symbol])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
symbol = (symbol << 1) | bit
|
||||
}
|
||||
s = byte(symbol - 0x100)
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// minLC and maxLC define the range for LC values.
|
||||
const (
|
||||
minLC = 0
|
||||
maxLC = 8
|
||||
)
|
||||
|
||||
// minLC and maxLC define the range for LP values.
|
||||
const (
|
||||
minLP = 0
|
||||
maxLP = 4
|
||||
)
|
||||
|
||||
// minState and maxState define a range for the state values stored in
|
||||
// the State values.
|
||||
const (
|
||||
minState = 0
|
||||
maxState = 11
|
||||
)
|
|
@ -0,0 +1,52 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import "errors"
|
||||
|
||||
// MatchAlgorithm identifies an algorithm to find matches in the
|
||||
// dictionary.
|
||||
type MatchAlgorithm byte
|
||||
|
||||
// Supported matcher algorithms.
|
||||
const (
|
||||
HashTable4 MatchAlgorithm = iota
|
||||
BinaryTree
|
||||
)
|
||||
|
||||
// maStrings are used by the String method.
|
||||
var maStrings = map[MatchAlgorithm]string{
|
||||
HashTable4: "HashTable4",
|
||||
BinaryTree: "BinaryTree",
|
||||
}
|
||||
|
||||
// String returns a string representation of the Matcher.
|
||||
func (a MatchAlgorithm) String() string {
|
||||
if s, ok := maStrings[a]; ok {
|
||||
return s
|
||||
}
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
var errUnsupportedMatchAlgorithm = errors.New(
|
||||
"lzma: unsupported match algorithm value")
|
||||
|
||||
// verify checks whether the matcher value is supported.
|
||||
func (a MatchAlgorithm) verify() error {
|
||||
if _, ok := maStrings[a]; !ok {
|
||||
return errUnsupportedMatchAlgorithm
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a MatchAlgorithm) new(dictCap int) (m matcher, err error) {
|
||||
switch a {
|
||||
case HashTable4:
|
||||
return newHashTable(dictCap, 4)
|
||||
case BinaryTree:
|
||||
return newBinTree(dictCap)
|
||||
}
|
||||
return nil, errUnsupportedMatchAlgorithm
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// operation represents an operation on the dictionary during encoding or
|
||||
// decoding.
|
||||
type operation interface {
|
||||
Len() int
|
||||
}
|
||||
|
||||
// rep represents a repetition at the given distance and the given length
|
||||
type match struct {
|
||||
// supports all possible distance values, including the eos marker
|
||||
distance int64
|
||||
// length
|
||||
n int
|
||||
}
|
||||
|
||||
// verify checks whether the match is valid. If that is not the case an
|
||||
// error is returned.
|
||||
func (m match) verify() error {
|
||||
if !(minDistance <= m.distance && m.distance <= maxDistance) {
|
||||
return errors.New("distance out of range")
|
||||
}
|
||||
if !(1 <= m.n && m.n <= maxMatchLen) {
|
||||
return errors.New("length out of range")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// l return the l-value for the match, which is the difference of length
|
||||
// n and 2.
|
||||
func (m match) l() uint32 {
|
||||
return uint32(m.n - minMatchLen)
|
||||
}
|
||||
|
||||
// dist returns the dist value for the match, which is one less of the
|
||||
// distance stored in the match.
|
||||
func (m match) dist() uint32 {
|
||||
return uint32(m.distance - minDistance)
|
||||
}
|
||||
|
||||
// Len returns the number of bytes matched.
|
||||
func (m match) Len() int {
|
||||
return m.n
|
||||
}
|
||||
|
||||
// String returns a string representation for the repetition.
|
||||
func (m match) String() string {
|
||||
return fmt.Sprintf("M{%d,%d}", m.distance, m.n)
|
||||
}
|
||||
|
||||
// lit represents a single byte literal.
|
||||
type lit struct {
|
||||
b byte
|
||||
}
|
||||
|
||||
// Len returns 1 for the single byte literal.
|
||||
func (l lit) Len() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
// String returns a string representation for the literal.
|
||||
func (l lit) String() string {
|
||||
var c byte
|
||||
if unicode.IsPrint(rune(l.b)) {
|
||||
c = l.b
|
||||
} else {
|
||||
c = '.'
|
||||
}
|
||||
return fmt.Sprintf("L{%c/%02x}", c, l.b)
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
// movebits defines the number of bits used for the updates of probability
|
||||
// values.
|
||||
const movebits = 5
|
||||
|
||||
// probbits defines the number of bits of a probability value.
|
||||
const probbits = 11
|
||||
|
||||
// probInit defines 0.5 as initial value for prob values.
|
||||
const probInit prob = 1 << (probbits - 1)
|
||||
|
||||
// Type prob represents probabilities. The type can also be used to encode and
|
||||
// decode single bits.
|
||||
type prob uint16
|
||||
|
||||
// Dec decreases the probability. The decrease is proportional to the
|
||||
// probability value.
|
||||
func (p *prob) dec() {
|
||||
*p -= *p >> movebits
|
||||
}
|
||||
|
||||
// Inc increases the probability. The Increase is proportional to the
|
||||
// difference of 1 and the probability value.
|
||||
func (p *prob) inc() {
|
||||
*p += ((1 << probbits) - *p) >> movebits
|
||||
}
|
||||
|
||||
// Computes the new bound for a given range using the probability value.
|
||||
func (p prob) bound(r uint32) uint32 {
|
||||
return (r >> probbits) * uint32(p)
|
||||
}
|
||||
|
||||
// Bits returns 1. One is the number of bits that can be encoded or decoded
|
||||
// with a single prob value.
|
||||
func (p prob) Bits() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
// Encode encodes the least-significant bit of v. Note that the p value will be
|
||||
// changed.
|
||||
func (p *prob) Encode(e *rangeEncoder, v uint32) error {
|
||||
return e.EncodeBit(v, p)
|
||||
}
|
||||
|
||||
// Decode decodes a single bit. Note that the p value will change.
|
||||
func (p *prob) Decode(d *rangeDecoder) (v uint32, err error) {
|
||||
return d.DecodeBit(p)
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// maximum and minimum values for the LZMA properties.
|
||||
const (
|
||||
minPB = 0
|
||||
maxPB = 4
|
||||
)
|
||||
|
||||
// maxPropertyCode is the possible maximum of a properties code byte.
|
||||
const maxPropertyCode = (maxPB+1)*(maxLP+1)*(maxLC+1) - 1
|
||||
|
||||
// Properties contains the parameters LC, LP and PB. The parameter LC
|
||||
// defines the number of literal context bits; parameter LP the number
|
||||
// of literal position bits and PB the number of position bits.
|
||||
type Properties struct {
|
||||
LC int
|
||||
LP int
|
||||
PB int
|
||||
}
|
||||
|
||||
// String returns the properties in a string representation.
|
||||
func (p *Properties) String() string {
|
||||
return fmt.Sprintf("LC %d LP %d PB %d", p.LC, p.LP, p.PB)
|
||||
}
|
||||
|
||||
// PropertiesForCode converts a properties code byte into a Properties value.
|
||||
func PropertiesForCode(code byte) (p Properties, err error) {
|
||||
if code > maxPropertyCode {
|
||||
return p, errors.New("lzma: invalid properties code")
|
||||
}
|
||||
p.LC = int(code % 9)
|
||||
code /= 9
|
||||
p.LP = int(code % 5)
|
||||
code /= 5
|
||||
p.PB = int(code % 5)
|
||||
return p, err
|
||||
}
|
||||
|
||||
// verify checks the properties for correctness.
|
||||
func (p *Properties) verify() error {
|
||||
if p == nil {
|
||||
return errors.New("lzma: properties are nil")
|
||||
}
|
||||
if !(minLC <= p.LC && p.LC <= maxLC) {
|
||||
return errors.New("lzma: lc out of range")
|
||||
}
|
||||
if !(minLP <= p.LP && p.LP <= maxLP) {
|
||||
return errors.New("lzma: lp out of range")
|
||||
}
|
||||
if !(minPB <= p.PB && p.PB <= maxPB) {
|
||||
return errors.New("lzma: pb out of range")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Code converts the properties to a byte. The function assumes that
|
||||
// the properties components are all in range.
|
||||
func (p Properties) Code() byte {
|
||||
return byte((p.PB*5+p.LP)*9 + p.LC)
|
||||
}
|
|
@ -0,0 +1,248 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
)
|
||||
|
||||
// rangeEncoder implements range encoding of single bits. The low value can
|
||||
// overflow therefore we need uint64. The cache value is used to handle
|
||||
// overflows.
|
||||
type rangeEncoder struct {
|
||||
lbw *LimitedByteWriter
|
||||
nrange uint32
|
||||
low uint64
|
||||
cacheLen int64
|
||||
cache byte
|
||||
}
|
||||
|
||||
// maxInt64 provides the maximal value of the int64 type
|
||||
const maxInt64 = 1<<63 - 1
|
||||
|
||||
// newRangeEncoder creates a new range encoder.
|
||||
func newRangeEncoder(bw io.ByteWriter) (re *rangeEncoder, err error) {
|
||||
lbw, ok := bw.(*LimitedByteWriter)
|
||||
if !ok {
|
||||
lbw = &LimitedByteWriter{BW: bw, N: maxInt64}
|
||||
}
|
||||
return &rangeEncoder{
|
||||
lbw: lbw,
|
||||
nrange: 0xffffffff,
|
||||
cacheLen: 1}, nil
|
||||
}
|
||||
|
||||
// Available returns the number of bytes that still can be written. The
|
||||
// method takes the bytes that will be currently written by Close into
|
||||
// account.
|
||||
func (e *rangeEncoder) Available() int64 {
|
||||
return e.lbw.N - (e.cacheLen + 4)
|
||||
}
|
||||
|
||||
// writeByte writes a single byte to the underlying writer. An error is
|
||||
// returned if the limit is reached. The written byte will be counted if
|
||||
// the underlying writer doesn't return an error.
|
||||
func (e *rangeEncoder) writeByte(c byte) error {
|
||||
if e.Available() < 1 {
|
||||
return ErrLimit
|
||||
}
|
||||
return e.lbw.WriteByte(c)
|
||||
}
|
||||
|
||||
// DirectEncodeBit encodes the least-significant bit of b with probability 1/2.
|
||||
func (e *rangeEncoder) DirectEncodeBit(b uint32) error {
|
||||
e.nrange >>= 1
|
||||
e.low += uint64(e.nrange) & (0 - (uint64(b) & 1))
|
||||
|
||||
// normalize
|
||||
const top = 1 << 24
|
||||
if e.nrange >= top {
|
||||
return nil
|
||||
}
|
||||
e.nrange <<= 8
|
||||
return e.shiftLow()
|
||||
}
|
||||
|
||||
// EncodeBit encodes the least significant bit of b. The p value will be
|
||||
// updated by the function depending on the bit encoded.
|
||||
func (e *rangeEncoder) EncodeBit(b uint32, p *prob) error {
|
||||
bound := p.bound(e.nrange)
|
||||
if b&1 == 0 {
|
||||
e.nrange = bound
|
||||
p.inc()
|
||||
} else {
|
||||
e.low += uint64(bound)
|
||||
e.nrange -= bound
|
||||
p.dec()
|
||||
}
|
||||
|
||||
// normalize
|
||||
const top = 1 << 24
|
||||
if e.nrange >= top {
|
||||
return nil
|
||||
}
|
||||
e.nrange <<= 8
|
||||
return e.shiftLow()
|
||||
}
|
||||
|
||||
// Close writes a complete copy of the low value.
|
||||
func (e *rangeEncoder) Close() error {
|
||||
for i := 0; i < 5; i++ {
|
||||
if err := e.shiftLow(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// shiftLow shifts the low value for 8 bit. The shifted byte is written into
|
||||
// the byte writer. The cache value is used to handle overflows.
|
||||
func (e *rangeEncoder) shiftLow() error {
|
||||
if uint32(e.low) < 0xff000000 || (e.low>>32) != 0 {
|
||||
tmp := e.cache
|
||||
for {
|
||||
err := e.writeByte(tmp + byte(e.low>>32))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tmp = 0xff
|
||||
e.cacheLen--
|
||||
if e.cacheLen <= 0 {
|
||||
if e.cacheLen < 0 {
|
||||
panic("negative cacheLen")
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
e.cache = byte(uint32(e.low) >> 24)
|
||||
}
|
||||
e.cacheLen++
|
||||
e.low = uint64(uint32(e.low) << 8)
|
||||
return nil
|
||||
}
|
||||
|
||||
// rangeDecoder decodes single bits of the range encoding stream.
|
||||
type rangeDecoder struct {
|
||||
br io.ByteReader
|
||||
nrange uint32
|
||||
code uint32
|
||||
}
|
||||
|
||||
// init initializes the range decoder, by reading from the byte reader.
|
||||
func (d *rangeDecoder) init() error {
|
||||
d.nrange = 0xffffffff
|
||||
d.code = 0
|
||||
|
||||
b, err := d.br.ReadByte()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if b != 0 {
|
||||
return errors.New("newRangeDecoder: first byte not zero")
|
||||
}
|
||||
|
||||
for i := 0; i < 4; i++ {
|
||||
if err = d.updateCode(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if d.code >= d.nrange {
|
||||
return errors.New("newRangeDecoder: d.code >= d.nrange")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// newRangeDecoder initializes a range decoder. It reads five bytes from the
|
||||
// reader and therefore may return an error.
|
||||
func newRangeDecoder(br io.ByteReader) (d *rangeDecoder, err error) {
|
||||
d = &rangeDecoder{br: br, nrange: 0xffffffff}
|
||||
|
||||
b, err := d.br.ReadByte()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if b != 0 {
|
||||
return nil, errors.New("newRangeDecoder: first byte not zero")
|
||||
}
|
||||
|
||||
for i := 0; i < 4; i++ {
|
||||
if err = d.updateCode(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if d.code >= d.nrange {
|
||||
return nil, errors.New("newRangeDecoder: d.code >= d.nrange")
|
||||
}
|
||||
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// possiblyAtEnd checks whether the decoder may be at the end of the stream.
|
||||
func (d *rangeDecoder) possiblyAtEnd() bool {
|
||||
return d.code == 0
|
||||
}
|
||||
|
||||
// DirectDecodeBit decodes a bit with probability 1/2. The return value b will
|
||||
// contain the bit at the least-significant position. All other bits will be
|
||||
// zero.
|
||||
func (d *rangeDecoder) DirectDecodeBit() (b uint32, err error) {
|
||||
d.nrange >>= 1
|
||||
d.code -= d.nrange
|
||||
t := 0 - (d.code >> 31)
|
||||
d.code += d.nrange & t
|
||||
b = (t + 1) & 1
|
||||
|
||||
// d.code will stay less then d.nrange
|
||||
|
||||
// normalize
|
||||
// assume d.code < d.nrange
|
||||
const top = 1 << 24
|
||||
if d.nrange >= top {
|
||||
return b, nil
|
||||
}
|
||||
d.nrange <<= 8
|
||||
// d.code < d.nrange will be maintained
|
||||
return b, d.updateCode()
|
||||
}
|
||||
|
||||
// decodeBit decodes a single bit. The bit will be returned at the
|
||||
// least-significant position. All other bits will be zero. The probability
|
||||
// value will be updated.
|
||||
func (d *rangeDecoder) DecodeBit(p *prob) (b uint32, err error) {
|
||||
bound := p.bound(d.nrange)
|
||||
if d.code < bound {
|
||||
d.nrange = bound
|
||||
p.inc()
|
||||
b = 0
|
||||
} else {
|
||||
d.code -= bound
|
||||
d.nrange -= bound
|
||||
p.dec()
|
||||
b = 1
|
||||
}
|
||||
// normalize
|
||||
// assume d.code < d.nrange
|
||||
const top = 1 << 24
|
||||
if d.nrange >= top {
|
||||
return b, nil
|
||||
}
|
||||
d.nrange <<= 8
|
||||
// d.code < d.nrange will be maintained
|
||||
return b, d.updateCode()
|
||||
}
|
||||
|
||||
// updateCode reads a new byte into the code.
|
||||
func (d *rangeDecoder) updateCode() error {
|
||||
b, err := d.br.ReadByte()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
d.code = (d.code << 8) | uint32(b)
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,100 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package lzma supports the decoding and encoding of LZMA streams.
|
||||
// Reader and Writer support the classic LZMA format. Reader2 and
|
||||
// Writer2 support the decoding and encoding of LZMA2 streams.
|
||||
//
|
||||
// The package is written completely in Go and doesn't rely on any external
|
||||
// library.
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
)
|
||||
|
||||
// ReaderConfig stores the parameters for the reader of the classic LZMA
|
||||
// format.
|
||||
type ReaderConfig struct {
|
||||
DictCap int
|
||||
}
|
||||
|
||||
// fill converts the zero values of the configuration to the default values.
|
||||
func (c *ReaderConfig) fill() {
|
||||
if c.DictCap == 0 {
|
||||
c.DictCap = 8 * 1024 * 1024
|
||||
}
|
||||
}
|
||||
|
||||
// Verify checks the reader configuration for errors. Zero values will
|
||||
// be replaced by default values.
|
||||
func (c *ReaderConfig) Verify() error {
|
||||
c.fill()
|
||||
if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) {
|
||||
return errors.New("lzma: dictionary capacity is out of range")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Reader provides a reader for LZMA files or streams.
|
||||
type Reader struct {
|
||||
lzma io.Reader
|
||||
h header
|
||||
d *decoder
|
||||
}
|
||||
|
||||
// NewReader creates a new reader for an LZMA stream using the classic
|
||||
// format. NewReader reads and checks the header of the LZMA stream.
|
||||
func NewReader(lzma io.Reader) (r *Reader, err error) {
|
||||
return ReaderConfig{}.NewReader(lzma)
|
||||
}
|
||||
|
||||
// NewReader creates a new reader for an LZMA stream in the classic
|
||||
// format. The function reads and verifies the the header of the LZMA
|
||||
// stream.
|
||||
func (c ReaderConfig) NewReader(lzma io.Reader) (r *Reader, err error) {
|
||||
if err = c.Verify(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
data := make([]byte, HeaderLen)
|
||||
if _, err := io.ReadFull(lzma, data); err != nil {
|
||||
if err == io.EOF {
|
||||
return nil, errors.New("lzma: unexpected EOF")
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
r = &Reader{lzma: lzma}
|
||||
if err = r.h.unmarshalBinary(data); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if r.h.dictCap < MinDictCap {
|
||||
return nil, errors.New("lzma: dictionary capacity too small")
|
||||
}
|
||||
dictCap := r.h.dictCap
|
||||
if c.DictCap > dictCap {
|
||||
dictCap = c.DictCap
|
||||
}
|
||||
|
||||
state := newState(r.h.properties)
|
||||
dict, err := newDecoderDict(dictCap)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r.d, err = newDecoder(ByteReader(lzma), state, dict, r.h.size)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// EOSMarker indicates that an EOS marker has been encountered.
|
||||
func (r *Reader) EOSMarker() bool {
|
||||
return r.d.eosMarker
|
||||
}
|
||||
|
||||
// Read returns uncompressed data.
|
||||
func (r *Reader) Read(p []byte) (n int, err error) {
|
||||
return r.d.Read(p)
|
||||
}
|
|
@ -0,0 +1,232 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
|
||||
"github.com/ulikunitz/xz/internal/xlog"
|
||||
)
|
||||
|
||||
// Reader2Config stores the parameters for the LZMA2 reader.
|
||||
// format.
|
||||
type Reader2Config struct {
|
||||
DictCap int
|
||||
}
|
||||
|
||||
// fill converts the zero values of the configuration to the default values.
|
||||
func (c *Reader2Config) fill() {
|
||||
if c.DictCap == 0 {
|
||||
c.DictCap = 8 * 1024 * 1024
|
||||
}
|
||||
}
|
||||
|
||||
// Verify checks the reader configuration for errors. Zero configuration values
|
||||
// will be replaced by default values.
|
||||
func (c *Reader2Config) Verify() error {
|
||||
c.fill()
|
||||
if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) {
|
||||
return errors.New("lzma: dictionary capacity is out of range")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Reader2 supports the reading of LZMA2 chunk sequences. Note that the
|
||||
// first chunk should have a dictionary reset and the first compressed
|
||||
// chunk a properties reset. The chunk sequence may not be terminated by
|
||||
// an end-of-stream chunk.
|
||||
type Reader2 struct {
|
||||
r io.Reader
|
||||
err error
|
||||
|
||||
dict *decoderDict
|
||||
ur *uncompressedReader
|
||||
decoder *decoder
|
||||
chunkReader io.Reader
|
||||
|
||||
cstate chunkState
|
||||
ctype chunkType
|
||||
}
|
||||
|
||||
// NewReader2 creates a reader for an LZMA2 chunk sequence.
|
||||
func NewReader2(lzma2 io.Reader) (r *Reader2, err error) {
|
||||
return Reader2Config{}.NewReader2(lzma2)
|
||||
}
|
||||
|
||||
// NewReader2 creates an LZMA2 reader using the given configuration.
|
||||
func (c Reader2Config) NewReader2(lzma2 io.Reader) (r *Reader2, err error) {
|
||||
if err = c.Verify(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r = &Reader2{r: lzma2, cstate: start}
|
||||
r.dict, err = newDecoderDict(c.DictCap)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err = r.startChunk(); err != nil {
|
||||
r.err = err
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// uncompressed tests whether the chunk type specifies an uncompressed
|
||||
// chunk.
|
||||
func uncompressed(ctype chunkType) bool {
|
||||
return ctype == cU || ctype == cUD
|
||||
}
|
||||
|
||||
// startChunk parses a new chunk.
|
||||
func (r *Reader2) startChunk() error {
|
||||
r.chunkReader = nil
|
||||
header, err := readChunkHeader(r.r)
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
err = io.ErrUnexpectedEOF
|
||||
}
|
||||
return err
|
||||
}
|
||||
xlog.Debugf("chunk header %v", header)
|
||||
if err = r.cstate.next(header.ctype); err != nil {
|
||||
return err
|
||||
}
|
||||
if r.cstate == stop {
|
||||
return io.EOF
|
||||
}
|
||||
if header.ctype == cUD || header.ctype == cLRND {
|
||||
r.dict.Reset()
|
||||
}
|
||||
size := int64(header.uncompressed) + 1
|
||||
if uncompressed(header.ctype) {
|
||||
if r.ur != nil {
|
||||
r.ur.Reopen(r.r, size)
|
||||
} else {
|
||||
r.ur = newUncompressedReader(r.r, r.dict, size)
|
||||
}
|
||||
r.chunkReader = r.ur
|
||||
return nil
|
||||
}
|
||||
br := ByteReader(io.LimitReader(r.r, int64(header.compressed)+1))
|
||||
if r.decoder == nil {
|
||||
state := newState(header.props)
|
||||
r.decoder, err = newDecoder(br, state, r.dict, size)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
r.chunkReader = r.decoder
|
||||
return nil
|
||||
}
|
||||
switch header.ctype {
|
||||
case cLR:
|
||||
r.decoder.State.Reset()
|
||||
case cLRN, cLRND:
|
||||
r.decoder.State = newState(header.props)
|
||||
}
|
||||
err = r.decoder.Reopen(br, size)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
r.chunkReader = r.decoder
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read reads data from the LZMA2 chunk sequence.
|
||||
func (r *Reader2) Read(p []byte) (n int, err error) {
|
||||
if r.err != nil {
|
||||
return 0, r.err
|
||||
}
|
||||
for n < len(p) {
|
||||
var k int
|
||||
k, err = r.chunkReader.Read(p[n:])
|
||||
n += k
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
err = r.startChunk()
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
r.err = err
|
||||
return n, err
|
||||
}
|
||||
if k == 0 {
|
||||
r.err = errors.New("lzma: Reader2 doesn't get data")
|
||||
return n, r.err
|
||||
}
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// EOS returns whether the LZMA2 stream has been terminated by an
|
||||
// end-of-stream chunk.
|
||||
func (r *Reader2) EOS() bool {
|
||||
return r.cstate == stop
|
||||
}
|
||||
|
||||
// uncompressedReader is used to read uncompressed chunks.
|
||||
type uncompressedReader struct {
|
||||
lr io.LimitedReader
|
||||
Dict *decoderDict
|
||||
eof bool
|
||||
err error
|
||||
}
|
||||
|
||||
// newUncompressedReader initializes a new uncompressedReader.
|
||||
func newUncompressedReader(r io.Reader, dict *decoderDict, size int64) *uncompressedReader {
|
||||
ur := &uncompressedReader{
|
||||
lr: io.LimitedReader{R: r, N: size},
|
||||
Dict: dict,
|
||||
}
|
||||
return ur
|
||||
}
|
||||
|
||||
// Reopen reinitializes an uncompressed reader.
|
||||
func (ur *uncompressedReader) Reopen(r io.Reader, size int64) {
|
||||
ur.err = nil
|
||||
ur.eof = false
|
||||
ur.lr = io.LimitedReader{R: r, N: size}
|
||||
}
|
||||
|
||||
// fill reads uncompressed data into the dictionary.
|
||||
func (ur *uncompressedReader) fill() error {
|
||||
if !ur.eof {
|
||||
n, err := io.CopyN(ur.Dict, &ur.lr, int64(ur.Dict.Available()))
|
||||
if err != io.EOF {
|
||||
return err
|
||||
}
|
||||
ur.eof = true
|
||||
if n > 0 {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
if ur.lr.N != 0 {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
return io.EOF
|
||||
}
|
||||
|
||||
// Read reads uncompressed data from the limited reader.
|
||||
func (ur *uncompressedReader) Read(p []byte) (n int, err error) {
|
||||
if ur.err != nil {
|
||||
return 0, ur.err
|
||||
}
|
||||
for {
|
||||
var k int
|
||||
k, err = ur.Dict.Read(p[n:])
|
||||
n += k
|
||||
if n >= len(p) {
|
||||
return n, nil
|
||||
}
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
err = ur.fill()
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
ur.err = err
|
||||
return n, err
|
||||
}
|
|
@ -0,0 +1,151 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
// states defines the overall state count
|
||||
const states = 12
|
||||
|
||||
// State maintains the full state of the operation encoding or decoding
|
||||
// process.
|
||||
type state struct {
|
||||
rep [4]uint32
|
||||
isMatch [states << maxPosBits]prob
|
||||
isRepG0Long [states << maxPosBits]prob
|
||||
isRep [states]prob
|
||||
isRepG0 [states]prob
|
||||
isRepG1 [states]prob
|
||||
isRepG2 [states]prob
|
||||
litCodec literalCodec
|
||||
lenCodec lengthCodec
|
||||
repLenCodec lengthCodec
|
||||
distCodec distCodec
|
||||
state uint32
|
||||
posBitMask uint32
|
||||
Properties Properties
|
||||
}
|
||||
|
||||
// initProbSlice initializes a slice of probabilities.
|
||||
func initProbSlice(p []prob) {
|
||||
for i := range p {
|
||||
p[i] = probInit
|
||||
}
|
||||
}
|
||||
|
||||
// Reset sets all state information to the original values.
|
||||
func (s *state) Reset() {
|
||||
p := s.Properties
|
||||
*s = state{
|
||||
Properties: p,
|
||||
// dict: s.dict,
|
||||
posBitMask: (uint32(1) << uint(p.PB)) - 1,
|
||||
}
|
||||
initProbSlice(s.isMatch[:])
|
||||
initProbSlice(s.isRep[:])
|
||||
initProbSlice(s.isRepG0[:])
|
||||
initProbSlice(s.isRepG1[:])
|
||||
initProbSlice(s.isRepG2[:])
|
||||
initProbSlice(s.isRepG0Long[:])
|
||||
s.litCodec.init(p.LC, p.LP)
|
||||
s.lenCodec.init()
|
||||
s.repLenCodec.init()
|
||||
s.distCodec.init()
|
||||
}
|
||||
|
||||
// initState initializes the state.
|
||||
func initState(s *state, p Properties) {
|
||||
*s = state{Properties: p}
|
||||
s.Reset()
|
||||
}
|
||||
|
||||
// newState creates a new state from the give Properties.
|
||||
func newState(p Properties) *state {
|
||||
s := &state{Properties: p}
|
||||
s.Reset()
|
||||
return s
|
||||
}
|
||||
|
||||
// deepcopy initializes s as a deep copy of the source.
|
||||
func (s *state) deepcopy(src *state) {
|
||||
if s == src {
|
||||
return
|
||||
}
|
||||
s.rep = src.rep
|
||||
s.isMatch = src.isMatch
|
||||
s.isRepG0Long = src.isRepG0Long
|
||||
s.isRep = src.isRep
|
||||
s.isRepG0 = src.isRepG0
|
||||
s.isRepG1 = src.isRepG1
|
||||
s.isRepG2 = src.isRepG2
|
||||
s.litCodec.deepcopy(&src.litCodec)
|
||||
s.lenCodec.deepcopy(&src.lenCodec)
|
||||
s.repLenCodec.deepcopy(&src.repLenCodec)
|
||||
s.distCodec.deepcopy(&src.distCodec)
|
||||
s.state = src.state
|
||||
s.posBitMask = src.posBitMask
|
||||
s.Properties = src.Properties
|
||||
}
|
||||
|
||||
// cloneState creates a new clone of the give state.
|
||||
func cloneState(src *state) *state {
|
||||
s := new(state)
|
||||
s.deepcopy(src)
|
||||
return s
|
||||
}
|
||||
|
||||
// updateStateLiteral updates the state for a literal.
|
||||
func (s *state) updateStateLiteral() {
|
||||
switch {
|
||||
case s.state < 4:
|
||||
s.state = 0
|
||||
return
|
||||
case s.state < 10:
|
||||
s.state -= 3
|
||||
return
|
||||
}
|
||||
s.state -= 6
|
||||
}
|
||||
|
||||
// updateStateMatch updates the state for a match.
|
||||
func (s *state) updateStateMatch() {
|
||||
if s.state < 7 {
|
||||
s.state = 7
|
||||
} else {
|
||||
s.state = 10
|
||||
}
|
||||
}
|
||||
|
||||
// updateStateRep updates the state for a repetition.
|
||||
func (s *state) updateStateRep() {
|
||||
if s.state < 7 {
|
||||
s.state = 8
|
||||
} else {
|
||||
s.state = 11
|
||||
}
|
||||
}
|
||||
|
||||
// updateStateShortRep updates the state for a short repetition.
|
||||
func (s *state) updateStateShortRep() {
|
||||
if s.state < 7 {
|
||||
s.state = 9
|
||||
} else {
|
||||
s.state = 11
|
||||
}
|
||||
}
|
||||
|
||||
// states computes the states of the operation codec.
|
||||
func (s *state) states(dictHead int64) (state1, state2, posState uint32) {
|
||||
state1 = s.state
|
||||
posState = uint32(dictHead) & s.posBitMask
|
||||
state2 = (s.state << maxPosBits) | posState
|
||||
return
|
||||
}
|
||||
|
||||
// litState computes the literal state.
|
||||
func (s *state) litState(prev byte, dictHead int64) uint32 {
|
||||
lp, lc := uint(s.Properties.LP), uint(s.Properties.LC)
|
||||
litState := ((uint32(dictHead) & ((1 << lp) - 1)) << lc) |
|
||||
(uint32(prev) >> (8 - lc))
|
||||
return litState
|
||||
}
|
|
@ -0,0 +1,133 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
// treeCodec encodes or decodes values with a fixed bit size. It is using a
|
||||
// tree of probability value. The root of the tree is the most-significant bit.
|
||||
type treeCodec struct {
|
||||
probTree
|
||||
}
|
||||
|
||||
// makeTreeCodec makes a tree codec. The bits value must be inside the range
|
||||
// [1,32].
|
||||
func makeTreeCodec(bits int) treeCodec {
|
||||
return treeCodec{makeProbTree(bits)}
|
||||
}
|
||||
|
||||
// deepcopy initializes tc as a deep copy of the source.
|
||||
func (tc *treeCodec) deepcopy(src *treeCodec) {
|
||||
tc.probTree.deepcopy(&src.probTree)
|
||||
}
|
||||
|
||||
// Encode uses the range encoder to encode a fixed-bit-size value.
|
||||
func (tc *treeCodec) Encode(e *rangeEncoder, v uint32) (err error) {
|
||||
m := uint32(1)
|
||||
for i := int(tc.bits) - 1; i >= 0; i-- {
|
||||
b := (v >> uint(i)) & 1
|
||||
if err := e.EncodeBit(b, &tc.probs[m]); err != nil {
|
||||
return err
|
||||
}
|
||||
m = (m << 1) | b
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Decodes uses the range decoder to decode a fixed-bit-size value. Errors may
|
||||
// be caused by the range decoder.
|
||||
func (tc *treeCodec) Decode(d *rangeDecoder) (v uint32, err error) {
|
||||
m := uint32(1)
|
||||
for j := 0; j < int(tc.bits); j++ {
|
||||
b, err := d.DecodeBit(&tc.probs[m])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
m = (m << 1) | b
|
||||
}
|
||||
return m - (1 << uint(tc.bits)), nil
|
||||
}
|
||||
|
||||
// treeReverseCodec is another tree codec, where the least-significant bit is
|
||||
// the start of the probability tree.
|
||||
type treeReverseCodec struct {
|
||||
probTree
|
||||
}
|
||||
|
||||
// deepcopy initializes the treeReverseCodec as a deep copy of the
|
||||
// source.
|
||||
func (tc *treeReverseCodec) deepcopy(src *treeReverseCodec) {
|
||||
tc.probTree.deepcopy(&src.probTree)
|
||||
}
|
||||
|
||||
// makeTreeReverseCodec creates treeReverseCodec value. The bits argument must
|
||||
// be in the range [1,32].
|
||||
func makeTreeReverseCodec(bits int) treeReverseCodec {
|
||||
return treeReverseCodec{makeProbTree(bits)}
|
||||
}
|
||||
|
||||
// Encode uses range encoder to encode a fixed-bit-size value. The range
|
||||
// encoder may cause errors.
|
||||
func (tc *treeReverseCodec) Encode(v uint32, e *rangeEncoder) (err error) {
|
||||
m := uint32(1)
|
||||
for i := uint(0); i < uint(tc.bits); i++ {
|
||||
b := (v >> i) & 1
|
||||
if err := e.EncodeBit(b, &tc.probs[m]); err != nil {
|
||||
return err
|
||||
}
|
||||
m = (m << 1) | b
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Decodes uses the range decoder to decode a fixed-bit-size value. Errors
|
||||
// returned by the range decoder will be returned.
|
||||
func (tc *treeReverseCodec) Decode(d *rangeDecoder) (v uint32, err error) {
|
||||
m := uint32(1)
|
||||
for j := uint(0); j < uint(tc.bits); j++ {
|
||||
b, err := d.DecodeBit(&tc.probs[m])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
m = (m << 1) | b
|
||||
v |= b << j
|
||||
}
|
||||
return v, nil
|
||||
}
|
||||
|
||||
// probTree stores enough probability values to be used by the treeEncode and
|
||||
// treeDecode methods of the range coder types.
|
||||
type probTree struct {
|
||||
probs []prob
|
||||
bits byte
|
||||
}
|
||||
|
||||
// deepcopy initializes the probTree value as a deep copy of the source.
|
||||
func (t *probTree) deepcopy(src *probTree) {
|
||||
if t == src {
|
||||
return
|
||||
}
|
||||
t.probs = make([]prob, len(src.probs))
|
||||
copy(t.probs, src.probs)
|
||||
t.bits = src.bits
|
||||
}
|
||||
|
||||
// makeProbTree initializes a probTree structure.
|
||||
func makeProbTree(bits int) probTree {
|
||||
if !(1 <= bits && bits <= 32) {
|
||||
panic("bits outside of range [1,32]")
|
||||
}
|
||||
t := probTree{
|
||||
bits: byte(bits),
|
||||
probs: make([]prob, 1<<uint(bits)),
|
||||
}
|
||||
for i := range t.probs {
|
||||
t.probs[i] = probInit
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// Bits provides the number of bits for the values to de- or encode.
|
||||
func (t *probTree) Bits() int {
|
||||
return int(t.bits)
|
||||
}
|
|
@ -0,0 +1,209 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"io"
|
||||
)
|
||||
|
||||
// MinDictCap and MaxDictCap provide the range of supported dictionary
|
||||
// capacities.
|
||||
const (
|
||||
MinDictCap = 1 << 12
|
||||
MaxDictCap = 1<<32 - 1
|
||||
)
|
||||
|
||||
// WriterConfig defines the configuration parameter for a writer.
|
||||
type WriterConfig struct {
|
||||
// Properties for the encoding. If the it is nil the value
|
||||
// {LC: 3, LP: 0, PB: 2} will be chosen.
|
||||
Properties *Properties
|
||||
// The capacity of the dictionary. If DictCap is zero, the value
|
||||
// 8 MiB will be chosen.
|
||||
DictCap int
|
||||
// Size of the lookahead buffer; value 0 indicates default size
|
||||
// 4096
|
||||
BufSize int
|
||||
// Match algorithm
|
||||
Matcher MatchAlgorithm
|
||||
// SizeInHeader indicates that the header will contain an
|
||||
// explicit size.
|
||||
SizeInHeader bool
|
||||
// Size of the data to be encoded. A positive value will imply
|
||||
// than an explicit size will be set in the header.
|
||||
Size int64
|
||||
// EOSMarker requests whether the EOSMarker needs to be written.
|
||||
// If no explicit size is been given the EOSMarker will be
|
||||
// set automatically.
|
||||
EOSMarker bool
|
||||
}
|
||||
|
||||
// fill converts zero-value fields to their explicit default values.
|
||||
func (c *WriterConfig) fill() {
|
||||
if c.Properties == nil {
|
||||
c.Properties = &Properties{LC: 3, LP: 0, PB: 2}
|
||||
}
|
||||
if c.DictCap == 0 {
|
||||
c.DictCap = 8 * 1024 * 1024
|
||||
}
|
||||
if c.BufSize == 0 {
|
||||
c.BufSize = 4096
|
||||
}
|
||||
if c.Size > 0 {
|
||||
c.SizeInHeader = true
|
||||
}
|
||||
if !c.SizeInHeader {
|
||||
c.EOSMarker = true
|
||||
}
|
||||
}
|
||||
|
||||
// Verify checks WriterConfig for errors. Verify will replace zero
|
||||
// values with default values.
|
||||
func (c *WriterConfig) Verify() error {
|
||||
c.fill()
|
||||
var err error
|
||||
if c == nil {
|
||||
return errors.New("lzma: WriterConfig is nil")
|
||||
}
|
||||
if c.Properties == nil {
|
||||
return errors.New("lzma: WriterConfig has no Properties set")
|
||||
}
|
||||
if err = c.Properties.verify(); err != nil {
|
||||
return err
|
||||
}
|
||||
if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) {
|
||||
return errors.New("lzma: dictionary capacity is out of range")
|
||||
}
|
||||
if !(maxMatchLen <= c.BufSize) {
|
||||
return errors.New("lzma: lookahead buffer size too small")
|
||||
}
|
||||
if c.SizeInHeader {
|
||||
if c.Size < 0 {
|
||||
return errors.New("lzma: negative size not supported")
|
||||
}
|
||||
} else if !c.EOSMarker {
|
||||
return errors.New("lzma: EOS marker is required")
|
||||
}
|
||||
if err = c.Matcher.verify(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// header returns the header structure for this configuration.
|
||||
func (c *WriterConfig) header() header {
|
||||
h := header{
|
||||
properties: *c.Properties,
|
||||
dictCap: c.DictCap,
|
||||
size: -1,
|
||||
}
|
||||
if c.SizeInHeader {
|
||||
h.size = c.Size
|
||||
}
|
||||
return h
|
||||
}
|
||||
|
||||
// Writer writes an LZMA stream in the classic format.
|
||||
type Writer struct {
|
||||
h header
|
||||
bw io.ByteWriter
|
||||
buf *bufio.Writer
|
||||
e *encoder
|
||||
}
|
||||
|
||||
// NewWriter creates a new LZMA writer for the classic format. The
|
||||
// method will write the header to the underlying stream.
|
||||
func (c WriterConfig) NewWriter(lzma io.Writer) (w *Writer, err error) {
|
||||
if err = c.Verify(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
w = &Writer{h: c.header()}
|
||||
|
||||
var ok bool
|
||||
w.bw, ok = lzma.(io.ByteWriter)
|
||||
if !ok {
|
||||
w.buf = bufio.NewWriter(lzma)
|
||||
w.bw = w.buf
|
||||
}
|
||||
state := newState(w.h.properties)
|
||||
m, err := c.Matcher.new(w.h.dictCap)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dict, err := newEncoderDict(w.h.dictCap, c.BufSize, m)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var flags encoderFlags
|
||||
if c.EOSMarker {
|
||||
flags = eosMarker
|
||||
}
|
||||
if w.e, err = newEncoder(w.bw, state, dict, flags); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err = w.writeHeader(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return w, nil
|
||||
}
|
||||
|
||||
// NewWriter creates a new LZMA writer using the classic format. The
|
||||
// function writes the header to the underlying stream.
|
||||
func NewWriter(lzma io.Writer) (w *Writer, err error) {
|
||||
return WriterConfig{}.NewWriter(lzma)
|
||||
}
|
||||
|
||||
// writeHeader writes the LZMA header into the stream.
|
||||
func (w *Writer) writeHeader() error {
|
||||
data, err := w.h.marshalBinary()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = w.bw.(io.Writer).Write(data)
|
||||
return err
|
||||
}
|
||||
|
||||
// Write puts data into the Writer.
|
||||
func (w *Writer) Write(p []byte) (n int, err error) {
|
||||
if w.h.size >= 0 {
|
||||
m := w.h.size
|
||||
m -= w.e.Compressed() + int64(w.e.dict.Buffered())
|
||||
if m < 0 {
|
||||
m = 0
|
||||
}
|
||||
if m < int64(len(p)) {
|
||||
p = p[:m]
|
||||
err = ErrNoSpace
|
||||
}
|
||||
}
|
||||
var werr error
|
||||
if n, werr = w.e.Write(p); werr != nil {
|
||||
err = werr
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Close closes the writer stream. It ensures that all data from the
|
||||
// buffer will be compressed and the LZMA stream will be finished.
|
||||
func (w *Writer) Close() error {
|
||||
if w.h.size >= 0 {
|
||||
n := w.e.Compressed() + int64(w.e.dict.Buffered())
|
||||
if n != w.h.size {
|
||||
return errSize
|
||||
}
|
||||
}
|
||||
err := w.e.Close()
|
||||
if w.buf != nil {
|
||||
ferr := w.buf.Flush()
|
||||
if err == nil {
|
||||
err = ferr
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
|
@ -0,0 +1,305 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzma
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"io"
|
||||
)
|
||||
|
||||
// Writer2Config is used to create a Writer2 using parameters.
|
||||
type Writer2Config struct {
|
||||
// The properties for the encoding. If the it is nil the value
|
||||
// {LC: 3, LP: 0, PB: 2} will be chosen.
|
||||
Properties *Properties
|
||||
// The capacity of the dictionary. If DictCap is zero, the value
|
||||
// 8 MiB will be chosen.
|
||||
DictCap int
|
||||
// Size of the lookahead buffer; value 0 indicates default size
|
||||
// 4096
|
||||
BufSize int
|
||||
// Match algorithm
|
||||
Matcher MatchAlgorithm
|
||||
}
|
||||
|
||||
// fill replaces zero values with default values.
|
||||
func (c *Writer2Config) fill() {
|
||||
if c.Properties == nil {
|
||||
c.Properties = &Properties{LC: 3, LP: 0, PB: 2}
|
||||
}
|
||||
if c.DictCap == 0 {
|
||||
c.DictCap = 8 * 1024 * 1024
|
||||
}
|
||||
if c.BufSize == 0 {
|
||||
c.BufSize = 4096
|
||||
}
|
||||
}
|
||||
|
||||
// Verify checks the Writer2Config for correctness. Zero values will be
|
||||
// replaced by default values.
|
||||
func (c *Writer2Config) Verify() error {
|
||||
c.fill()
|
||||
var err error
|
||||
if c == nil {
|
||||
return errors.New("lzma: WriterConfig is nil")
|
||||
}
|
||||
if c.Properties == nil {
|
||||
return errors.New("lzma: WriterConfig has no Properties set")
|
||||
}
|
||||
if err = c.Properties.verify(); err != nil {
|
||||
return err
|
||||
}
|
||||
if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) {
|
||||
return errors.New("lzma: dictionary capacity is out of range")
|
||||
}
|
||||
if !(maxMatchLen <= c.BufSize) {
|
||||
return errors.New("lzma: lookahead buffer size too small")
|
||||
}
|
||||
if c.Properties.LC+c.Properties.LP > 4 {
|
||||
return errors.New("lzma: sum of lc and lp exceeds 4")
|
||||
}
|
||||
if err = c.Matcher.verify(); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Writer2 supports the creation of an LZMA2 stream. But note that
|
||||
// written data is buffered, so call Flush or Close to write data to the
|
||||
// underlying writer. The Close method writes the end-of-stream marker
|
||||
// to the stream. So you may be able to concatenate the output of two
|
||||
// writers as long the output of the first writer has only been flushed
|
||||
// but not closed.
|
||||
//
|
||||
// Any change to the fields Properties, DictCap must be done before the
|
||||
// first call to Write, Flush or Close.
|
||||
type Writer2 struct {
|
||||
w io.Writer
|
||||
|
||||
start *state
|
||||
encoder *encoder
|
||||
|
||||
cstate chunkState
|
||||
ctype chunkType
|
||||
|
||||
buf bytes.Buffer
|
||||
lbw LimitedByteWriter
|
||||
}
|
||||
|
||||
// NewWriter2 creates an LZMA2 chunk sequence writer with the default
|
||||
// parameters and options.
|
||||
func NewWriter2(lzma2 io.Writer) (w *Writer2, err error) {
|
||||
return Writer2Config{}.NewWriter2(lzma2)
|
||||
}
|
||||
|
||||
// NewWriter2 creates a new LZMA2 writer using the given configuration.
|
||||
func (c Writer2Config) NewWriter2(lzma2 io.Writer) (w *Writer2, err error) {
|
||||
if err = c.Verify(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
w = &Writer2{
|
||||
w: lzma2,
|
||||
start: newState(*c.Properties),
|
||||
cstate: start,
|
||||
ctype: start.defaultChunkType(),
|
||||
}
|
||||
w.buf.Grow(maxCompressed)
|
||||
w.lbw = LimitedByteWriter{BW: &w.buf, N: maxCompressed}
|
||||
m, err := c.Matcher.new(c.DictCap)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
d, err := newEncoderDict(c.DictCap, c.BufSize, m)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
w.encoder, err = newEncoder(&w.lbw, cloneState(w.start), d, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return w, nil
|
||||
}
|
||||
|
||||
// written returns the number of bytes written to the current chunk
|
||||
func (w *Writer2) written() int {
|
||||
if w.encoder == nil {
|
||||
return 0
|
||||
}
|
||||
return int(w.encoder.Compressed()) + w.encoder.dict.Buffered()
|
||||
}
|
||||
|
||||
// errClosed indicates that the writer is closed.
|
||||
var errClosed = errors.New("lzma: writer closed")
|
||||
|
||||
// Writes data to LZMA2 stream. Note that written data will be buffered.
|
||||
// Use Flush or Close to ensure that data is written to the underlying
|
||||
// writer.
|
||||
func (w *Writer2) Write(p []byte) (n int, err error) {
|
||||
if w.cstate == stop {
|
||||
return 0, errClosed
|
||||
}
|
||||
for n < len(p) {
|
||||
m := maxUncompressed - w.written()
|
||||
if m <= 0 {
|
||||
panic("lzma: maxUncompressed reached")
|
||||
}
|
||||
var q []byte
|
||||
if n+m < len(p) {
|
||||
q = p[n : n+m]
|
||||
} else {
|
||||
q = p[n:]
|
||||
}
|
||||
k, err := w.encoder.Write(q)
|
||||
n += k
|
||||
if err != nil && err != ErrLimit {
|
||||
return n, err
|
||||
}
|
||||
if err == ErrLimit || k == m {
|
||||
if err = w.flushChunk(); err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// writeUncompressedChunk writes an uncompressed chunk to the LZMA2
|
||||
// stream.
|
||||
func (w *Writer2) writeUncompressedChunk() error {
|
||||
u := w.encoder.Compressed()
|
||||
if u <= 0 {
|
||||
return errors.New("lzma: can't write empty uncompressed chunk")
|
||||
}
|
||||
if u > maxUncompressed {
|
||||
panic("overrun of uncompressed data limit")
|
||||
}
|
||||
switch w.ctype {
|
||||
case cLRND:
|
||||
w.ctype = cUD
|
||||
default:
|
||||
w.ctype = cU
|
||||
}
|
||||
w.encoder.state = w.start
|
||||
|
||||
header := chunkHeader{
|
||||
ctype: w.ctype,
|
||||
uncompressed: uint32(u - 1),
|
||||
}
|
||||
hdata, err := header.MarshalBinary()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err = w.w.Write(hdata); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = w.encoder.dict.CopyN(w.w, int(u))
|
||||
return err
|
||||
}
|
||||
|
||||
// writeCompressedChunk writes a compressed chunk to the underlying
|
||||
// writer.
|
||||
func (w *Writer2) writeCompressedChunk() error {
|
||||
if w.ctype == cU || w.ctype == cUD {
|
||||
panic("chunk type uncompressed")
|
||||
}
|
||||
|
||||
u := w.encoder.Compressed()
|
||||
if u <= 0 {
|
||||
return errors.New("writeCompressedChunk: empty chunk")
|
||||
}
|
||||
if u > maxUncompressed {
|
||||
panic("overrun of uncompressed data limit")
|
||||
}
|
||||
c := w.buf.Len()
|
||||
if c <= 0 {
|
||||
panic("no compressed data")
|
||||
}
|
||||
if c > maxCompressed {
|
||||
panic("overrun of compressed data limit")
|
||||
}
|
||||
header := chunkHeader{
|
||||
ctype: w.ctype,
|
||||
uncompressed: uint32(u - 1),
|
||||
compressed: uint16(c - 1),
|
||||
props: w.encoder.state.Properties,
|
||||
}
|
||||
hdata, err := header.MarshalBinary()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err = w.w.Write(hdata); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = io.Copy(w.w, &w.buf)
|
||||
return err
|
||||
}
|
||||
|
||||
// writes a single chunk to the underlying writer.
|
||||
func (w *Writer2) writeChunk() error {
|
||||
u := int(uncompressedHeaderLen + w.encoder.Compressed())
|
||||
c := headerLen(w.ctype) + w.buf.Len()
|
||||
if u < c {
|
||||
return w.writeUncompressedChunk()
|
||||
}
|
||||
return w.writeCompressedChunk()
|
||||
}
|
||||
|
||||
// flushChunk terminates the current chunk. The encoder will be reset
|
||||
// to support the next chunk.
|
||||
func (w *Writer2) flushChunk() error {
|
||||
if w.written() == 0 {
|
||||
return nil
|
||||
}
|
||||
var err error
|
||||
if err = w.encoder.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err = w.writeChunk(); err != nil {
|
||||
return err
|
||||
}
|
||||
w.buf.Reset()
|
||||
w.lbw.N = maxCompressed
|
||||
if err = w.encoder.Reopen(&w.lbw); err != nil {
|
||||
return err
|
||||
}
|
||||
if err = w.cstate.next(w.ctype); err != nil {
|
||||
return err
|
||||
}
|
||||
w.ctype = w.cstate.defaultChunkType()
|
||||
w.start = cloneState(w.encoder.state)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Flush writes all buffered data out to the underlying stream. This
|
||||
// could result in multiple chunks to be created.
|
||||
func (w *Writer2) Flush() error {
|
||||
if w.cstate == stop {
|
||||
return errClosed
|
||||
}
|
||||
for w.written() > 0 {
|
||||
if err := w.flushChunk(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close terminates the LZMA2 stream with an EOS chunk.
|
||||
func (w *Writer2) Close() error {
|
||||
if w.cstate == stop {
|
||||
return errClosed
|
||||
}
|
||||
if err := w.Flush(); err != nil {
|
||||
return nil
|
||||
}
|
||||
// write zero byte EOS chunk
|
||||
_, err := w.w.Write([]byte{0})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
w.cstate = stop
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package xz
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/ulikunitz/xz/lzma"
|
||||
)
|
||||
|
||||
// LZMA filter constants.
|
||||
const (
|
||||
lzmaFilterID = 0x21
|
||||
lzmaFilterLen = 3
|
||||
)
|
||||
|
||||
// lzmaFilter declares the LZMA2 filter information stored in an xz
|
||||
// block header.
|
||||
type lzmaFilter struct {
|
||||
dictCap int64
|
||||
}
|
||||
|
||||
// String returns a representation of the LZMA filter.
|
||||
func (f lzmaFilter) String() string {
|
||||
return fmt.Sprintf("LZMA dict cap %#x", f.dictCap)
|
||||
}
|
||||
|
||||
// id returns the ID for the LZMA2 filter.
|
||||
func (f lzmaFilter) id() uint64 { return lzmaFilterID }
|
||||
|
||||
// MarshalBinary converts the lzmaFilter in its encoded representation.
|
||||
func (f lzmaFilter) MarshalBinary() (data []byte, err error) {
|
||||
c := lzma.EncodeDictCap(f.dictCap)
|
||||
return []byte{lzmaFilterID, 1, c}, nil
|
||||
}
|
||||
|
||||
// UnmarshalBinary unmarshals the given data representation of the LZMA2
|
||||
// filter.
|
||||
func (f *lzmaFilter) UnmarshalBinary(data []byte) error {
|
||||
if len(data) != lzmaFilterLen {
|
||||
return errors.New("xz: data for LZMA2 filter has wrong length")
|
||||
}
|
||||
if data[0] != lzmaFilterID {
|
||||
return errors.New("xz: wrong LZMA2 filter id")
|
||||
}
|
||||
if data[1] != 1 {
|
||||
return errors.New("xz: wrong LZMA2 filter size")
|
||||
}
|
||||
dc, err := lzma.DecodeDictCap(data[2])
|
||||
if err != nil {
|
||||
return errors.New("xz: wrong LZMA2 dictionary size property")
|
||||
}
|
||||
|
||||
f.dictCap = dc
|
||||
return nil
|
||||
}
|
||||
|
||||
// reader creates a new reader for the LZMA2 filter.
|
||||
func (f lzmaFilter) reader(r io.Reader, c *ReaderConfig) (fr io.Reader,
|
||||
err error) {
|
||||
|
||||
config := new(lzma.Reader2Config)
|
||||
if c != nil {
|
||||
config.DictCap = c.DictCap
|
||||
}
|
||||
dc := int(f.dictCap)
|
||||
if dc < 1 {
|
||||
return nil, errors.New("xz: LZMA2 filter parameter " +
|
||||
"dictionary capacity overflow")
|
||||
}
|
||||
if dc > config.DictCap {
|
||||
config.DictCap = dc
|
||||
}
|
||||
|
||||
fr, err = config.NewReader2(r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fr, nil
|
||||
}
|
||||
|
||||
// writeCloser creates a io.WriteCloser for the LZMA2 filter.
|
||||
func (f lzmaFilter) writeCloser(w io.WriteCloser, c *WriterConfig,
|
||||
) (fw io.WriteCloser, err error) {
|
||||
config := new(lzma.Writer2Config)
|
||||
if c != nil {
|
||||
*config = lzma.Writer2Config{
|
||||
Properties: c.Properties,
|
||||
DictCap: c.DictCap,
|
||||
BufSize: c.BufSize,
|
||||
Matcher: c.Matcher,
|
||||
}
|
||||
}
|
||||
|
||||
dc := int(f.dictCap)
|
||||
if dc < 1 {
|
||||
return nil, errors.New("xz: LZMA2 filter parameter " +
|
||||
"dictionary capacity overflow")
|
||||
}
|
||||
if dc > config.DictCap {
|
||||
config.DictCap = dc
|
||||
}
|
||||
|
||||
fw, err = config.NewWriter2(w)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fw, nil
|
||||
}
|
||||
|
||||
// last returns true, because an LZMA2 filter must be the last filter in
|
||||
// the filter list.
|
||||
func (f lzmaFilter) last() bool { return true }
|
|
@ -0,0 +1,5 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -x
|
||||
pandoc -t html5 -f markdown -s --css=doc/md.css -o README.html README.md
|
||||
pandoc -t html5 -f markdown -s --css=doc/md.css -o TODO.html TODO.md
|
|
@ -0,0 +1,373 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package xz supports the compression and decompression of xz files. It
|
||||
// supports version 1.0.4 of the specification without the non-LZMA2
|
||||
// filters. See http://tukaani.org/xz/xz-file-format-1.0.4.txt
|
||||
package xz
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
|
||||
"github.com/ulikunitz/xz/internal/xlog"
|
||||
"github.com/ulikunitz/xz/lzma"
|
||||
)
|
||||
|
||||
// ReaderConfig defines the parameters for the xz reader. The
|
||||
// SingleStream parameter requests the reader to assume that the
|
||||
// underlying stream contains only a single stream.
|
||||
type ReaderConfig struct {
|
||||
DictCap int
|
||||
SingleStream bool
|
||||
}
|
||||
|
||||
// fill replaces all zero values with their default values.
|
||||
func (c *ReaderConfig) fill() {
|
||||
if c.DictCap == 0 {
|
||||
c.DictCap = 8 * 1024 * 1024
|
||||
}
|
||||
}
|
||||
|
||||
// Verify checks the reader parameters for Validity. Zero values will be
|
||||
// replaced by default values.
|
||||
func (c *ReaderConfig) Verify() error {
|
||||
if c == nil {
|
||||
return errors.New("xz: reader parameters are nil")
|
||||
}
|
||||
lc := lzma.Reader2Config{DictCap: c.DictCap}
|
||||
if err := lc.Verify(); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Reader supports the reading of one or multiple xz streams.
|
||||
type Reader struct {
|
||||
ReaderConfig
|
||||
|
||||
xz io.Reader
|
||||
sr *streamReader
|
||||
}
|
||||
|
||||
// streamReader decodes a single xz stream
|
||||
type streamReader struct {
|
||||
ReaderConfig
|
||||
|
||||
xz io.Reader
|
||||
br *blockReader
|
||||
newHash func() hash.Hash
|
||||
h header
|
||||
index []record
|
||||
}
|
||||
|
||||
// NewReader creates a new xz reader using the default parameters.
|
||||
// The function reads and checks the header of the first XZ stream. The
|
||||
// reader will process multiple streams including padding.
|
||||
func NewReader(xz io.Reader) (r *Reader, err error) {
|
||||
return ReaderConfig{}.NewReader(xz)
|
||||
}
|
||||
|
||||
// NewReader creates an xz stream reader. The created reader will be
|
||||
// able to process multiple streams and padding unless a SingleStream
|
||||
// has been set in the reader configuration c.
|
||||
func (c ReaderConfig) NewReader(xz io.Reader) (r *Reader, err error) {
|
||||
if err = c.Verify(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r = &Reader{
|
||||
ReaderConfig: c,
|
||||
xz: xz,
|
||||
}
|
||||
if r.sr, err = c.newStreamReader(xz); err != nil {
|
||||
if err == io.EOF {
|
||||
err = io.ErrUnexpectedEOF
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
var errUnexpectedData = errors.New("xz: unexpected data after stream")
|
||||
|
||||
// Read reads uncompressed data from the stream.
|
||||
func (r *Reader) Read(p []byte) (n int, err error) {
|
||||
for n < len(p) {
|
||||
if r.sr == nil {
|
||||
if r.SingleStream {
|
||||
data := make([]byte, 1)
|
||||
_, err = io.ReadFull(r.xz, data)
|
||||
if err != io.EOF {
|
||||
return n, errUnexpectedData
|
||||
}
|
||||
return n, io.EOF
|
||||
}
|
||||
for {
|
||||
r.sr, err = r.ReaderConfig.newStreamReader(r.xz)
|
||||
if err != errPadding {
|
||||
break
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
k, err := r.sr.Read(p[n:])
|
||||
n += k
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
r.sr = nil
|
||||
continue
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
var errPadding = errors.New("xz: padding (4 zero bytes) encountered")
|
||||
|
||||
// newStreamReader creates a new xz stream reader using the given configuration
|
||||
// parameters. NewReader reads and checks the header of the xz stream.
|
||||
func (c ReaderConfig) newStreamReader(xz io.Reader) (r *streamReader, err error) {
|
||||
if err = c.Verify(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
data := make([]byte, HeaderLen)
|
||||
if _, err := io.ReadFull(xz, data[:4]); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if bytes.Equal(data[:4], []byte{0, 0, 0, 0}) {
|
||||
return nil, errPadding
|
||||
}
|
||||
if _, err = io.ReadFull(xz, data[4:]); err != nil {
|
||||
if err == io.EOF {
|
||||
err = io.ErrUnexpectedEOF
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
r = &streamReader{
|
||||
ReaderConfig: c,
|
||||
xz: xz,
|
||||
index: make([]record, 0, 4),
|
||||
}
|
||||
if err = r.h.UnmarshalBinary(data); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
xlog.Debugf("xz header %s", r.h)
|
||||
if r.newHash, err = newHashFunc(r.h.flags); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// errIndex indicates an error with the xz file index.
|
||||
var errIndex = errors.New("xz: error in xz file index")
|
||||
|
||||
// readTail reads the index body and the xz footer.
|
||||
func (r *streamReader) readTail() error {
|
||||
index, n, err := readIndexBody(r.xz)
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
err = io.ErrUnexpectedEOF
|
||||
}
|
||||
return err
|
||||
}
|
||||
if len(index) != len(r.index) {
|
||||
return fmt.Errorf("xz: index length is %d; want %d",
|
||||
len(index), len(r.index))
|
||||
}
|
||||
for i, rec := range r.index {
|
||||
if rec != index[i] {
|
||||
return fmt.Errorf("xz: record %d is %v; want %v",
|
||||
i, rec, index[i])
|
||||
}
|
||||
}
|
||||
|
||||
p := make([]byte, footerLen)
|
||||
if _, err = io.ReadFull(r.xz, p); err != nil {
|
||||
if err == io.EOF {
|
||||
err = io.ErrUnexpectedEOF
|
||||
}
|
||||
return err
|
||||
}
|
||||
var f footer
|
||||
if err = f.UnmarshalBinary(p); err != nil {
|
||||
return err
|
||||
}
|
||||
xlog.Debugf("xz footer %s", f)
|
||||
if f.flags != r.h.flags {
|
||||
return errors.New("xz: footer flags incorrect")
|
||||
}
|
||||
if f.indexSize != int64(n)+1 {
|
||||
return errors.New("xz: index size in footer wrong")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read reads actual data from the xz stream.
|
||||
func (r *streamReader) Read(p []byte) (n int, err error) {
|
||||
for n < len(p) {
|
||||
if r.br == nil {
|
||||
bh, hlen, err := readBlockHeader(r.xz)
|
||||
if err != nil {
|
||||
if err == errIndexIndicator {
|
||||
if err = r.readTail(); err != nil {
|
||||
return n, err
|
||||
}
|
||||
return n, io.EOF
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
xlog.Debugf("block %v", *bh)
|
||||
r.br, err = r.ReaderConfig.newBlockReader(r.xz, bh,
|
||||
hlen, r.newHash())
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
k, err := r.br.Read(p[n:])
|
||||
n += k
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
r.index = append(r.index, r.br.record())
|
||||
r.br = nil
|
||||
} else {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// countingReader is a reader that counts the bytes read.
|
||||
type countingReader struct {
|
||||
r io.Reader
|
||||
n int64
|
||||
}
|
||||
|
||||
// Read reads data from the wrapped reader and adds it to the n field.
|
||||
func (lr *countingReader) Read(p []byte) (n int, err error) {
|
||||
n, err = lr.r.Read(p)
|
||||
lr.n += int64(n)
|
||||
return n, err
|
||||
}
|
||||
|
||||
// blockReader supports the reading of a block.
|
||||
type blockReader struct {
|
||||
lxz countingReader
|
||||
header *blockHeader
|
||||
headerLen int
|
||||
n int64
|
||||
hash hash.Hash
|
||||
r io.Reader
|
||||
err error
|
||||
}
|
||||
|
||||
// newBlockReader creates a new block reader.
|
||||
func (c *ReaderConfig) newBlockReader(xz io.Reader, h *blockHeader,
|
||||
hlen int, hash hash.Hash) (br *blockReader, err error) {
|
||||
|
||||
br = &blockReader{
|
||||
lxz: countingReader{r: xz},
|
||||
header: h,
|
||||
headerLen: hlen,
|
||||
hash: hash,
|
||||
}
|
||||
|
||||
fr, err := c.newFilterReader(&br.lxz, h.filters)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
br.r = io.TeeReader(fr, br.hash)
|
||||
|
||||
return br, nil
|
||||
}
|
||||
|
||||
// uncompressedSize returns the uncompressed size of the block.
|
||||
func (br *blockReader) uncompressedSize() int64 {
|
||||
return br.n
|
||||
}
|
||||
|
||||
// compressedSize returns the compressed size of the block.
|
||||
func (br *blockReader) compressedSize() int64 {
|
||||
return br.lxz.n
|
||||
}
|
||||
|
||||
// unpaddedSize computes the unpadded size for the block.
|
||||
func (br *blockReader) unpaddedSize() int64 {
|
||||
n := int64(br.headerLen)
|
||||
n += br.compressedSize()
|
||||
n += int64(br.hash.Size())
|
||||
return n
|
||||
}
|
||||
|
||||
// record returns the index record for the current block.
|
||||
func (br *blockReader) record() record {
|
||||
return record{br.unpaddedSize(), br.uncompressedSize()}
|
||||
}
|
||||
|
||||
// errBlockSize indicates that the size of the block in the block header
|
||||
// is wrong.
|
||||
var errBlockSize = errors.New("xz: wrong uncompressed size for block")
|
||||
|
||||
// Read reads data from the block.
|
||||
func (br *blockReader) Read(p []byte) (n int, err error) {
|
||||
n, err = br.r.Read(p)
|
||||
br.n += int64(n)
|
||||
|
||||
u := br.header.uncompressedSize
|
||||
if u >= 0 && br.uncompressedSize() > u {
|
||||
return n, errors.New("xz: wrong uncompressed size for block")
|
||||
}
|
||||
c := br.header.compressedSize
|
||||
if c >= 0 && br.compressedSize() > c {
|
||||
return n, errors.New("xz: wrong compressed size for block")
|
||||
}
|
||||
if err != io.EOF {
|
||||
return n, err
|
||||
}
|
||||
if br.uncompressedSize() < u || br.compressedSize() < c {
|
||||
return n, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
s := br.hash.Size()
|
||||
k := padLen(br.lxz.n)
|
||||
q := make([]byte, k+s, k+2*s)
|
||||
if _, err = io.ReadFull(br.lxz.r, q); err != nil {
|
||||
if err == io.EOF {
|
||||
err = io.ErrUnexpectedEOF
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
if !allZeros(q[:k]) {
|
||||
return n, errors.New("xz: non-zero block padding")
|
||||
}
|
||||
checkSum := q[k:]
|
||||
computedSum := br.hash.Sum(checkSum[s:])
|
||||
if !bytes.Equal(checkSum, computedSum) {
|
||||
return n, errors.New("xz: checksum error for block")
|
||||
}
|
||||
return n, io.EOF
|
||||
}
|
||||
|
||||
func (c *ReaderConfig) newFilterReader(r io.Reader, f []filter) (fr io.Reader,
|
||||
err error) {
|
||||
|
||||
if err = verifyFilters(f); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fr = r
|
||||
for i := len(f) - 1; i >= 0; i-- {
|
||||
fr, err = f[i].reader(fr, c)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return fr, nil
|
||||
}
|
|
@ -0,0 +1,386 @@
|
|||
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package xz
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"hash"
|
||||
"io"
|
||||
|
||||
"github.com/ulikunitz/xz/lzma"
|
||||
)
|
||||
|
||||
// WriterConfig describe the parameters for an xz writer.
|
||||
type WriterConfig struct {
|
||||
Properties *lzma.Properties
|
||||
DictCap int
|
||||
BufSize int
|
||||
BlockSize int64
|
||||
// checksum method: CRC32, CRC64 or SHA256
|
||||
CheckSum byte
|
||||
// match algorithm
|
||||
Matcher lzma.MatchAlgorithm
|
||||
}
|
||||
|
||||
// fill replaces zero values with default values.
|
||||
func (c *WriterConfig) fill() {
|
||||
if c.Properties == nil {
|
||||
c.Properties = &lzma.Properties{LC: 3, LP: 0, PB: 2}
|
||||
}
|
||||
if c.DictCap == 0 {
|
||||
c.DictCap = 8 * 1024 * 1024
|
||||
}
|
||||
if c.BufSize == 0 {
|
||||
c.BufSize = 4096
|
||||
}
|
||||
if c.BlockSize == 0 {
|
||||
c.BlockSize = maxInt64
|
||||
}
|
||||
if c.CheckSum == 0 {
|
||||
c.CheckSum = CRC64
|
||||
}
|
||||
}
|
||||
|
||||
// Verify checks the configuration for errors. Zero values will be
|
||||
// replaced by default values.
|
||||
func (c *WriterConfig) Verify() error {
|
||||
if c == nil {
|
||||
return errors.New("xz: writer configuration is nil")
|
||||
}
|
||||
c.fill()
|
||||
lc := lzma.Writer2Config{
|
||||
Properties: c.Properties,
|
||||
DictCap: c.DictCap,
|
||||
BufSize: c.BufSize,
|
||||
Matcher: c.Matcher,
|
||||
}
|
||||
if err := lc.Verify(); err != nil {
|
||||
return err
|
||||
}
|
||||
if c.BlockSize <= 0 {
|
||||
return errors.New("xz: block size out of range")
|
||||
}
|
||||
if err := verifyFlags(c.CheckSum); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// filters creates the filter list for the given parameters.
|
||||
func (c *WriterConfig) filters() []filter {
|
||||
return []filter{&lzmaFilter{int64(c.DictCap)}}
|
||||
}
|
||||
|
||||
// maxInt64 defines the maximum 64-bit signed integer.
|
||||
const maxInt64 = 1<<63 - 1
|
||||
|
||||
// verifyFilters checks the filter list for the length and the right
|
||||
// sequence of filters.
|
||||
func verifyFilters(f []filter) error {
|
||||
if len(f) == 0 {
|
||||
return errors.New("xz: no filters")
|
||||
}
|
||||
if len(f) > 4 {
|
||||
return errors.New("xz: more than four filters")
|
||||
}
|
||||
for _, g := range f[:len(f)-1] {
|
||||
if g.last() {
|
||||
return errors.New("xz: last filter is not last")
|
||||
}
|
||||
}
|
||||
if !f[len(f)-1].last() {
|
||||
return errors.New("xz: wrong last filter")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// newFilterWriteCloser converts a filter list into a WriteCloser that
|
||||
// can be used by a blockWriter.
|
||||
func (c *WriterConfig) newFilterWriteCloser(w io.Writer, f []filter) (fw io.WriteCloser, err error) {
|
||||
if err = verifyFilters(f); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fw = nopWriteCloser(w)
|
||||
for i := len(f) - 1; i >= 0; i-- {
|
||||
fw, err = f[i].writeCloser(fw, c)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return fw, nil
|
||||
}
|
||||
|
||||
// nopWCloser implements a WriteCloser with a Close method not doing
|
||||
// anything.
|
||||
type nopWCloser struct {
|
||||
io.Writer
|
||||
}
|
||||
|
||||
// Close returns nil and doesn't do anything else.
|
||||
func (c nopWCloser) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// nopWriteCloser converts the Writer into a WriteCloser with a Close
|
||||
// function that does nothing beside returning nil.
|
||||
func nopWriteCloser(w io.Writer) io.WriteCloser {
|
||||
return nopWCloser{w}
|
||||
}
|
||||
|
||||
// Writer compresses data written to it. It is an io.WriteCloser.
|
||||
type Writer struct {
|
||||
WriterConfig
|
||||
|
||||
xz io.Writer
|
||||
bw *blockWriter
|
||||
newHash func() hash.Hash
|
||||
h header
|
||||
index []record
|
||||
closed bool
|
||||
}
|
||||
|
||||
// newBlockWriter creates a new block writer writes the header out.
|
||||
func (w *Writer) newBlockWriter() error {
|
||||
var err error
|
||||
w.bw, err = w.WriterConfig.newBlockWriter(w.xz, w.newHash())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err = w.bw.writeHeader(w.xz); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// closeBlockWriter closes a block writer and records the sizes in the
|
||||
// index.
|
||||
func (w *Writer) closeBlockWriter() error {
|
||||
var err error
|
||||
if err = w.bw.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
w.index = append(w.index, w.bw.record())
|
||||
return nil
|
||||
}
|
||||
|
||||
// NewWriter creates a new xz writer using default parameters.
|
||||
func NewWriter(xz io.Writer) (w *Writer, err error) {
|
||||
return WriterConfig{}.NewWriter(xz)
|
||||
}
|
||||
|
||||
// NewWriter creates a new Writer using the given configuration parameters.
|
||||
func (c WriterConfig) NewWriter(xz io.Writer) (w *Writer, err error) {
|
||||
if err = c.Verify(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
w = &Writer{
|
||||
WriterConfig: c,
|
||||
xz: xz,
|
||||
h: header{c.CheckSum},
|
||||
index: make([]record, 0, 4),
|
||||
}
|
||||
if w.newHash, err = newHashFunc(c.CheckSum); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
data, err := w.h.MarshalBinary()
|
||||
if _, err = xz.Write(data); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err = w.newBlockWriter(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return w, nil
|
||||
|
||||
}
|
||||
|
||||
// Write compresses the uncompressed data provided.
|
||||
func (w *Writer) Write(p []byte) (n int, err error) {
|
||||
if w.closed {
|
||||
return 0, errClosed
|
||||
}
|
||||
for {
|
||||
k, err := w.bw.Write(p[n:])
|
||||
n += k
|
||||
if err != errNoSpace {
|
||||
return n, err
|
||||
}
|
||||
if err = w.closeBlockWriter(); err != nil {
|
||||
return n, err
|
||||
}
|
||||
if err = w.newBlockWriter(); err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close closes the writer and adds the footer to the Writer. Close
|
||||
// doesn't close the underlying writer.
|
||||
func (w *Writer) Close() error {
|
||||
if w.closed {
|
||||
return errClosed
|
||||
}
|
||||
w.closed = true
|
||||
var err error
|
||||
if err = w.closeBlockWriter(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
f := footer{flags: w.h.flags}
|
||||
if f.indexSize, err = writeIndex(w.xz, w.index); err != nil {
|
||||
return err
|
||||
}
|
||||
data, err := f.MarshalBinary()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err = w.xz.Write(data); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// countingWriter is a writer that counts all data written to it.
|
||||
type countingWriter struct {
|
||||
w io.Writer
|
||||
n int64
|
||||
}
|
||||
|
||||
// Write writes data to the countingWriter.
|
||||
func (cw *countingWriter) Write(p []byte) (n int, err error) {
|
||||
n, err = cw.w.Write(p)
|
||||
cw.n += int64(n)
|
||||
if err == nil && cw.n < 0 {
|
||||
return n, errors.New("xz: counter overflow")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// blockWriter is writes a single block.
|
||||
type blockWriter struct {
|
||||
cxz countingWriter
|
||||
// mw combines io.WriteCloser w and the hash.
|
||||
mw io.Writer
|
||||
w io.WriteCloser
|
||||
n int64
|
||||
blockSize int64
|
||||
closed bool
|
||||
headerLen int
|
||||
|
||||
filters []filter
|
||||
hash hash.Hash
|
||||
}
|
||||
|
||||
// newBlockWriter creates a new block writer.
|
||||
func (c *WriterConfig) newBlockWriter(xz io.Writer, hash hash.Hash) (bw *blockWriter, err error) {
|
||||
bw = &blockWriter{
|
||||
cxz: countingWriter{w: xz},
|
||||
blockSize: c.BlockSize,
|
||||
filters: c.filters(),
|
||||
hash: hash,
|
||||
}
|
||||
bw.w, err = c.newFilterWriteCloser(&bw.cxz, bw.filters)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
bw.mw = io.MultiWriter(bw.w, bw.hash)
|
||||
return bw, nil
|
||||
}
|
||||
|
||||
// writeHeader writes the header. If the function is called after Close
|
||||
// the commpressedSize and uncompressedSize fields will be filled.
|
||||
func (bw *blockWriter) writeHeader(w io.Writer) error {
|
||||
h := blockHeader{
|
||||
compressedSize: -1,
|
||||
uncompressedSize: -1,
|
||||
filters: bw.filters,
|
||||
}
|
||||
if bw.closed {
|
||||
h.compressedSize = bw.compressedSize()
|
||||
h.uncompressedSize = bw.uncompressedSize()
|
||||
}
|
||||
data, err := h.MarshalBinary()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err = w.Write(data); err != nil {
|
||||
return err
|
||||
}
|
||||
bw.headerLen = len(data)
|
||||
return nil
|
||||
}
|
||||
|
||||
// compressed size returns the amount of data written to the underlying
|
||||
// stream.
|
||||
func (bw *blockWriter) compressedSize() int64 {
|
||||
return bw.cxz.n
|
||||
}
|
||||
|
||||
// uncompressedSize returns the number of data written to the
|
||||
// blockWriter
|
||||
func (bw *blockWriter) uncompressedSize() int64 {
|
||||
return bw.n
|
||||
}
|
||||
|
||||
// unpaddedSize returns the sum of the header length, the uncompressed
|
||||
// size of the block and the hash size.
|
||||
func (bw *blockWriter) unpaddedSize() int64 {
|
||||
if bw.headerLen <= 0 {
|
||||
panic("xz: block header not written")
|
||||
}
|
||||
n := int64(bw.headerLen)
|
||||
n += bw.compressedSize()
|
||||
n += int64(bw.hash.Size())
|
||||
return n
|
||||
}
|
||||
|
||||
// record returns the record for the current stream. Call Close before
|
||||
// calling this method.
|
||||
func (bw *blockWriter) record() record {
|
||||
return record{bw.unpaddedSize(), bw.uncompressedSize()}
|
||||
}
|
||||
|
||||
var errClosed = errors.New("xz: writer already closed")
|
||||
|
||||
var errNoSpace = errors.New("xz: no space")
|
||||
|
||||
// Write writes uncompressed data to the block writer.
|
||||
func (bw *blockWriter) Write(p []byte) (n int, err error) {
|
||||
if bw.closed {
|
||||
return 0, errClosed
|
||||
}
|
||||
|
||||
t := bw.blockSize - bw.n
|
||||
if int64(len(p)) > t {
|
||||
err = errNoSpace
|
||||
p = p[:t]
|
||||
}
|
||||
|
||||
var werr error
|
||||
n, werr = bw.mw.Write(p)
|
||||
bw.n += int64(n)
|
||||
if werr != nil {
|
||||
return n, werr
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Close closes the writer.
|
||||
func (bw *blockWriter) Close() error {
|
||||
if bw.closed {
|
||||
return errClosed
|
||||
}
|
||||
bw.closed = true
|
||||
if err := bw.w.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
s := bw.hash.Size()
|
||||
k := padLen(bw.cxz.n)
|
||||
p := make([]byte, k+s)
|
||||
bw.hash.Sum(p[k:k])
|
||||
if _, err := bw.cxz.w.Write(p); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -1299,7 +1299,7 @@
|
|||
"revisionTime": "2018-03-05T22:44:21Z"
|
||||
},
|
||||
{
|
||||
"checksumSHA1": "hU3ibLi5mZBayj0HPIVpcMSvRgU=",
|
||||
"checksumSHA1": "ByFN6xh/YGP/D3DM9c8p0D9D1XM=",
|
||||
"path": "github.com/sirupsen/logrus",
|
||||
"revision": "90150a8ed11b6ce285e77e8af2b0109559ce4777",
|
||||
"revisionTime": "2018-03-15T01:07:03Z"
|
||||
|
@ -1336,6 +1336,30 @@
|
|||
"path": "github.com/ugorji/go/codec/codecgen",
|
||||
"revision": "646ae4a518c1c3be0739df898118d9bccf993858"
|
||||
},
|
||||
{
|
||||
"checksumSHA1": "amJo1E3F0QxcfxSt+FxDiQbR1wU=",
|
||||
"path": "github.com/ulikunitz/xz",
|
||||
"revision": "636d36a76670e6c700f22fd5f4588679ff2896c4",
|
||||
"revisionTime": "2018-07-03T11:21:13Z"
|
||||
},
|
||||
{
|
||||
"checksumSHA1": "vjnTkzNrMs5Xj6so/fq0mQ6dT1c=",
|
||||
"path": "github.com/ulikunitz/xz/internal/hash",
|
||||
"revision": "636d36a76670e6c700f22fd5f4588679ff2896c4",
|
||||
"revisionTime": "2018-07-03T11:21:13Z"
|
||||
},
|
||||
{
|
||||
"checksumSHA1": "m0pm57ASBK/CTdmC0ppRHO17mBs=",
|
||||
"path": "github.com/ulikunitz/xz/internal/xlog",
|
||||
"revision": "636d36a76670e6c700f22fd5f4588679ff2896c4",
|
||||
"revisionTime": "2018-07-03T11:21:13Z"
|
||||
},
|
||||
{
|
||||
"checksumSHA1": "2vZw6zc8xuNlyVz2QKvdlNSZQ1U=",
|
||||
"path": "github.com/ulikunitz/xz/lzma",
|
||||
"revision": "636d36a76670e6c700f22fd5f4588679ff2896c4",
|
||||
"revisionTime": "2018-07-03T11:21:13Z"
|
||||
},
|
||||
{
|
||||
"checksumSHA1": "AGBqaHqBx5wJdVRsQt4GeawjbXA=",
|
||||
"path": "github.com/vmware/govmomi",
|
||||
|
|
Loading…
Reference in New Issue