Adds bzip2 support to post-processor (#10867)
* compress post processor: add bzip2 + tests * post-processor/compress/post-processor_test.go: refactor tests and add tests for bzip2 * post-processor_test.go: test write/read for all compression algos * check artifact.Destroy() errors * close archive before deleting it Co-authored-by: Adrien Delorme <azr@users.noreply.github.com>
This commit is contained in:
parent
c17f236e85
commit
a6c5958c67
3
go.mod
3
go.mod
|
@ -25,6 +25,7 @@ require (
|
|||
github.com/dgrijalva/jwt-go v3.2.0+incompatible
|
||||
github.com/digitalocean/go-qemu v0.0.0-20201211181942-d361e7b4965f
|
||||
github.com/digitalocean/godo v1.11.1
|
||||
github.com/dsnet/compress v0.0.1
|
||||
github.com/exoscale/packer-plugin-exoscale v0.1.1
|
||||
github.com/fatih/camelcase v1.0.0
|
||||
github.com/fatih/structtag v1.0.0
|
||||
|
@ -81,7 +82,7 @@ require (
|
|||
github.com/tencentcloud/tencentcloud-sdk-go v3.0.222+incompatible
|
||||
github.com/ucloud/ucloud-sdk-go v0.16.3
|
||||
github.com/ufilesdk-dev/ufile-gosdk v0.0.0-20190830075812-b4dbc4ef43a6
|
||||
github.com/ulikunitz/xz v0.5.5
|
||||
github.com/ulikunitz/xz v0.5.6
|
||||
github.com/vmware/govmomi v0.23.1
|
||||
github.com/xanzy/go-cloudstack v0.0.0-20190526095453-42f262b63ed0
|
||||
github.com/yandex-cloud/go-genproto v0.0.0-20200915125933-33de72a328bd
|
||||
|
|
14
go.sum
14
go.sum
|
@ -200,6 +200,9 @@ github.com/dimchansky/utfbom v1.1.0 h1:FcM3g+nofKgUteL8dm/UpdRXNC9KmADgTpLKsu0TR
|
|||
github.com/dimchansky/utfbom v1.1.0/go.mod h1:rO41eb7gLfo8SF1jd9F8HplJm1Fewwi4mQvIirEdv+8=
|
||||
github.com/dnaeon/go-vcr v1.0.1 h1:r8L/HqC0Hje5AXMu1ooW8oyQyOFv4GxqpL0nRP7SLLY=
|
||||
github.com/dnaeon/go-vcr v1.0.1/go.mod h1:aBB1+wY4s93YsC3HHjMBMrwTj2R9FHDzUr9KyGc8n1E=
|
||||
github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
|
||||
github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
|
||||
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
|
||||
github.com/dylanmei/iso8601 v0.1.0 h1:812NGQDBcqquTfH5Yeo7lwR0nzx/cKdsmf3qMjPURUI=
|
||||
github.com/dylanmei/iso8601 v0.1.0/go.mod h1:w9KhXSgIyROl1DefbMYIE7UVSIvELTbMrCfx+QkYnoQ=
|
||||
github.com/dylanmei/winrmtest v0.0.0-20170819153634-c2fbb09e6c08 h1:0bp6/GrNOrTDtSXe9YYGCwf8jp5Fb/b+4a6MTRm4qzY=
|
||||
|
@ -445,12 +448,6 @@ github.com/hashicorp/packer-plugin-sdk v0.0.14/go.mod h1:tNb3XzJPnjMl3QuUdKmF47B
|
|||
github.com/hashicorp/packer-plugin-sdk v0.1.0/go.mod h1:CFsC20uZjtER/EnTn/CSMKD0kEdkqOVev8mtOmfnZiI=
|
||||
github.com/hashicorp/packer-plugin-sdk v0.1.1/go.mod h1:1d3nqB9LUsXMQaNUiL67Q+WYEtjsVcLNTX8ikVlpBrc=
|
||||
github.com/hashicorp/packer-plugin-sdk v0.1.2/go.mod h1:KRjczE1/c9NV5Re+PXt3myJsVTI/FxEHpZjRjOH0Fug=
|
||||
github.com/hashicorp/packer-plugin-sdk v0.1.3-0.20210407090040-d1eff9fe99e8 h1:pkB+Y15/ck/NRUBFF9DrdPYQwmnHsEvnNwmgMfl/8hA=
|
||||
github.com/hashicorp/packer-plugin-sdk v0.1.3-0.20210407090040-d1eff9fe99e8/go.mod h1:xePpgQgQYv/bamiypx3hH9ukidxDdcN8q0R0wLi8IEQ=
|
||||
github.com/hashicorp/packer-plugin-sdk v0.1.3-0.20210407130359-85b84b1d6060 h1:uRrDQYiP3pFn5W17Bvj9If2taHB/DqIP7uuPQGnLDFM=
|
||||
github.com/hashicorp/packer-plugin-sdk v0.1.3-0.20210407130359-85b84b1d6060/go.mod h1:xePpgQgQYv/bamiypx3hH9ukidxDdcN8q0R0wLi8IEQ=
|
||||
github.com/hashicorp/packer-plugin-sdk v0.1.3-0.20210407130906-826d4f395a10 h1:VlcHJEpR99eeZi7uujdQKFOIK8rE5ditXGqpBWiGjc4=
|
||||
github.com/hashicorp/packer-plugin-sdk v0.1.3-0.20210407130906-826d4f395a10/go.mod h1:xePpgQgQYv/bamiypx3hH9ukidxDdcN8q0R0wLi8IEQ=
|
||||
github.com/hashicorp/packer-plugin-sdk v0.1.3-0.20210407132324-af39c7839daf h1:0DBlIExTDefzbfkOl213QtgJsVJXWdgW/aIQIvYUMzs=
|
||||
github.com/hashicorp/packer-plugin-sdk v0.1.3-0.20210407132324-af39c7839daf/go.mod h1:xePpgQgQYv/bamiypx3hH9ukidxDdcN8q0R0wLi8IEQ=
|
||||
github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc=
|
||||
|
@ -496,10 +493,12 @@ github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfV
|
|||
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
|
||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||
github.com/klauspost/compress v0.0.0-20160131094358-f86d2e6d8a77/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||
github.com/klauspost/compress v1.11.6/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
|
||||
github.com/klauspost/compress v1.11.7 h1:0hzRabrMN4tSTvMfnL3SCv1ZGeAP23ynzodBgaHeMeg=
|
||||
github.com/klauspost/compress v1.11.7/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
|
||||
github.com/klauspost/cpuid v0.0.0-20160106104451-349c67577817/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
||||
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
||||
github.com/klauspost/crc32 v0.0.0-20160114101742-999f3125931f/go.mod h1:+ZoRqAPRLkC4NPOvfYeR5KNOrY6TD+/sAC3HXPZgDYg=
|
||||
github.com/klauspost/crc32 v1.2.0 h1:0VuyqOCruD33/lJ/ojXNvzVyl8Zr5zdTmj9l9qLZ86I=
|
||||
github.com/klauspost/crc32 v1.2.0/go.mod h1:+ZoRqAPRLkC4NPOvfYeR5KNOrY6TD+/sAC3HXPZgDYg=
|
||||
|
@ -686,8 +685,9 @@ github.com/ugorji/go v1.2.4 h1:cTciPbZ/VSOzCLKclmssnfQ/jyoVyOcJ3aoJyUV1Urc=
|
|||
github.com/ugorji/go v1.2.4/go.mod h1:EuaSCk8iZMdIspsu6HXH7X2UGKw1ezO4wCfGszGmmo4=
|
||||
github.com/ugorji/go/codec v1.2.4 h1:C5VurWRRCKjuENsbM6GYVw8W++WVW9rSxoACKIvxzz8=
|
||||
github.com/ugorji/go/codec v1.2.4/go.mod h1:bWBu1+kIRWcF8uMklKaJrR6fTWQOwAlrIzX22pHwryA=
|
||||
github.com/ulikunitz/xz v0.5.5 h1:pFrO0lVpTBXLpYw+pnLj6TbvHuyjXMfjGeCwSqCVwok=
|
||||
github.com/ulikunitz/xz v0.5.5/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
|
||||
github.com/ulikunitz/xz v0.5.6 h1:jGHAfXawEGZQ3blwU5wnWKQJvAraT7Ftq9EXjnXYgt8=
|
||||
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
|
||||
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
|
||||
github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
|
||||
github.com/valyala/fasttemplate v1.1.0/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
|
||||
|
|
|
@ -14,6 +14,7 @@ import (
|
|||
"runtime"
|
||||
|
||||
"github.com/biogo/hts/bgzf"
|
||||
"github.com/dsnet/compress/bzip2"
|
||||
"github.com/hashicorp/hcl/v2/hcldec"
|
||||
"github.com/hashicorp/packer-plugin-sdk/common"
|
||||
packersdk "github.com/hashicorp/packer-plugin-sdk/packer"
|
||||
|
@ -160,6 +161,14 @@ func (p *PostProcessor) PostProcess(
|
|||
return nil, false, false, fmt.Errorf(errTmpl, p.config.Algorithm, err)
|
||||
}
|
||||
defer output.Close()
|
||||
case "bzip2":
|
||||
ui.Say(fmt.Sprintf("Using bzip2 compression with 1 core for %s (library does not support MT)",
|
||||
target))
|
||||
output, err = makeBZIP2Writer(outputFile, p.config.CompressionLevel)
|
||||
if err != nil {
|
||||
return nil, false, false, fmt.Errorf(errTmpl, p.config.Algorithm, err)
|
||||
}
|
||||
defer output.Close()
|
||||
case "lz4":
|
||||
ui.Say(fmt.Sprintf("Using lz4 compression with %d cores for %s",
|
||||
runtime.GOMAXPROCS(-1), target))
|
||||
|
@ -242,12 +251,13 @@ func (config *Config) detectFromFilename() {
|
|||
var result [][]string
|
||||
|
||||
extensions := map[string]string{
|
||||
"tar": "tar",
|
||||
"zip": "zip",
|
||||
"gz": "pgzip",
|
||||
"lz4": "lz4",
|
||||
"bgzf": "bgzf",
|
||||
"xz": "xz",
|
||||
"tar": "tar",
|
||||
"zip": "zip",
|
||||
"gz": "pgzip",
|
||||
"lz4": "lz4",
|
||||
"bgzf": "bgzf",
|
||||
"xz": "xz",
|
||||
"bzip2": "bzip2",
|
||||
}
|
||||
|
||||
if config.Format == "" {
|
||||
|
@ -304,6 +314,20 @@ func makeBGZFWriter(output io.WriteCloser, compressionLevel int) (io.WriteCloser
|
|||
return bgzfWriter, nil
|
||||
}
|
||||
|
||||
func makeBZIP2Writer(output io.Writer, compressionLevel int) (io.WriteCloser, error) {
|
||||
// Set the default to highest level compression
|
||||
bzipCFG := &bzip2.WriterConfig{Level: 9}
|
||||
// Override our set defaults
|
||||
if compressionLevel > 0 {
|
||||
bzipCFG.Level = compressionLevel
|
||||
}
|
||||
bzipWriter, err := bzip2.NewWriter(output, bzipCFG)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return bzipWriter, nil
|
||||
}
|
||||
|
||||
func makeLZ4Writer(output io.WriteCloser, compressionLevel int) (io.WriteCloser, error) {
|
||||
lzwriter := lz4.NewWriter(output)
|
||||
if compressionLevel > 0 {
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
package compress
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"archive/zip"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"fmt"
|
||||
|
@ -9,9 +11,11 @@ import (
|
|||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/dsnet/compress/bzip2"
|
||||
packersdk "github.com/hashicorp/packer-plugin-sdk/packer"
|
||||
"github.com/hashicorp/packer-plugin-sdk/template"
|
||||
"github.com/hashicorp/packer/builder/file"
|
||||
"github.com/pierrec/lz4"
|
||||
)
|
||||
|
||||
func TestDetectFilename(t *testing.T) {
|
||||
|
@ -58,73 +62,6 @@ func TestDetectFilename(t *testing.T) {
|
|||
|
||||
const expectedFileContents = "Hello world!"
|
||||
|
||||
func TestSimpleCompress(t *testing.T) {
|
||||
const config = `
|
||||
{
|
||||
"post-processors": [
|
||||
{
|
||||
"type": "compress",
|
||||
"output": "package.tar.gz"
|
||||
}
|
||||
]
|
||||
}
|
||||
`
|
||||
artifact := testArchive(t, config)
|
||||
defer artifact.Destroy()
|
||||
|
||||
fi, err := os.Stat("package.tar.gz")
|
||||
if err != nil {
|
||||
t.Errorf("Unable to read archive: %s", err)
|
||||
}
|
||||
if fi.IsDir() {
|
||||
t.Error("Archive should not be a directory")
|
||||
}
|
||||
}
|
||||
|
||||
func TestZipArchive(t *testing.T) {
|
||||
const config = `
|
||||
{
|
||||
"post-processors": [
|
||||
{
|
||||
"type": "compress",
|
||||
"output": "package.zip"
|
||||
}
|
||||
]
|
||||
}
|
||||
`
|
||||
|
||||
artifact := testArchive(t, config)
|
||||
defer artifact.Destroy()
|
||||
|
||||
// Verify things look good
|
||||
_, err := os.Stat("package.zip")
|
||||
if err != nil {
|
||||
t.Errorf("Unable to read archive: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTarArchive(t *testing.T) {
|
||||
const config = `
|
||||
{
|
||||
"post-processors": [
|
||||
{
|
||||
"type": "compress",
|
||||
"output": "package.tar"
|
||||
}
|
||||
]
|
||||
}
|
||||
`
|
||||
|
||||
artifact := testArchive(t, config)
|
||||
defer artifact.Destroy()
|
||||
|
||||
// Verify things look good
|
||||
_, err := os.Stat("package.tar")
|
||||
if err != nil {
|
||||
t.Errorf("Unable to read archive: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCompressOptions(t *testing.T) {
|
||||
const config = `
|
||||
{
|
||||
|
@ -246,3 +183,105 @@ func testArchive(t *testing.T, config string) packersdk.Artifact {
|
|||
|
||||
return artifactOut
|
||||
}
|
||||
|
||||
func TestArchive(t *testing.T) {
|
||||
tc := map[string]func(*os.File) ([]byte, error){
|
||||
"bzip2": func(archive *os.File) ([]byte, error) {
|
||||
bzipReader, err := bzip2.NewReader(archive, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ioutil.ReadAll(bzipReader)
|
||||
},
|
||||
"zip": func(archive *os.File) ([]byte, error) {
|
||||
fi, _ := archive.Stat()
|
||||
zipReader, err := zip.NewReader(archive, fi.Size())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ctt, err := zipReader.File[0].Open()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ioutil.ReadAll(ctt)
|
||||
},
|
||||
"tar": func(archive *os.File) ([]byte, error) {
|
||||
tarReader := tar.NewReader(archive)
|
||||
_, err := tarReader.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ioutil.ReadAll(tarReader)
|
||||
},
|
||||
"tar.gz": func(archive *os.File) ([]byte, error) {
|
||||
gzipReader, err := gzip.NewReader(archive)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tarReader := tar.NewReader(gzipReader)
|
||||
_, err = tarReader.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ioutil.ReadAll(tarReader)
|
||||
},
|
||||
"gz": func(archive *os.File) ([]byte, error) {
|
||||
gzipReader, _ := gzip.NewReader(archive)
|
||||
return ioutil.ReadAll(gzipReader)
|
||||
},
|
||||
"lz4": func(archive *os.File) ([]byte, error) {
|
||||
lz4Reader := lz4.NewReader(archive)
|
||||
return ioutil.ReadAll(lz4Reader)
|
||||
},
|
||||
}
|
||||
|
||||
for format, unzip := range tc {
|
||||
t.Run(format, func(t *testing.T) {
|
||||
config := fmt.Sprintf(`
|
||||
{
|
||||
"post-processors": [
|
||||
{
|
||||
"type": "compress",
|
||||
"output": "package.%s"
|
||||
}
|
||||
]
|
||||
}
|
||||
`, format)
|
||||
|
||||
artifact := testArchive(t, config)
|
||||
defer func() {
|
||||
err := artifact.Destroy()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
filename := fmt.Sprintf("package.%s", format)
|
||||
// Verify things look good
|
||||
_, err := os.Stat(filename)
|
||||
if err != nil {
|
||||
t.Errorf("Unable to read archive: %s", err)
|
||||
}
|
||||
|
||||
archive, err := os.Open(filename)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := archive.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
found, err := unzip(archive)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if string(found) != expectedFileContents {
|
||||
t.Errorf("Expected:\n%s\nFound:\n%s\n", expectedFileContents, string(found))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
sudo: false
|
||||
language: go
|
||||
before_install:
|
||||
- curl -L https://github.com/google/brotli/archive/v1.0.2.tar.gz | tar -zxv
|
||||
- (cd brotli-1.0.2 && mkdir out && cd out && ../configure-cmake && make && sudo make install)
|
||||
- rm -rf brotli-1.0.2
|
||||
- curl -L https://github.com/facebook/zstd/archive/v1.3.2.tar.gz | tar -zxv
|
||||
- (cd zstd-1.3.2 && sudo make install)
|
||||
- rm -rf zstd-1.3.2
|
||||
- sudo ldconfig
|
||||
- mkdir /tmp/go1.12
|
||||
- curl -L -s https://dl.google.com/go/go1.12.linux-amd64.tar.gz | tar -zxf - -C /tmp/go1.12 --strip-components 1
|
||||
- unset GOROOT
|
||||
- (GO111MODULE=on /tmp/go1.12/bin/go mod vendor)
|
||||
- (cd /tmp && GO111MODULE=on /tmp/go1.12/bin/go get golang.org/x/lint/golint@8f45f776aaf18cebc8d65861cc70c33c60471952)
|
||||
- (cd /tmp && GO111MODULE=on /tmp/go1.12/bin/go get honnef.co/go/tools/cmd/staticcheck@2019.1)
|
||||
matrix:
|
||||
include:
|
||||
- go: 1.9.x
|
||||
script:
|
||||
- go test -v -race ./...
|
||||
- go: 1.10.x
|
||||
script:
|
||||
- go test -v -race ./...
|
||||
- go: 1.11.x
|
||||
script:
|
||||
- go test -v -race ./...
|
||||
- go: 1.12.x
|
||||
script:
|
||||
- ./ztest.sh
|
||||
- go: master
|
||||
script:
|
||||
- go test -v -race ./...
|
||||
allow_failures:
|
||||
- go: master
|
||||
fast_finish: true
|
|
@ -0,0 +1,24 @@
|
|||
Copyright © 2015, Joe Tsai and The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
* Neither the copyright holder nor the names of its contributors may be used to
|
||||
endorse or promote products derived from this software without specific prior
|
||||
written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,75 @@
|
|||
# Collection of compression libraries for Go #
|
||||
|
||||
[![GoDoc](https://godoc.org/github.com/dsnet/compress/cmp?status.svg)](https://godoc.org/github.com/dsnet/compress)
|
||||
[![Build Status](https://travis-ci.org/dsnet/compress.svg?branch=master)](https://travis-ci.org/dsnet/compress)
|
||||
[![Report Card](https://goreportcard.com/badge/github.com/dsnet/compress)](https://goreportcard.com/report/github.com/dsnet/compress)
|
||||
|
||||
## Introduction ##
|
||||
|
||||
**NOTE: This library is in active development. As such, there are no guarantees about the stability of the API. The author reserves the right to arbitrarily break the API for any reason.**
|
||||
|
||||
This repository hosts a collection of compression related libraries. The goal of this project is to provide pure Go implementations for popular compression algorithms beyond what the Go standard library provides. The goals for these packages are as follows:
|
||||
* Maintainable: That the code remains well documented, well tested, readable, easy to maintain, and easy to verify that it conforms to the specification for the format being implemented.
|
||||
* Performant: To be able to compress and decompress within at least 80% of the rates that the C implementations are able to achieve.
|
||||
* Flexible: That the code provides low-level and fine granularity control over the compression streams similar to what the C APIs would provide.
|
||||
|
||||
Of these three, the first objective is often at odds with the other two objectives and provides interesting challenges. Higher performance can often be achieved by muddling abstraction layers or using non-intuitive low-level primitives. Also, more features and functionality, while useful in some situations, often complicates the API. Thus, this package will attempt to satisfy all the goals, but will defer to favoring maintainability when the performance or flexibility benefits are not significant enough.
|
||||
|
||||
|
||||
## Library Status ##
|
||||
|
||||
For the packages available, only some features are currently implemented:
|
||||
|
||||
| Package | Reader | Writer |
|
||||
| ------- | :----: | :----: |
|
||||
| brotli | :white_check_mark: | |
|
||||
| bzip2 | :white_check_mark: | :white_check_mark: |
|
||||
| flate | :white_check_mark: | |
|
||||
| xflate | :white_check_mark: | :white_check_mark: |
|
||||
|
||||
This library is in active development. As such, there are no guarantees about the stability of the API. The author reserves the right to arbitrarily break the API for any reason. When the library becomes more mature, it is planned to eventually conform to some strict versioning scheme like [Semantic Versioning](http://semver.org/).
|
||||
|
||||
However, in the meanwhile, this library does provide some basic API guarantees. For the types defined below, the method signatures are guaranteed to not change. Note that the author still reserves the right to change the fields within each ```Reader``` and ```Writer``` structs.
|
||||
```go
|
||||
type ReaderConfig struct { ... }
|
||||
type Reader struct { ... }
|
||||
func NewReader(io.Reader, *ReaderConfig) (*Reader, error) { ... }
|
||||
func (*Reader) Read([]byte) (int, error) { ... }
|
||||
func (*Reader) Close() error { ... }
|
||||
|
||||
type WriterConfig struct { ... }
|
||||
type Writer struct { ... }
|
||||
func NewWriter(io.Writer, *WriterConfig) (*Writer, error) { ... }
|
||||
func (*Writer) Write([]byte) (int, error) { ... }
|
||||
func (*Writer) Close() error { ... }
|
||||
```
|
||||
|
||||
To see what work still remains, see the [Task List](https://github.com/dsnet/compress/wiki/Task-List).
|
||||
|
||||
## Performance ##
|
||||
|
||||
See [Performance Metrics](https://github.com/dsnet/compress/wiki/Performance-Metrics).
|
||||
|
||||
|
||||
## Frequently Asked Questions ##
|
||||
|
||||
See [Frequently Asked Questions](https://github.com/dsnet/compress/wiki/Frequently-Asked-Questions).
|
||||
|
||||
|
||||
## Installation ##
|
||||
|
||||
Run the command:
|
||||
|
||||
```go get -u github.com/dsnet/compress```
|
||||
|
||||
This library requires `Go1.9` or higher in order to build.
|
||||
|
||||
|
||||
## Packages ##
|
||||
|
||||
| Package | Description |
|
||||
| :------ | :---------- |
|
||||
| [brotli](http://godoc.org/github.com/dsnet/compress/brotli) | Package brotli implements the Brotli format, described in RFC 7932. |
|
||||
| [bzip2](http://godoc.org/github.com/dsnet/compress/bzip2) | Package bzip2 implements the BZip2 compressed data format. |
|
||||
| [flate](http://godoc.org/github.com/dsnet/compress/flate) | Package flate implements the DEFLATE format, described in RFC 1951. |
|
||||
| [xflate](http://godoc.org/github.com/dsnet/compress/xflate) | Package xflate implements the XFLATE format, an random-access extension to DEFLATE. |
|
|
@ -0,0 +1,74 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// Package compress is a collection of compression libraries.
|
||||
package compress
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
|
||||
"github.com/dsnet/compress/internal/errors"
|
||||
)
|
||||
|
||||
// The Error interface identifies all compression related errors.
|
||||
type Error interface {
|
||||
error
|
||||
CompressError()
|
||||
|
||||
// IsDeprecated reports the use of a deprecated and unsupported feature.
|
||||
IsDeprecated() bool
|
||||
|
||||
// IsCorrupted reports whether the input stream was corrupted.
|
||||
IsCorrupted() bool
|
||||
}
|
||||
|
||||
var _ Error = errors.Error{}
|
||||
|
||||
// ByteReader is an interface accepted by all decompression Readers.
|
||||
// It guarantees that the decompressor never reads more data than is necessary
|
||||
// from the underlying io.Reader.
|
||||
type ByteReader interface {
|
||||
io.Reader
|
||||
io.ByteReader
|
||||
}
|
||||
|
||||
var _ ByteReader = (*bufio.Reader)(nil)
|
||||
|
||||
// BufferedReader is an interface accepted by all decompression Readers.
|
||||
// It guarantees that the decompressor never reads more data than is necessary
|
||||
// from the underlying io.Reader. Since BufferedReader allows a decompressor
|
||||
// to peek at bytes further along in the stream without advancing the read
|
||||
// pointer, decompression can experience a significant performance gain when
|
||||
// provided a reader that satisfies this interface. Thus, a decompressor will
|
||||
// prefer this interface over ByteReader for performance reasons.
|
||||
//
|
||||
// The bufio.Reader satisfies this interface.
|
||||
type BufferedReader interface {
|
||||
io.Reader
|
||||
|
||||
// Buffered returns the number of bytes currently buffered.
|
||||
//
|
||||
// This value becomes invalid following the next Read/Discard operation.
|
||||
Buffered() int
|
||||
|
||||
// Peek returns the next n bytes without advancing the reader.
|
||||
//
|
||||
// If Peek returns fewer than n bytes, it also returns an error explaining
|
||||
// why the peek is short. Peek must support peeking of at least 8 bytes.
|
||||
// If 0 <= n <= Buffered(), Peek is guaranteed to succeed without reading
|
||||
// from the underlying io.Reader.
|
||||
//
|
||||
// This result becomes invalid following the next Read/Discard operation.
|
||||
Peek(n int) ([]byte, error)
|
||||
|
||||
// Discard skips the next n bytes, returning the number of bytes discarded.
|
||||
//
|
||||
// If Discard skips fewer than n bytes, it also returns an error.
|
||||
// If 0 <= n <= Buffered(), Discard is guaranteed to succeed without reading
|
||||
// from the underlying io.Reader.
|
||||
Discard(n int) (int, error)
|
||||
}
|
||||
|
||||
var _ BufferedReader = (*bufio.Reader)(nil)
|
|
@ -0,0 +1,110 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package bzip2
|
||||
|
||||
import "github.com/dsnet/compress/bzip2/internal/sais"
|
||||
|
||||
// The Burrows-Wheeler Transform implementation used here is based on the
|
||||
// Suffix Array by Induced Sorting (SA-IS) methodology by Nong, Zhang, and Chan.
|
||||
// This implementation uses the sais algorithm originally written by Yuta Mori.
|
||||
//
|
||||
// The SA-IS algorithm runs in O(n) and outputs a Suffix Array. There is a
|
||||
// mathematical relationship between Suffix Arrays and the Burrows-Wheeler
|
||||
// Transform, such that a SA can be converted to a BWT in O(n) time.
|
||||
//
|
||||
// References:
|
||||
// http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf
|
||||
// https://github.com/cscott/compressjs/blob/master/lib/BWT.js
|
||||
// https://www.quora.com/How-can-I-optimize-burrows-wheeler-transform-and-inverse-transform-to-work-in-O-n-time-O-n-space
|
||||
type burrowsWheelerTransform struct {
|
||||
buf []byte
|
||||
sa []int
|
||||
perm []uint32
|
||||
}
|
||||
|
||||
func (bwt *burrowsWheelerTransform) Encode(buf []byte) (ptr int) {
|
||||
if len(buf) == 0 {
|
||||
return -1
|
||||
}
|
||||
|
||||
// TODO(dsnet): Find a way to avoid the duplicate input string method.
|
||||
// We only need to do this because suffix arrays (by definition) only
|
||||
// operate non-wrapped suffixes of a string. On the other hand,
|
||||
// the BWT specifically used in bzip2 operate on a strings that wrap-around
|
||||
// when being sorted.
|
||||
|
||||
// Step 1: Concatenate the input string to itself so that we can use the
|
||||
// suffix array algorithm for bzip2's variant of BWT.
|
||||
n := len(buf)
|
||||
bwt.buf = append(append(bwt.buf[:0], buf...), buf...)
|
||||
if cap(bwt.sa) < 2*n {
|
||||
bwt.sa = make([]int, 2*n)
|
||||
}
|
||||
t := bwt.buf[:2*n]
|
||||
sa := bwt.sa[:2*n]
|
||||
|
||||
// Step 2: Compute the suffix array (SA). The input string, t, will not be
|
||||
// modified, while the results will be written to the output, sa.
|
||||
sais.ComputeSA(t, sa)
|
||||
|
||||
// Step 3: Convert the SA to a BWT. Since ComputeSA does not mutate the
|
||||
// input, we have two copies of the input; in buf and buf2. Thus, we write
|
||||
// the transformation to buf, while using buf2.
|
||||
var j int
|
||||
buf2 := t[n:]
|
||||
for _, i := range sa {
|
||||
if i < n {
|
||||
if i == 0 {
|
||||
ptr = j
|
||||
i = n
|
||||
}
|
||||
buf[j] = buf2[i-1]
|
||||
j++
|
||||
}
|
||||
}
|
||||
return ptr
|
||||
}
|
||||
|
||||
func (bwt *burrowsWheelerTransform) Decode(buf []byte, ptr int) {
|
||||
if len(buf) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Step 1: Compute cumm, where cumm[ch] reports the total number of
|
||||
// characters that precede the character ch in the alphabet.
|
||||
var cumm [256]int
|
||||
for _, v := range buf {
|
||||
cumm[v]++
|
||||
}
|
||||
var sum int
|
||||
for i, v := range cumm {
|
||||
cumm[i] = sum
|
||||
sum += v
|
||||
}
|
||||
|
||||
// Step 2: Compute perm, where perm[ptr] contains a pointer to the next
|
||||
// byte in buf and the next pointer in perm itself.
|
||||
if cap(bwt.perm) < len(buf) {
|
||||
bwt.perm = make([]uint32, len(buf))
|
||||
}
|
||||
perm := bwt.perm[:len(buf)]
|
||||
for i, b := range buf {
|
||||
perm[cumm[b]] = uint32(i)
|
||||
cumm[b]++
|
||||
}
|
||||
|
||||
// Step 3: Follow each pointer in perm to the next byte, starting with the
|
||||
// origin pointer.
|
||||
if cap(bwt.buf) < len(buf) {
|
||||
bwt.buf = make([]byte, len(buf))
|
||||
}
|
||||
buf2 := bwt.buf[:len(buf)]
|
||||
i := perm[ptr]
|
||||
for j := range buf2 {
|
||||
buf2[j] = buf[i]
|
||||
i = perm[i]
|
||||
}
|
||||
copy(buf, buf2)
|
||||
}
|
|
@ -0,0 +1,110 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// Package bzip2 implements the BZip2 compressed data format.
|
||||
//
|
||||
// Canonical C implementation:
|
||||
// http://bzip.org
|
||||
//
|
||||
// Unofficial format specification:
|
||||
// https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf
|
||||
package bzip2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"hash/crc32"
|
||||
|
||||
"github.com/dsnet/compress/internal"
|
||||
"github.com/dsnet/compress/internal/errors"
|
||||
)
|
||||
|
||||
// There does not exist a formal specification of the BZip2 format. As such,
|
||||
// much of this work is derived by either reverse engineering the original C
|
||||
// source code or using secondary sources.
|
||||
//
|
||||
// Significant amounts of fuzz testing is done to ensure that outputs from
|
||||
// this package is properly decoded by the C library. Furthermore, we test that
|
||||
// both this package and the C library agree about what inputs are invalid.
|
||||
//
|
||||
// Compression stack:
|
||||
// Run-length encoding 1 (RLE1)
|
||||
// Burrows-Wheeler transform (BWT)
|
||||
// Move-to-front transform (MTF)
|
||||
// Run-length encoding 2 (RLE2)
|
||||
// Prefix encoding (PE)
|
||||
//
|
||||
// References:
|
||||
// http://bzip.org/
|
||||
// https://en.wikipedia.org/wiki/Bzip2
|
||||
// https://code.google.com/p/jbzip2/
|
||||
|
||||
const (
|
||||
BestSpeed = 1
|
||||
BestCompression = 9
|
||||
DefaultCompression = 6
|
||||
)
|
||||
|
||||
const (
|
||||
hdrMagic = 0x425a // Hex of "BZ"
|
||||
blkMagic = 0x314159265359 // BCD of PI
|
||||
endMagic = 0x177245385090 // BCD of sqrt(PI)
|
||||
|
||||
blockSize = 100000
|
||||
)
|
||||
|
||||
func errorf(c int, f string, a ...interface{}) error {
|
||||
return errors.Error{Code: c, Pkg: "bzip2", Msg: fmt.Sprintf(f, a...)}
|
||||
}
|
||||
|
||||
func panicf(c int, f string, a ...interface{}) {
|
||||
errors.Panic(errorf(c, f, a...))
|
||||
}
|
||||
|
||||
// errWrap converts a lower-level errors.Error to be one from this package.
|
||||
// The replaceCode passed in will be used to replace the code for any errors
|
||||
// with the errors.Invalid code.
|
||||
//
|
||||
// For the Reader, set this to errors.Corrupted.
|
||||
// For the Writer, set this to errors.Internal.
|
||||
func errWrap(err error, replaceCode int) error {
|
||||
if cerr, ok := err.(errors.Error); ok {
|
||||
if errors.IsInvalid(cerr) {
|
||||
cerr.Code = replaceCode
|
||||
}
|
||||
err = errorf(cerr.Code, "%s", cerr.Msg)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
var errClosed = errorf(errors.Closed, "")
|
||||
|
||||
// crc computes the CRC-32 used by BZip2.
|
||||
//
|
||||
// The CRC-32 computation in bzip2 treats bytes as having bits in big-endian
|
||||
// order. That is, the MSB is read before the LSB. Thus, we can use the
|
||||
// standard library version of CRC-32 IEEE with some minor adjustments.
|
||||
//
|
||||
// The byte array is used as an intermediate buffer to swap the bits of every
|
||||
// byte of the input.
|
||||
type crc struct {
|
||||
val uint32
|
||||
buf [256]byte
|
||||
}
|
||||
|
||||
// update computes the CRC-32 of appending buf to c.
|
||||
func (c *crc) update(buf []byte) {
|
||||
cval := internal.ReverseUint32(c.val)
|
||||
for len(buf) > 0 {
|
||||
n := len(buf)
|
||||
if n > len(c.buf) {
|
||||
n = len(c.buf)
|
||||
}
|
||||
for i, b := range buf[:n] {
|
||||
c.buf[i] = internal.ReverseLUT[b]
|
||||
}
|
||||
cval = crc32.Update(cval, crc32.IEEETable, c.buf[:n])
|
||||
buf = buf[n:]
|
||||
}
|
||||
c.val = internal.ReverseUint32(cval)
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
// Copyright 2016, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// +build !gofuzz
|
||||
|
||||
// This file exists to suppress fuzzing details from release builds.
|
||||
|
||||
package bzip2
|
||||
|
||||
type fuzzReader struct{}
|
||||
|
||||
func (*fuzzReader) updateChecksum(int64, uint32) {}
|
|
@ -0,0 +1,77 @@
|
|||
// Copyright 2016, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// +build gofuzz
|
||||
|
||||
// This file exists to export internal implementation details for fuzz testing.
|
||||
|
||||
package bzip2
|
||||
|
||||
func ForwardBWT(buf []byte) (ptr int) {
|
||||
var bwt burrowsWheelerTransform
|
||||
return bwt.Encode(buf)
|
||||
}
|
||||
|
||||
func ReverseBWT(buf []byte, ptr int) {
|
||||
var bwt burrowsWheelerTransform
|
||||
bwt.Decode(buf, ptr)
|
||||
}
|
||||
|
||||
type fuzzReader struct {
|
||||
Checksums Checksums
|
||||
}
|
||||
|
||||
// updateChecksum updates Checksums.
|
||||
//
|
||||
// If a valid pos is provided, it appends the (pos, val) pair to the slice.
|
||||
// Otherwise, it will update the last record with the new value.
|
||||
func (fr *fuzzReader) updateChecksum(pos int64, val uint32) {
|
||||
if pos >= 0 {
|
||||
fr.Checksums = append(fr.Checksums, Checksum{pos, val})
|
||||
} else {
|
||||
fr.Checksums[len(fr.Checksums)-1].Value = val
|
||||
}
|
||||
}
|
||||
|
||||
type Checksum struct {
|
||||
Offset int64 // Bit offset of the checksum
|
||||
Value uint32 // Checksum value
|
||||
}
|
||||
|
||||
type Checksums []Checksum
|
||||
|
||||
// Apply overwrites all checksum fields in d with the ones in cs.
|
||||
func (cs Checksums) Apply(d []byte) []byte {
|
||||
d = append([]byte(nil), d...)
|
||||
for _, c := range cs {
|
||||
setU32(d, c.Offset, c.Value)
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func setU32(d []byte, pos int64, val uint32) {
|
||||
for i := uint(0); i < 32; i++ {
|
||||
bpos := uint64(pos) + uint64(i)
|
||||
d[bpos/8] &= ^byte(1 << (7 - bpos%8))
|
||||
d[bpos/8] |= byte(val>>(31-i)) << (7 - bpos%8)
|
||||
}
|
||||
}
|
||||
|
||||
// Verify checks that all checksum fields in d matches those in cs.
|
||||
func (cs Checksums) Verify(d []byte) bool {
|
||||
for _, c := range cs {
|
||||
if getU32(d, c.Offset) != c.Value {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func getU32(d []byte, pos int64) (val uint32) {
|
||||
for i := uint(0); i < 32; i++ {
|
||||
bpos := uint64(pos) + uint64(i)
|
||||
val |= (uint32(d[bpos/8] >> (7 - bpos%8))) << (31 - i)
|
||||
}
|
||||
return val
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// Package sais implements a linear time suffix array algorithm.
|
||||
package sais
|
||||
|
||||
//go:generate go run sais_gen.go byte sais_byte.go
|
||||
//go:generate go run sais_gen.go int sais_int.go
|
||||
|
||||
// This package ports the C sais implementation by Yuta Mori. The ports are
|
||||
// located in sais_byte.go and sais_int.go, which are identical to each other
|
||||
// except for the types. Since Go does not support generics, we use generators to
|
||||
// create the two files.
|
||||
//
|
||||
// References:
|
||||
// https://sites.google.com/site/yuta256/sais
|
||||
// https://www.researchgate.net/publication/221313676_Linear_Time_Suffix_Array_Construction_Using_D-Critical_Substrings
|
||||
// https://www.researchgate.net/publication/224176324_Two_Efficient_Algorithms_for_Linear_Time_Suffix_Array_Construction
|
||||
|
||||
// ComputeSA computes the suffix array of t and places the result in sa.
|
||||
// Both t and sa must be the same length.
|
||||
func ComputeSA(t []byte, sa []int) {
|
||||
if len(sa) != len(t) {
|
||||
panic("mismatching sizes")
|
||||
}
|
||||
computeSA_byte(t, sa, 0, len(t), 256)
|
||||
}
|
661
vendor/github.com/dsnet/compress/bzip2/internal/sais/sais_byte.go
generated
vendored
Normal file
661
vendor/github.com/dsnet/compress/bzip2/internal/sais/sais_byte.go
generated
vendored
Normal file
|
@ -0,0 +1,661 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// Code generated by sais_gen.go. DO NOT EDIT.
|
||||
|
||||
// ====================================================
|
||||
// Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
// ====================================================
|
||||
|
||||
package sais
|
||||
|
||||
func getCounts_byte(T []byte, C []int, n, k int) {
|
||||
var i int
|
||||
for i = 0; i < k; i++ {
|
||||
C[i] = 0
|
||||
}
|
||||
for i = 0; i < n; i++ {
|
||||
C[T[i]]++
|
||||
}
|
||||
}
|
||||
|
||||
func getBuckets_byte(C, B []int, k int, end bool) {
|
||||
var i, sum int
|
||||
if end {
|
||||
for i = 0; i < k; i++ {
|
||||
sum += C[i]
|
||||
B[i] = sum
|
||||
}
|
||||
} else {
|
||||
for i = 0; i < k; i++ {
|
||||
sum += C[i]
|
||||
B[i] = sum - C[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func sortLMS1_byte(T []byte, SA, C, B []int, n, k int) {
|
||||
var b, i, j int
|
||||
var c0, c1 int
|
||||
|
||||
// Compute SAl.
|
||||
if &C[0] == &B[0] {
|
||||
getCounts_byte(T, C, n, k)
|
||||
}
|
||||
getBuckets_byte(C, B, k, false) // Find starts of buckets
|
||||
j = n - 1
|
||||
c1 = int(T[j])
|
||||
b = B[c1]
|
||||
j--
|
||||
if int(T[j]) < c1 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
for i = 0; i < n; i++ {
|
||||
if j = SA[i]; j > 0 {
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
j--
|
||||
if int(T[j]) < c1 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
SA[i] = 0
|
||||
} else if j < 0 {
|
||||
SA[i] = ^j
|
||||
}
|
||||
}
|
||||
|
||||
// Compute SAs.
|
||||
if &C[0] == &B[0] {
|
||||
getCounts_byte(T, C, n, k)
|
||||
}
|
||||
getBuckets_byte(C, B, k, true) // Find ends of buckets
|
||||
c1 = 0
|
||||
b = B[c1]
|
||||
for i = n - 1; i >= 0; i-- {
|
||||
if j = SA[i]; j > 0 {
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
j--
|
||||
b--
|
||||
if int(T[j]) > c1 {
|
||||
SA[b] = ^(j + 1)
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
SA[i] = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func postProcLMS1_byte(T []byte, SA []int, n, m int) int {
|
||||
var i, j, p, q, plen, qlen, name int
|
||||
var c0, c1 int
|
||||
var diff bool
|
||||
|
||||
// Compact all the sorted substrings into the first m items of SA.
|
||||
// 2*m must be not larger than n (provable).
|
||||
for i = 0; SA[i] < 0; i++ {
|
||||
SA[i] = ^SA[i]
|
||||
}
|
||||
if i < m {
|
||||
for j, i = i, i+1; ; i++ {
|
||||
if p = SA[i]; p < 0 {
|
||||
SA[j] = ^p
|
||||
j++
|
||||
SA[i] = 0
|
||||
if j == m {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Store the length of all substrings.
|
||||
i = n - 1
|
||||
j = n - 1
|
||||
c0 = int(T[n-1])
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
for i >= 0 {
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 > c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i >= 0 {
|
||||
SA[m+((i+1)>>1)] = j - i
|
||||
j = i + 1
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find the lexicographic names of all substrings.
|
||||
name = 0
|
||||
qlen = 0
|
||||
for i, q = 0, n; i < m; i++ {
|
||||
p = SA[i]
|
||||
plen = SA[m+(p>>1)]
|
||||
diff = true
|
||||
if (plen == qlen) && ((q + plen) < n) {
|
||||
for j = 0; (j < plen) && (T[p+j] == T[q+j]); j++ {
|
||||
}
|
||||
if j == plen {
|
||||
diff = false
|
||||
}
|
||||
}
|
||||
if diff {
|
||||
name++
|
||||
q = p
|
||||
qlen = plen
|
||||
}
|
||||
SA[m+(p>>1)] = name
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
func sortLMS2_byte(T []byte, SA, C, B, D []int, n, k int) {
|
||||
var b, i, j, t, d int
|
||||
var c0, c1 int
|
||||
|
||||
// Compute SAl.
|
||||
getBuckets_byte(C, B, k, false) // Find starts of buckets
|
||||
j = n - 1
|
||||
c1 = int(T[j])
|
||||
b = B[c1]
|
||||
j--
|
||||
if int(T[j]) < c1 {
|
||||
t = 1
|
||||
} else {
|
||||
t = 0
|
||||
}
|
||||
j += n
|
||||
if t&1 > 0 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
for i, d = 0, 0; i < n; i++ {
|
||||
if j = SA[i]; j > 0 {
|
||||
if n <= j {
|
||||
d += 1
|
||||
j -= n
|
||||
}
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
j--
|
||||
t = int(c0) << 1
|
||||
if int(T[j]) < c1 {
|
||||
t |= 1
|
||||
}
|
||||
if D[t] != d {
|
||||
j += n
|
||||
D[t] = d
|
||||
}
|
||||
if t&1 > 0 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
SA[i] = 0
|
||||
} else if j < 0 {
|
||||
SA[i] = ^j
|
||||
}
|
||||
}
|
||||
for i = n - 1; 0 <= i; i-- {
|
||||
if SA[i] > 0 {
|
||||
if SA[i] < n {
|
||||
SA[i] += n
|
||||
for j = i - 1; SA[j] < n; j-- {
|
||||
}
|
||||
SA[j] -= n
|
||||
i = j
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute SAs.
|
||||
getBuckets_byte(C, B, k, true) // Find ends of buckets
|
||||
c1 = 0
|
||||
b = B[c1]
|
||||
for i, d = n-1, d+1; i >= 0; i-- {
|
||||
if j = SA[i]; j > 0 {
|
||||
if n <= j {
|
||||
d += 1
|
||||
j -= n
|
||||
}
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
j--
|
||||
t = int(c0) << 1
|
||||
if int(T[j]) > c1 {
|
||||
t |= 1
|
||||
}
|
||||
if D[t] != d {
|
||||
j += n
|
||||
D[t] = d
|
||||
}
|
||||
b--
|
||||
if t&1 > 0 {
|
||||
SA[b] = ^(j + 1)
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
SA[i] = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func postProcLMS2_byte(SA []int, n, m int) int {
|
||||
var i, j, d, name int
|
||||
|
||||
// Compact all the sorted LMS substrings into the first m items of SA.
|
||||
name = 0
|
||||
for i = 0; SA[i] < 0; i++ {
|
||||
j = ^SA[i]
|
||||
if n <= j {
|
||||
name += 1
|
||||
}
|
||||
SA[i] = j
|
||||
}
|
||||
if i < m {
|
||||
for d, i = i, i+1; ; i++ {
|
||||
if j = SA[i]; j < 0 {
|
||||
j = ^j
|
||||
if n <= j {
|
||||
name += 1
|
||||
}
|
||||
SA[d] = j
|
||||
d++
|
||||
SA[i] = 0
|
||||
if d == m {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if name < m {
|
||||
// Store the lexicographic names.
|
||||
for i, d = m-1, name+1; 0 <= i; i-- {
|
||||
if j = SA[i]; n <= j {
|
||||
j -= n
|
||||
d--
|
||||
}
|
||||
SA[m+(j>>1)] = d
|
||||
}
|
||||
} else {
|
||||
// Unset flags.
|
||||
for i = 0; i < m; i++ {
|
||||
if j = SA[i]; n <= j {
|
||||
j -= n
|
||||
SA[i] = j
|
||||
}
|
||||
}
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
func induceSA_byte(T []byte, SA, C, B []int, n, k int) {
|
||||
var b, i, j int
|
||||
var c0, c1 int
|
||||
|
||||
// Compute SAl.
|
||||
if &C[0] == &B[0] {
|
||||
getCounts_byte(T, C, n, k)
|
||||
}
|
||||
getBuckets_byte(C, B, k, false) // Find starts of buckets
|
||||
j = n - 1
|
||||
c1 = int(T[j])
|
||||
b = B[c1]
|
||||
if j > 0 && int(T[j-1]) < c1 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
for i = 0; i < n; i++ {
|
||||
j = SA[i]
|
||||
SA[i] = ^j
|
||||
if j > 0 {
|
||||
j--
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
if j > 0 && int(T[j-1]) < c1 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
}
|
||||
}
|
||||
|
||||
// Compute SAs.
|
||||
if &C[0] == &B[0] {
|
||||
getCounts_byte(T, C, n, k)
|
||||
}
|
||||
getBuckets_byte(C, B, k, true) // Find ends of buckets
|
||||
c1 = 0
|
||||
b = B[c1]
|
||||
for i = n - 1; i >= 0; i-- {
|
||||
if j = SA[i]; j > 0 {
|
||||
j--
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
b--
|
||||
if (j == 0) || (int(T[j-1]) > c1) {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
} else {
|
||||
SA[i] = ^j
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func computeSA_byte(T []byte, SA []int, fs, n, k int) {
|
||||
const (
|
||||
minBucketSize = 512
|
||||
sortLMS2Limit = 0x3fffffff
|
||||
)
|
||||
|
||||
var C, B, D, RA []int
|
||||
var bo int // Offset of B relative to SA
|
||||
var b, i, j, m, p, q, name, newfs int
|
||||
var c0, c1 int
|
||||
var flags uint
|
||||
|
||||
if k <= minBucketSize {
|
||||
C = make([]int, k)
|
||||
if k <= fs {
|
||||
bo = n + fs - k
|
||||
B = SA[bo:]
|
||||
flags = 1
|
||||
} else {
|
||||
B = make([]int, k)
|
||||
flags = 3
|
||||
}
|
||||
} else if k <= fs {
|
||||
C = SA[n+fs-k:]
|
||||
if k <= fs-k {
|
||||
bo = n + fs - 2*k
|
||||
B = SA[bo:]
|
||||
flags = 0
|
||||
} else if k <= 4*minBucketSize {
|
||||
B = make([]int, k)
|
||||
flags = 2
|
||||
} else {
|
||||
B = C
|
||||
flags = 8
|
||||
}
|
||||
} else {
|
||||
C = make([]int, k)
|
||||
B = C
|
||||
flags = 4 | 8
|
||||
}
|
||||
if n <= sortLMS2Limit && 2 <= (n/k) {
|
||||
if flags&1 > 0 {
|
||||
if 2*k <= fs-k {
|
||||
flags |= 32
|
||||
} else {
|
||||
flags |= 16
|
||||
}
|
||||
} else if flags == 0 && 2*k <= (fs-2*k) {
|
||||
flags |= 32
|
||||
}
|
||||
}
|
||||
|
||||
// Stage 1: Reduce the problem by at least 1/2.
|
||||
// Sort all the LMS-substrings.
|
||||
getCounts_byte(T, C, n, k)
|
||||
getBuckets_byte(C, B, k, true) // Find ends of buckets
|
||||
for i = 0; i < n; i++ {
|
||||
SA[i] = 0
|
||||
}
|
||||
b = -1
|
||||
i = n - 1
|
||||
j = n
|
||||
m = 0
|
||||
c0 = int(T[n-1])
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
for i >= 0 {
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 > c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i >= 0 {
|
||||
if b >= 0 {
|
||||
SA[b] = j
|
||||
}
|
||||
B[c1]--
|
||||
b = B[c1]
|
||||
j = i
|
||||
m++
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if m > 1 {
|
||||
if flags&(16|32) > 0 {
|
||||
if flags&16 > 0 {
|
||||
D = make([]int, 2*k)
|
||||
} else {
|
||||
D = SA[bo-2*k:]
|
||||
}
|
||||
B[T[j+1]]++
|
||||
for i, j = 0, 0; i < k; i++ {
|
||||
j += C[i]
|
||||
if B[i] != j {
|
||||
SA[B[i]] += n
|
||||
}
|
||||
D[i] = 0
|
||||
D[i+k] = 0
|
||||
}
|
||||
sortLMS2_byte(T, SA, C, B, D, n, k)
|
||||
name = postProcLMS2_byte(SA, n, m)
|
||||
} else {
|
||||
sortLMS1_byte(T, SA, C, B, n, k)
|
||||
name = postProcLMS1_byte(T, SA, n, m)
|
||||
}
|
||||
} else if m == 1 {
|
||||
SA[b] = j + 1
|
||||
name = 1
|
||||
} else {
|
||||
name = 0
|
||||
}
|
||||
|
||||
// Stage 2: Solve the reduced problem.
|
||||
// Recurse if names are not yet unique.
|
||||
if name < m {
|
||||
newfs = n + fs - 2*m
|
||||
if flags&(1|4|8) == 0 {
|
||||
if k+name <= newfs {
|
||||
newfs -= k
|
||||
} else {
|
||||
flags |= 8
|
||||
}
|
||||
}
|
||||
RA = SA[m+newfs:]
|
||||
for i, j = m+(n>>1)-1, m-1; m <= i; i-- {
|
||||
if SA[i] != 0 {
|
||||
RA[j] = SA[i] - 1
|
||||
j--
|
||||
}
|
||||
}
|
||||
computeSA_int(RA, SA, newfs, m, name)
|
||||
|
||||
i = n - 1
|
||||
j = m - 1
|
||||
c0 = int(T[n-1])
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
for i >= 0 {
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 > c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i >= 0 {
|
||||
RA[j] = i + 1
|
||||
j--
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for i = 0; i < m; i++ {
|
||||
SA[i] = RA[SA[i]]
|
||||
}
|
||||
if flags&4 > 0 {
|
||||
B = make([]int, k)
|
||||
C = B
|
||||
}
|
||||
if flags&2 > 0 {
|
||||
B = make([]int, k)
|
||||
}
|
||||
}
|
||||
|
||||
// Stage 3: Induce the result for the original problem.
|
||||
if flags&8 > 0 {
|
||||
getCounts_byte(T, C, n, k)
|
||||
}
|
||||
// Put all left-most S characters into their buckets.
|
||||
if m > 1 {
|
||||
getBuckets_byte(C, B, k, true) // Find ends of buckets
|
||||
i = m - 1
|
||||
j = n
|
||||
p = SA[m-1]
|
||||
c1 = int(T[p])
|
||||
for {
|
||||
c0 = c1
|
||||
q = B[c0]
|
||||
for q < j {
|
||||
j--
|
||||
SA[j] = 0
|
||||
}
|
||||
for {
|
||||
j--
|
||||
SA[j] = p
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
p = SA[i]
|
||||
if c1 = int(T[p]); c1 != c0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i < 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
for j > 0 {
|
||||
j--
|
||||
SA[j] = 0
|
||||
}
|
||||
}
|
||||
induceSA_byte(T, SA, C, B, n, k)
|
||||
}
|
|
@ -0,0 +1,661 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// Code generated by sais_gen.go. DO NOT EDIT.
|
||||
|
||||
// ====================================================
|
||||
// Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
// ====================================================
|
||||
|
||||
package sais
|
||||
|
||||
func getCounts_int(T []int, C []int, n, k int) {
|
||||
var i int
|
||||
for i = 0; i < k; i++ {
|
||||
C[i] = 0
|
||||
}
|
||||
for i = 0; i < n; i++ {
|
||||
C[T[i]]++
|
||||
}
|
||||
}
|
||||
|
||||
func getBuckets_int(C, B []int, k int, end bool) {
|
||||
var i, sum int
|
||||
if end {
|
||||
for i = 0; i < k; i++ {
|
||||
sum += C[i]
|
||||
B[i] = sum
|
||||
}
|
||||
} else {
|
||||
for i = 0; i < k; i++ {
|
||||
sum += C[i]
|
||||
B[i] = sum - C[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func sortLMS1_int(T []int, SA, C, B []int, n, k int) {
|
||||
var b, i, j int
|
||||
var c0, c1 int
|
||||
|
||||
// Compute SAl.
|
||||
if &C[0] == &B[0] {
|
||||
getCounts_int(T, C, n, k)
|
||||
}
|
||||
getBuckets_int(C, B, k, false) // Find starts of buckets
|
||||
j = n - 1
|
||||
c1 = int(T[j])
|
||||
b = B[c1]
|
||||
j--
|
||||
if int(T[j]) < c1 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
for i = 0; i < n; i++ {
|
||||
if j = SA[i]; j > 0 {
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
j--
|
||||
if int(T[j]) < c1 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
SA[i] = 0
|
||||
} else if j < 0 {
|
||||
SA[i] = ^j
|
||||
}
|
||||
}
|
||||
|
||||
// Compute SAs.
|
||||
if &C[0] == &B[0] {
|
||||
getCounts_int(T, C, n, k)
|
||||
}
|
||||
getBuckets_int(C, B, k, true) // Find ends of buckets
|
||||
c1 = 0
|
||||
b = B[c1]
|
||||
for i = n - 1; i >= 0; i-- {
|
||||
if j = SA[i]; j > 0 {
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
j--
|
||||
b--
|
||||
if int(T[j]) > c1 {
|
||||
SA[b] = ^(j + 1)
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
SA[i] = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func postProcLMS1_int(T []int, SA []int, n, m int) int {
|
||||
var i, j, p, q, plen, qlen, name int
|
||||
var c0, c1 int
|
||||
var diff bool
|
||||
|
||||
// Compact all the sorted substrings into the first m items of SA.
|
||||
// 2*m must be not larger than n (provable).
|
||||
for i = 0; SA[i] < 0; i++ {
|
||||
SA[i] = ^SA[i]
|
||||
}
|
||||
if i < m {
|
||||
for j, i = i, i+1; ; i++ {
|
||||
if p = SA[i]; p < 0 {
|
||||
SA[j] = ^p
|
||||
j++
|
||||
SA[i] = 0
|
||||
if j == m {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Store the length of all substrings.
|
||||
i = n - 1
|
||||
j = n - 1
|
||||
c0 = int(T[n-1])
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
for i >= 0 {
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 > c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i >= 0 {
|
||||
SA[m+((i+1)>>1)] = j - i
|
||||
j = i + 1
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find the lexicographic names of all substrings.
|
||||
name = 0
|
||||
qlen = 0
|
||||
for i, q = 0, n; i < m; i++ {
|
||||
p = SA[i]
|
||||
plen = SA[m+(p>>1)]
|
||||
diff = true
|
||||
if (plen == qlen) && ((q + plen) < n) {
|
||||
for j = 0; (j < plen) && (T[p+j] == T[q+j]); j++ {
|
||||
}
|
||||
if j == plen {
|
||||
diff = false
|
||||
}
|
||||
}
|
||||
if diff {
|
||||
name++
|
||||
q = p
|
||||
qlen = plen
|
||||
}
|
||||
SA[m+(p>>1)] = name
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
func sortLMS2_int(T []int, SA, C, B, D []int, n, k int) {
|
||||
var b, i, j, t, d int
|
||||
var c0, c1 int
|
||||
|
||||
// Compute SAl.
|
||||
getBuckets_int(C, B, k, false) // Find starts of buckets
|
||||
j = n - 1
|
||||
c1 = int(T[j])
|
||||
b = B[c1]
|
||||
j--
|
||||
if int(T[j]) < c1 {
|
||||
t = 1
|
||||
} else {
|
||||
t = 0
|
||||
}
|
||||
j += n
|
||||
if t&1 > 0 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
for i, d = 0, 0; i < n; i++ {
|
||||
if j = SA[i]; j > 0 {
|
||||
if n <= j {
|
||||
d += 1
|
||||
j -= n
|
||||
}
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
j--
|
||||
t = int(c0) << 1
|
||||
if int(T[j]) < c1 {
|
||||
t |= 1
|
||||
}
|
||||
if D[t] != d {
|
||||
j += n
|
||||
D[t] = d
|
||||
}
|
||||
if t&1 > 0 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
SA[i] = 0
|
||||
} else if j < 0 {
|
||||
SA[i] = ^j
|
||||
}
|
||||
}
|
||||
for i = n - 1; 0 <= i; i-- {
|
||||
if SA[i] > 0 {
|
||||
if SA[i] < n {
|
||||
SA[i] += n
|
||||
for j = i - 1; SA[j] < n; j-- {
|
||||
}
|
||||
SA[j] -= n
|
||||
i = j
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute SAs.
|
||||
getBuckets_int(C, B, k, true) // Find ends of buckets
|
||||
c1 = 0
|
||||
b = B[c1]
|
||||
for i, d = n-1, d+1; i >= 0; i-- {
|
||||
if j = SA[i]; j > 0 {
|
||||
if n <= j {
|
||||
d += 1
|
||||
j -= n
|
||||
}
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
j--
|
||||
t = int(c0) << 1
|
||||
if int(T[j]) > c1 {
|
||||
t |= 1
|
||||
}
|
||||
if D[t] != d {
|
||||
j += n
|
||||
D[t] = d
|
||||
}
|
||||
b--
|
||||
if t&1 > 0 {
|
||||
SA[b] = ^(j + 1)
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
SA[i] = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func postProcLMS2_int(SA []int, n, m int) int {
|
||||
var i, j, d, name int
|
||||
|
||||
// Compact all the sorted LMS substrings into the first m items of SA.
|
||||
name = 0
|
||||
for i = 0; SA[i] < 0; i++ {
|
||||
j = ^SA[i]
|
||||
if n <= j {
|
||||
name += 1
|
||||
}
|
||||
SA[i] = j
|
||||
}
|
||||
if i < m {
|
||||
for d, i = i, i+1; ; i++ {
|
||||
if j = SA[i]; j < 0 {
|
||||
j = ^j
|
||||
if n <= j {
|
||||
name += 1
|
||||
}
|
||||
SA[d] = j
|
||||
d++
|
||||
SA[i] = 0
|
||||
if d == m {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if name < m {
|
||||
// Store the lexicographic names.
|
||||
for i, d = m-1, name+1; 0 <= i; i-- {
|
||||
if j = SA[i]; n <= j {
|
||||
j -= n
|
||||
d--
|
||||
}
|
||||
SA[m+(j>>1)] = d
|
||||
}
|
||||
} else {
|
||||
// Unset flags.
|
||||
for i = 0; i < m; i++ {
|
||||
if j = SA[i]; n <= j {
|
||||
j -= n
|
||||
SA[i] = j
|
||||
}
|
||||
}
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
func induceSA_int(T []int, SA, C, B []int, n, k int) {
|
||||
var b, i, j int
|
||||
var c0, c1 int
|
||||
|
||||
// Compute SAl.
|
||||
if &C[0] == &B[0] {
|
||||
getCounts_int(T, C, n, k)
|
||||
}
|
||||
getBuckets_int(C, B, k, false) // Find starts of buckets
|
||||
j = n - 1
|
||||
c1 = int(T[j])
|
||||
b = B[c1]
|
||||
if j > 0 && int(T[j-1]) < c1 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
for i = 0; i < n; i++ {
|
||||
j = SA[i]
|
||||
SA[i] = ^j
|
||||
if j > 0 {
|
||||
j--
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
if j > 0 && int(T[j-1]) < c1 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
}
|
||||
}
|
||||
|
||||
// Compute SAs.
|
||||
if &C[0] == &B[0] {
|
||||
getCounts_int(T, C, n, k)
|
||||
}
|
||||
getBuckets_int(C, B, k, true) // Find ends of buckets
|
||||
c1 = 0
|
||||
b = B[c1]
|
||||
for i = n - 1; i >= 0; i-- {
|
||||
if j = SA[i]; j > 0 {
|
||||
j--
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
b--
|
||||
if (j == 0) || (int(T[j-1]) > c1) {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
} else {
|
||||
SA[i] = ^j
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func computeSA_int(T []int, SA []int, fs, n, k int) {
|
||||
const (
|
||||
minBucketSize = 512
|
||||
sortLMS2Limit = 0x3fffffff
|
||||
)
|
||||
|
||||
var C, B, D, RA []int
|
||||
var bo int // Offset of B relative to SA
|
||||
var b, i, j, m, p, q, name, newfs int
|
||||
var c0, c1 int
|
||||
var flags uint
|
||||
|
||||
if k <= minBucketSize {
|
||||
C = make([]int, k)
|
||||
if k <= fs {
|
||||
bo = n + fs - k
|
||||
B = SA[bo:]
|
||||
flags = 1
|
||||
} else {
|
||||
B = make([]int, k)
|
||||
flags = 3
|
||||
}
|
||||
} else if k <= fs {
|
||||
C = SA[n+fs-k:]
|
||||
if k <= fs-k {
|
||||
bo = n + fs - 2*k
|
||||
B = SA[bo:]
|
||||
flags = 0
|
||||
} else if k <= 4*minBucketSize {
|
||||
B = make([]int, k)
|
||||
flags = 2
|
||||
} else {
|
||||
B = C
|
||||
flags = 8
|
||||
}
|
||||
} else {
|
||||
C = make([]int, k)
|
||||
B = C
|
||||
flags = 4 | 8
|
||||
}
|
||||
if n <= sortLMS2Limit && 2 <= (n/k) {
|
||||
if flags&1 > 0 {
|
||||
if 2*k <= fs-k {
|
||||
flags |= 32
|
||||
} else {
|
||||
flags |= 16
|
||||
}
|
||||
} else if flags == 0 && 2*k <= (fs-2*k) {
|
||||
flags |= 32
|
||||
}
|
||||
}
|
||||
|
||||
// Stage 1: Reduce the problem by at least 1/2.
|
||||
// Sort all the LMS-substrings.
|
||||
getCounts_int(T, C, n, k)
|
||||
getBuckets_int(C, B, k, true) // Find ends of buckets
|
||||
for i = 0; i < n; i++ {
|
||||
SA[i] = 0
|
||||
}
|
||||
b = -1
|
||||
i = n - 1
|
||||
j = n
|
||||
m = 0
|
||||
c0 = int(T[n-1])
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
for i >= 0 {
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 > c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i >= 0 {
|
||||
if b >= 0 {
|
||||
SA[b] = j
|
||||
}
|
||||
B[c1]--
|
||||
b = B[c1]
|
||||
j = i
|
||||
m++
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if m > 1 {
|
||||
if flags&(16|32) > 0 {
|
||||
if flags&16 > 0 {
|
||||
D = make([]int, 2*k)
|
||||
} else {
|
||||
D = SA[bo-2*k:]
|
||||
}
|
||||
B[T[j+1]]++
|
||||
for i, j = 0, 0; i < k; i++ {
|
||||
j += C[i]
|
||||
if B[i] != j {
|
||||
SA[B[i]] += n
|
||||
}
|
||||
D[i] = 0
|
||||
D[i+k] = 0
|
||||
}
|
||||
sortLMS2_int(T, SA, C, B, D, n, k)
|
||||
name = postProcLMS2_int(SA, n, m)
|
||||
} else {
|
||||
sortLMS1_int(T, SA, C, B, n, k)
|
||||
name = postProcLMS1_int(T, SA, n, m)
|
||||
}
|
||||
} else if m == 1 {
|
||||
SA[b] = j + 1
|
||||
name = 1
|
||||
} else {
|
||||
name = 0
|
||||
}
|
||||
|
||||
// Stage 2: Solve the reduced problem.
|
||||
// Recurse if names are not yet unique.
|
||||
if name < m {
|
||||
newfs = n + fs - 2*m
|
||||
if flags&(1|4|8) == 0 {
|
||||
if k+name <= newfs {
|
||||
newfs -= k
|
||||
} else {
|
||||
flags |= 8
|
||||
}
|
||||
}
|
||||
RA = SA[m+newfs:]
|
||||
for i, j = m+(n>>1)-1, m-1; m <= i; i-- {
|
||||
if SA[i] != 0 {
|
||||
RA[j] = SA[i] - 1
|
||||
j--
|
||||
}
|
||||
}
|
||||
computeSA_int(RA, SA, newfs, m, name)
|
||||
|
||||
i = n - 1
|
||||
j = m - 1
|
||||
c0 = int(T[n-1])
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
for i >= 0 {
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 > c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i >= 0 {
|
||||
RA[j] = i + 1
|
||||
j--
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for i = 0; i < m; i++ {
|
||||
SA[i] = RA[SA[i]]
|
||||
}
|
||||
if flags&4 > 0 {
|
||||
B = make([]int, k)
|
||||
C = B
|
||||
}
|
||||
if flags&2 > 0 {
|
||||
B = make([]int, k)
|
||||
}
|
||||
}
|
||||
|
||||
// Stage 3: Induce the result for the original problem.
|
||||
if flags&8 > 0 {
|
||||
getCounts_int(T, C, n, k)
|
||||
}
|
||||
// Put all left-most S characters into their buckets.
|
||||
if m > 1 {
|
||||
getBuckets_int(C, B, k, true) // Find ends of buckets
|
||||
i = m - 1
|
||||
j = n
|
||||
p = SA[m-1]
|
||||
c1 = int(T[p])
|
||||
for {
|
||||
c0 = c1
|
||||
q = B[c0]
|
||||
for q < j {
|
||||
j--
|
||||
SA[j] = 0
|
||||
}
|
||||
for {
|
||||
j--
|
||||
SA[j] = p
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
p = SA[i]
|
||||
if c1 = int(T[p]); c1 != c0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i < 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
for j > 0 {
|
||||
j--
|
||||
SA[j] = 0
|
||||
}
|
||||
}
|
||||
induceSA_int(T, SA, C, B, n, k)
|
||||
}
|
|
@ -0,0 +1,131 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package bzip2
|
||||
|
||||
import "github.com/dsnet/compress/internal/errors"
|
||||
|
||||
// moveToFront implements both the MTF and RLE stages of bzip2 at the same time.
|
||||
// Any runs of zeros in the encoded output will be replaced by a sequence of
|
||||
// RUNA and RUNB symbols are encode the length of the run.
|
||||
//
|
||||
// The RLE encoding used can actually be encoded to and decoded from using
|
||||
// normal two's complement arithmetic. The methodology for doing so is below.
|
||||
//
|
||||
// Assuming the following:
|
||||
// num: The value being encoded by RLE encoding.
|
||||
// run: A sequence of RUNA and RUNB symbols represented as a binary integer,
|
||||
// where RUNA is the 0 bit, RUNB is the 1 bit, and least-significant RUN
|
||||
// symbols are at the least-significant bit positions.
|
||||
// cnt: The number of RUNA and RUNB symbols.
|
||||
//
|
||||
// Then the RLE encoding used by bzip2 has this mathematical property:
|
||||
// num+1 == (1<<cnt) | run
|
||||
type moveToFront struct {
|
||||
dictBuf [256]uint8
|
||||
dictLen int
|
||||
|
||||
vals []byte
|
||||
syms []uint16
|
||||
blkSize int
|
||||
}
|
||||
|
||||
func (mtf *moveToFront) Init(dict []uint8, blkSize int) {
|
||||
if len(dict) > len(mtf.dictBuf) {
|
||||
panicf(errors.Internal, "alphabet too large")
|
||||
}
|
||||
copy(mtf.dictBuf[:], dict)
|
||||
mtf.dictLen = len(dict)
|
||||
mtf.blkSize = blkSize
|
||||
}
|
||||
|
||||
func (mtf *moveToFront) Encode(vals []byte) (syms []uint16) {
|
||||
dict := mtf.dictBuf[:mtf.dictLen]
|
||||
syms = mtf.syms[:0]
|
||||
|
||||
if len(vals) > mtf.blkSize {
|
||||
panicf(errors.Internal, "exceeded block size")
|
||||
}
|
||||
|
||||
var lastNum uint32
|
||||
for _, val := range vals {
|
||||
// Normal move-to-front transform.
|
||||
var idx uint8 // Reverse lookup idx in dict
|
||||
for di, dv := range dict {
|
||||
if dv == val {
|
||||
idx = uint8(di)
|
||||
break
|
||||
}
|
||||
}
|
||||
copy(dict[1:], dict[:idx])
|
||||
dict[0] = val
|
||||
|
||||
// Run-length encoding augmentation.
|
||||
if idx == 0 {
|
||||
lastNum++
|
||||
continue
|
||||
}
|
||||
if lastNum > 0 {
|
||||
for rc := lastNum + 1; rc != 1; rc >>= 1 {
|
||||
syms = append(syms, uint16(rc&1))
|
||||
}
|
||||
lastNum = 0
|
||||
}
|
||||
syms = append(syms, uint16(idx)+1)
|
||||
}
|
||||
if lastNum > 0 {
|
||||
for rc := lastNum + 1; rc != 1; rc >>= 1 {
|
||||
syms = append(syms, uint16(rc&1))
|
||||
}
|
||||
}
|
||||
mtf.syms = syms
|
||||
return syms
|
||||
}
|
||||
|
||||
func (mtf *moveToFront) Decode(syms []uint16) (vals []byte) {
|
||||
dict := mtf.dictBuf[:mtf.dictLen]
|
||||
vals = mtf.vals[:0]
|
||||
|
||||
var lastCnt uint
|
||||
var lastRun uint32
|
||||
for _, sym := range syms {
|
||||
// Run-length encoding augmentation.
|
||||
if sym < 2 {
|
||||
lastRun |= uint32(sym) << lastCnt
|
||||
lastCnt++
|
||||
continue
|
||||
}
|
||||
if lastCnt > 0 {
|
||||
cnt := int((1<<lastCnt)|lastRun) - 1
|
||||
if len(vals)+cnt > mtf.blkSize || lastCnt > 24 {
|
||||
panicf(errors.Corrupted, "run-length decoding exceeded block size")
|
||||
}
|
||||
for i := cnt; i > 0; i-- {
|
||||
vals = append(vals, dict[0])
|
||||
}
|
||||
lastCnt, lastRun = 0, 0
|
||||
}
|
||||
|
||||
// Normal move-to-front transform.
|
||||
val := dict[sym-1] // Forward lookup val in dict
|
||||
copy(dict[1:], dict[:sym-1])
|
||||
dict[0] = val
|
||||
|
||||
if len(vals) >= mtf.blkSize {
|
||||
panicf(errors.Corrupted, "run-length decoding exceeded block size")
|
||||
}
|
||||
vals = append(vals, val)
|
||||
}
|
||||
if lastCnt > 0 {
|
||||
cnt := int((1<<lastCnt)|lastRun) - 1
|
||||
if len(vals)+cnt > mtf.blkSize || lastCnt > 24 {
|
||||
panicf(errors.Corrupted, "run-length decoding exceeded block size")
|
||||
}
|
||||
for i := cnt; i > 0; i-- {
|
||||
vals = append(vals, dict[0])
|
||||
}
|
||||
}
|
||||
mtf.vals = vals
|
||||
return vals
|
||||
}
|
|
@ -0,0 +1,374 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package bzip2
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"github.com/dsnet/compress/internal"
|
||||
"github.com/dsnet/compress/internal/errors"
|
||||
"github.com/dsnet/compress/internal/prefix"
|
||||
)
|
||||
|
||||
const (
|
||||
minNumTrees = 2
|
||||
maxNumTrees = 6
|
||||
|
||||
maxPrefixBits = 20 // Maximum bit-width of a prefix code
|
||||
maxNumSyms = 256 + 2 // Maximum number of symbols in the alphabet
|
||||
numBlockSyms = 50 // Number of bytes in a block
|
||||
)
|
||||
|
||||
// encSel and decSel are used to handle the prefix encoding for tree selectors.
|
||||
// The prefix encoding is as follows:
|
||||
//
|
||||
// Code TreeIdx
|
||||
// 0 <=> 0
|
||||
// 10 <=> 1
|
||||
// 110 <=> 2
|
||||
// 1110 <=> 3
|
||||
// 11110 <=> 4
|
||||
// 111110 <=> 5
|
||||
// 111111 <=> 6 Invalid tree index, so should fail
|
||||
//
|
||||
var encSel, decSel = func() (e prefix.Encoder, d prefix.Decoder) {
|
||||
var selCodes [maxNumTrees + 1]prefix.PrefixCode
|
||||
for i := range selCodes {
|
||||
selCodes[i] = prefix.PrefixCode{Sym: uint32(i), Len: uint32(i + 1)}
|
||||
}
|
||||
selCodes[maxNumTrees] = prefix.PrefixCode{Sym: maxNumTrees, Len: maxNumTrees}
|
||||
prefix.GeneratePrefixes(selCodes[:])
|
||||
e.Init(selCodes[:])
|
||||
d.Init(selCodes[:])
|
||||
return
|
||||
}()
|
||||
|
||||
type prefixReader struct{ prefix.Reader }
|
||||
|
||||
func (pr *prefixReader) Init(r io.Reader) {
|
||||
pr.Reader.Init(r, true)
|
||||
}
|
||||
|
||||
func (pr *prefixReader) ReadBitsBE64(nb uint) uint64 {
|
||||
if nb <= 32 {
|
||||
v := uint32(pr.ReadBits(nb))
|
||||
return uint64(internal.ReverseUint32N(v, nb))
|
||||
}
|
||||
v0 := internal.ReverseUint32(uint32(pr.ReadBits(32)))
|
||||
v1 := internal.ReverseUint32(uint32(pr.ReadBits(nb - 32)))
|
||||
v := uint64(v0)<<32 | uint64(v1)
|
||||
return v >> (64 - nb)
|
||||
}
|
||||
|
||||
func (pr *prefixReader) ReadPrefixCodes(codes []prefix.PrefixCodes, trees []prefix.Decoder) {
|
||||
for i, pc := range codes {
|
||||
clen := int(pr.ReadBitsBE64(5))
|
||||
sum := 1 << maxPrefixBits
|
||||
for sym := range pc {
|
||||
for {
|
||||
if clen < 1 || clen > maxPrefixBits {
|
||||
panicf(errors.Corrupted, "invalid prefix bit-length: %d", clen)
|
||||
}
|
||||
|
||||
b, ok := pr.TryReadBits(1)
|
||||
if !ok {
|
||||
b = pr.ReadBits(1)
|
||||
}
|
||||
if b == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
b, ok = pr.TryReadBits(1)
|
||||
if !ok {
|
||||
b = pr.ReadBits(1)
|
||||
}
|
||||
clen -= int(b*2) - 1 // +1 or -1
|
||||
}
|
||||
pc[sym] = prefix.PrefixCode{Sym: uint32(sym), Len: uint32(clen)}
|
||||
sum -= (1 << maxPrefixBits) >> uint(clen)
|
||||
}
|
||||
|
||||
if sum == 0 {
|
||||
// Fast path, but only handles complete trees.
|
||||
if err := prefix.GeneratePrefixes(pc); err != nil {
|
||||
errors.Panic(err) // Using complete trees; should never fail
|
||||
}
|
||||
} else {
|
||||
// Slow path, but handles anything.
|
||||
pc = handleDegenerateCodes(pc) // Never fails, but may fail later
|
||||
codes[i] = pc
|
||||
}
|
||||
trees[i].Init(pc)
|
||||
}
|
||||
}
|
||||
|
||||
type prefixWriter struct{ prefix.Writer }
|
||||
|
||||
func (pw *prefixWriter) Init(w io.Writer) {
|
||||
pw.Writer.Init(w, true)
|
||||
}
|
||||
|
||||
func (pw *prefixWriter) WriteBitsBE64(v uint64, nb uint) {
|
||||
if nb <= 32 {
|
||||
v := internal.ReverseUint32N(uint32(v), nb)
|
||||
pw.WriteBits(uint(v), nb)
|
||||
return
|
||||
}
|
||||
v <<= (64 - nb)
|
||||
v0 := internal.ReverseUint32(uint32(v >> 32))
|
||||
v1 := internal.ReverseUint32(uint32(v))
|
||||
pw.WriteBits(uint(v0), 32)
|
||||
pw.WriteBits(uint(v1), nb-32)
|
||||
return
|
||||
}
|
||||
|
||||
func (pw *prefixWriter) WritePrefixCodes(codes []prefix.PrefixCodes, trees []prefix.Encoder) {
|
||||
for i, pc := range codes {
|
||||
if err := prefix.GeneratePrefixes(pc); err != nil {
|
||||
errors.Panic(err) // Using complete trees; should never fail
|
||||
}
|
||||
trees[i].Init(pc)
|
||||
|
||||
clen := int(pc[0].Len)
|
||||
pw.WriteBitsBE64(uint64(clen), 5)
|
||||
for _, c := range pc {
|
||||
for int(c.Len) < clen {
|
||||
pw.WriteBits(3, 2) // 11
|
||||
clen--
|
||||
}
|
||||
for int(c.Len) > clen {
|
||||
pw.WriteBits(1, 2) // 10
|
||||
clen++
|
||||
}
|
||||
pw.WriteBits(0, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleDegenerateCodes converts a degenerate tree into a canonical tree.
|
||||
//
|
||||
// For example, when the input is an under-subscribed tree:
|
||||
// input: []PrefixCode{
|
||||
// {Sym: 0, Len: 3},
|
||||
// {Sym: 1, Len: 4},
|
||||
// {Sym: 2, Len: 3},
|
||||
// }
|
||||
// output: []PrefixCode{
|
||||
// {Sym: 0, Len: 3, Val: 0}, // 000
|
||||
// {Sym: 1, Len: 4, Val: 2}, // 0010
|
||||
// {Sym: 2, Len: 3, Val: 4}, // 100
|
||||
// {Sym: 258, Len: 4, Val: 10}, // 1010
|
||||
// {Sym: 259, Len: 3, Val: 6}, // 110
|
||||
// {Sym: 260, Len: 1, Val: 1}, // 1
|
||||
// }
|
||||
//
|
||||
// For example, when the input is an over-subscribed tree:
|
||||
// input: []PrefixCode{
|
||||
// {Sym: 0, Len: 1},
|
||||
// {Sym: 1, Len: 3},
|
||||
// {Sym: 2, Len: 4},
|
||||
// {Sym: 3, Len: 3},
|
||||
// {Sym: 4, Len: 2},
|
||||
// }
|
||||
// output: []PrefixCode{
|
||||
// {Sym: 0, Len: 1, Val: 0}, // 0
|
||||
// {Sym: 1, Len: 3, Val: 3}, // 011
|
||||
// {Sym: 3, Len: 3, Val: 7}, // 111
|
||||
// {Sym: 4, Len: 2, Val: 1}, // 01
|
||||
// }
|
||||
func handleDegenerateCodes(codes prefix.PrefixCodes) prefix.PrefixCodes {
|
||||
// Since there is no formal definition for the BZip2 format, there is no
|
||||
// specification that says that the code lengths must form a complete
|
||||
// prefix tree (IE: it is neither over-subscribed nor under-subscribed).
|
||||
// Thus, the original C implementation becomes the reference for how prefix
|
||||
// decoding is done in these edge cases. Unfortunately, the C version does
|
||||
// not error when an invalid tree is used, but rather allows decoding to
|
||||
// continue and only errors if some bit pattern happens to cause an error.
|
||||
// Thus, it is possible for an invalid tree to end up decoding an input
|
||||
// "properly" so long as invalid bit patterns are not present. In order to
|
||||
// replicate this non-specified behavior, we use a ported version of the
|
||||
// C code to generate the codes as a valid canonical tree by substituting
|
||||
// invalid nodes with invalid symbols.
|
||||
//
|
||||
// ====================================================
|
||||
// This program, "bzip2", the associated library "libbzip2", and all
|
||||
// documentation, are copyright (C) 1996-2010 Julian R Seward. All
|
||||
// rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. The origin of this software must not be misrepresented; you must
|
||||
// not claim that you wrote the original software. If you use this
|
||||
// software in a product, an acknowledgment in the product
|
||||
// documentation would be appreciated but is not required.
|
||||
//
|
||||
// 3. Altered source versions must be plainly marked as such, and must
|
||||
// not be misrepresented as being the original software.
|
||||
//
|
||||
// 4. The name of the author may not be used to endorse or promote
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
||||
// OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Julian Seward, jseward@bzip.org
|
||||
// bzip2/libbzip2 version 1.0.6 of 6 September 2010
|
||||
// ====================================================
|
||||
var (
|
||||
limits [maxPrefixBits + 2]int32
|
||||
bases [maxPrefixBits + 2]int32
|
||||
perms [maxNumSyms]int32
|
||||
|
||||
minLen = uint32(maxPrefixBits)
|
||||
maxLen = uint32(0)
|
||||
)
|
||||
|
||||
const (
|
||||
statusOkay = iota
|
||||
statusInvalid
|
||||
statusNeedBits
|
||||
statusMaxBits
|
||||
)
|
||||
|
||||
// createTables is the BZ2_hbCreateDecodeTables function from the C code.
|
||||
createTables := func(codes []prefix.PrefixCode) {
|
||||
for _, c := range codes {
|
||||
if c.Len > maxLen {
|
||||
maxLen = c.Len
|
||||
}
|
||||
if c.Len < minLen {
|
||||
minLen = c.Len
|
||||
}
|
||||
}
|
||||
|
||||
var pp int
|
||||
for i := minLen; i <= maxLen; i++ {
|
||||
for j, c := range codes {
|
||||
if c.Len == i {
|
||||
perms[pp] = int32(j)
|
||||
pp++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var vec int32
|
||||
for _, c := range codes {
|
||||
bases[c.Len+1]++
|
||||
}
|
||||
for i := 1; i < len(bases); i++ {
|
||||
bases[i] += bases[i-1]
|
||||
}
|
||||
for i := minLen; i <= maxLen; i++ {
|
||||
vec += bases[i+1] - bases[i]
|
||||
limits[i] = vec - 1
|
||||
vec <<= 1
|
||||
}
|
||||
for i := minLen + 1; i <= maxLen; i++ {
|
||||
bases[i] = ((limits[i-1] + 1) << 1) - bases[i]
|
||||
}
|
||||
}
|
||||
|
||||
// getSymbol is the GET_MTF_VAL macro from the C code.
|
||||
getSymbol := func(c prefix.PrefixCode) (uint32, int) {
|
||||
v := internal.ReverseUint32(c.Val)
|
||||
n := c.Len
|
||||
|
||||
zn := minLen
|
||||
if zn > n {
|
||||
return 0, statusNeedBits
|
||||
}
|
||||
zvec := int32(v >> (32 - zn))
|
||||
v <<= zn
|
||||
for {
|
||||
if zn > maxLen {
|
||||
return 0, statusMaxBits
|
||||
}
|
||||
if zvec <= limits[zn] {
|
||||
break
|
||||
}
|
||||
zn++
|
||||
if zn > n {
|
||||
return 0, statusNeedBits
|
||||
}
|
||||
zvec = (zvec << 1) | int32(v>>31)
|
||||
v <<= 1
|
||||
}
|
||||
if zvec-bases[zn] < 0 || zvec-bases[zn] >= maxNumSyms {
|
||||
return 0, statusInvalid
|
||||
}
|
||||
return uint32(perms[zvec-bases[zn]]), statusOkay
|
||||
}
|
||||
|
||||
// Step 1: Create the prefix trees using the C algorithm.
|
||||
createTables(codes)
|
||||
|
||||
// Step 2: Starting with the shortest bit pattern, explore the whole tree.
|
||||
// If tree is under-subscribed, the worst-case runtime is O(1<<maxLen).
|
||||
// If tree is over-subscribed, the worst-case runtime is O(maxNumSyms).
|
||||
var pcodesArr [2 * maxNumSyms]prefix.PrefixCode
|
||||
pcodes := pcodesArr[:maxNumSyms]
|
||||
var exploreCode func(prefix.PrefixCode) bool
|
||||
exploreCode = func(c prefix.PrefixCode) (term bool) {
|
||||
sym, status := getSymbol(c)
|
||||
switch status {
|
||||
case statusOkay:
|
||||
// This code is valid, so insert it.
|
||||
c.Sym = sym
|
||||
pcodes[sym] = c
|
||||
term = true
|
||||
case statusInvalid:
|
||||
// This code is invalid, so insert an invalid symbol.
|
||||
c.Sym = uint32(len(pcodes))
|
||||
pcodes = append(pcodes, c)
|
||||
term = true
|
||||
case statusNeedBits:
|
||||
// This code is too short, so explore both children.
|
||||
c.Len++
|
||||
c0, c1 := c, c
|
||||
c1.Val |= 1 << (c.Len - 1)
|
||||
|
||||
b0 := exploreCode(c0)
|
||||
b1 := exploreCode(c1)
|
||||
switch {
|
||||
case !b0 && b1:
|
||||
c0.Sym = uint32(len(pcodes))
|
||||
pcodes = append(pcodes, c0)
|
||||
case !b1 && b0:
|
||||
c1.Sym = uint32(len(pcodes))
|
||||
pcodes = append(pcodes, c1)
|
||||
}
|
||||
term = b0 || b1
|
||||
case statusMaxBits:
|
||||
// This code is too long, so report it upstream.
|
||||
term = false
|
||||
}
|
||||
return term // Did this code terminate?
|
||||
}
|
||||
exploreCode(prefix.PrefixCode{})
|
||||
|
||||
// Step 3: Copy new sparse codes to old output codes.
|
||||
codes = codes[:0]
|
||||
for _, c := range pcodes {
|
||||
if c.Len > 0 {
|
||||
codes = append(codes, c)
|
||||
}
|
||||
}
|
||||
return codes
|
||||
}
|
|
@ -0,0 +1,274 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package bzip2
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"github.com/dsnet/compress/internal"
|
||||
"github.com/dsnet/compress/internal/errors"
|
||||
"github.com/dsnet/compress/internal/prefix"
|
||||
)
|
||||
|
||||
type Reader struct {
|
||||
InputOffset int64 // Total number of bytes read from underlying io.Reader
|
||||
OutputOffset int64 // Total number of bytes emitted from Read
|
||||
|
||||
rd prefixReader
|
||||
err error
|
||||
level int // The current compression level
|
||||
rdHdrFtr int // Number of times we read the stream header and footer
|
||||
blkCRC uint32 // CRC-32 IEEE of each block (as stored)
|
||||
endCRC uint32 // Checksum of all blocks using bzip2's custom method
|
||||
|
||||
crc crc
|
||||
mtf moveToFront
|
||||
bwt burrowsWheelerTransform
|
||||
rle runLengthEncoding
|
||||
|
||||
// These fields are allocated with Reader and re-used later.
|
||||
treeSels []uint8
|
||||
codes2D [maxNumTrees][maxNumSyms]prefix.PrefixCode
|
||||
codes1D [maxNumTrees]prefix.PrefixCodes
|
||||
trees1D [maxNumTrees]prefix.Decoder
|
||||
syms []uint16
|
||||
|
||||
fuzzReader // Exported functionality when fuzz testing
|
||||
}
|
||||
|
||||
type ReaderConfig struct {
|
||||
_ struct{} // Blank field to prevent unkeyed struct literals
|
||||
}
|
||||
|
||||
func NewReader(r io.Reader, conf *ReaderConfig) (*Reader, error) {
|
||||
zr := new(Reader)
|
||||
zr.Reset(r)
|
||||
return zr, nil
|
||||
}
|
||||
|
||||
func (zr *Reader) Reset(r io.Reader) error {
|
||||
*zr = Reader{
|
||||
rd: zr.rd,
|
||||
|
||||
mtf: zr.mtf,
|
||||
bwt: zr.bwt,
|
||||
rle: zr.rle,
|
||||
|
||||
treeSels: zr.treeSels,
|
||||
trees1D: zr.trees1D,
|
||||
syms: zr.syms,
|
||||
}
|
||||
zr.rd.Init(r)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (zr *Reader) Read(buf []byte) (int, error) {
|
||||
for {
|
||||
cnt, err := zr.rle.Read(buf)
|
||||
if err != rleDone && zr.err == nil {
|
||||
zr.err = err
|
||||
}
|
||||
if cnt > 0 {
|
||||
zr.crc.update(buf[:cnt])
|
||||
zr.OutputOffset += int64(cnt)
|
||||
return cnt, nil
|
||||
}
|
||||
if zr.err != nil || len(buf) == 0 {
|
||||
return 0, zr.err
|
||||
}
|
||||
|
||||
// Read the next chunk.
|
||||
zr.rd.Offset = zr.InputOffset
|
||||
func() {
|
||||
defer errors.Recover(&zr.err)
|
||||
if zr.rdHdrFtr%2 == 0 {
|
||||
// Check if we are already at EOF.
|
||||
if err := zr.rd.PullBits(1); err != nil {
|
||||
if err == io.ErrUnexpectedEOF && zr.rdHdrFtr > 0 {
|
||||
err = io.EOF // EOF is okay if we read at least one stream
|
||||
}
|
||||
errors.Panic(err)
|
||||
}
|
||||
|
||||
// Read stream header.
|
||||
if zr.rd.ReadBitsBE64(16) != hdrMagic {
|
||||
panicf(errors.Corrupted, "invalid stream magic")
|
||||
}
|
||||
if ver := zr.rd.ReadBitsBE64(8); ver != 'h' {
|
||||
if ver == '0' {
|
||||
panicf(errors.Deprecated, "bzip1 format is not supported")
|
||||
}
|
||||
panicf(errors.Corrupted, "invalid version: %q", ver)
|
||||
}
|
||||
lvl := int(zr.rd.ReadBitsBE64(8)) - '0'
|
||||
if lvl < BestSpeed || lvl > BestCompression {
|
||||
panicf(errors.Corrupted, "invalid block size: %d", lvl*blockSize)
|
||||
}
|
||||
zr.level = lvl
|
||||
zr.rdHdrFtr++
|
||||
} else {
|
||||
// Check and update the CRC.
|
||||
if internal.GoFuzz {
|
||||
zr.updateChecksum(-1, zr.crc.val) // Update with value
|
||||
zr.blkCRC = zr.crc.val // Suppress CRC failures
|
||||
}
|
||||
if zr.blkCRC != zr.crc.val {
|
||||
panicf(errors.Corrupted, "mismatching block checksum")
|
||||
}
|
||||
zr.endCRC = (zr.endCRC<<1 | zr.endCRC>>31) ^ zr.blkCRC
|
||||
}
|
||||
buf := zr.decodeBlock()
|
||||
zr.rle.Init(buf)
|
||||
}()
|
||||
if zr.InputOffset, err = zr.rd.Flush(); zr.err == nil {
|
||||
zr.err = err
|
||||
}
|
||||
if zr.err != nil {
|
||||
zr.err = errWrap(zr.err, errors.Corrupted)
|
||||
return 0, zr.err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (zr *Reader) Close() error {
|
||||
if zr.err == io.EOF || zr.err == errClosed {
|
||||
zr.rle.Init(nil) // Make sure future reads fail
|
||||
zr.err = errClosed
|
||||
return nil
|
||||
}
|
||||
return zr.err // Return the persistent error
|
||||
}
|
||||
|
||||
func (zr *Reader) decodeBlock() []byte {
|
||||
if magic := zr.rd.ReadBitsBE64(48); magic != blkMagic {
|
||||
if magic == endMagic {
|
||||
endCRC := uint32(zr.rd.ReadBitsBE64(32))
|
||||
if internal.GoFuzz {
|
||||
zr.updateChecksum(zr.rd.BitsRead()-32, zr.endCRC)
|
||||
endCRC = zr.endCRC // Suppress CRC failures
|
||||
}
|
||||
if zr.endCRC != endCRC {
|
||||
panicf(errors.Corrupted, "mismatching stream checksum")
|
||||
}
|
||||
zr.endCRC = 0
|
||||
zr.rd.ReadPads()
|
||||
zr.rdHdrFtr++
|
||||
return nil
|
||||
}
|
||||
panicf(errors.Corrupted, "invalid block or footer magic")
|
||||
}
|
||||
|
||||
zr.crc.val = 0
|
||||
zr.blkCRC = uint32(zr.rd.ReadBitsBE64(32))
|
||||
if internal.GoFuzz {
|
||||
zr.updateChecksum(zr.rd.BitsRead()-32, 0) // Record offset only
|
||||
}
|
||||
if zr.rd.ReadBitsBE64(1) != 0 {
|
||||
panicf(errors.Deprecated, "block randomization is not supported")
|
||||
}
|
||||
|
||||
// Read BWT related fields.
|
||||
ptr := int(zr.rd.ReadBitsBE64(24)) // BWT origin pointer
|
||||
|
||||
// Read MTF related fields.
|
||||
var dictArr [256]uint8
|
||||
dict := dictArr[:0]
|
||||
bmapHi := uint16(zr.rd.ReadBits(16))
|
||||
for i := 0; i < 256; i, bmapHi = i+16, bmapHi>>1 {
|
||||
if bmapHi&1 > 0 {
|
||||
bmapLo := uint16(zr.rd.ReadBits(16))
|
||||
for j := 0; j < 16; j, bmapLo = j+1, bmapLo>>1 {
|
||||
if bmapLo&1 > 0 {
|
||||
dict = append(dict, uint8(i+j))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 1: Prefix encoding.
|
||||
syms := zr.decodePrefix(len(dict))
|
||||
|
||||
// Step 2: Move-to-front transform and run-length encoding.
|
||||
zr.mtf.Init(dict, zr.level*blockSize)
|
||||
buf := zr.mtf.Decode(syms)
|
||||
|
||||
// Step 3: Burrows-Wheeler transformation.
|
||||
if ptr >= len(buf) {
|
||||
panicf(errors.Corrupted, "origin pointer (0x%06x) exceeds block size: %d", ptr, len(buf))
|
||||
}
|
||||
zr.bwt.Decode(buf, ptr)
|
||||
|
||||
return buf
|
||||
}
|
||||
|
||||
func (zr *Reader) decodePrefix(numSyms int) (syms []uint16) {
|
||||
numSyms += 2 // Remove 0 symbol, add RUNA, RUNB, and EOF symbols
|
||||
if numSyms < 3 {
|
||||
panicf(errors.Corrupted, "not enough prefix symbols: %d", numSyms)
|
||||
}
|
||||
|
||||
// Read information about the trees and tree selectors.
|
||||
var mtf internal.MoveToFront
|
||||
numTrees := int(zr.rd.ReadBitsBE64(3))
|
||||
if numTrees < minNumTrees || numTrees > maxNumTrees {
|
||||
panicf(errors.Corrupted, "invalid number of prefix trees: %d", numTrees)
|
||||
}
|
||||
numSels := int(zr.rd.ReadBitsBE64(15))
|
||||
if cap(zr.treeSels) < numSels {
|
||||
zr.treeSels = make([]uint8, numSels)
|
||||
}
|
||||
treeSels := zr.treeSels[:numSels]
|
||||
for i := range treeSels {
|
||||
sym, ok := zr.rd.TryReadSymbol(&decSel)
|
||||
if !ok {
|
||||
sym = zr.rd.ReadSymbol(&decSel)
|
||||
}
|
||||
if int(sym) >= numTrees {
|
||||
panicf(errors.Corrupted, "invalid prefix tree selector: %d", sym)
|
||||
}
|
||||
treeSels[i] = uint8(sym)
|
||||
}
|
||||
mtf.Decode(treeSels)
|
||||
zr.treeSels = treeSels
|
||||
|
||||
// Initialize prefix codes.
|
||||
for i := range zr.codes2D[:numTrees] {
|
||||
zr.codes1D[i] = zr.codes2D[i][:numSyms]
|
||||
}
|
||||
zr.rd.ReadPrefixCodes(zr.codes1D[:numTrees], zr.trees1D[:numTrees])
|
||||
|
||||
// Read prefix encoded symbols of compressed data.
|
||||
var tree *prefix.Decoder
|
||||
var blkLen, selIdx int
|
||||
syms = zr.syms[:0]
|
||||
for {
|
||||
if blkLen == 0 {
|
||||
blkLen = numBlockSyms
|
||||
if selIdx >= len(treeSels) {
|
||||
panicf(errors.Corrupted, "not enough prefix tree selectors")
|
||||
}
|
||||
tree = &zr.trees1D[treeSels[selIdx]]
|
||||
selIdx++
|
||||
}
|
||||
blkLen--
|
||||
sym, ok := zr.rd.TryReadSymbol(tree)
|
||||
if !ok {
|
||||
sym = zr.rd.ReadSymbol(tree)
|
||||
}
|
||||
|
||||
if int(sym) == numSyms-1 {
|
||||
break // EOF marker
|
||||
}
|
||||
if int(sym) >= numSyms {
|
||||
panicf(errors.Corrupted, "invalid prefix symbol: %d", sym)
|
||||
}
|
||||
if len(syms) >= zr.level*blockSize {
|
||||
panicf(errors.Corrupted, "number of prefix symbols exceeds block size")
|
||||
}
|
||||
syms = append(syms, uint16(sym))
|
||||
}
|
||||
zr.syms = syms
|
||||
return syms
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package bzip2
|
||||
|
||||
import "github.com/dsnet/compress/internal/errors"
|
||||
|
||||
// rleDone is a special "error" to indicate that the RLE stage is done.
|
||||
var rleDone = errorf(errors.Unknown, "RLE1 stage is completed")
|
||||
|
||||
// runLengthEncoding implements the first RLE stage of bzip2. Every sequence
|
||||
// of 4..255 duplicated bytes is replaced by only the first 4 bytes, and a
|
||||
// single byte representing the repeat length. Similar to the C bzip2
|
||||
// implementation, the encoder will always terminate repeat sequences with a
|
||||
// count (even if it is the end of the buffer), and it will also never produce
|
||||
// run lengths of 256..259. The decoder can handle the latter case.
|
||||
//
|
||||
// For example, if the input was:
|
||||
// input: "AAAAAAABBBBCCCD"
|
||||
//
|
||||
// Then the output will be:
|
||||
// output: "AAAA\x03BBBB\x00CCCD"
|
||||
type runLengthEncoding struct {
|
||||
buf []byte
|
||||
idx int
|
||||
lastVal byte
|
||||
lastCnt int
|
||||
}
|
||||
|
||||
func (rle *runLengthEncoding) Init(buf []byte) {
|
||||
*rle = runLengthEncoding{buf: buf}
|
||||
}
|
||||
|
||||
func (rle *runLengthEncoding) Write(buf []byte) (int, error) {
|
||||
for i, b := range buf {
|
||||
if rle.lastVal != b {
|
||||
rle.lastCnt = 0
|
||||
}
|
||||
rle.lastCnt++
|
||||
switch {
|
||||
case rle.lastCnt < 4:
|
||||
if rle.idx >= len(rle.buf) {
|
||||
return i, rleDone
|
||||
}
|
||||
rle.buf[rle.idx] = b
|
||||
rle.idx++
|
||||
case rle.lastCnt == 4:
|
||||
if rle.idx+1 >= len(rle.buf) {
|
||||
return i, rleDone
|
||||
}
|
||||
rle.buf[rle.idx] = b
|
||||
rle.idx++
|
||||
rle.buf[rle.idx] = 0
|
||||
rle.idx++
|
||||
case rle.lastCnt < 256:
|
||||
rle.buf[rle.idx-1]++
|
||||
default:
|
||||
if rle.idx >= len(rle.buf) {
|
||||
return i, rleDone
|
||||
}
|
||||
rle.lastCnt = 1
|
||||
rle.buf[rle.idx] = b
|
||||
rle.idx++
|
||||
}
|
||||
rle.lastVal = b
|
||||
}
|
||||
return len(buf), nil
|
||||
}
|
||||
|
||||
func (rle *runLengthEncoding) Read(buf []byte) (int, error) {
|
||||
for i := range buf {
|
||||
switch {
|
||||
case rle.lastCnt == -4:
|
||||
if rle.idx >= len(rle.buf) {
|
||||
return i, errorf(errors.Corrupted, "missing terminating run-length repeater")
|
||||
}
|
||||
rle.lastCnt = int(rle.buf[rle.idx])
|
||||
rle.idx++
|
||||
if rle.lastCnt > 0 {
|
||||
break // Break the switch
|
||||
}
|
||||
fallthrough // Count was zero, continue the work
|
||||
case rle.lastCnt <= 0:
|
||||
if rle.idx >= len(rle.buf) {
|
||||
return i, rleDone
|
||||
}
|
||||
b := rle.buf[rle.idx]
|
||||
rle.idx++
|
||||
if b != rle.lastVal {
|
||||
rle.lastCnt = 0
|
||||
rle.lastVal = b
|
||||
}
|
||||
}
|
||||
buf[i] = rle.lastVal
|
||||
rle.lastCnt--
|
||||
}
|
||||
return len(buf), nil
|
||||
}
|
||||
|
||||
func (rle *runLengthEncoding) Bytes() []byte { return rle.buf[:rle.idx] }
|
|
@ -0,0 +1,307 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package bzip2
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"github.com/dsnet/compress/internal"
|
||||
"github.com/dsnet/compress/internal/errors"
|
||||
"github.com/dsnet/compress/internal/prefix"
|
||||
)
|
||||
|
||||
type Writer struct {
|
||||
InputOffset int64 // Total number of bytes issued to Write
|
||||
OutputOffset int64 // Total number of bytes written to underlying io.Writer
|
||||
|
||||
wr prefixWriter
|
||||
err error
|
||||
level int // The current compression level
|
||||
wrHdr bool // Have we written the stream header?
|
||||
blkCRC uint32 // CRC-32 IEEE of each block
|
||||
endCRC uint32 // Checksum of all blocks using bzip2's custom method
|
||||
|
||||
crc crc
|
||||
rle runLengthEncoding
|
||||
bwt burrowsWheelerTransform
|
||||
mtf moveToFront
|
||||
|
||||
// These fields are allocated with Writer and re-used later.
|
||||
buf []byte
|
||||
treeSels []uint8
|
||||
treeSelsMTF []uint8
|
||||
codes2D [maxNumTrees][maxNumSyms]prefix.PrefixCode
|
||||
codes1D [maxNumTrees]prefix.PrefixCodes
|
||||
trees1D [maxNumTrees]prefix.Encoder
|
||||
}
|
||||
|
||||
type WriterConfig struct {
|
||||
Level int
|
||||
|
||||
_ struct{} // Blank field to prevent unkeyed struct literals
|
||||
}
|
||||
|
||||
func NewWriter(w io.Writer, conf *WriterConfig) (*Writer, error) {
|
||||
var lvl int
|
||||
if conf != nil {
|
||||
lvl = conf.Level
|
||||
}
|
||||
if lvl == 0 {
|
||||
lvl = DefaultCompression
|
||||
}
|
||||
if lvl < BestSpeed || lvl > BestCompression {
|
||||
return nil, errorf(errors.Invalid, "compression level: %d", lvl)
|
||||
}
|
||||
zw := new(Writer)
|
||||
zw.level = lvl
|
||||
zw.Reset(w)
|
||||
return zw, nil
|
||||
}
|
||||
|
||||
func (zw *Writer) Reset(w io.Writer) error {
|
||||
*zw = Writer{
|
||||
wr: zw.wr,
|
||||
level: zw.level,
|
||||
|
||||
rle: zw.rle,
|
||||
bwt: zw.bwt,
|
||||
mtf: zw.mtf,
|
||||
|
||||
buf: zw.buf,
|
||||
treeSels: zw.treeSels,
|
||||
treeSelsMTF: zw.treeSelsMTF,
|
||||
trees1D: zw.trees1D,
|
||||
}
|
||||
zw.wr.Init(w)
|
||||
if len(zw.buf) != zw.level*blockSize {
|
||||
zw.buf = make([]byte, zw.level*blockSize)
|
||||
}
|
||||
zw.rle.Init(zw.buf)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (zw *Writer) Write(buf []byte) (int, error) {
|
||||
if zw.err != nil {
|
||||
return 0, zw.err
|
||||
}
|
||||
|
||||
cnt := len(buf)
|
||||
for {
|
||||
wrCnt, err := zw.rle.Write(buf)
|
||||
if err != rleDone && zw.err == nil {
|
||||
zw.err = err
|
||||
}
|
||||
zw.crc.update(buf[:wrCnt])
|
||||
buf = buf[wrCnt:]
|
||||
if len(buf) == 0 {
|
||||
zw.InputOffset += int64(cnt)
|
||||
return cnt, nil
|
||||
}
|
||||
if zw.err = zw.flush(); zw.err != nil {
|
||||
return 0, zw.err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (zw *Writer) flush() error {
|
||||
vals := zw.rle.Bytes()
|
||||
if len(vals) == 0 {
|
||||
return nil
|
||||
}
|
||||
zw.wr.Offset = zw.OutputOffset
|
||||
func() {
|
||||
defer errors.Recover(&zw.err)
|
||||
if !zw.wrHdr {
|
||||
// Write stream header.
|
||||
zw.wr.WriteBitsBE64(hdrMagic, 16)
|
||||
zw.wr.WriteBitsBE64('h', 8)
|
||||
zw.wr.WriteBitsBE64(uint64('0'+zw.level), 8)
|
||||
zw.wrHdr = true
|
||||
}
|
||||
zw.encodeBlock(vals)
|
||||
}()
|
||||
var err error
|
||||
if zw.OutputOffset, err = zw.wr.Flush(); zw.err == nil {
|
||||
zw.err = err
|
||||
}
|
||||
if zw.err != nil {
|
||||
zw.err = errWrap(zw.err, errors.Internal)
|
||||
return zw.err
|
||||
}
|
||||
zw.endCRC = (zw.endCRC<<1 | zw.endCRC>>31) ^ zw.blkCRC
|
||||
zw.blkCRC = 0
|
||||
zw.rle.Init(zw.buf)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (zw *Writer) Close() error {
|
||||
if zw.err == errClosed {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Flush RLE buffer if there is left-over data.
|
||||
if zw.err = zw.flush(); zw.err != nil {
|
||||
return zw.err
|
||||
}
|
||||
|
||||
// Write stream footer.
|
||||
zw.wr.Offset = zw.OutputOffset
|
||||
func() {
|
||||
defer errors.Recover(&zw.err)
|
||||
if !zw.wrHdr {
|
||||
// Write stream header.
|
||||
zw.wr.WriteBitsBE64(hdrMagic, 16)
|
||||
zw.wr.WriteBitsBE64('h', 8)
|
||||
zw.wr.WriteBitsBE64(uint64('0'+zw.level), 8)
|
||||
zw.wrHdr = true
|
||||
}
|
||||
zw.wr.WriteBitsBE64(endMagic, 48)
|
||||
zw.wr.WriteBitsBE64(uint64(zw.endCRC), 32)
|
||||
zw.wr.WritePads(0)
|
||||
}()
|
||||
var err error
|
||||
if zw.OutputOffset, err = zw.wr.Flush(); zw.err == nil {
|
||||
zw.err = err
|
||||
}
|
||||
if zw.err != nil {
|
||||
zw.err = errWrap(zw.err, errors.Internal)
|
||||
return zw.err
|
||||
}
|
||||
|
||||
zw.err = errClosed
|
||||
return nil
|
||||
}
|
||||
|
||||
func (zw *Writer) encodeBlock(buf []byte) {
|
||||
zw.blkCRC = zw.crc.val
|
||||
zw.wr.WriteBitsBE64(blkMagic, 48)
|
||||
zw.wr.WriteBitsBE64(uint64(zw.blkCRC), 32)
|
||||
zw.wr.WriteBitsBE64(0, 1)
|
||||
zw.crc.val = 0
|
||||
|
||||
// Step 1: Burrows-Wheeler transformation.
|
||||
ptr := zw.bwt.Encode(buf)
|
||||
zw.wr.WriteBitsBE64(uint64(ptr), 24)
|
||||
|
||||
// Step 2: Move-to-front transform and run-length encoding.
|
||||
var dictMap [256]bool
|
||||
for _, c := range buf {
|
||||
dictMap[c] = true
|
||||
}
|
||||
|
||||
var dictArr [256]uint8
|
||||
var bmapLo [16]uint16
|
||||
dict := dictArr[:0]
|
||||
bmapHi := uint16(0)
|
||||
for i, b := range dictMap {
|
||||
if b {
|
||||
c := uint8(i)
|
||||
dict = append(dict, c)
|
||||
bmapHi |= 1 << (c >> 4)
|
||||
bmapLo[c>>4] |= 1 << (c & 0xf)
|
||||
}
|
||||
}
|
||||
|
||||
zw.wr.WriteBits(uint(bmapHi), 16)
|
||||
for _, m := range bmapLo {
|
||||
if m > 0 {
|
||||
zw.wr.WriteBits(uint(m), 16)
|
||||
}
|
||||
}
|
||||
|
||||
zw.mtf.Init(dict, len(buf))
|
||||
syms := zw.mtf.Encode(buf)
|
||||
|
||||
// Step 3: Prefix encoding.
|
||||
zw.encodePrefix(syms, len(dict))
|
||||
}
|
||||
|
||||
func (zw *Writer) encodePrefix(syms []uint16, numSyms int) {
|
||||
numSyms += 2 // Remove 0 symbol, add RUNA, RUNB, and EOB symbols
|
||||
if numSyms < 3 {
|
||||
panicf(errors.Internal, "unable to encode EOB marker")
|
||||
}
|
||||
syms = append(syms, uint16(numSyms-1)) // EOB marker
|
||||
|
||||
// Compute number of prefix trees needed.
|
||||
numTrees := maxNumTrees
|
||||
for i, lim := range []int{200, 600, 1200, 2400} {
|
||||
if len(syms) < lim {
|
||||
numTrees = minNumTrees + i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Compute number of block selectors.
|
||||
numSels := (len(syms) + numBlockSyms - 1) / numBlockSyms
|
||||
if cap(zw.treeSels) < numSels {
|
||||
zw.treeSels = make([]uint8, numSels)
|
||||
}
|
||||
treeSels := zw.treeSels[:numSels]
|
||||
for i := range treeSels {
|
||||
treeSels[i] = uint8(i % numTrees)
|
||||
}
|
||||
|
||||
// Initialize prefix codes.
|
||||
for i := range zw.codes2D[:numTrees] {
|
||||
pc := zw.codes2D[i][:numSyms]
|
||||
for j := range pc {
|
||||
pc[j] = prefix.PrefixCode{Sym: uint32(j)}
|
||||
}
|
||||
zw.codes1D[i] = pc
|
||||
}
|
||||
|
||||
// First cut at assigning prefix trees to each group.
|
||||
var codes prefix.PrefixCodes
|
||||
var blkLen, selIdx int
|
||||
for _, sym := range syms {
|
||||
if blkLen == 0 {
|
||||
blkLen = numBlockSyms
|
||||
codes = zw.codes2D[treeSels[selIdx]][:numSyms]
|
||||
selIdx++
|
||||
}
|
||||
blkLen--
|
||||
codes[sym].Cnt++
|
||||
}
|
||||
|
||||
// TODO(dsnet): Use K-means to cluster groups to each prefix tree.
|
||||
|
||||
// Generate lengths and prefixes based on symbol frequencies.
|
||||
for i := range zw.trees1D[:numTrees] {
|
||||
pc := prefix.PrefixCodes(zw.codes2D[i][:numSyms])
|
||||
pc.SortByCount()
|
||||
if err := prefix.GenerateLengths(pc, maxPrefixBits); err != nil {
|
||||
errors.Panic(err)
|
||||
}
|
||||
pc.SortBySymbol()
|
||||
}
|
||||
|
||||
// Write out information about the trees and tree selectors.
|
||||
var mtf internal.MoveToFront
|
||||
zw.wr.WriteBitsBE64(uint64(numTrees), 3)
|
||||
zw.wr.WriteBitsBE64(uint64(numSels), 15)
|
||||
zw.treeSelsMTF = append(zw.treeSelsMTF[:0], treeSels...)
|
||||
mtf.Encode(zw.treeSelsMTF)
|
||||
for _, sym := range zw.treeSelsMTF {
|
||||
zw.wr.WriteSymbol(uint(sym), &encSel)
|
||||
}
|
||||
zw.wr.WritePrefixCodes(zw.codes1D[:numTrees], zw.trees1D[:numTrees])
|
||||
|
||||
// Write out prefix encoded symbols of compressed data.
|
||||
var tree *prefix.Encoder
|
||||
blkLen, selIdx = 0, 0
|
||||
for _, sym := range syms {
|
||||
if blkLen == 0 {
|
||||
blkLen = numBlockSyms
|
||||
tree = &zw.trees1D[treeSels[selIdx]]
|
||||
selIdx++
|
||||
}
|
||||
blkLen--
|
||||
ok := zw.wr.TryWriteSymbol(uint(sym), tree)
|
||||
if !ok {
|
||||
zw.wr.WriteSymbol(uint(sym), tree)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
module github.com/dsnet/compress
|
||||
|
||||
go 1.9
|
||||
|
||||
require (
|
||||
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780
|
||||
github.com/klauspost/compress v1.4.1
|
||||
github.com/klauspost/cpuid v1.2.0 // indirect
|
||||
github.com/ulikunitz/xz v0.5.6
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780 h1:tFh1tRc4CA31yP6qDcu+Trax5wW5GuMxvkIba07qVLY=
|
||||
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
|
||||
github.com/klauspost/compress v1.4.1 h1:8VMb5+0wMgdBykOV96DwNwKFQ+WTI4pzYURP99CcB9E=
|
||||
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||
github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
|
||||
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
||||
github.com/ulikunitz/xz v0.5.6 h1:jGHAfXawEGZQ3blwU5wnWKQJvAraT7Ftq9EXjnXYgt8=
|
||||
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
|
|
@ -0,0 +1,107 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// Package internal is a collection of common compression algorithms.
|
||||
//
|
||||
// For performance reasons, these packages lack strong error checking and
|
||||
// require that the caller to ensure that strict invariants are kept.
|
||||
package internal
|
||||
|
||||
var (
|
||||
// IdentityLUT returns the input key itself.
|
||||
IdentityLUT = func() (lut [256]byte) {
|
||||
for i := range lut {
|
||||
lut[i] = uint8(i)
|
||||
}
|
||||
return lut
|
||||
}()
|
||||
|
||||
// ReverseLUT returns the input key with its bits reversed.
|
||||
ReverseLUT = func() (lut [256]byte) {
|
||||
for i := range lut {
|
||||
b := uint8(i)
|
||||
b = (b&0xaa)>>1 | (b&0x55)<<1
|
||||
b = (b&0xcc)>>2 | (b&0x33)<<2
|
||||
b = (b&0xf0)>>4 | (b&0x0f)<<4
|
||||
lut[i] = b
|
||||
}
|
||||
return lut
|
||||
}()
|
||||
)
|
||||
|
||||
// ReverseUint32 reverses all bits of v.
|
||||
func ReverseUint32(v uint32) (x uint32) {
|
||||
x |= uint32(ReverseLUT[byte(v>>0)]) << 24
|
||||
x |= uint32(ReverseLUT[byte(v>>8)]) << 16
|
||||
x |= uint32(ReverseLUT[byte(v>>16)]) << 8
|
||||
x |= uint32(ReverseLUT[byte(v>>24)]) << 0
|
||||
return x
|
||||
}
|
||||
|
||||
// ReverseUint32N reverses the lower n bits of v.
|
||||
func ReverseUint32N(v uint32, n uint) (x uint32) {
|
||||
return ReverseUint32(v << (32 - n))
|
||||
}
|
||||
|
||||
// ReverseUint64 reverses all bits of v.
|
||||
func ReverseUint64(v uint64) (x uint64) {
|
||||
x |= uint64(ReverseLUT[byte(v>>0)]) << 56
|
||||
x |= uint64(ReverseLUT[byte(v>>8)]) << 48
|
||||
x |= uint64(ReverseLUT[byte(v>>16)]) << 40
|
||||
x |= uint64(ReverseLUT[byte(v>>24)]) << 32
|
||||
x |= uint64(ReverseLUT[byte(v>>32)]) << 24
|
||||
x |= uint64(ReverseLUT[byte(v>>40)]) << 16
|
||||
x |= uint64(ReverseLUT[byte(v>>48)]) << 8
|
||||
x |= uint64(ReverseLUT[byte(v>>56)]) << 0
|
||||
return x
|
||||
}
|
||||
|
||||
// ReverseUint64N reverses the lower n bits of v.
|
||||
func ReverseUint64N(v uint64, n uint) (x uint64) {
|
||||
return ReverseUint64(v << (64 - n))
|
||||
}
|
||||
|
||||
// MoveToFront is a data structure that allows for more efficient move-to-front
|
||||
// transformations. This specific implementation assumes that the alphabet is
|
||||
// densely packed within 0..255.
|
||||
type MoveToFront struct {
|
||||
dict [256]uint8 // Mapping from indexes to values
|
||||
tail int // Number of tail bytes that are already ordered
|
||||
}
|
||||
|
||||
func (m *MoveToFront) Encode(vals []uint8) {
|
||||
copy(m.dict[:], IdentityLUT[:256-m.tail]) // Reset dict to be identity
|
||||
|
||||
var max int
|
||||
for i, val := range vals {
|
||||
var idx uint8 // Reverse lookup idx in dict
|
||||
for di, dv := range m.dict {
|
||||
if dv == val {
|
||||
idx = uint8(di)
|
||||
break
|
||||
}
|
||||
}
|
||||
vals[i] = idx
|
||||
|
||||
max |= int(idx)
|
||||
copy(m.dict[1:], m.dict[:idx])
|
||||
m.dict[0] = val
|
||||
}
|
||||
m.tail = 256 - max - 1
|
||||
}
|
||||
|
||||
func (m *MoveToFront) Decode(idxs []uint8) {
|
||||
copy(m.dict[:], IdentityLUT[:256-m.tail]) // Reset dict to be identity
|
||||
|
||||
var max int
|
||||
for i, idx := range idxs {
|
||||
val := m.dict[idx] // Forward lookup val in dict
|
||||
idxs[i] = val
|
||||
|
||||
max |= int(idx)
|
||||
copy(m.dict[1:], m.dict[:idx])
|
||||
m.dict[0] = val
|
||||
}
|
||||
m.tail = 256 - max - 1
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// +build debug,!gofuzz
|
||||
|
||||
package internal
|
||||
|
||||
const (
|
||||
Debug = true
|
||||
GoFuzz = false
|
||||
)
|
|
@ -0,0 +1,120 @@
|
|||
// Copyright 2016, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// Package errors implements functions to manipulate compression errors.
|
||||
//
|
||||
// In idiomatic Go, it is an anti-pattern to use panics as a form of error
|
||||
// reporting in the API. Instead, the expected way to transmit errors is by
|
||||
// returning an error value. Unfortunately, the checking of "err != nil" in
|
||||
// tight loops commonly found in compression causes non-negligible performance
|
||||
// degradation. While this may not be idiomatic, the internal packages of this
|
||||
// repository rely on panics as a normal means to convey errors. In order to
|
||||
// ensure that these panics do not leak across the public API, the public
|
||||
// packages must recover from these panics and present an error value.
|
||||
//
|
||||
// The Panic and Recover functions in this package provide a safe way to
|
||||
// recover from errors only generated from within this repository.
|
||||
//
|
||||
// Example usage:
|
||||
// func Foo() (err error) {
|
||||
// defer errors.Recover(&err)
|
||||
//
|
||||
// if rand.Intn(2) == 0 {
|
||||
// // Unexpected panics will not be caught by Recover.
|
||||
// io.Closer(nil).Close()
|
||||
// } else {
|
||||
// // Errors thrown by Panic will be caught by Recover.
|
||||
// errors.Panic(errors.New("whoopsie"))
|
||||
// }
|
||||
// }
|
||||
//
|
||||
package errors
|
||||
|
||||
import "strings"
|
||||
|
||||
const (
|
||||
// Unknown indicates that there is no classification for this error.
|
||||
Unknown = iota
|
||||
|
||||
// Internal indicates that this error is due to an internal bug.
|
||||
// Users should file a issue report if this type of error is encountered.
|
||||
Internal
|
||||
|
||||
// Invalid indicates that this error is due to the user misusing the API
|
||||
// and is indicative of a bug on the user's part.
|
||||
Invalid
|
||||
|
||||
// Deprecated indicates the use of a deprecated and unsupported feature.
|
||||
Deprecated
|
||||
|
||||
// Corrupted indicates that the input stream is corrupted.
|
||||
Corrupted
|
||||
|
||||
// Closed indicates that the handlers are closed.
|
||||
Closed
|
||||
)
|
||||
|
||||
var codeMap = map[int]string{
|
||||
Unknown: "unknown error",
|
||||
Internal: "internal error",
|
||||
Invalid: "invalid argument",
|
||||
Deprecated: "deprecated format",
|
||||
Corrupted: "corrupted input",
|
||||
Closed: "closed handler",
|
||||
}
|
||||
|
||||
type Error struct {
|
||||
Code int // The error type
|
||||
Pkg string // Name of the package where the error originated
|
||||
Msg string // Descriptive message about the error (optional)
|
||||
}
|
||||
|
||||
func (e Error) Error() string {
|
||||
var ss []string
|
||||
for _, s := range []string{e.Pkg, codeMap[e.Code], e.Msg} {
|
||||
if s != "" {
|
||||
ss = append(ss, s)
|
||||
}
|
||||
}
|
||||
return strings.Join(ss, ": ")
|
||||
}
|
||||
|
||||
func (e Error) CompressError() {}
|
||||
func (e Error) IsInternal() bool { return e.Code == Internal }
|
||||
func (e Error) IsInvalid() bool { return e.Code == Invalid }
|
||||
func (e Error) IsDeprecated() bool { return e.Code == Deprecated }
|
||||
func (e Error) IsCorrupted() bool { return e.Code == Corrupted }
|
||||
func (e Error) IsClosed() bool { return e.Code == Closed }
|
||||
|
||||
func IsInternal(err error) bool { return isCode(err, Internal) }
|
||||
func IsInvalid(err error) bool { return isCode(err, Invalid) }
|
||||
func IsDeprecated(err error) bool { return isCode(err, Deprecated) }
|
||||
func IsCorrupted(err error) bool { return isCode(err, Corrupted) }
|
||||
func IsClosed(err error) bool { return isCode(err, Closed) }
|
||||
|
||||
func isCode(err error, code int) bool {
|
||||
if cerr, ok := err.(Error); ok && cerr.Code == code {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// errWrap is used by Panic and Recover to ensure that only errors raised by
|
||||
// Panic are recovered by Recover.
|
||||
type errWrap struct{ e *error }
|
||||
|
||||
func Recover(err *error) {
|
||||
switch ex := recover().(type) {
|
||||
case nil:
|
||||
// Do nothing.
|
||||
case errWrap:
|
||||
*err = *ex.e
|
||||
default:
|
||||
panic(ex)
|
||||
}
|
||||
}
|
||||
|
||||
func Panic(err error) {
|
||||
panic(errWrap{&err})
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
// Copyright 2016, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// +build gofuzz
|
||||
|
||||
package internal
|
||||
|
||||
const (
|
||||
Debug = true
|
||||
GoFuzz = true
|
||||
)
|
|
@ -0,0 +1,159 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// +build debug
|
||||
|
||||
package prefix
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func max(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func lenBase2(n uint) int {
|
||||
return int(math.Ceil(math.Log2(float64(n + 1))))
|
||||
}
|
||||
func padBase2(v, n uint, m int) string {
|
||||
s := fmt.Sprintf("%b", 1<<n|v)[1:]
|
||||
if pad := m - len(s); pad > 0 {
|
||||
return strings.Repeat(" ", pad) + s
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func lenBase10(n int) int {
|
||||
return int(math.Ceil(math.Log10(float64(n + 1))))
|
||||
}
|
||||
func padBase10(n, m int) string {
|
||||
s := fmt.Sprintf("%d", n)
|
||||
if pad := m - len(s); pad > 0 {
|
||||
return strings.Repeat(" ", pad) + s
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func (rc RangeCodes) String() string {
|
||||
var maxLen, maxBase int
|
||||
for _, c := range rc {
|
||||
maxLen = max(maxLen, int(c.Len))
|
||||
maxBase = max(maxBase, int(c.Base))
|
||||
}
|
||||
|
||||
var ss []string
|
||||
ss = append(ss, "{")
|
||||
for i, c := range rc {
|
||||
base := padBase10(int(c.Base), lenBase10(maxBase))
|
||||
if c.Len > 0 {
|
||||
base += fmt.Sprintf("-%d", c.End()-1)
|
||||
}
|
||||
ss = append(ss, fmt.Sprintf("\t%s: {len: %s, range: %s},",
|
||||
padBase10(int(i), lenBase10(len(rc)-1)),
|
||||
padBase10(int(c.Len), lenBase10(maxLen)),
|
||||
base,
|
||||
))
|
||||
}
|
||||
ss = append(ss, "}")
|
||||
return strings.Join(ss, "\n")
|
||||
}
|
||||
|
||||
func (pc PrefixCodes) String() string {
|
||||
var maxSym, maxLen, maxCnt int
|
||||
for _, c := range pc {
|
||||
maxSym = max(maxSym, int(c.Sym))
|
||||
maxLen = max(maxLen, int(c.Len))
|
||||
maxCnt = max(maxCnt, int(c.Cnt))
|
||||
}
|
||||
|
||||
var ss []string
|
||||
ss = append(ss, "{")
|
||||
for _, c := range pc {
|
||||
var cntStr string
|
||||
if maxCnt > 0 {
|
||||
cnt := int(32*float32(c.Cnt)/float32(maxCnt) + 0.5)
|
||||
cntStr = fmt.Sprintf("%s |%s",
|
||||
padBase10(int(c.Cnt), lenBase10(maxCnt)),
|
||||
strings.Repeat("#", cnt),
|
||||
)
|
||||
}
|
||||
ss = append(ss, fmt.Sprintf("\t%s: %s, %s",
|
||||
padBase10(int(c.Sym), lenBase10(maxSym)),
|
||||
padBase2(uint(c.Val), uint(c.Len), maxLen),
|
||||
cntStr,
|
||||
))
|
||||
}
|
||||
ss = append(ss, "}")
|
||||
return strings.Join(ss, "\n")
|
||||
}
|
||||
|
||||
func (pd Decoder) String() string {
|
||||
var ss []string
|
||||
ss = append(ss, "{")
|
||||
if len(pd.chunks) > 0 {
|
||||
ss = append(ss, "\tchunks: {")
|
||||
for i, c := range pd.chunks {
|
||||
label := "sym"
|
||||
if uint(c&countMask) > uint(pd.chunkBits) {
|
||||
label = "idx"
|
||||
}
|
||||
ss = append(ss, fmt.Sprintf("\t\t%s: {%s: %s, len: %s}",
|
||||
padBase2(uint(i), uint(pd.chunkBits), int(pd.chunkBits)),
|
||||
label, padBase10(int(c>>countBits), 3),
|
||||
padBase10(int(c&countMask), 2),
|
||||
))
|
||||
}
|
||||
ss = append(ss, "\t},")
|
||||
|
||||
for j, links := range pd.links {
|
||||
ss = append(ss, fmt.Sprintf("\tlinks[%d]: {", j))
|
||||
linkBits := lenBase2(uint(pd.linkMask))
|
||||
for i, c := range links {
|
||||
ss = append(ss, fmt.Sprintf("\t\t%s: {sym: %s, len: %s},",
|
||||
padBase2(uint(i), uint(linkBits), int(linkBits)),
|
||||
padBase10(int(c>>countBits), 3),
|
||||
padBase10(int(c&countMask), 2),
|
||||
))
|
||||
}
|
||||
ss = append(ss, "\t},")
|
||||
}
|
||||
}
|
||||
ss = append(ss, fmt.Sprintf("\tchunkMask: %b,", pd.chunkMask))
|
||||
ss = append(ss, fmt.Sprintf("\tlinkMask: %b,", pd.linkMask))
|
||||
ss = append(ss, fmt.Sprintf("\tchunkBits: %d,", pd.chunkBits))
|
||||
ss = append(ss, fmt.Sprintf("\tMinBits: %d,", pd.MinBits))
|
||||
ss = append(ss, fmt.Sprintf("\tNumSyms: %d,", pd.NumSyms))
|
||||
ss = append(ss, "}")
|
||||
return strings.Join(ss, "\n")
|
||||
}
|
||||
|
||||
func (pe Encoder) String() string {
|
||||
var maxLen int
|
||||
for _, c := range pe.chunks {
|
||||
maxLen = max(maxLen, int(c&countMask))
|
||||
}
|
||||
|
||||
var ss []string
|
||||
ss = append(ss, "{")
|
||||
if len(pe.chunks) > 0 {
|
||||
ss = append(ss, "\tchunks: {")
|
||||
for i, c := range pe.chunks {
|
||||
ss = append(ss, fmt.Sprintf("\t\t%s: %s,",
|
||||
padBase10(i, 3),
|
||||
padBase2(uint(c>>countBits), uint(c&countMask), maxLen),
|
||||
))
|
||||
}
|
||||
ss = append(ss, "\t},")
|
||||
}
|
||||
ss = append(ss, fmt.Sprintf("\tchunkMask: %b,", pe.chunkMask))
|
||||
ss = append(ss, fmt.Sprintf("\tNumSyms: %d,", pe.NumSyms))
|
||||
ss = append(ss, "}")
|
||||
return strings.Join(ss, "\n")
|
||||
}
|
|
@ -0,0 +1,136 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package prefix
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"github.com/dsnet/compress/internal"
|
||||
)
|
||||
|
||||
// The algorithm used to decode variable length codes is based on the lookup
|
||||
// method in zlib. If the code is less-than-or-equal to maxChunkBits,
|
||||
// then the symbol can be decoded using a single lookup into the chunks table.
|
||||
// Otherwise, the links table will be used for a second level lookup.
|
||||
//
|
||||
// The chunks slice is keyed by the contents of the bit buffer ANDed with
|
||||
// the chunkMask to avoid a out-of-bounds lookup. The value of chunks is a tuple
|
||||
// that is decoded as follow:
|
||||
//
|
||||
// var length = chunks[bitBuffer&chunkMask] & countMask
|
||||
// var symbol = chunks[bitBuffer&chunkMask] >> countBits
|
||||
//
|
||||
// If the decoded length is larger than chunkBits, then an overflow link table
|
||||
// must be used for further decoding. In this case, the symbol is actually the
|
||||
// index into the links tables. The second-level links table returned is
|
||||
// processed in the same way as the chunks table.
|
||||
//
|
||||
// if length > chunkBits {
|
||||
// var index = symbol // Previous symbol is index into links tables
|
||||
// length = links[index][bitBuffer>>chunkBits & linkMask] & countMask
|
||||
// symbol = links[index][bitBuffer>>chunkBits & linkMask] >> countBits
|
||||
// }
|
||||
//
|
||||
// See the following:
|
||||
// http://www.gzip.org/algorithm.txt
|
||||
|
||||
type Decoder struct {
|
||||
chunks []uint32 // First-level lookup map
|
||||
links [][]uint32 // Second-level lookup map
|
||||
chunkMask uint32 // Mask the length of the chunks table
|
||||
linkMask uint32 // Mask the length of the link table
|
||||
chunkBits uint32 // Bit-length of the chunks table
|
||||
|
||||
MinBits uint32 // The minimum number of bits to safely make progress
|
||||
NumSyms uint32 // Number of symbols
|
||||
}
|
||||
|
||||
// Init initializes Decoder according to the codes provided.
|
||||
func (pd *Decoder) Init(codes PrefixCodes) {
|
||||
// Handle special case trees.
|
||||
if len(codes) <= 1 {
|
||||
switch {
|
||||
case len(codes) == 0: // Empty tree (should error if used later)
|
||||
*pd = Decoder{chunks: pd.chunks[:0], links: pd.links[:0], NumSyms: 0}
|
||||
case len(codes) == 1 && codes[0].Len == 0: // Single code tree (bit-length of zero)
|
||||
pd.chunks = append(pd.chunks[:0], codes[0].Sym<<countBits|0)
|
||||
*pd = Decoder{chunks: pd.chunks[:1], links: pd.links[:0], NumSyms: 1}
|
||||
default:
|
||||
panic("invalid codes")
|
||||
}
|
||||
return
|
||||
}
|
||||
if internal.Debug && !sort.IsSorted(prefixCodesBySymbol(codes)) {
|
||||
panic("input codes is not sorted")
|
||||
}
|
||||
if internal.Debug && !(codes.checkLengths() && codes.checkPrefixes()) {
|
||||
panic("detected incomplete or overlapping codes")
|
||||
}
|
||||
|
||||
var minBits, maxBits uint32 = valueBits, 0
|
||||
for _, c := range codes {
|
||||
if minBits > c.Len {
|
||||
minBits = c.Len
|
||||
}
|
||||
if maxBits < c.Len {
|
||||
maxBits = c.Len
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate chunks table as needed.
|
||||
const maxChunkBits = 9 // This can be tuned for better performance
|
||||
pd.NumSyms = uint32(len(codes))
|
||||
pd.MinBits = minBits
|
||||
pd.chunkBits = maxBits
|
||||
if pd.chunkBits > maxChunkBits {
|
||||
pd.chunkBits = maxChunkBits
|
||||
}
|
||||
numChunks := 1 << pd.chunkBits
|
||||
pd.chunks = allocUint32s(pd.chunks, numChunks)
|
||||
pd.chunkMask = uint32(numChunks - 1)
|
||||
|
||||
// Allocate links tables as needed.
|
||||
pd.links = pd.links[:0]
|
||||
pd.linkMask = 0
|
||||
if pd.chunkBits < maxBits {
|
||||
numLinks := 1 << (maxBits - pd.chunkBits)
|
||||
pd.linkMask = uint32(numLinks - 1)
|
||||
|
||||
var linkIdx uint32
|
||||
for i := range pd.chunks {
|
||||
pd.chunks[i] = 0 // Logic below relies on zero value as uninitialized
|
||||
}
|
||||
for _, c := range codes {
|
||||
if c.Len > pd.chunkBits && pd.chunks[c.Val&pd.chunkMask] == 0 {
|
||||
pd.chunks[c.Val&pd.chunkMask] = (linkIdx << countBits) | (pd.chunkBits + 1)
|
||||
linkIdx++
|
||||
}
|
||||
}
|
||||
|
||||
pd.links = extendSliceUint32s(pd.links, int(linkIdx))
|
||||
linksFlat := allocUint32s(pd.links[0], numLinks*int(linkIdx))
|
||||
for i, j := 0, 0; i < len(pd.links); i, j = i+1, j+numLinks {
|
||||
pd.links[i] = linksFlat[j : j+numLinks]
|
||||
}
|
||||
}
|
||||
|
||||
// Fill out chunks and links tables with values.
|
||||
for _, c := range codes {
|
||||
chunk := c.Sym<<countBits | c.Len
|
||||
if c.Len <= pd.chunkBits {
|
||||
skip := 1 << uint(c.Len)
|
||||
for j := int(c.Val); j < len(pd.chunks); j += skip {
|
||||
pd.chunks[j] = chunk
|
||||
}
|
||||
} else {
|
||||
linkIdx := pd.chunks[c.Val&pd.chunkMask] >> countBits
|
||||
links := pd.links[linkIdx]
|
||||
skip := 1 << uint(c.Len-pd.chunkBits)
|
||||
for j := int(c.Val >> pd.chunkBits); j < len(links); j += skip {
|
||||
links[j] = chunk
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package prefix
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"github.com/dsnet/compress/internal"
|
||||
)
|
||||
|
||||
type Encoder struct {
|
||||
chunks []uint32 // First-level lookup map
|
||||
chunkMask uint32 // Mask the length of the chunks table
|
||||
|
||||
NumSyms uint32 // Number of symbols
|
||||
}
|
||||
|
||||
// Init initializes Encoder according to the codes provided.
|
||||
func (pe *Encoder) Init(codes PrefixCodes) {
|
||||
// Handle special case trees.
|
||||
if len(codes) <= 1 {
|
||||
switch {
|
||||
case len(codes) == 0: // Empty tree (should error if used later)
|
||||
*pe = Encoder{chunks: pe.chunks[:0], NumSyms: 0}
|
||||
case len(codes) == 1 && codes[0].Len == 0: // Single code tree (bit-length of zero)
|
||||
pe.chunks = append(pe.chunks[:0], codes[0].Val<<countBits|0)
|
||||
*pe = Encoder{chunks: pe.chunks[:1], NumSyms: 1}
|
||||
default:
|
||||
panic("invalid codes")
|
||||
}
|
||||
return
|
||||
}
|
||||
if internal.Debug && !sort.IsSorted(prefixCodesBySymbol(codes)) {
|
||||
panic("input codes is not sorted")
|
||||
}
|
||||
if internal.Debug && !(codes.checkLengths() && codes.checkPrefixes()) {
|
||||
panic("detected incomplete or overlapping codes")
|
||||
}
|
||||
|
||||
// Enough chunks to contain all the symbols.
|
||||
numChunks := 1
|
||||
for n := len(codes) - 1; n > 0; n >>= 1 {
|
||||
numChunks <<= 1
|
||||
}
|
||||
pe.NumSyms = uint32(len(codes))
|
||||
|
||||
retry:
|
||||
// Allocate and reset chunks.
|
||||
pe.chunks = allocUint32s(pe.chunks, numChunks)
|
||||
pe.chunkMask = uint32(numChunks - 1)
|
||||
for i := range pe.chunks {
|
||||
pe.chunks[i] = 0 // Logic below relies on zero value as uninitialized
|
||||
}
|
||||
|
||||
// Insert each symbol, checking that there are no conflicts.
|
||||
for _, c := range codes {
|
||||
if pe.chunks[c.Sym&pe.chunkMask] > 0 {
|
||||
// Collision found our "hash" table, so grow and try again.
|
||||
numChunks <<= 1
|
||||
goto retry
|
||||
}
|
||||
pe.chunks[c.Sym&pe.chunkMask] = c.Val<<countBits | c.Len
|
||||
}
|
||||
}
|
|
@ -0,0 +1,400 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// Package prefix implements bit readers and writers that use prefix encoding.
|
||||
package prefix
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"github.com/dsnet/compress/internal"
|
||||
"github.com/dsnet/compress/internal/errors"
|
||||
)
|
||||
|
||||
func errorf(c int, f string, a ...interface{}) error {
|
||||
return errors.Error{Code: c, Pkg: "prefix", Msg: fmt.Sprintf(f, a...)}
|
||||
}
|
||||
|
||||
func panicf(c int, f string, a ...interface{}) {
|
||||
errors.Panic(errorf(c, f, a...))
|
||||
}
|
||||
|
||||
const (
|
||||
countBits = 5 // Number of bits to store the bit-length of the code
|
||||
valueBits = 27 // Number of bits to store the code value
|
||||
|
||||
countMask = (1 << countBits) - 1
|
||||
)
|
||||
|
||||
// PrefixCode is a representation of a prefix code, which is conceptually a
|
||||
// mapping from some arbitrary symbol to some bit-string.
|
||||
//
|
||||
// The Sym and Cnt fields are typically provided by the user,
|
||||
// while the Len and Val fields are generated by this package.
|
||||
type PrefixCode struct {
|
||||
Sym uint32 // The symbol being mapped
|
||||
Cnt uint32 // The number times this symbol is used
|
||||
Len uint32 // Bit-length of the prefix code
|
||||
Val uint32 // Value of the prefix code (must be in 0..(1<<Len)-1)
|
||||
}
|
||||
type PrefixCodes []PrefixCode
|
||||
|
||||
type prefixCodesBySymbol []PrefixCode
|
||||
|
||||
func (c prefixCodesBySymbol) Len() int { return len(c) }
|
||||
func (c prefixCodesBySymbol) Less(i, j int) bool { return c[i].Sym < c[j].Sym }
|
||||
func (c prefixCodesBySymbol) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
|
||||
|
||||
type prefixCodesByCount []PrefixCode
|
||||
|
||||
func (c prefixCodesByCount) Len() int { return len(c) }
|
||||
func (c prefixCodesByCount) Less(i, j int) bool {
|
||||
return c[i].Cnt < c[j].Cnt || (c[i].Cnt == c[j].Cnt && c[i].Sym < c[j].Sym)
|
||||
}
|
||||
func (c prefixCodesByCount) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
|
||||
|
||||
func (pc PrefixCodes) SortBySymbol() { sort.Sort(prefixCodesBySymbol(pc)) }
|
||||
func (pc PrefixCodes) SortByCount() { sort.Sort(prefixCodesByCount(pc)) }
|
||||
|
||||
// Length computes the total bit-length using the Len and Cnt fields.
|
||||
func (pc PrefixCodes) Length() (nb uint) {
|
||||
for _, c := range pc {
|
||||
nb += uint(c.Len * c.Cnt)
|
||||
}
|
||||
return nb
|
||||
}
|
||||
|
||||
// checkLengths reports whether the codes form a complete prefix tree.
|
||||
func (pc PrefixCodes) checkLengths() bool {
|
||||
sum := 1 << valueBits
|
||||
for _, c := range pc {
|
||||
sum -= (1 << valueBits) >> uint(c.Len)
|
||||
}
|
||||
return sum == 0 || len(pc) == 0
|
||||
}
|
||||
|
||||
// checkPrefixes reports whether all codes have non-overlapping prefixes.
|
||||
func (pc PrefixCodes) checkPrefixes() bool {
|
||||
for i, c1 := range pc {
|
||||
for j, c2 := range pc {
|
||||
mask := uint32(1)<<c1.Len - 1
|
||||
if i != j && c1.Len <= c2.Len && c1.Val&mask == c2.Val&mask {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// checkCanonical reports whether all codes are canonical.
|
||||
// That is, they have the following properties:
|
||||
//
|
||||
// 1. All codes of a given bit-length are consecutive values.
|
||||
// 2. Shorter codes lexicographically precede longer codes.
|
||||
//
|
||||
// The codes must have unique symbols and be sorted by the symbol
|
||||
// The Len and Val fields in each code must be populated.
|
||||
func (pc PrefixCodes) checkCanonical() bool {
|
||||
// Rule 1.
|
||||
var vals [valueBits + 1]PrefixCode
|
||||
for _, c := range pc {
|
||||
if c.Len > 0 {
|
||||
c.Val = internal.ReverseUint32N(c.Val, uint(c.Len))
|
||||
if vals[c.Len].Cnt > 0 && vals[c.Len].Val+1 != c.Val {
|
||||
return false
|
||||
}
|
||||
vals[c.Len].Val = c.Val
|
||||
vals[c.Len].Cnt++
|
||||
}
|
||||
}
|
||||
|
||||
// Rule 2.
|
||||
var last PrefixCode
|
||||
for _, v := range vals {
|
||||
if v.Cnt > 0 {
|
||||
curVal := v.Val - v.Cnt + 1
|
||||
if last.Cnt != 0 && last.Val >= curVal {
|
||||
return false
|
||||
}
|
||||
last = v
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// GenerateLengths assigns non-zero bit-lengths to all codes. Codes with high
|
||||
// frequency counts will be assigned shorter codes to reduce bit entropy.
|
||||
// This function is used primarily by compressors.
|
||||
//
|
||||
// The input codes must have the Cnt field populated, be sorted by count.
|
||||
// Even if a code has a count of 0, a non-zero bit-length will be assigned.
|
||||
//
|
||||
// The result will have the Len field populated. The algorithm used guarantees
|
||||
// that Len <= maxBits and that it is a complete prefix tree. The resulting
|
||||
// codes will remain sorted by count.
|
||||
func GenerateLengths(codes PrefixCodes, maxBits uint) error {
|
||||
if len(codes) <= 1 {
|
||||
if len(codes) == 1 {
|
||||
codes[0].Len = 0
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Verify that the codes are in ascending order by count.
|
||||
cntLast := codes[0].Cnt
|
||||
for _, c := range codes[1:] {
|
||||
if c.Cnt < cntLast {
|
||||
return errorf(errors.Invalid, "non-monotonically increasing symbol counts")
|
||||
}
|
||||
cntLast = c.Cnt
|
||||
}
|
||||
|
||||
// Construct a Huffman tree used to generate the bit-lengths.
|
||||
//
|
||||
// The Huffman tree is a binary tree where each symbol lies as a leaf node
|
||||
// on this tree. The length of the prefix code to assign is the depth of
|
||||
// that leaf from the root. The Huffman algorithm, which runs in O(n),
|
||||
// is used to generate the tree. It assumes that codes are sorted in
|
||||
// increasing order of frequency.
|
||||
//
|
||||
// The algorithm is as follows:
|
||||
// 1. Start with two queues, F and Q, where F contains all of the starting
|
||||
// symbols sorted such that symbols with lowest counts come first.
|
||||
// 2. While len(F)+len(Q) > 1:
|
||||
// 2a. Dequeue the node from F or Q that has the lowest weight as N0.
|
||||
// 2b. Dequeue the node from F or Q that has the lowest weight as N1.
|
||||
// 2c. Create a new node N that has N0 and N1 as its children.
|
||||
// 2d. Enqueue N into the back of Q.
|
||||
// 3. The tree's root node is Q[0].
|
||||
type node struct {
|
||||
cnt uint32
|
||||
|
||||
// n0 or c0 represent the left child of this node.
|
||||
// Since Go does not have unions, only one of these will be set.
|
||||
// Similarly, n1 or c1 represent the right child of this node.
|
||||
//
|
||||
// If n0 or n1 is set, then it represents a "pointer" to another
|
||||
// node in the Huffman tree. Since Go's pointer analysis cannot reason
|
||||
// that these node pointers do not escape (golang.org/issue/13493),
|
||||
// we use an index to a node in the nodes slice as a pseudo-pointer.
|
||||
//
|
||||
// If c0 or c1 is set, then it represents a leaf "node" in the
|
||||
// Huffman tree. The leaves are the PrefixCode values themselves.
|
||||
n0, n1 int // Index to child nodes
|
||||
c0, c1 *PrefixCode
|
||||
}
|
||||
var nodeIdx int
|
||||
var nodeArr [1024]node // Large enough to handle most cases on the stack
|
||||
nodes := nodeArr[:]
|
||||
if len(nodes) < len(codes) {
|
||||
nodes = make([]node, len(codes)) // Number of internal nodes < number of leaves
|
||||
}
|
||||
freqs, queue := codes, nodes[:0]
|
||||
for len(freqs)+len(queue) > 1 {
|
||||
// These are the two smallest nodes at the front of freqs and queue.
|
||||
var n node
|
||||
if len(queue) == 0 || (len(freqs) > 0 && freqs[0].Cnt <= queue[0].cnt) {
|
||||
n.c0, freqs = &freqs[0], freqs[1:]
|
||||
n.cnt += n.c0.Cnt
|
||||
} else {
|
||||
n.cnt += queue[0].cnt
|
||||
n.n0 = nodeIdx // nodeIdx is same as &queue[0] - &nodes[0]
|
||||
nodeIdx++
|
||||
queue = queue[1:]
|
||||
}
|
||||
if len(queue) == 0 || (len(freqs) > 0 && freqs[0].Cnt <= queue[0].cnt) {
|
||||
n.c1, freqs = &freqs[0], freqs[1:]
|
||||
n.cnt += n.c1.Cnt
|
||||
} else {
|
||||
n.cnt += queue[0].cnt
|
||||
n.n1 = nodeIdx // nodeIdx is same as &queue[0] - &nodes[0]
|
||||
nodeIdx++
|
||||
queue = queue[1:]
|
||||
}
|
||||
queue = append(queue, n)
|
||||
}
|
||||
rootIdx := nodeIdx
|
||||
|
||||
// Search the whole binary tree, noting when we hit each leaf node.
|
||||
// We do not care about the exact Huffman tree structure, but rather we only
|
||||
// care about depth of each of the leaf nodes. That is, the depth determines
|
||||
// how long each symbol is in bits.
|
||||
//
|
||||
// Since the number of leaves is n, there is at most n internal nodes.
|
||||
// Thus, this algorithm runs in O(n).
|
||||
var fixBits bool
|
||||
var explore func(int, uint)
|
||||
explore = func(rootIdx int, level uint) {
|
||||
root := &nodes[rootIdx]
|
||||
|
||||
// Explore left branch.
|
||||
if root.c0 == nil {
|
||||
explore(root.n0, level+1)
|
||||
} else {
|
||||
fixBits = fixBits || (level > maxBits)
|
||||
root.c0.Len = uint32(level)
|
||||
}
|
||||
|
||||
// Explore right branch.
|
||||
if root.c1 == nil {
|
||||
explore(root.n1, level+1)
|
||||
} else {
|
||||
fixBits = fixBits || (level > maxBits)
|
||||
root.c1.Len = uint32(level)
|
||||
}
|
||||
}
|
||||
explore(rootIdx, 1)
|
||||
|
||||
// Fix the bit-lengths if we violate the maxBits requirement.
|
||||
if fixBits {
|
||||
// Create histogram for number of symbols with each bit-length.
|
||||
var symBitsArr [valueBits + 1]uint32
|
||||
symBits := symBitsArr[:] // symBits[nb] indicates number of symbols using nb bits
|
||||
for _, c := range codes {
|
||||
for int(c.Len) >= len(symBits) {
|
||||
symBits = append(symBits, 0)
|
||||
}
|
||||
symBits[c.Len]++
|
||||
}
|
||||
|
||||
// Fudge the tree such that the largest bit-length is <= maxBits.
|
||||
// This is accomplish by effectively doing a tree rotation. That is, we
|
||||
// increase the bit-length of some higher frequency code, so that the
|
||||
// bit-lengths of lower frequency codes can be decreased.
|
||||
//
|
||||
// Visually, this looks like the following transform:
|
||||
//
|
||||
// Level Before After
|
||||
// __ ___
|
||||
// / \ / \
|
||||
// n-1 X / \ /\ /\
|
||||
// n X /\ X X X X
|
||||
// n+1 X X
|
||||
//
|
||||
var treeRotate func(uint)
|
||||
treeRotate = func(nb uint) {
|
||||
if symBits[nb-1] == 0 {
|
||||
treeRotate(nb - 1)
|
||||
}
|
||||
symBits[nb-1] -= 1 // Push this node to the level below
|
||||
symBits[nb] += 3 // This level gets one node from above, two from below
|
||||
symBits[nb+1] -= 2 // Push two nodes to the level above
|
||||
}
|
||||
for i := uint(len(symBits)) - 1; i > maxBits; i-- {
|
||||
for symBits[i] > 0 {
|
||||
treeRotate(i - 1)
|
||||
}
|
||||
}
|
||||
|
||||
// Assign bit-lengths to each code. Since codes is sorted in increasing
|
||||
// order of frequency, that means that the most frequently used symbols
|
||||
// should have the shortest bit-lengths. Thus, we copy symbols to codes
|
||||
// from the back of codes first.
|
||||
cs := codes
|
||||
for nb, cnt := range symBits {
|
||||
if cnt > 0 {
|
||||
pos := len(cs) - int(cnt)
|
||||
cs2 := cs[pos:]
|
||||
for i := range cs2 {
|
||||
cs2[i].Len = uint32(nb)
|
||||
}
|
||||
cs = cs[:pos]
|
||||
}
|
||||
}
|
||||
if len(cs) != 0 {
|
||||
panic("not all codes were used up")
|
||||
}
|
||||
}
|
||||
|
||||
if internal.Debug && !codes.checkLengths() {
|
||||
panic("incomplete prefix tree detected")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GeneratePrefixes assigns a prefix value to all codes according to the
|
||||
// bit-lengths. This function is used by both compressors and decompressors.
|
||||
//
|
||||
// The input codes must have the Sym and Len fields populated and be
|
||||
// sorted by symbol. The bit-lengths of each code must be properly allocated,
|
||||
// such that it forms a complete tree.
|
||||
//
|
||||
// The result will have the Val field populated and will produce a canonical
|
||||
// prefix tree. The resulting codes will remain sorted by symbol.
|
||||
func GeneratePrefixes(codes PrefixCodes) error {
|
||||
if len(codes) <= 1 {
|
||||
if len(codes) == 1 {
|
||||
if codes[0].Len != 0 {
|
||||
return errorf(errors.Invalid, "degenerate prefix tree with one node")
|
||||
}
|
||||
codes[0].Val = 0
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Compute basic statistics on the symbols.
|
||||
var bitCnts [valueBits + 1]uint
|
||||
c0 := codes[0]
|
||||
bitCnts[c0.Len]++
|
||||
minBits, maxBits, symLast := c0.Len, c0.Len, c0.Sym
|
||||
for _, c := range codes[1:] {
|
||||
if c.Sym <= symLast {
|
||||
return errorf(errors.Invalid, "non-unique or non-monotonically increasing symbols")
|
||||
}
|
||||
if minBits > c.Len {
|
||||
minBits = c.Len
|
||||
}
|
||||
if maxBits < c.Len {
|
||||
maxBits = c.Len
|
||||
}
|
||||
bitCnts[c.Len]++ // Histogram of bit counts
|
||||
symLast = c.Sym // Keep track of last symbol
|
||||
}
|
||||
if minBits == 0 {
|
||||
return errorf(errors.Invalid, "invalid prefix bit-length")
|
||||
}
|
||||
|
||||
// Compute the next code for a symbol of a given bit length.
|
||||
var nextCodes [valueBits + 1]uint
|
||||
var code uint
|
||||
for i := minBits; i <= maxBits; i++ {
|
||||
code <<= 1
|
||||
nextCodes[i] = code
|
||||
code += bitCnts[i]
|
||||
}
|
||||
if code != 1<<maxBits {
|
||||
return errorf(errors.Invalid, "degenerate prefix tree")
|
||||
}
|
||||
|
||||
// Assign the code to each symbol.
|
||||
for i, c := range codes {
|
||||
codes[i].Val = internal.ReverseUint32N(uint32(nextCodes[c.Len]), uint(c.Len))
|
||||
nextCodes[c.Len]++
|
||||
}
|
||||
|
||||
if internal.Debug && !codes.checkPrefixes() {
|
||||
panic("overlapping prefixes detected")
|
||||
}
|
||||
if internal.Debug && !codes.checkCanonical() {
|
||||
panic("non-canonical prefixes detected")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func allocUint32s(s []uint32, n int) []uint32 {
|
||||
if cap(s) >= n {
|
||||
return s[:n]
|
||||
}
|
||||
return make([]uint32, n, n*3/2)
|
||||
}
|
||||
|
||||
func extendSliceUint32s(s [][]uint32, n int) [][]uint32 {
|
||||
if cap(s) >= n {
|
||||
return s[:n]
|
||||
}
|
||||
ss := make([][]uint32, n, n*3/2)
|
||||
copy(ss, s[:cap(s)])
|
||||
return ss
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package prefix
|
||||
|
||||
type RangeCode struct {
|
||||
Base uint32 // Starting base offset of the range
|
||||
Len uint32 // Bit-length of a subsequent integer to add to base offset
|
||||
}
|
||||
type RangeCodes []RangeCode
|
||||
|
||||
type RangeEncoder struct {
|
||||
rcs RangeCodes
|
||||
lut [1024]uint32
|
||||
minBase uint
|
||||
}
|
||||
|
||||
// End reports the non-inclusive ending range.
|
||||
func (rc RangeCode) End() uint32 { return rc.Base + (1 << rc.Len) }
|
||||
|
||||
// MakeRangeCodes creates a RangeCodes, where each region is assumed to be
|
||||
// contiguously stacked, without any gaps, with bit-lengths taken from bits.
|
||||
func MakeRangeCodes(minBase uint, bits []uint) (rc RangeCodes) {
|
||||
for _, nb := range bits {
|
||||
rc = append(rc, RangeCode{Base: uint32(minBase), Len: uint32(nb)})
|
||||
minBase += 1 << nb
|
||||
}
|
||||
return rc
|
||||
}
|
||||
|
||||
// Base reports the inclusive starting range for all ranges.
|
||||
func (rcs RangeCodes) Base() uint32 { return rcs[0].Base }
|
||||
|
||||
// End reports the non-inclusive ending range for all ranges.
|
||||
func (rcs RangeCodes) End() uint32 { return rcs[len(rcs)-1].End() }
|
||||
|
||||
// checkValid reports whether the RangeCodes is valid. In order to be valid,
|
||||
// the following must hold true:
|
||||
// rcs[i-1].Base <= rcs[i].Base
|
||||
// rcs[i-1].End <= rcs[i].End
|
||||
// rcs[i-1].End >= rcs[i].Base
|
||||
//
|
||||
// Practically speaking, each range must be increasing and must not have any
|
||||
// gaps in between. It is okay for ranges to overlap.
|
||||
func (rcs RangeCodes) checkValid() bool {
|
||||
if len(rcs) == 0 {
|
||||
return false
|
||||
}
|
||||
pre := rcs[0]
|
||||
for _, cur := range rcs[1:] {
|
||||
preBase, preEnd := pre.Base, pre.End()
|
||||
curBase, curEnd := cur.Base, cur.End()
|
||||
if preBase > curBase || preEnd > curEnd || preEnd < curBase {
|
||||
return false
|
||||
}
|
||||
pre = cur
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (re *RangeEncoder) Init(rcs RangeCodes) {
|
||||
if !rcs.checkValid() {
|
||||
panic("invalid range codes")
|
||||
}
|
||||
*re = RangeEncoder{rcs: rcs, minBase: uint(rcs.Base())}
|
||||
for sym, rc := range rcs {
|
||||
base := int(rc.Base) - int(re.minBase)
|
||||
end := int(rc.End()) - int(re.minBase)
|
||||
if base >= len(re.lut) {
|
||||
break
|
||||
}
|
||||
if end > len(re.lut) {
|
||||
end = len(re.lut)
|
||||
}
|
||||
for i := base; i < end; i++ {
|
||||
re.lut[i] = uint32(sym)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (re *RangeEncoder) Encode(offset uint) (sym uint) {
|
||||
if idx := int(offset - re.minBase); idx < len(re.lut) {
|
||||
return uint(re.lut[idx])
|
||||
}
|
||||
sym = uint(re.lut[len(re.lut)-1])
|
||||
retry:
|
||||
if int(sym) >= len(re.rcs) || re.rcs[sym].Base > uint32(offset) {
|
||||
return sym - 1
|
||||
}
|
||||
sym++
|
||||
goto retry // Avoid for-loop so that this function can be inlined
|
||||
}
|
|
@ -0,0 +1,335 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package prefix
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/dsnet/compress"
|
||||
"github.com/dsnet/compress/internal"
|
||||
"github.com/dsnet/compress/internal/errors"
|
||||
)
|
||||
|
||||
// Reader implements a prefix decoder. If the input io.Reader satisfies the
|
||||
// compress.ByteReader or compress.BufferedReader interface, then it also
|
||||
// guarantees that it will never read more bytes than is necessary.
|
||||
//
|
||||
// For high performance, provide an io.Reader that satisfies the
|
||||
// compress.BufferedReader interface. If the input does not satisfy either
|
||||
// compress.ByteReader or compress.BufferedReader, then it will be internally
|
||||
// wrapped with a bufio.Reader.
|
||||
type Reader struct {
|
||||
Offset int64 // Number of bytes read from the underlying io.Reader
|
||||
|
||||
rd io.Reader
|
||||
byteRd compress.ByteReader // Set if rd is a ByteReader
|
||||
bufRd compress.BufferedReader // Set if rd is a BufferedReader
|
||||
|
||||
bufBits uint64 // Buffer to hold some bits
|
||||
numBits uint // Number of valid bits in bufBits
|
||||
bigEndian bool // Do we treat input bytes as big endian?
|
||||
|
||||
// These fields are only used if rd is a compress.BufferedReader.
|
||||
bufPeek []byte // Buffer for the Peek data
|
||||
discardBits int // Number of bits to discard from reader
|
||||
fedBits uint // Number of bits fed in last call to PullBits
|
||||
|
||||
// These fields are used to reduce allocations.
|
||||
bb *buffer
|
||||
br *bytesReader
|
||||
sr *stringReader
|
||||
bu *bufio.Reader
|
||||
}
|
||||
|
||||
// Init initializes the bit Reader to read from r. If bigEndian is true, then
|
||||
// bits will be read starting from the most-significant bits of a byte
|
||||
// (as done in bzip2), otherwise it will read starting from the
|
||||
// least-significant bits of a byte (such as for deflate and brotli).
|
||||
func (pr *Reader) Init(r io.Reader, bigEndian bool) {
|
||||
*pr = Reader{
|
||||
rd: r,
|
||||
bigEndian: bigEndian,
|
||||
|
||||
bb: pr.bb,
|
||||
br: pr.br,
|
||||
sr: pr.sr,
|
||||
bu: pr.bu,
|
||||
}
|
||||
switch rr := r.(type) {
|
||||
case *bytes.Buffer:
|
||||
if pr.bb == nil {
|
||||
pr.bb = new(buffer)
|
||||
}
|
||||
*pr.bb = buffer{Buffer: rr}
|
||||
pr.bufRd = pr.bb
|
||||
case *bytes.Reader:
|
||||
if pr.br == nil {
|
||||
pr.br = new(bytesReader)
|
||||
}
|
||||
*pr.br = bytesReader{Reader: rr}
|
||||
pr.bufRd = pr.br
|
||||
case *strings.Reader:
|
||||
if pr.sr == nil {
|
||||
pr.sr = new(stringReader)
|
||||
}
|
||||
*pr.sr = stringReader{Reader: rr}
|
||||
pr.bufRd = pr.sr
|
||||
case compress.BufferedReader:
|
||||
pr.bufRd = rr
|
||||
case compress.ByteReader:
|
||||
pr.byteRd = rr
|
||||
default:
|
||||
if pr.bu == nil {
|
||||
pr.bu = bufio.NewReader(nil)
|
||||
}
|
||||
pr.bu.Reset(r)
|
||||
pr.rd, pr.bufRd = pr.bu, pr.bu
|
||||
}
|
||||
}
|
||||
|
||||
// BitsRead reports the total number of bits emitted from any Read method.
|
||||
func (pr *Reader) BitsRead() int64 {
|
||||
offset := 8*pr.Offset - int64(pr.numBits)
|
||||
if pr.bufRd != nil {
|
||||
discardBits := pr.discardBits + int(pr.fedBits-pr.numBits)
|
||||
offset = 8*pr.Offset + int64(discardBits)
|
||||
}
|
||||
return offset
|
||||
}
|
||||
|
||||
// IsBufferedReader reports whether the underlying io.Reader is also a
|
||||
// compress.BufferedReader.
|
||||
func (pr *Reader) IsBufferedReader() bool {
|
||||
return pr.bufRd != nil
|
||||
}
|
||||
|
||||
// ReadPads reads 0-7 bits from the bit buffer to achieve byte-alignment.
|
||||
func (pr *Reader) ReadPads() uint {
|
||||
nb := pr.numBits % 8
|
||||
val := uint(pr.bufBits & uint64(1<<nb-1))
|
||||
pr.bufBits >>= nb
|
||||
pr.numBits -= nb
|
||||
return val
|
||||
}
|
||||
|
||||
// Read reads bytes into buf.
|
||||
// The bit-ordering mode does not affect this method.
|
||||
func (pr *Reader) Read(buf []byte) (cnt int, err error) {
|
||||
if pr.numBits > 0 {
|
||||
if pr.numBits%8 != 0 {
|
||||
return 0, errorf(errors.Invalid, "non-aligned bit buffer")
|
||||
}
|
||||
for cnt = 0; len(buf) > cnt && pr.numBits > 0; cnt++ {
|
||||
if pr.bigEndian {
|
||||
buf[cnt] = internal.ReverseLUT[byte(pr.bufBits)]
|
||||
} else {
|
||||
buf[cnt] = byte(pr.bufBits)
|
||||
}
|
||||
pr.bufBits >>= 8
|
||||
pr.numBits -= 8
|
||||
}
|
||||
return cnt, nil
|
||||
}
|
||||
if _, err := pr.Flush(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
cnt, err = pr.rd.Read(buf)
|
||||
pr.Offset += int64(cnt)
|
||||
return cnt, err
|
||||
}
|
||||
|
||||
// ReadOffset reads an offset value using the provided RangeCodes indexed by
|
||||
// the symbol read.
|
||||
func (pr *Reader) ReadOffset(pd *Decoder, rcs RangeCodes) uint {
|
||||
rc := rcs[pr.ReadSymbol(pd)]
|
||||
return uint(rc.Base) + pr.ReadBits(uint(rc.Len))
|
||||
}
|
||||
|
||||
// TryReadBits attempts to read nb bits using the contents of the bit buffer
|
||||
// alone. It returns the value and whether it succeeded.
|
||||
//
|
||||
// This method is designed to be inlined for performance reasons.
|
||||
func (pr *Reader) TryReadBits(nb uint) (uint, bool) {
|
||||
if pr.numBits < nb {
|
||||
return 0, false
|
||||
}
|
||||
val := uint(pr.bufBits & uint64(1<<nb-1))
|
||||
pr.bufBits >>= nb
|
||||
pr.numBits -= nb
|
||||
return val, true
|
||||
}
|
||||
|
||||
// ReadBits reads nb bits in from the underlying reader.
|
||||
func (pr *Reader) ReadBits(nb uint) uint {
|
||||
if err := pr.PullBits(nb); err != nil {
|
||||
errors.Panic(err)
|
||||
}
|
||||
val := uint(pr.bufBits & uint64(1<<nb-1))
|
||||
pr.bufBits >>= nb
|
||||
pr.numBits -= nb
|
||||
return val
|
||||
}
|
||||
|
||||
// TryReadSymbol attempts to decode the next symbol using the contents of the
|
||||
// bit buffer alone. It returns the decoded symbol and whether it succeeded.
|
||||
//
|
||||
// This method is designed to be inlined for performance reasons.
|
||||
func (pr *Reader) TryReadSymbol(pd *Decoder) (uint, bool) {
|
||||
if pr.numBits < uint(pd.MinBits) || len(pd.chunks) == 0 {
|
||||
return 0, false
|
||||
}
|
||||
chunk := pd.chunks[uint32(pr.bufBits)&pd.chunkMask]
|
||||
nb := uint(chunk & countMask)
|
||||
if nb > pr.numBits || nb > uint(pd.chunkBits) {
|
||||
return 0, false
|
||||
}
|
||||
pr.bufBits >>= nb
|
||||
pr.numBits -= nb
|
||||
return uint(chunk >> countBits), true
|
||||
}
|
||||
|
||||
// ReadSymbol reads the next symbol using the provided prefix Decoder.
|
||||
func (pr *Reader) ReadSymbol(pd *Decoder) uint {
|
||||
if len(pd.chunks) == 0 {
|
||||
panicf(errors.Invalid, "decode with empty prefix tree")
|
||||
}
|
||||
|
||||
nb := uint(pd.MinBits)
|
||||
for {
|
||||
if err := pr.PullBits(nb); err != nil {
|
||||
errors.Panic(err)
|
||||
}
|
||||
chunk := pd.chunks[uint32(pr.bufBits)&pd.chunkMask]
|
||||
nb = uint(chunk & countMask)
|
||||
if nb > uint(pd.chunkBits) {
|
||||
linkIdx := chunk >> countBits
|
||||
chunk = pd.links[linkIdx][uint32(pr.bufBits>>pd.chunkBits)&pd.linkMask]
|
||||
nb = uint(chunk & countMask)
|
||||
}
|
||||
if nb <= pr.numBits {
|
||||
pr.bufBits >>= nb
|
||||
pr.numBits -= nb
|
||||
return uint(chunk >> countBits)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Flush updates the read offset of the underlying ByteReader.
|
||||
// If reader is a compress.BufferedReader, then this calls Discard to update
|
||||
// the read offset.
|
||||
func (pr *Reader) Flush() (int64, error) {
|
||||
if pr.bufRd == nil {
|
||||
return pr.Offset, nil
|
||||
}
|
||||
|
||||
// Update the number of total bits to discard.
|
||||
pr.discardBits += int(pr.fedBits - pr.numBits)
|
||||
pr.fedBits = pr.numBits
|
||||
|
||||
// Discard some bytes to update read offset.
|
||||
var err error
|
||||
nd := (pr.discardBits + 7) / 8 // Round up to nearest byte
|
||||
nd, err = pr.bufRd.Discard(nd)
|
||||
pr.discardBits -= nd * 8 // -7..0
|
||||
pr.Offset += int64(nd)
|
||||
|
||||
// These are invalid after Discard.
|
||||
pr.bufPeek = nil
|
||||
return pr.Offset, err
|
||||
}
|
||||
|
||||
// PullBits ensures that at least nb bits exist in the bit buffer.
|
||||
// If the underlying reader is a compress.BufferedReader, then this will fill
|
||||
// the bit buffer with as many bits as possible, relying on Peek and Discard to
|
||||
// properly advance the read offset. Otherwise, it will use ReadByte to fill the
|
||||
// buffer with just the right number of bits.
|
||||
func (pr *Reader) PullBits(nb uint) error {
|
||||
if pr.bufRd != nil {
|
||||
pr.discardBits += int(pr.fedBits - pr.numBits)
|
||||
for {
|
||||
if len(pr.bufPeek) == 0 {
|
||||
pr.fedBits = pr.numBits // Don't discard bits just added
|
||||
if _, err := pr.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Peek no more bytes than necessary.
|
||||
// The computation for cntPeek computes the minimum number of
|
||||
// bytes to Peek to fill nb bits.
|
||||
var err error
|
||||
cntPeek := int(nb+(-nb&7)) / 8
|
||||
if cntPeek < pr.bufRd.Buffered() {
|
||||
cntPeek = pr.bufRd.Buffered()
|
||||
}
|
||||
pr.bufPeek, err = pr.bufRd.Peek(cntPeek)
|
||||
pr.bufPeek = pr.bufPeek[int(pr.numBits/8):] // Skip buffered bits
|
||||
if len(pr.bufPeek) == 0 {
|
||||
if pr.numBits >= nb {
|
||||
break
|
||||
}
|
||||
if err == io.EOF {
|
||||
err = io.ErrUnexpectedEOF
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
n := int(64-pr.numBits) / 8 // Number of bytes to copy to bit buffer
|
||||
if len(pr.bufPeek) >= 8 {
|
||||
// Starting with Go 1.7, the compiler should use a wide integer
|
||||
// load here if the architecture supports it.
|
||||
u := binary.LittleEndian.Uint64(pr.bufPeek)
|
||||
if pr.bigEndian {
|
||||
// Swap all the bits within each byte.
|
||||
u = (u&0xaaaaaaaaaaaaaaaa)>>1 | (u&0x5555555555555555)<<1
|
||||
u = (u&0xcccccccccccccccc)>>2 | (u&0x3333333333333333)<<2
|
||||
u = (u&0xf0f0f0f0f0f0f0f0)>>4 | (u&0x0f0f0f0f0f0f0f0f)<<4
|
||||
}
|
||||
|
||||
pr.bufBits |= u << pr.numBits
|
||||
pr.numBits += uint(n * 8)
|
||||
pr.bufPeek = pr.bufPeek[n:]
|
||||
break
|
||||
} else {
|
||||
if n > len(pr.bufPeek) {
|
||||
n = len(pr.bufPeek)
|
||||
}
|
||||
for _, c := range pr.bufPeek[:n] {
|
||||
if pr.bigEndian {
|
||||
c = internal.ReverseLUT[c]
|
||||
}
|
||||
pr.bufBits |= uint64(c) << pr.numBits
|
||||
pr.numBits += 8
|
||||
}
|
||||
pr.bufPeek = pr.bufPeek[n:]
|
||||
if pr.numBits > 56 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
pr.fedBits = pr.numBits
|
||||
} else {
|
||||
for pr.numBits < nb {
|
||||
c, err := pr.byteRd.ReadByte()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
err = io.ErrUnexpectedEOF
|
||||
}
|
||||
return err
|
||||
}
|
||||
if pr.bigEndian {
|
||||
c = internal.ReverseLUT[c]
|
||||
}
|
||||
pr.bufBits |= uint64(c) << pr.numBits
|
||||
pr.numBits += 8
|
||||
pr.Offset++
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,146 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package prefix
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// For some of the common Readers, we wrap and extend them to satisfy the
|
||||
// compress.BufferedReader interface to improve performance.
|
||||
|
||||
type buffer struct {
|
||||
*bytes.Buffer
|
||||
}
|
||||
|
||||
type bytesReader struct {
|
||||
*bytes.Reader
|
||||
pos int64
|
||||
buf []byte
|
||||
arr [512]byte
|
||||
}
|
||||
|
||||
type stringReader struct {
|
||||
*strings.Reader
|
||||
pos int64
|
||||
buf []byte
|
||||
arr [512]byte
|
||||
}
|
||||
|
||||
func (r *buffer) Buffered() int {
|
||||
return r.Len()
|
||||
}
|
||||
|
||||
func (r *buffer) Peek(n int) ([]byte, error) {
|
||||
b := r.Bytes()
|
||||
if len(b) < n {
|
||||
return b, io.EOF
|
||||
}
|
||||
return b[:n], nil
|
||||
}
|
||||
|
||||
func (r *buffer) Discard(n int) (int, error) {
|
||||
b := r.Next(n)
|
||||
if len(b) < n {
|
||||
return len(b), io.EOF
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func (r *bytesReader) Buffered() int {
|
||||
r.update()
|
||||
if r.Len() > len(r.buf) {
|
||||
return len(r.buf)
|
||||
}
|
||||
return r.Len()
|
||||
}
|
||||
|
||||
func (r *bytesReader) Peek(n int) ([]byte, error) {
|
||||
if n > len(r.arr) {
|
||||
return nil, io.ErrShortBuffer
|
||||
}
|
||||
|
||||
// Return sub-slice of local buffer if possible.
|
||||
r.update()
|
||||
if len(r.buf) >= n {
|
||||
return r.buf[:n], nil
|
||||
}
|
||||
|
||||
// Fill entire local buffer, and return appropriate sub-slice.
|
||||
cnt, err := r.ReadAt(r.arr[:], r.pos)
|
||||
r.buf = r.arr[:cnt]
|
||||
if cnt < n {
|
||||
return r.arr[:cnt], err
|
||||
}
|
||||
return r.arr[:n], nil
|
||||
}
|
||||
|
||||
func (r *bytesReader) Discard(n int) (int, error) {
|
||||
var err error
|
||||
if n > r.Len() {
|
||||
n, err = r.Len(), io.EOF
|
||||
}
|
||||
r.Seek(int64(n), io.SeekCurrent)
|
||||
return n, err
|
||||
}
|
||||
|
||||
// update reslices the internal buffer to be consistent with the read offset.
|
||||
func (r *bytesReader) update() {
|
||||
pos, _ := r.Seek(0, io.SeekCurrent)
|
||||
if off := pos - r.pos; off >= 0 && off < int64(len(r.buf)) {
|
||||
r.buf, r.pos = r.buf[off:], pos
|
||||
} else {
|
||||
r.buf, r.pos = nil, pos
|
||||
}
|
||||
}
|
||||
|
||||
func (r *stringReader) Buffered() int {
|
||||
r.update()
|
||||
if r.Len() > len(r.buf) {
|
||||
return len(r.buf)
|
||||
}
|
||||
return r.Len()
|
||||
}
|
||||
|
||||
func (r *stringReader) Peek(n int) ([]byte, error) {
|
||||
if n > len(r.arr) {
|
||||
return nil, io.ErrShortBuffer
|
||||
}
|
||||
|
||||
// Return sub-slice of local buffer if possible.
|
||||
r.update()
|
||||
if len(r.buf) >= n {
|
||||
return r.buf[:n], nil
|
||||
}
|
||||
|
||||
// Fill entire local buffer, and return appropriate sub-slice.
|
||||
cnt, err := r.ReadAt(r.arr[:], r.pos)
|
||||
r.buf = r.arr[:cnt]
|
||||
if cnt < n {
|
||||
return r.arr[:cnt], err
|
||||
}
|
||||
return r.arr[:n], nil
|
||||
}
|
||||
|
||||
func (r *stringReader) Discard(n int) (int, error) {
|
||||
var err error
|
||||
if n > r.Len() {
|
||||
n, err = r.Len(), io.EOF
|
||||
}
|
||||
r.Seek(int64(n), io.SeekCurrent)
|
||||
return n, err
|
||||
}
|
||||
|
||||
// update reslices the internal buffer to be consistent with the read offset.
|
||||
func (r *stringReader) update() {
|
||||
pos, _ := r.Seek(0, io.SeekCurrent)
|
||||
if off := pos - r.pos; off >= 0 && off < int64(len(r.buf)) {
|
||||
r.buf, r.pos = r.buf[off:], pos
|
||||
} else {
|
||||
r.buf, r.pos = nil, pos
|
||||
}
|
||||
}
|
|
@ -0,0 +1,166 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package prefix
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"io"
|
||||
|
||||
"github.com/dsnet/compress/internal/errors"
|
||||
)
|
||||
|
||||
// Writer implements a prefix encoder. For performance reasons, Writer will not
|
||||
// write bytes immediately to the underlying stream.
|
||||
type Writer struct {
|
||||
Offset int64 // Number of bytes written to the underlying io.Writer
|
||||
|
||||
wr io.Writer
|
||||
bufBits uint64 // Buffer to hold some bits
|
||||
numBits uint // Number of valid bits in bufBits
|
||||
bigEndian bool // Are bits written in big-endian order?
|
||||
|
||||
buf [512]byte
|
||||
cntBuf int
|
||||
}
|
||||
|
||||
// Init initializes the bit Writer to write to w. If bigEndian is true, then
|
||||
// bits will be written starting from the most-significant bits of a byte
|
||||
// (as done in bzip2), otherwise it will write starting from the
|
||||
// least-significant bits of a byte (such as for deflate and brotli).
|
||||
func (pw *Writer) Init(w io.Writer, bigEndian bool) {
|
||||
*pw = Writer{wr: w, bigEndian: bigEndian}
|
||||
return
|
||||
}
|
||||
|
||||
// BitsWritten reports the total number of bits issued to any Write method.
|
||||
func (pw *Writer) BitsWritten() int64 {
|
||||
return 8*pw.Offset + 8*int64(pw.cntBuf) + int64(pw.numBits)
|
||||
}
|
||||
|
||||
// WritePads writes 0-7 bits to the bit buffer to achieve byte-alignment.
|
||||
func (pw *Writer) WritePads(v uint) {
|
||||
nb := -pw.numBits & 7
|
||||
pw.bufBits |= uint64(v) << pw.numBits
|
||||
pw.numBits += nb
|
||||
}
|
||||
|
||||
// Write writes bytes from buf.
|
||||
// The bit-ordering mode does not affect this method.
|
||||
func (pw *Writer) Write(buf []byte) (cnt int, err error) {
|
||||
if pw.numBits > 0 || pw.cntBuf > 0 {
|
||||
if pw.numBits%8 != 0 {
|
||||
return 0, errorf(errors.Invalid, "non-aligned bit buffer")
|
||||
}
|
||||
if _, err := pw.Flush(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
cnt, err = pw.wr.Write(buf)
|
||||
pw.Offset += int64(cnt)
|
||||
return cnt, err
|
||||
}
|
||||
|
||||
// WriteOffset writes ofs in a (sym, extra) fashion using the provided prefix
|
||||
// Encoder and RangeEncoder.
|
||||
func (pw *Writer) WriteOffset(ofs uint, pe *Encoder, re *RangeEncoder) {
|
||||
sym := re.Encode(ofs)
|
||||
pw.WriteSymbol(sym, pe)
|
||||
rc := re.rcs[sym]
|
||||
pw.WriteBits(ofs-uint(rc.Base), uint(rc.Len))
|
||||
}
|
||||
|
||||
// TryWriteBits attempts to write nb bits using the contents of the bit buffer
|
||||
// alone. It reports whether it succeeded.
|
||||
//
|
||||
// This method is designed to be inlined for performance reasons.
|
||||
func (pw *Writer) TryWriteBits(v, nb uint) bool {
|
||||
if 64-pw.numBits < nb {
|
||||
return false
|
||||
}
|
||||
pw.bufBits |= uint64(v) << pw.numBits
|
||||
pw.numBits += nb
|
||||
return true
|
||||
}
|
||||
|
||||
// WriteBits writes nb bits of v to the underlying writer.
|
||||
func (pw *Writer) WriteBits(v, nb uint) {
|
||||
if _, err := pw.PushBits(); err != nil {
|
||||
errors.Panic(err)
|
||||
}
|
||||
pw.bufBits |= uint64(v) << pw.numBits
|
||||
pw.numBits += nb
|
||||
}
|
||||
|
||||
// TryWriteSymbol attempts to encode the next symbol using the contents of the
|
||||
// bit buffer alone. It reports whether it succeeded.
|
||||
//
|
||||
// This method is designed to be inlined for performance reasons.
|
||||
func (pw *Writer) TryWriteSymbol(sym uint, pe *Encoder) bool {
|
||||
chunk := pe.chunks[uint32(sym)&pe.chunkMask]
|
||||
nb := uint(chunk & countMask)
|
||||
if 64-pw.numBits < nb {
|
||||
return false
|
||||
}
|
||||
pw.bufBits |= uint64(chunk>>countBits) << pw.numBits
|
||||
pw.numBits += nb
|
||||
return true
|
||||
}
|
||||
|
||||
// WriteSymbol writes the symbol using the provided prefix Encoder.
|
||||
func (pw *Writer) WriteSymbol(sym uint, pe *Encoder) {
|
||||
if _, err := pw.PushBits(); err != nil {
|
||||
errors.Panic(err)
|
||||
}
|
||||
chunk := pe.chunks[uint32(sym)&pe.chunkMask]
|
||||
nb := uint(chunk & countMask)
|
||||
pw.bufBits |= uint64(chunk>>countBits) << pw.numBits
|
||||
pw.numBits += nb
|
||||
}
|
||||
|
||||
// Flush flushes all complete bytes from the bit buffer to the byte buffer, and
|
||||
// then flushes all bytes in the byte buffer to the underlying writer.
|
||||
// After this call, the bit Writer is will only withhold 7 bits at most.
|
||||
func (pw *Writer) Flush() (int64, error) {
|
||||
if pw.numBits < 8 && pw.cntBuf == 0 {
|
||||
return pw.Offset, nil
|
||||
}
|
||||
if _, err := pw.PushBits(); err != nil {
|
||||
return pw.Offset, err
|
||||
}
|
||||
cnt, err := pw.wr.Write(pw.buf[:pw.cntBuf])
|
||||
pw.cntBuf -= cnt
|
||||
pw.Offset += int64(cnt)
|
||||
return pw.Offset, err
|
||||
}
|
||||
|
||||
// PushBits pushes as many bytes as possible from the bit buffer to the byte
|
||||
// buffer, reporting the number of bits pushed.
|
||||
func (pw *Writer) PushBits() (uint, error) {
|
||||
if pw.cntBuf >= len(pw.buf)-8 {
|
||||
cnt, err := pw.wr.Write(pw.buf[:pw.cntBuf])
|
||||
pw.cntBuf -= cnt
|
||||
pw.Offset += int64(cnt)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
u := pw.bufBits
|
||||
if pw.bigEndian {
|
||||
// Swap all the bits within each byte.
|
||||
u = (u&0xaaaaaaaaaaaaaaaa)>>1 | (u&0x5555555555555555)<<1
|
||||
u = (u&0xcccccccccccccccc)>>2 | (u&0x3333333333333333)<<2
|
||||
u = (u&0xf0f0f0f0f0f0f0f0)>>4 | (u&0x0f0f0f0f0f0f0f0f)<<4
|
||||
}
|
||||
// Starting with Go 1.7, the compiler should use a wide integer
|
||||
// store here if the architecture supports it.
|
||||
binary.LittleEndian.PutUint64(pw.buf[pw.cntBuf:], u)
|
||||
|
||||
nb := pw.numBits / 8 // Number of bytes to copy from bit buffer
|
||||
pw.cntBuf += int(nb)
|
||||
pw.bufBits >>= 8 * nb
|
||||
pw.numBits -= 8 * nb
|
||||
return 8 * nb, nil
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// +build !debug,!gofuzz
|
||||
|
||||
package internal
|
||||
|
||||
// Debug indicates whether the debug build tag was set.
|
||||
//
|
||||
// If set, programs may choose to print with more human-readable
|
||||
// debug information and also perform sanity checks that would otherwise be too
|
||||
// expensive to run in a release build.
|
||||
const Debug = false
|
||||
|
||||
// GoFuzz indicates whether the gofuzz build tag was set.
|
||||
//
|
||||
// If set, programs may choose to disable certain checks (like checksums) that
|
||||
// would be nearly impossible for gofuzz to properly get right.
|
||||
// If GoFuzz is set, it implies that Debug is set as well.
|
||||
const GoFuzz = false
|
|
@ -0,0 +1,12 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2017, Joe Tsai. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE.md file.
|
||||
|
||||
# zbench wraps internal/tool/bench and is useful for comparing benchmarks from
|
||||
# the implementations in this repository relative to other implementations.
|
||||
#
|
||||
# See internal/tool/bench/main.go for more details.
|
||||
cd $(dirname "${BASH_SOURCE[0]}")/internal/tool/bench
|
||||
go run $(go list -f '{{ join .GoFiles "\n" }}') "$@"
|
|
@ -0,0 +1,10 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2017, Joe Tsai. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE.md file.
|
||||
|
||||
# zfuzz wraps internal/tool/fuzz and is useful for fuzz testing each of
|
||||
# the implementations in this repository.
|
||||
cd $(dirname "${BASH_SOURCE[0]}")/internal/tool/fuzz
|
||||
./fuzz.sh "$@"
|
|
@ -0,0 +1,54 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2017, Joe Tsai. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE.md file.
|
||||
|
||||
if [ $# == 0 ]; then
|
||||
echo "Usage: $0 PKG_PATH TEST_ARGS..."
|
||||
echo ""
|
||||
echo "Runs coverage and performance benchmarks for a given package."
|
||||
echo "The results are stored in the _zprof_ directory."
|
||||
echo ""
|
||||
echo "Example:"
|
||||
echo " $0 flate -test.bench=Decode/Twain/Default"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PKG_PATH=$1
|
||||
PKG_NAME=$(basename $PKG_PATH)
|
||||
shift
|
||||
|
||||
TMPDIR=$(mktemp -d)
|
||||
trap "rm -rf $TMPDIR $PKG_PATH/$PKG_NAME.test" SIGINT SIGTERM EXIT
|
||||
|
||||
(
|
||||
cd $DIR/$PKG_PATH
|
||||
|
||||
# Print the go version.
|
||||
go version
|
||||
|
||||
# Perform coverage profiling.
|
||||
go test github.com/dsnet/compress/$PKG_PATH -coverprofile $TMPDIR/cover.profile
|
||||
if [ $? != 0 ]; then exit 1; fi
|
||||
go tool cover -html $TMPDIR/cover.profile -o cover.html
|
||||
|
||||
# Perform performance profiling.
|
||||
if [ $# != 0 ]; then
|
||||
go test -c github.com/dsnet/compress/$PKG_PATH
|
||||
if [ $? != 0 ]; then exit 1; fi
|
||||
./$PKG_NAME.test -test.cpuprofile $TMPDIR/cpu.profile -test.memprofile $TMPDIR/mem.profile -test.run - "$@"
|
||||
PPROF="go tool pprof"
|
||||
$PPROF -output=cpu.svg -web $PKG_NAME.test $TMPDIR/cpu.profile 2> /dev/null
|
||||
$PPROF -output=cpu.html -weblist=. $PKG_NAME.test $TMPDIR/cpu.profile 2> /dev/null
|
||||
$PPROF -output=mem_objects.svg -alloc_objects -web $PKG_NAME.test $TMPDIR/mem.profile 2> /dev/null
|
||||
$PPROF -output=mem_objects.html -alloc_objects -weblist=. $PKG_NAME.test $TMPDIR/mem.profile 2> /dev/null
|
||||
$PPROF -output=mem_space.svg -alloc_space -web $PKG_NAME.test $TMPDIR/mem.profile 2> /dev/null
|
||||
$PPROF -output=mem_space.html -alloc_space -weblist=. $PKG_NAME.test $TMPDIR/mem.profile 2> /dev/null
|
||||
fi
|
||||
|
||||
rm -rf $DIR/_zprof_/$PKG_NAME
|
||||
mkdir -p $DIR/_zprof_/$PKG_NAME
|
||||
mv *.html *.svg $DIR/_zprof_/$PKG_NAME 2> /dev/null
|
||||
)
|
|
@ -0,0 +1,54 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2017, Joe Tsai. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE.md file.
|
||||
|
||||
cd $(go list -f '{{ .Dir }}' github.com/dsnet/compress)
|
||||
|
||||
BOLD="\x1b[1mRunning: "
|
||||
PASS="\x1b[32mPASS"
|
||||
FAIL="\x1b[31mFAIL"
|
||||
RESET="\x1b[0m"
|
||||
|
||||
echo -e "${BOLD}fmt${RESET}"
|
||||
RET_FMT=$(find . -name "*.go" | egrep -v "/(_.*_|\..*|testdata)/" | xargs gofmt -d)
|
||||
if [[ ! -z "$RET_FMT" ]]; then echo "$RET_FMT"; echo; fi
|
||||
|
||||
echo -e "${BOLD}test${RESET}"
|
||||
RET_TEST=$(go test -race ./... | egrep -v "^(ok|[?])\s+")
|
||||
if [[ ! -z "$RET_TEST" ]]; then echo "$RET_TEST"; echo; fi
|
||||
|
||||
echo -e "${BOLD}staticcheck${RESET}"
|
||||
RET_SCHK=$(staticcheck \
|
||||
-ignore "
|
||||
github.com/dsnet/compress/brotli/*.go:SA4016
|
||||
github.com/dsnet/compress/brotli/*.go:S1023
|
||||
github.com/dsnet/compress/brotli/*.go:U1000
|
||||
github.com/dsnet/compress/bzip2/*.go:S1023
|
||||
github.com/dsnet/compress/flate/*.go:U1000
|
||||
github.com/dsnet/compress/internal/cgo/lzma/*.go:SA4000
|
||||
github.com/dsnet/compress/internal/prefix/*.go:S1004
|
||||
github.com/dsnet/compress/internal/prefix/*.go:S1023
|
||||
github.com/dsnet/compress/internal/prefix/*.go:SA4016
|
||||
github.com/dsnet/compress/internal/tool/bench/*.go:S1007
|
||||
github.com/dsnet/compress/xflate/internal/meta/*.go:S1023
|
||||
" ./... 2>&1)
|
||||
if [[ ! -z "$RET_SCHK" ]]; then echo "$RET_SCHK"; echo; fi
|
||||
|
||||
echo -e "${BOLD}lint${RESET}"
|
||||
RET_LINT=$(golint ./... 2>&1 |
|
||||
egrep -v "^vendor/" |
|
||||
egrep -v "should have comment(.*)or be unexported" |
|
||||
egrep -v "^(.*)type name will be used as(.*)by other packages" |
|
||||
egrep -v "^brotli/transform.go:(.*)replace i [+]= 1 with i[+]{2}" |
|
||||
egrep -v "^internal/prefix/prefix.go:(.*)replace symBits(.*) [-]= 1 with symBits(.*)[-]{2}" |
|
||||
egrep -v "^xflate/common.go:(.*)NoCompression should be of the form" |
|
||||
egrep -v "^exit status")
|
||||
if [[ ! -z "$RET_LINT" ]]; then echo "$RET_LINT"; echo; fi
|
||||
|
||||
if [[ ! -z "$RET_FMT" ]] || [ ! -z "$RET_TEST" ] || [[ ! -z "$RET_SCHK" ]] || [[ ! -z "$RET_LINT" ]]; then
|
||||
echo -e "${FAIL}${RESET}"; exit 1
|
||||
else
|
||||
echo -e "${PASS}${RESET}"; exit 0
|
||||
fi
|
|
@ -86,6 +86,10 @@
|
|||
|
||||
## Log
|
||||
|
||||
### 2019-02-20
|
||||
|
||||
Release v0.5.6 supports the go.mod file.
|
||||
|
||||
### 2018-10-28
|
||||
|
||||
Release v0.5.5 fixes issues #19 observing ErrLimit outputs.
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
module github.com/ulikunitz/xz
|
|
@ -290,6 +290,14 @@ github.com/digitalocean/go-qemu/qmp
|
|||
github.com/digitalocean/godo
|
||||
# github.com/dimchansky/utfbom v1.1.0
|
||||
github.com/dimchansky/utfbom
|
||||
# github.com/dsnet/compress v0.0.1
|
||||
## explicit
|
||||
github.com/dsnet/compress
|
||||
github.com/dsnet/compress/bzip2
|
||||
github.com/dsnet/compress/bzip2/internal/sais
|
||||
github.com/dsnet/compress/internal
|
||||
github.com/dsnet/compress/internal/errors
|
||||
github.com/dsnet/compress/internal/prefix
|
||||
# github.com/dylanmei/iso8601 v0.1.0
|
||||
github.com/dylanmei/iso8601
|
||||
# github.com/exoscale/egoscale v0.43.1
|
||||
|
@ -775,7 +783,7 @@ github.com/ucloud/ucloud-sdk-go/ucloud/version
|
|||
github.com/ufilesdk-dev/ufile-gosdk
|
||||
# github.com/ugorji/go/codec v1.2.4
|
||||
github.com/ugorji/go/codec
|
||||
# github.com/ulikunitz/xz v0.5.5
|
||||
# github.com/ulikunitz/xz v0.5.6
|
||||
## explicit
|
||||
github.com/ulikunitz/xz
|
||||
github.com/ulikunitz/xz/internal/hash
|
||||
|
|
Loading…
Reference in New Issue