From 52269b66b98e119bb4600a470007997c6476dafc Mon Sep 17 00:00:00 2001 From: Chris Bednarski Date: Wed, 10 Jun 2015 12:30:18 -0700 Subject: [PATCH] Added new compress post-processor, contributed by Vasiliy Tolstov --- post-processor/compress/LICENSE | 21 ++ post-processor/compress/artifact.go | 32 +- post-processor/compress/benchmark.go | 197 ++++++++++ post-processor/compress/post-processor.go | 419 +++++++++++++++++++--- 4 files changed, 600 insertions(+), 69 deletions(-) create mode 100644 post-processor/compress/LICENSE create mode 100644 post-processor/compress/benchmark.go diff --git a/post-processor/compress/LICENSE b/post-processor/compress/LICENSE new file mode 100644 index 000000000..38bbf26f3 --- /dev/null +++ b/post-processor/compress/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2014 Vasiliy Tolstov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/post-processor/compress/artifact.go b/post-processor/compress/artifact.go index 34a7ce8d6..f428a3b55 100644 --- a/post-processor/compress/artifact.go +++ b/post-processor/compress/artifact.go @@ -5,40 +5,34 @@ import ( "os" ) -const BuilderId = "packer.post-processor.compress" +const BuilderId = "vtolstov.compress" type Artifact struct { - Path string - Provider string + builderId string + dir string + f []string } -func NewArtifact(provider, path string) *Artifact { - return &Artifact{ - Path: path, - Provider: provider, - } -} - -func (*Artifact) BuilderId() string { +func (a *Artifact) BuilderId() string { return BuilderId } -func (self *Artifact) Id() string { - return "" +func (a *Artifact) Files() []string { + return a.f } -func (self *Artifact) Files() []string { - return []string{self.Path} +func (*Artifact) Id() string { + return "COMPRESS" } -func (self *Artifact) String() string { - return fmt.Sprintf("'%s' compressing: %s", self.Provider, self.Path) +func (a *Artifact) String() string { + return fmt.Sprintf("VM compressed files in directory: %s", a.dir) } func (*Artifact) State(name string) interface{} { return nil } -func (self *Artifact) Destroy() error { - return os.Remove(self.Path) +func (a *Artifact) Destroy() error { + return os.RemoveAll(a.dir) } diff --git a/post-processor/compress/benchmark.go b/post-processor/compress/benchmark.go new file mode 100644 index 000000000..a2585bc89 --- /dev/null +++ b/post-processor/compress/benchmark.go @@ -0,0 +1,197 @@ +// +build ignore + +package main + +import ( + "compress/flate" + gzip "compress/gzip" + "io" + "io/ioutil" + "fmt" + "os" + "runtime" + "testing" + + bgzf "github.com/biogo/hts/bgzf" + pgzip "github.com/klauspost/pgzip" + lz4 "github.com/pierrec/lz4" +) + +type Compressor struct { + r *os.File + w *os.File + sr int64 + sw int64 +} + +func (c *Compressor) Close() error { + var err error + + fi, _ := c.w.Stat() + c.sw = fi.Size() + if err = c.w.Close(); err != nil { + return err + } + + fi, _ = c.r.Stat() + c.sr = fi.Size() + if err = c.r.Close(); err != nil { + return err + } + + return nil +} + +func NewCompressor(src, dst string) (*Compressor, error) { + r, err := os.Open(src) + if err != nil { + return nil, err + } + + w, err := os.Create(dst) + if err != nil { + r.Close() + return nil, err + } + + c := &Compressor{r: r, w: w} + return c, nil +} + +func main() { + + runtime.GOMAXPROCS(runtime.NumCPU()) + + var resw testing.BenchmarkResult + var resr testing.BenchmarkResult + + c, err := NewCompressor("/tmp/image.r", "/tmp/image.w") + if err != nil { + panic(err) + } + resw = testing.Benchmark(c.BenchmarkGZIPWriter) + c.w.Seek(0, 0) + resr = testing.Benchmark(c.BenchmarkGZIPReader) + c.Close() + fmt.Printf("gzip:\twriter %s\treader %s\tsize %d\n", resw.T.String(), resr.T.String(), c.sw) + + c, err = NewCompressor("/tmp/image.r", "/tmp/image.w") + if err != nil { + panic(err) + } + resw = testing.Benchmark(c.BenchmarkBGZFWriter) + c.w.Seek(0, 0) + resr = testing.Benchmark(c.BenchmarkBGZFReader) + c.Close() + fmt.Printf("bgzf:\twriter %s\treader %s\tsize %d\n", resw.T.String(), resr.T.String(), c.sw) + + c, err = NewCompressor("/tmp/image.r", "/tmp/image.w") + if err != nil { + panic(err) + } + resw = testing.Benchmark(c.BenchmarkPGZIPWriter) + c.w.Seek(0, 0) + resr = testing.Benchmark(c.BenchmarkPGZIPReader) + c.Close() + fmt.Printf("pgzip:\twriter %s\treader %s\tsize %d\n", resw.T.String(), resr.T.String(), c.sw) + + c, err = NewCompressor("/tmp/image.r", "/tmp/image.w") + if err != nil { + panic(err) + } + resw = testing.Benchmark(c.BenchmarkLZ4Writer) + c.w.Seek(0, 0) + resr = testing.Benchmark(c.BenchmarkLZ4Reader) + c.Close() + fmt.Printf("lz4:\twriter %s\treader %s\tsize %d\n", resw.T.String(), resr.T.String(), c.sw) + +} + +func (c *Compressor) BenchmarkGZIPWriter(b *testing.B) { + cw, _ := gzip.NewWriterLevel(c.w, flate.BestSpeed) + b.ResetTimer() + + _, err := io.Copy(cw, c.r) + if err != nil { + b.Fatal(err) + } + cw.Close() + c.w.Sync() +} + +func (c *Compressor) BenchmarkGZIPReader(b *testing.B) { + cr, _ := gzip.NewReader(c.w) + b.ResetTimer() + + _, err := io.Copy(ioutil.Discard, cr) + if err != nil { + b.Fatal(err) + } +} + +func (c *Compressor) BenchmarkBGZFWriter(b *testing.B) { + cw, _ := bgzf.NewWriterLevel(c.w, flate.BestSpeed, runtime.NumCPU()) + b.ResetTimer() + + _, err := io.Copy(cw, c.r) + if err != nil { + b.Fatal(err) + } + c.w.Sync() +} + +func (c *Compressor) BenchmarkBGZFReader(b *testing.B) { + cr, _ := bgzf.NewReader(c.w, 0) + b.ResetTimer() + + _, err := io.Copy(ioutil.Discard, cr) + if err != nil { + b.Fatal(err) + } +} + +func (c *Compressor) BenchmarkPGZIPWriter(b *testing.B) { + cw, _ := pgzip.NewWriterLevel(c.w, flate.BestSpeed) + b.ResetTimer() + + _, err := io.Copy(cw, c.r) + if err != nil { + b.Fatal(err) + } + cw.Close() + c.w.Sync() +} + +func (c *Compressor) BenchmarkPGZIPReader(b *testing.B) { + cr, _ := pgzip.NewReader(c.w) + b.ResetTimer() + + _, err := io.Copy(ioutil.Discard, cr) + if err != nil { + b.Fatal(err) + } +} + +func (c *Compressor) BenchmarkLZ4Writer(b *testing.B) { + cw := lz4.NewWriter(c.w) +// cw.Header.HighCompression = true + cw.Header.NoChecksum = true + b.ResetTimer() + + _, err := io.Copy(cw, c.r) + if err != nil { + b.Fatal(err) + } + cw.Close() + c.w.Sync() +} + +func (c *Compressor) BenchmarkLZ4Reader(b *testing.B) { + cr := lz4.NewReader(c.w) + b.ResetTimer() + + _, err := io.Copy(ioutil.Discard, cr) + if err != nil { + b.Fatal(err) + } +} diff --git a/post-processor/compress/post-processor.go b/post-processor/compress/post-processor.go index ccf300946..f62bea858 100644 --- a/post-processor/compress/post-processor.go +++ b/post-processor/compress/post-processor.go @@ -1,98 +1,417 @@ package compress import ( - "archive/tar" - "compress/gzip" + tar "archive/tar" + zip "archive/zip" + "compress/flate" + gzip "compress/gzip" "fmt" "io" "os" + "path/filepath" + "runtime" + "strings" + "time" + bgzf "github.com/biogo/hts/bgzf" + pgzip "github.com/klauspost/pgzip" "github.com/mitchellh/packer/common" - "github.com/mitchellh/packer/helper/config" "github.com/mitchellh/packer/packer" - "github.com/mitchellh/packer/template/interpolate" + lz4 "github.com/pierrec/lz4" + "gopkg.in/yaml.v2" ) +type Metadata map[string]Metaitem + +type Metaitem struct { + CompSize int64 `yaml:"compsize"` + OrigSize int64 `yaml:"origsize"` + CompType string `yaml:"comptype"` + CompDate string `yaml:"compdate"` +} + type Config struct { common.PackerConfig `mapstructure:",squash"` - OutputPath string `mapstructure:"output"` - - ctx interpolate.Context + OutputPath string `mapstructure:"output"` + OutputFile string `mapstructure:"file"` + Compression int `mapstructure:"compression"` + Metadata bool `mapstructure:"metadata"` + NumCPU int `mapstructure:"numcpu"` + Format string `mapstructure:"format"` + KeepInputArtifact bool `mapstructure:"keep_input_artifact"` + tpl *packer.ConfigTemplate } -type PostProcessor struct { - config Config +type CompressPostProcessor struct { + cfg Config } -func (self *PostProcessor) Configure(raws ...interface{}) error { - err := config.Decode(&self.config, &config.DecodeOpts{ - Interpolate: true, - InterpolateFilter: &interpolate.RenderFilter{ - Exclude: []string{}, - }, - }, raws...) +func (p *CompressPostProcessor) Configure(raws ...interface{}) error { + p.cfg.Compression = -1 + _, err := common.DecodeConfig(&p.cfg, raws...) if err != nil { return err } + errs := new(packer.MultiError) + + p.cfg.tpl, err = packer.NewConfigTemplate() + if err != nil { + return err + } + p.cfg.tpl.UserVars = p.cfg.PackerUserVars + + if p.cfg.OutputPath == "" { + p.cfg.OutputPath = "packer_{{.BuildName}}_{{.Provider}}" + } + + if err = p.cfg.tpl.Validate(p.cfg.OutputPath); err != nil { + errs = packer.MultiErrorAppend( + errs, fmt.Errorf("Error parsing target template: %s", err)) + } + + templates := map[string]*string{ + "output": &p.cfg.OutputPath, + } + + if p.cfg.Compression > flate.BestCompression { + p.cfg.Compression = flate.BestCompression + } + if p.cfg.Compression == -1 { + p.cfg.Compression = flate.DefaultCompression + } + + if p.cfg.NumCPU < 1 { + p.cfg.NumCPU = runtime.NumCPU() + } + + runtime.GOMAXPROCS(p.cfg.NumCPU) + + for key, ptr := range templates { + if *ptr == "" { + errs = packer.MultiErrorAppend( + errs, fmt.Errorf("%s must be set", key)) + } + + *ptr, err = p.cfg.tpl.Process(*ptr, nil) + if err != nil { + errs = packer.MultiErrorAppend( + errs, fmt.Errorf("Error processing %s: %s", key, err)) + } + } + + if len(errs.Errors) > 0 { + return errs + } + return nil } -func (self *PostProcessor) PostProcess(ui packer.Ui, artifact packer.Artifact) (packer.Artifact, bool, error) { - ui.Say(fmt.Sprintf("Creating archive for '%s'", artifact.BuilderId())) +func (p *CompressPostProcessor) fillMetadata(metadata Metadata, files []string) Metadata { + // layout shows by example how the reference time should be represented. + const layout = "2006-01-02_15-04-05" + t := time.Now() - // Create the compressed archive file at the appropriate OutputPath. - fw, err := os.Create(self.config.OutputPath) + if !p.cfg.Metadata { + return metadata + } + for _, f := range files { + if fi, err := os.Stat(f); err != nil { + continue + } else { + if i, ok := metadata[filepath.Base(f)]; !ok { + metadata[filepath.Base(f)] = Metaitem{CompType: p.cfg.Format, OrigSize: fi.Size(), CompDate: t.Format(layout)} + } else { + i.CompSize = fi.Size() + i.CompDate = t.Format(layout) + metadata[filepath.Base(f)] = i + } + } + } + return metadata +} + +func (p *CompressPostProcessor) PostProcess(ui packer.Ui, artifact packer.Artifact) (packer.Artifact, bool, error) { + newartifact := &Artifact{builderId: artifact.BuilderId(), dir: p.cfg.OutputPath} + var metafile string = filepath.Join(p.cfg.OutputPath, "metadata") + + _, err := os.Stat(newartifact.dir) + if err == nil { + return nil, false, fmt.Errorf("output dir must not exists: %s", err) + } + err = os.MkdirAll(newartifact.dir, 0755) if err != nil { - return nil, false, fmt.Errorf( - "Failed creating file for compressed archive: %s", self.config.OutputPath) + return nil, false, fmt.Errorf("failed to create output: %s", err) + } + + formats := strings.Split(p.cfg.Format, ".") + files := artifact.Files() + + metadata := make(Metadata, 0) + metadata = p.fillMetadata(metadata, files) + + for _, compress := range formats { + switch compress { + case "tar": + files, err = p.cmpTAR(files, filepath.Join(p.cfg.OutputPath, p.cfg.OutputFile)) + metadata = p.fillMetadata(metadata, files) + case "zip": + files, err = p.cmpZIP(files, filepath.Join(p.cfg.OutputPath, p.cfg.OutputFile)) + metadata = p.fillMetadata(metadata, files) + case "pgzip": + files, err = p.cmpPGZIP(files, p.cfg.OutputPath) + metadata = p.fillMetadata(metadata, files) + case "gzip": + files, err = p.cmpGZIP(files, p.cfg.OutputPath) + metadata = p.fillMetadata(metadata, files) + case "bgzf": + files, err = p.cmpBGZF(files, p.cfg.OutputPath) + metadata = p.fillMetadata(metadata, files) + case "lz4": + files, err = p.cmpLZ4(files, p.cfg.OutputPath) + metadata = p.fillMetadata(metadata, files) + case "e2fs": + files, err = p.cmpE2FS(files, filepath.Join(p.cfg.OutputPath, p.cfg.OutputFile)) + metadata = p.fillMetadata(metadata, files) + } + if err != nil { + return nil, false, fmt.Errorf("Failed to compress: %s", err) + } + } + + if p.cfg.Metadata { + fp, err := os.Create(metafile) + if err != nil { + return nil, false, err + } + if buf, err := yaml.Marshal(metadata); err != nil { + fp.Close() + return nil, false, err + } else { + if _, err = fp.Write(buf); err != nil { + fp.Close() + return nil, false, err + } + fp.Close() + } + } + + newartifact.f = append(newartifact.f, files...) + if p.cfg.Metadata { + newartifact.f = append(newartifact.f, metafile) + } + + return newartifact, p.cfg.KeepInputArtifact, nil +} + +func (p *CompressPostProcessor) cmpTAR(src []string, dst string) ([]string, error) { + fw, err := os.Create(dst) + if err != nil { + return nil, fmt.Errorf("tar error: %s", err) } defer fw.Close() - gw := gzip.NewWriter(fw) - defer gw.Close() + tw := tar.NewWriter(fw) + defer tw.Close() - // Iterate through all of the artifact's files and put them into the - // compressed archive using the tar/gzip writers. - for _, path := range artifact.Files() { - fi, err := os.Stat(path) + for _, name := range src { + fi, err := os.Stat(name) if err != nil { - return nil, false, fmt.Errorf( - "Failed stating file: %s", path) + return nil, fmt.Errorf("tar error: %s", err) } - target, _ := os.Readlink(path) + target, _ := os.Readlink(name) header, err := tar.FileInfoHeader(fi, target) if err != nil { - return nil, false, fmt.Errorf( - "Failed creating archive header: %s", path) + return nil, fmt.Errorf("tar erorr: %s", err) } - tw := tar.NewWriter(gw) - defer tw.Close() - - // Write the header first to the archive. This takes partial data - // from the FileInfo that is grabbed by running the stat command. - if err := tw.WriteHeader(header); err != nil { - return nil, false, fmt.Errorf( - "Failed writing archive header: %s", path) + if err = tw.WriteHeader(header); err != nil { + return nil, fmt.Errorf("tar error: %s", err) } - // Open the target file for archiving and compressing. - fr, err := os.Open(path) + fr, err := os.Open(name) if err != nil { - return nil, false, fmt.Errorf( - "Failed opening file '%s' to write compressed archive.", path) + return nil, fmt.Errorf("tar error: %s", err) } - defer fr.Close() if _, err = io.Copy(tw, fr); err != nil { - return nil, false, fmt.Errorf( - "Failed copying file to archive: %s", path) + fr.Close() + return nil, fmt.Errorf("tar error: %s", err) } + fr.Close() } - - return NewArtifact(artifact.BuilderId(), self.config.OutputPath), false, nil + return []string{dst}, nil +} + +func (p *CompressPostProcessor) cmpGZIP(src []string, dst string) ([]string, error) { + var res []string + for _, name := range src { + filename := filepath.Join(dst, filepath.Base(name)) + fw, err := os.Create(filename) + if err != nil { + return nil, fmt.Errorf("gzip error: %s", err) + } + cw, err := gzip.NewWriterLevel(fw, p.cfg.Compression) + if err != nil { + fw.Close() + return nil, fmt.Errorf("gzip error: %s", err) + } + fr, err := os.Open(name) + if err != nil { + cw.Close() + fw.Close() + return nil, fmt.Errorf("gzip error: %s", err) + } + if _, err = io.Copy(cw, fr); err != nil { + cw.Close() + fr.Close() + fw.Close() + return nil, fmt.Errorf("gzip error: %s", err) + } + cw.Close() + fr.Close() + fw.Close() + res = append(res, filename) + } + return res, nil +} + +func (p *CompressPostProcessor) cmpPGZIP(src []string, dst string) ([]string, error) { + var res []string + for _, name := range src { + filename := filepath.Join(dst, filepath.Base(name)) + fw, err := os.Create(filename) + if err != nil { + return nil, fmt.Errorf("pgzip error: %s", err) + } + cw, err := pgzip.NewWriterLevel(fw, p.cfg.Compression) + if err != nil { + fw.Close() + return nil, fmt.Errorf("pgzip error: %s", err) + } + fr, err := os.Open(name) + if err != nil { + cw.Close() + fw.Close() + return nil, fmt.Errorf("pgzip error: %s", err) + } + if _, err = io.Copy(cw, fr); err != nil { + cw.Close() + fr.Close() + fw.Close() + return nil, fmt.Errorf("pgzip error: %s", err) + } + cw.Close() + fr.Close() + fw.Close() + res = append(res, filename) + } + return res, nil +} + +func (p *CompressPostProcessor) cmpLZ4(src []string, dst string) ([]string, error) { + var res []string + for _, name := range src { + filename := filepath.Join(dst, filepath.Base(name)) + fw, err := os.Create(filename) + if err != nil { + return nil, fmt.Errorf("lz4 error: %s", err) + } + cw := lz4.NewWriter(fw) + if err != nil { + fw.Close() + return nil, fmt.Errorf("lz4 error: %s", err) + } + if p.cfg.Compression > flate.DefaultCompression { + cw.Header.HighCompression = true + } + fr, err := os.Open(name) + if err != nil { + cw.Close() + fw.Close() + return nil, fmt.Errorf("lz4 error: %s", err) + } + if _, err = io.Copy(cw, fr); err != nil { + cw.Close() + fr.Close() + fw.Close() + return nil, fmt.Errorf("lz4 error: %s", err) + } + cw.Close() + fr.Close() + fw.Close() + res = append(res, filename) + } + return res, nil +} + +func (p *CompressPostProcessor) cmpBGZF(src []string, dst string) ([]string, error) { + var res []string + for _, name := range src { + filename := filepath.Join(dst, filepath.Base(name)) + fw, err := os.Create(filename) + if err != nil { + return nil, fmt.Errorf("bgzf error: %s", err) + } + + cw, err := bgzf.NewWriterLevel(fw, p.cfg.Compression, runtime.NumCPU()) + if err != nil { + return nil, fmt.Errorf("bgzf error: %s", err) + } + fr, err := os.Open(name) + if err != nil { + cw.Close() + fw.Close() + return nil, fmt.Errorf("bgzf error: %s", err) + } + if _, err = io.Copy(cw, fr); err != nil { + cw.Close() + fr.Close() + fw.Close() + return nil, fmt.Errorf("bgzf error: %s", err) + } + cw.Close() + fr.Close() + fw.Close() + res = append(res, filename) + } + return res, nil +} + +func (p *CompressPostProcessor) cmpE2FS(src []string, dst string) ([]string, error) { + panic("not implemented") +} + +func (p *CompressPostProcessor) cmpZIP(src []string, dst string) ([]string, error) { + fw, err := os.Create(dst) + if err != nil { + return nil, fmt.Errorf("zip error: %s", err) + } + defer fw.Close() + + zw := zip.NewWriter(fw) + defer zw.Close() + + for _, name := range src { + header, err := zw.Create(name) + if err != nil { + return nil, fmt.Errorf("zip erorr: %s", err) + } + + fr, err := os.Open(name) + if err != nil { + return nil, fmt.Errorf("zip error: %s", err) + } + + if _, err = io.Copy(header, fr); err != nil { + fr.Close() + return nil, fmt.Errorf("zip error: %s", err) + } + fr.Close() + } + return []string{dst}, nil + }