packer-cn/vendor/github.com/aliyun/aliyun-oss-go-sdk/oss/upload.go

527 lines
13 KiB
Go
Raw Normal View History

2017-03-04 05:06:32 -05:00
package oss
import (
"crypto/md5"
"encoding/base64"
2019-10-14 10:21:52 -04:00
"encoding/hex"
2017-03-04 05:06:32 -05:00
"encoding/json"
"errors"
2019-10-14 10:21:52 -04:00
"fmt"
2017-03-04 05:06:32 -05:00
"io/ioutil"
"os"
2019-10-14 10:21:52 -04:00
"path/filepath"
2017-03-04 05:06:32 -05:00
"time"
)
2019-10-14 10:21:52 -04:00
// UploadFile is multipart file upload.
2017-03-04 05:06:32 -05:00
//
2019-10-14 10:21:52 -04:00
// objectKey the object name.
// filePath the local file path to upload.
// partSize the part size in byte.
// options the options for uploading object.
2017-03-04 05:06:32 -05:00
//
2019-10-14 10:21:52 -04:00
// error it's nil if the operation succeeds, otherwise it's an error object.
2017-03-04 05:06:32 -05:00
//
func (bucket Bucket) UploadFile(objectKey, filePath string, partSize int64, options ...Option) error {
if partSize < MinPartSize || partSize > MaxPartSize {
2019-10-14 10:21:52 -04:00
return errors.New("oss: part size invalid range (100KB, 5GB]")
2017-03-04 05:06:32 -05:00
}
2019-10-14 10:21:52 -04:00
cpConf := getCpConfig(options)
2017-03-04 05:06:32 -05:00
routines := getRoutines(options)
2019-10-14 10:21:52 -04:00
if cpConf != nil && cpConf.IsEnable {
cpFilePath := getUploadCpFilePath(cpConf, filePath, bucket.BucketName, objectKey)
if cpFilePath != "" {
return bucket.uploadFileWithCp(objectKey, filePath, partSize, options, cpFilePath, routines)
}
2017-03-04 05:06:32 -05:00
}
return bucket.uploadFile(objectKey, filePath, partSize, options, routines)
}
2019-10-14 10:21:52 -04:00
func getUploadCpFilePath(cpConf *cpConfig, srcFile, destBucket, destObject string) string {
if cpConf.FilePath == "" && cpConf.DirPath != "" {
dest := fmt.Sprintf("oss://%v/%v", destBucket, destObject)
absPath, _ := filepath.Abs(srcFile)
cpFileName := getCpFileName(absPath, dest)
cpConf.FilePath = cpConf.DirPath + string(os.PathSeparator) + cpFileName
}
return cpConf.FilePath
}
// ----- concurrent upload without checkpoint -----
2017-03-04 05:06:32 -05:00
2019-10-14 10:21:52 -04:00
// getCpConfig gets checkpoint configuration
func getCpConfig(options []Option) *cpConfig {
2017-03-04 05:06:32 -05:00
cpcOpt, err := findOption(options, checkpointConfig, nil)
if err != nil || cpcOpt == nil {
2019-10-14 10:21:52 -04:00
return nil
2017-03-04 05:06:32 -05:00
}
2019-10-14 10:21:52 -04:00
return cpcOpt.(*cpConfig)
}
// getCpFileName return the name of the checkpoint file
func getCpFileName(src, dest string) string {
md5Ctx := md5.New()
md5Ctx.Write([]byte(src))
srcCheckSum := hex.EncodeToString(md5Ctx.Sum(nil))
md5Ctx.Reset()
md5Ctx.Write([]byte(dest))
destCheckSum := hex.EncodeToString(md5Ctx.Sum(nil))
2017-03-04 05:06:32 -05:00
2019-10-14 10:21:52 -04:00
return fmt.Sprintf("%v-%v.cp", srcCheckSum, destCheckSum)
2017-03-04 05:06:32 -05:00
}
2019-10-14 10:21:52 -04:00
// getRoutines gets the routine count. by default it's 1.
2017-03-04 05:06:32 -05:00
func getRoutines(options []Option) int {
rtnOpt, err := findOption(options, routineNum, nil)
if err != nil || rtnOpt == nil {
return 1
}
rs := rtnOpt.(int)
if rs < 1 {
rs = 1
} else if rs > 100 {
rs = 100
}
return rs
}
2019-10-14 10:21:52 -04:00
// getPayer return the payer of the request
func getPayer(options []Option) string {
payerOpt, err := findOption(options, HTTPHeaderOSSRequester, nil)
if err != nil || payerOpt == nil {
return ""
}
return payerOpt.(string)
}
// getProgressListener gets the progress callback
2017-03-04 05:06:32 -05:00
func getProgressListener(options []Option) ProgressListener {
isSet, listener, _ := isOptionSet(options, progressListener)
if !isSet {
return nil
}
return listener.(ProgressListener)
}
2019-10-14 10:21:52 -04:00
// uploadPartHook is for testing usage
2017-03-04 05:06:32 -05:00
type uploadPartHook func(id int, chunk FileChunk) error
var uploadPartHooker uploadPartHook = defaultUploadPart
func defaultUploadPart(id int, chunk FileChunk) error {
return nil
}
2019-10-14 10:21:52 -04:00
// workerArg defines worker argument structure
2017-03-04 05:06:32 -05:00
type workerArg struct {
bucket *Bucket
filePath string
imur InitiateMultipartUploadResult
2019-10-14 10:21:52 -04:00
options []Option
2017-03-04 05:06:32 -05:00
hook uploadPartHook
}
2019-10-14 10:21:52 -04:00
// worker is the worker coroutine function
2017-03-04 05:06:32 -05:00
func worker(id int, arg workerArg, jobs <-chan FileChunk, results chan<- UploadPart, failed chan<- error, die <-chan bool) {
for chunk := range jobs {
if err := arg.hook(id, chunk); err != nil {
failed <- err
break
}
2019-10-14 10:21:52 -04:00
part, err := arg.bucket.UploadPartFromFile(arg.imur, arg.filePath, chunk.Offset, chunk.Size, chunk.Number, arg.options...)
2017-03-04 05:06:32 -05:00
if err != nil {
failed <- err
break
}
select {
case <-die:
return
default:
}
results <- part
}
}
2019-10-14 10:21:52 -04:00
// scheduler function
2017-03-04 05:06:32 -05:00
func scheduler(jobs chan FileChunk, chunks []FileChunk) {
for _, chunk := range chunks {
jobs <- chunk
}
close(jobs)
}
func getTotalBytes(chunks []FileChunk) int64 {
var tb int64
for _, chunk := range chunks {
tb += chunk.Size
}
return tb
}
2019-10-14 10:21:52 -04:00
// uploadFile is a concurrent upload, without checkpoint
2017-03-04 05:06:32 -05:00
func (bucket Bucket) uploadFile(objectKey, filePath string, partSize int64, options []Option, routines int) error {
listener := getProgressListener(options)
chunks, err := SplitFileByPartSize(filePath, partSize)
if err != nil {
return err
}
2019-10-14 10:21:52 -04:00
payerOptions := []Option{}
payer := getPayer(options)
if payer != "" {
payerOptions = append(payerOptions, RequestPayer(PayerType(payer)))
}
// Initialize the multipart upload
2017-03-04 05:06:32 -05:00
imur, err := bucket.InitiateMultipartUpload(objectKey, options...)
if err != nil {
return err
}
jobs := make(chan FileChunk, len(chunks))
results := make(chan UploadPart, len(chunks))
failed := make(chan error)
die := make(chan bool)
var completedBytes int64
totalBytes := getTotalBytes(chunks)
event := newProgressEvent(TransferStartedEvent, 0, totalBytes)
publishProgress(listener, event)
2019-10-14 10:21:52 -04:00
// Start the worker coroutine
arg := workerArg{&bucket, filePath, imur, payerOptions, uploadPartHooker}
2017-03-04 05:06:32 -05:00
for w := 1; w <= routines; w++ {
go worker(w, arg, jobs, results, failed, die)
}
2019-10-14 10:21:52 -04:00
// Schedule the jobs
2017-03-04 05:06:32 -05:00
go scheduler(jobs, chunks)
2019-10-14 10:21:52 -04:00
// Waiting for the upload finished
2017-03-04 05:06:32 -05:00
completed := 0
parts := make([]UploadPart, len(chunks))
for completed < len(chunks) {
select {
case part := <-results:
completed++
parts[part.PartNumber-1] = part
completedBytes += chunks[part.PartNumber-1].Size
event = newProgressEvent(TransferDataEvent, completedBytes, totalBytes)
publishProgress(listener, event)
case err := <-failed:
close(die)
event = newProgressEvent(TransferFailedEvent, completedBytes, totalBytes)
publishProgress(listener, event)
2019-10-14 10:21:52 -04:00
bucket.AbortMultipartUpload(imur, payerOptions...)
2017-03-04 05:06:32 -05:00
return err
}
if completed >= len(chunks) {
break
}
}
event = newProgressEvent(TransferStartedEvent, completedBytes, totalBytes)
publishProgress(listener, event)
2019-10-14 10:21:52 -04:00
// Complete the multpart upload
_, err = bucket.CompleteMultipartUpload(imur, parts, payerOptions...)
2017-03-04 05:06:32 -05:00
if err != nil {
2019-10-14 10:21:52 -04:00
bucket.AbortMultipartUpload(imur, payerOptions...)
2017-03-04 05:06:32 -05:00
return err
}
return nil
}
2019-10-14 10:21:52 -04:00
// ----- concurrent upload with checkpoint -----
2017-03-04 05:06:32 -05:00
const uploadCpMagic = "FE8BB4EA-B593-4FAC-AD7A-2459A36E2E62"
type uploadCheckpoint struct {
2019-10-14 10:21:52 -04:00
Magic string // Magic
MD5 string // Checkpoint file content's MD5
FilePath string // Local file path
FileStat cpStat // File state
ObjectKey string // Key
UploadID string // Upload ID
Parts []cpPart // All parts of the local file
2017-03-04 05:06:32 -05:00
}
type cpStat struct {
2019-10-14 10:21:52 -04:00
Size int64 // File size
LastModified time.Time // File's last modified time
MD5 string // Local file's MD5
2017-03-04 05:06:32 -05:00
}
type cpPart struct {
2019-10-14 10:21:52 -04:00
Chunk FileChunk // File chunk
Part UploadPart // Uploaded part
IsCompleted bool // Upload complete flag
2017-03-04 05:06:32 -05:00
}
2019-10-14 10:21:52 -04:00
// isValid checks if the uploaded data is valid---it's valid when the file is not updated and the checkpoint data is valid.
2017-03-04 05:06:32 -05:00
func (cp uploadCheckpoint) isValid(filePath string) (bool, error) {
2019-10-14 10:21:52 -04:00
// Compare the CP's magic number and MD5.
2017-03-04 05:06:32 -05:00
cpb := cp
cpb.MD5 = ""
js, _ := json.Marshal(cpb)
sum := md5.Sum(js)
b64 := base64.StdEncoding.EncodeToString(sum[:])
if cp.Magic != uploadCpMagic || b64 != cp.MD5 {
return false, nil
}
2019-10-14 10:21:52 -04:00
// Make sure if the local file is updated.
2017-03-04 05:06:32 -05:00
fd, err := os.Open(filePath)
if err != nil {
return false, err
}
defer fd.Close()
st, err := fd.Stat()
if err != nil {
return false, err
}
md, err := calcFileMD5(filePath)
if err != nil {
return false, err
}
2019-10-14 10:21:52 -04:00
// Compare the file size, file's last modified time and file's MD5
2017-03-04 05:06:32 -05:00
if cp.FileStat.Size != st.Size() ||
cp.FileStat.LastModified != st.ModTime() ||
cp.FileStat.MD5 != md {
return false, nil
}
return true, nil
}
2019-10-14 10:21:52 -04:00
// load loads from the file
2017-03-04 05:06:32 -05:00
func (cp *uploadCheckpoint) load(filePath string) error {
contents, err := ioutil.ReadFile(filePath)
if err != nil {
return err
}
err = json.Unmarshal(contents, cp)
return err
}
2019-10-14 10:21:52 -04:00
// dump dumps to the local file
2017-03-04 05:06:32 -05:00
func (cp *uploadCheckpoint) dump(filePath string) error {
bcp := *cp
2019-10-14 10:21:52 -04:00
// Calculate MD5
2017-03-04 05:06:32 -05:00
bcp.MD5 = ""
js, err := json.Marshal(bcp)
if err != nil {
return err
}
sum := md5.Sum(js)
b64 := base64.StdEncoding.EncodeToString(sum[:])
bcp.MD5 = b64
2019-10-14 10:21:52 -04:00
// Serialization
2017-03-04 05:06:32 -05:00
js, err = json.Marshal(bcp)
if err != nil {
return err
}
2019-10-14 10:21:52 -04:00
// Dump
2017-03-04 05:06:32 -05:00
return ioutil.WriteFile(filePath, js, FilePermMode)
}
2019-10-14 10:21:52 -04:00
// updatePart updates the part status
2017-03-04 05:06:32 -05:00
func (cp *uploadCheckpoint) updatePart(part UploadPart) {
cp.Parts[part.PartNumber-1].Part = part
cp.Parts[part.PartNumber-1].IsCompleted = true
}
2019-10-14 10:21:52 -04:00
// todoParts returns unfinished parts
2017-03-04 05:06:32 -05:00
func (cp *uploadCheckpoint) todoParts() []FileChunk {
fcs := []FileChunk{}
for _, part := range cp.Parts {
if !part.IsCompleted {
fcs = append(fcs, part.Chunk)
}
}
return fcs
}
2019-10-14 10:21:52 -04:00
// allParts returns all parts
2017-03-04 05:06:32 -05:00
func (cp *uploadCheckpoint) allParts() []UploadPart {
ps := []UploadPart{}
for _, part := range cp.Parts {
ps = append(ps, part.Part)
}
return ps
}
2019-10-14 10:21:52 -04:00
// getCompletedBytes returns completed bytes count
2017-03-04 05:06:32 -05:00
func (cp *uploadCheckpoint) getCompletedBytes() int64 {
var completedBytes int64
for _, part := range cp.Parts {
if part.IsCompleted {
completedBytes += part.Chunk.Size
}
}
return completedBytes
}
2019-10-14 10:21:52 -04:00
// calcFileMD5 calculates the MD5 for the specified local file
2017-03-04 05:06:32 -05:00
func calcFileMD5(filePath string) (string, error) {
return "", nil
}
2019-10-14 10:21:52 -04:00
// prepare initializes the multipart upload
2017-03-04 05:06:32 -05:00
func prepare(cp *uploadCheckpoint, objectKey, filePath string, partSize int64, bucket *Bucket, options []Option) error {
2019-10-14 10:21:52 -04:00
// CP
2017-03-04 05:06:32 -05:00
cp.Magic = uploadCpMagic
cp.FilePath = filePath
cp.ObjectKey = objectKey
2019-10-14 10:21:52 -04:00
// Local file
2017-03-04 05:06:32 -05:00
fd, err := os.Open(filePath)
if err != nil {
return err
}
defer fd.Close()
st, err := fd.Stat()
if err != nil {
return err
}
cp.FileStat.Size = st.Size()
cp.FileStat.LastModified = st.ModTime()
md, err := calcFileMD5(filePath)
if err != nil {
return err
}
cp.FileStat.MD5 = md
2019-10-14 10:21:52 -04:00
// Chunks
2017-03-04 05:06:32 -05:00
parts, err := SplitFileByPartSize(filePath, partSize)
if err != nil {
return err
}
cp.Parts = make([]cpPart, len(parts))
for i, part := range parts {
cp.Parts[i].Chunk = part
cp.Parts[i].IsCompleted = false
}
2019-10-14 10:21:52 -04:00
// Init load
2017-03-04 05:06:32 -05:00
imur, err := bucket.InitiateMultipartUpload(objectKey, options...)
if err != nil {
return err
}
cp.UploadID = imur.UploadID
return nil
}
2019-10-14 10:21:52 -04:00
// complete completes the multipart upload and deletes the local CP files
func complete(cp *uploadCheckpoint, bucket *Bucket, parts []UploadPart, cpFilePath string, options []Option) error {
2017-03-04 05:06:32 -05:00
imur := InitiateMultipartUploadResult{Bucket: bucket.BucketName,
Key: cp.ObjectKey, UploadID: cp.UploadID}
2019-10-14 10:21:52 -04:00
_, err := bucket.CompleteMultipartUpload(imur, parts, options...)
2017-03-04 05:06:32 -05:00
if err != nil {
return err
}
os.Remove(cpFilePath)
return err
}
2019-10-14 10:21:52 -04:00
// uploadFileWithCp handles concurrent upload with checkpoint
2017-03-04 05:06:32 -05:00
func (bucket Bucket) uploadFileWithCp(objectKey, filePath string, partSize int64, options []Option, cpFilePath string, routines int) error {
listener := getProgressListener(options)
2019-10-14 10:21:52 -04:00
payerOptions := []Option{}
payer := getPayer(options)
if payer != "" {
payerOptions = append(payerOptions, RequestPayer(PayerType(payer)))
}
// Load CP data
2017-03-04 05:06:32 -05:00
ucp := uploadCheckpoint{}
err := ucp.load(cpFilePath)
if err != nil {
os.Remove(cpFilePath)
}
2019-10-14 10:21:52 -04:00
// Load error or the CP data is invalid.
2017-03-04 05:06:32 -05:00
valid, err := ucp.isValid(filePath)
if err != nil || !valid {
if err = prepare(&ucp, objectKey, filePath, partSize, &bucket, options); err != nil {
return err
}
os.Remove(cpFilePath)
}
chunks := ucp.todoParts()
imur := InitiateMultipartUploadResult{
Bucket: bucket.BucketName,
Key: objectKey,
UploadID: ucp.UploadID}
jobs := make(chan FileChunk, len(chunks))
results := make(chan UploadPart, len(chunks))
failed := make(chan error)
die := make(chan bool)
completedBytes := ucp.getCompletedBytes()
event := newProgressEvent(TransferStartedEvent, completedBytes, ucp.FileStat.Size)
publishProgress(listener, event)
2019-10-14 10:21:52 -04:00
// Start the workers
arg := workerArg{&bucket, filePath, imur, payerOptions, uploadPartHooker}
2017-03-04 05:06:32 -05:00
for w := 1; w <= routines; w++ {
go worker(w, arg, jobs, results, failed, die)
}
2019-10-14 10:21:52 -04:00
// Schedule jobs
2017-03-04 05:06:32 -05:00
go scheduler(jobs, chunks)
2019-10-14 10:21:52 -04:00
// Waiting for the job finished
2017-03-04 05:06:32 -05:00
completed := 0
for completed < len(chunks) {
select {
case part := <-results:
completed++
ucp.updatePart(part)
ucp.dump(cpFilePath)
completedBytes += ucp.Parts[part.PartNumber-1].Chunk.Size
event = newProgressEvent(TransferDataEvent, completedBytes, ucp.FileStat.Size)
publishProgress(listener, event)
case err := <-failed:
close(die)
event = newProgressEvent(TransferFailedEvent, completedBytes, ucp.FileStat.Size)
publishProgress(listener, event)
return err
}
if completed >= len(chunks) {
break
}
}
event = newProgressEvent(TransferCompletedEvent, completedBytes, ucp.FileStat.Size)
publishProgress(listener, event)
2019-10-14 10:21:52 -04:00
// Complete the multipart upload
err = complete(&ucp, &bucket, ucp.allParts(), cpFilePath, payerOptions)
2017-03-04 05:06:32 -05:00
return err
}