clipper/backend/media/uploader.go

213 lines
5.3 KiB
Go
Raw Normal View History

2021-10-27 19:34:59 +00:00
package media
import (
"bytes"
"context"
2021-11-08 01:54:43 +00:00
"errors"
2021-10-27 19:34:59 +00:00
"fmt"
"io"
"sort"
2021-11-08 01:54:43 +00:00
"sync"
2021-10-27 19:34:59 +00:00
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/aws/aws-sdk-go-v2/service/s3/types"
2021-11-22 20:35:51 +00:00
"go.uber.org/zap"
2021-10-27 19:34:59 +00:00
)
2021-11-16 06:48:30 +00:00
// multipartUploader uploads a file to S3.
//
// TODO: extract to s3 package
type multipartUploader struct {
2021-11-22 20:35:51 +00:00
s3 S3Client
logger *zap.SugaredLogger
2021-11-08 01:54:43 +00:00
}
type uploadResult struct {
completedPart types.CompletedPart
size int64
2021-10-27 19:34:59 +00:00
}
const (
targetPartSizeBytes = 5 * 1024 * 1024 // 5MB
readBufferSizeBytes = 32_768 // 32Kb
)
2021-11-22 20:35:51 +00:00
func newMultipartUploader(s3Client S3Client, logger *zap.SugaredLogger) *multipartUploader {
return &multipartUploader{s3: s3Client, logger: logger}
}
// Upload uploads to an S3 bucket in 5MB parts. It buffers data internally
// until a part is ready to send over the network. Parts are sent as soon as
// they exceed the minimum part size of 5MB.
//
// TODO: expire after configurable period.
func (u *multipartUploader) Upload(ctx context.Context, r io.Reader, bucket, key, contentType string) (int64, error) {
var uploaded bool
2021-10-27 19:34:59 +00:00
input := s3.CreateMultipartUploadInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
ContentType: aws.String(contentType),
}
output, err := u.s3.CreateMultipartUpload(ctx, &input)
2021-10-27 19:34:59 +00:00
if err != nil {
return 0, fmt.Errorf("error creating multipart upload: %v", err)
2021-10-27 19:34:59 +00:00
}
// abort the upload if possible, logging any errors, on exit.
defer func() {
if uploaded {
return
}
input := s3.AbortMultipartUploadInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
UploadId: output.UploadId,
}
2021-10-27 19:34:59 +00:00
// if the context was cancelled, just use the background context.
ctxToUse := ctx
if ctxToUse.Err() != nil {
ctxToUse = context.Background()
}
_, abortErr := u.s3.AbortMultipartUpload(ctxToUse, &input)
if abortErr != nil {
2021-11-22 20:35:51 +00:00
u.logger.Errorf("uploader: error aborting upload: %v", abortErr)
} else {
2021-11-22 20:35:51 +00:00
u.logger.Infof("aborted upload, key = %s", key)
}
}()
2021-10-27 19:34:59 +00:00
uploadResultChan := make(chan uploadResult)
uploadErrorChan := make(chan error, 1)
2021-10-27 19:34:59 +00:00
// uploadPart uploads an individual part.
uploadPart := func(wg *sync.WaitGroup, buf []byte, partNum int32) {
defer wg.Done()
2021-10-27 19:34:59 +00:00
partLen := int64(len(buf))
2021-11-22 20:35:51 +00:00
u.logger.With("key", key, "partNum", partNum, "partLen", partLen).Debug("uploading part")
2021-11-08 01:54:43 +00:00
input := s3.UploadPartInput{
Body: bytes.NewReader(buf),
Bucket: aws.String(bucket),
Key: aws.String(key),
PartNumber: partNum,
UploadId: output.UploadId,
ContentLength: partLen,
}
2021-10-27 19:34:59 +00:00
output, uploadErr := u.s3.UploadPart(ctx, &input)
if uploadErr != nil {
// TODO: retry on failure
uploadErrorChan <- uploadErr
return
}
2021-10-27 19:34:59 +00:00
2021-11-22 20:35:51 +00:00
u.logger.With("key", key, "partNum", partNum, "partLen", partLen, "etag", *output.ETag).Debug("uploaded part")
2021-10-27 19:34:59 +00:00
uploadResultChan <- uploadResult{
completedPart: types.CompletedPart{ETag: output.ETag, PartNumber: partNum},
size: partLen,
}
2021-11-08 01:54:43 +00:00
}
wgDone := make(chan struct{})
var wg sync.WaitGroup
wg.Add(1) // done when the reader goroutine returns
go func() {
wg.Wait()
wgDone <- struct{}{}
}()
2021-11-08 01:54:43 +00:00
readChan := make(chan error, 1)
2021-11-08 01:54:43 +00:00
go func() {
defer wg.Done()
var closing bool
currPart := bytes.NewBuffer(make([]byte, 0, targetPartSizeBytes+readBufferSizeBytes))
partNum := int32(1)
buf := make([]byte, readBufferSizeBytes)
for {
n, readErr := r.Read(buf)
if readErr == io.EOF {
closing = true
} else if readErr != nil {
readChan <- readErr
return
}
_, _ = currPart.Write(buf[:n])
if closing || currPart.Len() >= targetPartSizeBytes {
part := make([]byte, currPart.Len())
copy(part, currPart.Bytes())
currPart.Truncate(0)
wg.Add(1)
go uploadPart(&wg, part, partNum)
partNum++
}
if closing {
return
}
}
2021-11-08 01:54:43 +00:00
}()
results := make([]uploadResult, 0, 64)
2021-11-08 01:54:43 +00:00
outer:
for {
select {
case readErr := <-readChan:
if readErr != io.EOF {
return 0, fmt.Errorf("reader error: %v", readErr)
}
case uploadResult := <-uploadResultChan:
results = append(results, uploadResult)
case uploadErr := <-uploadErrorChan:
return 0, fmt.Errorf("error while uploading part: %v", uploadErr)
case <-ctx.Done():
return 0, ctx.Err()
case <-wgDone:
break outer
2021-11-08 01:54:43 +00:00
}
}
if len(results) == 0 {
2021-11-08 01:54:43 +00:00
return 0, errors.New("no parts available to upload")
2021-10-27 19:34:59 +00:00
}
completedParts := make([]types.CompletedPart, 0, 64)
var uploadedBytes int64
for _, result := range results {
completedParts = append(completedParts, result.completedPart)
uploadedBytes += result.size
}
// the parts may be out of order, especially with slow network conditions:
sort.Slice(completedParts, func(i, j int) bool {
return completedParts[i].PartNumber < completedParts[j].PartNumber
})
completeInput := s3.CompleteMultipartUploadInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
UploadId: output.UploadId,
2021-11-08 01:54:43 +00:00
MultipartUpload: &types.CompletedMultipartUpload{Parts: completedParts},
}
if _, err = u.s3.CompleteMultipartUpload(ctx, &completeInput); err != nil {
2021-11-08 01:54:43 +00:00
return 0, fmt.Errorf("error completing upload: %v", err)
}
2021-11-22 20:35:51 +00:00
u.logger.With("key", key, "numParts", len(completedParts), "len", uploadedBytes).Debug("completed upload")
uploaded = true
2021-11-08 01:54:43 +00:00
return uploadedBytes, nil
2021-10-27 19:34:59 +00:00
}