Add grpc retry (#1160)
* Add grpc retry * Update grpc retry * Add tests * Fix a bug * Add api && some tests * Add comment * Add double check * Add server retry quota * Update optimize code * Fix bug * Update optimize code * Update optimize code * Fix bug
This commit is contained in:
179
core/retry/retryinterceptor.go
Normal file
179
core/retry/retryinterceptor.go
Normal file
@@ -0,0 +1,179 @@
|
||||
package retry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/tal-tech/go-zero/core/logx"
|
||||
"github.com/tal-tech/go-zero/core/retry/backoff"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/metadata"
|
||||
"google.golang.org/grpc/status"
|
||||
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
const AttemptMetadataKey = "x-retry-attempt"
|
||||
|
||||
var (
|
||||
// DefaultRetriableCodes default retry code
|
||||
DefaultRetriableCodes = []codes.Code{codes.ResourceExhausted, codes.Unavailable}
|
||||
// defaultRetryOptions default retry configuration
|
||||
defaultRetryOptions = &options{
|
||||
max: 0, // disabled
|
||||
perCallTimeout: 0, // disabled
|
||||
includeRetryHeader: true,
|
||||
codes: DefaultRetriableCodes,
|
||||
backoffFunc: backoff.LinearWithJitter(50*time.Millisecond /*jitter*/, 0.10),
|
||||
}
|
||||
)
|
||||
|
||||
type (
|
||||
// options retry the configuration
|
||||
options struct {
|
||||
max int
|
||||
perCallTimeout time.Duration
|
||||
includeRetryHeader bool
|
||||
codes []codes.Code
|
||||
backoffFunc backoff.Func
|
||||
}
|
||||
// CallOption is a grpc.CallOption that is local to grpc retry.
|
||||
CallOption struct {
|
||||
grpc.EmptyCallOption // make sure we implement private after() and before() fields so we don't panic.
|
||||
apply func(opt *options)
|
||||
}
|
||||
)
|
||||
|
||||
func waitRetryBackoff(logger logx.Logger, attempt int, ctx context.Context, retryOptions *options) error {
|
||||
var waitTime time.Duration = 0
|
||||
if attempt > 0 {
|
||||
waitTime = retryOptions.backoffFunc(attempt)
|
||||
}
|
||||
if waitTime > 0 {
|
||||
timer := time.NewTimer(waitTime)
|
||||
logger.Infof("grpc retry attempt: %d, backoff for %v", attempt, waitTime)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
timer.Stop()
|
||||
return status.FromContextError(ctx.Err()).Err()
|
||||
case <-timer.C:
|
||||
// double check
|
||||
err := ctx.Err()
|
||||
if err != nil {
|
||||
return status.FromContextError(err).Err()
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func isRetriable(err error, retryOptions *options) bool {
|
||||
errCode := status.Code(err)
|
||||
if isContextError(err) {
|
||||
return false
|
||||
}
|
||||
for _, code := range retryOptions.codes {
|
||||
if code == errCode {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isContextError(err error) bool {
|
||||
code := status.Code(err)
|
||||
return code == codes.DeadlineExceeded || code == codes.Canceled
|
||||
}
|
||||
|
||||
func reuseOrNewWithCallOptions(opt *options, retryCallOptions []*CallOption) *options {
|
||||
if len(retryCallOptions) == 0 {
|
||||
return opt
|
||||
}
|
||||
return parseRetryCallOptions(opt, retryCallOptions...)
|
||||
}
|
||||
|
||||
func parseRetryCallOptions(opt *options, opts ...*CallOption) *options {
|
||||
for _, option := range opts {
|
||||
option.apply(opt)
|
||||
}
|
||||
return opt
|
||||
}
|
||||
|
||||
func perCallContext(ctx context.Context, callOpts *options, attempt int) context.Context {
|
||||
if attempt > 0 {
|
||||
if callOpts.perCallTimeout != 0 {
|
||||
var cancel context.CancelFunc
|
||||
ctx, cancel = context.WithTimeout(ctx, callOpts.perCallTimeout)
|
||||
_ = cancel
|
||||
}
|
||||
if callOpts.includeRetryHeader {
|
||||
cloneMd := extractIncomingAndClone(ctx)
|
||||
cloneMd.Set(AttemptMetadataKey, strconv.Itoa(attempt))
|
||||
ctx = metadata.NewOutgoingContext(ctx, cloneMd)
|
||||
}
|
||||
}
|
||||
|
||||
return ctx
|
||||
}
|
||||
|
||||
func extractIncomingAndClone(ctx context.Context) metadata.MD {
|
||||
md, ok := metadata.FromIncomingContext(ctx)
|
||||
if !ok {
|
||||
return metadata.MD{}
|
||||
}
|
||||
// clone
|
||||
return md.Copy()
|
||||
}
|
||||
|
||||
func filterCallOptions(callOptions []grpc.CallOption) (grpcOptions []grpc.CallOption, retryOptions []*CallOption) {
|
||||
for _, opt := range callOptions {
|
||||
if co, ok := opt.(*CallOption); ok {
|
||||
retryOptions = append(retryOptions, co)
|
||||
} else {
|
||||
grpcOptions = append(grpcOptions, opt)
|
||||
}
|
||||
}
|
||||
return grpcOptions, retryOptions
|
||||
}
|
||||
|
||||
func Do(ctx context.Context, call func(ctx context.Context, opts ...grpc.CallOption) error, opts ...grpc.CallOption) error {
|
||||
logger := logx.WithContext(ctx)
|
||||
grpcOpts, retryOpts := filterCallOptions(opts)
|
||||
callOpts := reuseOrNewWithCallOptions(defaultRetryOptions, retryOpts)
|
||||
|
||||
if callOpts.max == 0 {
|
||||
return call(ctx, opts...)
|
||||
}
|
||||
var lastErr error
|
||||
for attempt := 0; attempt <= callOpts.max; attempt++ {
|
||||
if err := waitRetryBackoff(logger, attempt, ctx, callOpts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
callCtx := perCallContext(ctx, callOpts, attempt)
|
||||
lastErr = call(callCtx, grpcOpts...)
|
||||
|
||||
if lastErr == nil {
|
||||
return nil
|
||||
}
|
||||
if attempt == 0 {
|
||||
logger.Errorf("grpc call failed, got err: %v", lastErr)
|
||||
} else {
|
||||
logger.Errorf("grpc retry attempt: %d, got err: %v", attempt, lastErr)
|
||||
}
|
||||
if isContextError(lastErr) {
|
||||
if ctx.Err() != nil {
|
||||
logger.Errorf("grpc retry attempt: %d, parent context error: %v", attempt, ctx.Err())
|
||||
return lastErr
|
||||
} else if callOpts.perCallTimeout != 0 {
|
||||
logger.Errorf("grpc retry attempt: %d, context error from retry call", attempt)
|
||||
continue
|
||||
}
|
||||
}
|
||||
if !isRetriable(lastErr, callOpts) {
|
||||
return lastErr
|
||||
}
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
Reference in New Issue
Block a user