add the opentelemetry tracing (#908)

* add the opentelemetry tracing

* fix the error sampler config

* 添加stream的链路跟踪

* fix the error field name
This commit is contained in:
neosu
2021-08-22 10:03:56 +08:00
committed by GitHub
parent 9672298fa8
commit 5b35fa17de
17 changed files with 739 additions and 18 deletions

View File

@@ -0,0 +1,60 @@
package opentelemetry
import (
"sync"
"github.com/tal-tech/go-zero/core/logx"
"github.com/tal-tech/go-zero/core/syncx"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/jaeger"
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/sdk/resource"
tracesdk "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
)
var (
once sync.Once
enabled syncx.AtomicBool
)
// Enabled returns if prometheus is enabled.
func Enabled() bool {
return enabled.True()
}
// StartAgent starts a prometheus agent.
func StartAgent(c Config) {
once.Do(func() {
if len(c.Endpoint) == 0 {
return
}
// Just support jaeger now
if c.Batcher != "jaeger" {
return
}
exp, err := jaeger.New(jaeger.WithCollectorEndpoint(jaeger.WithEndpoint(c.Endpoint)))
if err != nil {
logx.Error(err)
return
}
tp := tracesdk.NewTracerProvider(
// Set the sampling rate based on the parent span to 100%
tracesdk.WithSampler(tracesdk.ParentBased(tracesdk.TraceIDRatioBased(c.Sampler))),
// Always be sure to batch in production.
tracesdk.WithBatcher(exp),
// Record information about this application in an Resource.
tracesdk.WithResource(resource.NewSchemaless(
semconv.ServiceNameKey.String(c.Name),
)),
)
otel.SetTracerProvider(tp)
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(propagation.TraceContext{}, propagation.Baggage{}))
enabled.Set(true)
})
}

View File

@@ -0,0 +1,45 @@
package opentelemetry
import (
"go.opentelemetry.io/otel/attribute"
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
grpc_codes "google.golang.org/grpc/codes"
)
const (
// GRPCStatusCodeKey is convention for numeric status code of a gRPC request.
GRPCStatusCodeKey = attribute.Key("rpc.grpc.status_code")
// Name of message transmitted or received.
RPCNameKey = attribute.Key("name")
// Type of message transmitted or received.
RPCMessageTypeKey = attribute.Key("message.type")
// Identifier of message transmitted or received.
RPCMessageIDKey = attribute.Key("message.id")
// The compressed size of the message transmitted or received in bytes.
RPCMessageCompressedSizeKey = attribute.Key("message.compressed_size")
// The uncompressed size of the message transmitted or received in
// bytes.
RPCMessageUncompressedSizeKey = attribute.Key("message.uncompressed_size")
)
// Semantic conventions for common RPC attributes.
var (
// Semantic convention for gRPC as the remoting system.
RPCSystemGRPC = semconv.RPCSystemKey.String("grpc")
// Semantic convention for a message named message.
RPCNameMessage = RPCNameKey.String("message")
// Semantic conventions for RPC message types.
RPCMessageTypeSent = RPCMessageTypeKey.String("SENT")
RPCMessageTypeReceived = RPCMessageTypeKey.String("RECEIVED")
)
func StatusCodeAttr(c grpc_codes.Code) attribute.KeyValue {
return GRPCStatusCodeKey.Int64(int64(c))
}

View File

@@ -0,0 +1,128 @@
package opentelemetry
import (
"context"
"io"
"google.golang.org/grpc"
"google.golang.org/grpc/metadata"
"google.golang.org/protobuf/proto"
)
type streamEventType int
type streamEvent struct {
Type streamEventType
Err error
}
const (
receiveEndEvent streamEventType = iota
errorEvent
)
type clientStream struct {
grpc.ClientStream
desc *grpc.StreamDesc
events chan streamEvent
eventsDone chan struct{}
Finished chan error
receivedMessageID int
sentMessageID int
}
var _ = proto.Marshal
func (w *clientStream) RecvMsg(m interface{}) error {
err := w.ClientStream.RecvMsg(m)
if err == nil && !w.desc.ServerStreams {
w.sendStreamEvent(receiveEndEvent, nil)
} else if err == io.EOF {
w.sendStreamEvent(receiveEndEvent, nil)
} else if err != nil {
w.sendStreamEvent(errorEvent, err)
} else {
w.receivedMessageID++
MessageReceived.Event(w.Context(), w.receivedMessageID, m)
}
return err
}
func (w *clientStream) SendMsg(m interface{}) error {
err := w.ClientStream.SendMsg(m)
w.sentMessageID++
MessageSent.Event(w.Context(), w.sentMessageID, m)
if err != nil {
w.sendStreamEvent(errorEvent, err)
}
return err
}
func (w *clientStream) Header() (metadata.MD, error) {
md, err := w.ClientStream.Header()
if err != nil {
w.sendStreamEvent(errorEvent, err)
}
return md, err
}
func (w *clientStream) CloseSend() error {
err := w.ClientStream.CloseSend()
if err != nil {
w.sendStreamEvent(errorEvent, err)
}
return err
}
func (w *clientStream) sendStreamEvent(eventType streamEventType, err error) {
select {
case <-w.eventsDone:
case w.events <- streamEvent{Type: eventType, Err: err}:
}
}
func WrapClientStream(ctx context.Context, s grpc.ClientStream, desc *grpc.StreamDesc) *clientStream {
events := make(chan streamEvent)
eventsDone := make(chan struct{})
finished := make(chan error)
go func() {
defer close(eventsDone)
for {
select {
case event := <-events:
switch event.Type {
case receiveEndEvent:
finished <- nil
return
case errorEvent:
finished <- event.Err
return
}
case <-ctx.Done():
finished <- ctx.Err()
return
}
}
}()
return &clientStream{
ClientStream: s,
desc: desc,
events: events,
eventsDone: eventsDone,
Finished: finished,
}
}

View File

@@ -0,0 +1,13 @@
package opentelemetry
const (
TraceName = "go-zero"
)
// A Config is a opentelemetry config.
type Config struct {
Name string `json:",optional"`
Endpoint string `json:",optional"`
Sampler float64 `json:",default=1.0"`
Batcher string `json:",default=jaeger"`
}

View File

@@ -0,0 +1,34 @@
package opentelemetry
import (
"context"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"google.golang.org/protobuf/proto"
)
var (
MessageSent = messageType(RPCMessageTypeSent)
MessageReceived = messageType(RPCMessageTypeReceived)
)
type messageType attribute.KeyValue
// Event adds an event of the messageType to the span associated with the
// passed context with id and size (if message is a proto message).
func (m messageType) Event(ctx context.Context, id int, message interface{}) {
span := trace.SpanFromContext(ctx)
if p, ok := message.(proto.Message); ok {
span.AddEvent("message", trace.WithAttributes(
attribute.KeyValue(m),
RPCMessageIDKey.Int(id),
RPCMessageUncompressedSizeKey.Int(proto.Size(p)),
))
} else {
span.AddEvent("message", trace.WithAttributes(
attribute.KeyValue(m),
RPCMessageIDKey.Int(id),
))
}
}

View File

@@ -0,0 +1,48 @@
package opentelemetry
import (
"context"
"google.golang.org/grpc"
)
// serverStream wraps around the embedded grpc.ServerStream, and intercepts the RecvMsg and
// SendMsg method call.
type serverStream struct {
grpc.ServerStream
ctx context.Context
receivedMessageID int
sentMessageID int
}
func (w *serverStream) Context() context.Context {
return w.ctx
}
func (w *serverStream) RecvMsg(m interface{}) error {
err := w.ServerStream.RecvMsg(m)
if err == nil {
w.receivedMessageID++
MessageReceived.Event(w.Context(), w.receivedMessageID, m)
}
return err
}
func (w *serverStream) SendMsg(m interface{}) error {
err := w.ServerStream.SendMsg(m)
w.sentMessageID++
MessageSent.Event(w.Context(), w.sentMessageID, m)
return err
}
func WrapServerStream(ctx context.Context, ss grpc.ServerStream) *serverStream {
return &serverStream{
ServerStream: ss,
ctx: ctx,
}
}

View File

@@ -0,0 +1,50 @@
package opentelemetry
import (
"context"
"go.opentelemetry.io/otel/baggage"
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/trace"
"google.golang.org/grpc/metadata"
)
type metadataSupplier struct {
metadata *metadata.MD
}
// assert that metadataSupplier implements the TextMapCarrier interface
var _ propagation.TextMapCarrier = &metadataSupplier{}
func (s *metadataSupplier) Get(key string) string {
values := s.metadata.Get(key)
if len(values) == 0 {
return ""
}
return values[0]
}
func (s *metadataSupplier) Set(key string, value string) {
s.metadata.Set(key, value)
}
func (s *metadataSupplier) Keys() []string {
out := make([]string, 0, len(*s.metadata))
for key := range *s.metadata {
out = append(out, key)
}
return out
}
func Inject(ctx context.Context, p propagation.TextMapPropagator, metadata *metadata.MD) {
p.Inject(ctx, &metadataSupplier{
metadata: metadata,
})
}
func Extract(ctx context.Context, p propagation.TextMapPropagator, metadata *metadata.MD) (baggage.Baggage, trace.SpanContext) {
ctx = p.Extract(ctx, &metadataSupplier{
metadata: metadata,
})
return baggage.FromContext(ctx), trace.SpanContextFromContext(ctx)
}

View File

@@ -0,0 +1,61 @@
package opentelemetry
import (
"context"
"net"
"strings"
"go.opentelemetry.io/otel/attribute"
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
"google.golang.org/grpc/peer"
)
func PeerFromCtx(ctx context.Context) string {
p, ok := peer.FromContext(ctx)
if !ok {
return ""
}
return p.Addr.String()
}
func SpanInfo(fullMethod, peerAddress string) (string, []attribute.KeyValue) {
attrs := []attribute.KeyValue{RPCSystemGRPC}
name, mAttrs := ParseFullMethod(fullMethod)
attrs = append(attrs, mAttrs...)
attrs = append(attrs, PeerAttr(peerAddress)...)
return name, attrs
}
func ParseFullMethod(fullMethod string) (string, []attribute.KeyValue) {
name := strings.TrimLeft(fullMethod, "/")
parts := strings.SplitN(name, "/", 2)
if len(parts) != 2 {
// Invalid format, does not follow `/package.service/method`.
return name, []attribute.KeyValue(nil)
}
var attrs []attribute.KeyValue
if service := parts[0]; service != "" {
attrs = append(attrs, semconv.RPCServiceKey.String(service))
}
if method := parts[1]; method != "" {
attrs = append(attrs, semconv.RPCMethodKey.String(method))
}
return name, attrs
}
func PeerAttr(addr string) []attribute.KeyValue {
host, port, err := net.SplitHostPort(addr)
if err != nil {
return []attribute.KeyValue(nil)
}
if host == "" {
host = "127.0.0.1"
}
return []attribute.KeyValue{
semconv.NetPeerIPKey.String(host),
semconv.NetPeerPortKey.String(port),
}
}

View File

@@ -5,6 +5,7 @@ import (
"github.com/tal-tech/go-zero/core/load"
"github.com/tal-tech/go-zero/core/logx"
"github.com/tal-tech/go-zero/core/opentelemetry"
"github.com/tal-tech/go-zero/core/prometheus"
"github.com/tal-tech/go-zero/core/stat"
)
@@ -24,11 +25,12 @@ const (
// A ServiceConf is a service config.
type ServiceConf struct {
Name string
Log logx.LogConf
Mode string `json:",default=pro,options=dev|test|rt|pre|pro"`
MetricsUrl string `json:",optional"`
Prometheus prometheus.Config `json:",optional"`
Name string
Log logx.LogConf
Mode string `json:",default=pro,options=dev|test|rt|pre|pro"`
MetricsUrl string `json:",optional"`
Prometheus prometheus.Config `json:",optional"`
OpenTelemetry opentelemetry.Config `json:",optional"`
}
// MustSetUp sets up the service, exits on error.
@@ -49,6 +51,8 @@ func (sc ServiceConf) SetUp() error {
sc.initMode()
prometheus.StartAgent(sc.Prometheus)
opentelemetry.StartAgent(sc.OpenTelemetry)
if len(sc.MetricsUrl) > 0 {
stat.SetReportWriter(stat.NewRemoteWriter(sc.MetricsUrl))
}