From 5bd7a9920e770b6fa3009d4f3de271fd003326bc Mon Sep 17 00:00:00 2001 From: Giannis Gkiortzis <58184179+giortzisg@users.noreply.github.com> Date: Mon, 16 Feb 2026 14:43:10 +0100 Subject: [PATCH 1/7] fix: improve otel span map cleanup Previously the span map was holding all span information in a tree structure. When a span finished, MarkFinished acquired a global write lock and recursively traversed the tree to clean up. The fix usees the already existin spanRecorder to keep track of the spans in the transaction. Instead of linking each child, we keep track of the root transaction and override the ParentSpanID so that the spanRecorder reconstructs the span hierarchy. --- internal/util/map.go | 39 +++++++++++ otel/propagator_test.go | 10 +-- otel/span_map.go | 136 ++++++++++++------------------------ otel/span_processor.go | 50 ++++++------- otel/span_processor_test.go | 2 +- 5 files changed, 115 insertions(+), 122 deletions(-) create mode 100644 internal/util/map.go diff --git a/internal/util/map.go b/internal/util/map.go new file mode 100644 index 000000000..8da46b28f --- /dev/null +++ b/internal/util/map.go @@ -0,0 +1,39 @@ +package util + +import "sync" + +type SyncMap[K comparable, V any] struct { + m sync.Map +} + +func (s *SyncMap[K, V]) Store(key K, value V) { + s.m.Store(key, value) +} + +func (s *SyncMap[K, V]) Load(key K) (V, bool) { + v, ok := s.m.Load(key) + if !ok { + var zero V + return zero, false + } + return v.(V), true +} + +func (s *SyncMap[K, V]) Delete(key K) { + s.m.Delete(key) +} + +func (s *SyncMap[K, V]) LoadOrStore(key K, value V) (V, bool) { + actual, loaded := s.m.LoadOrStore(key, value) + return actual.(V), loaded +} + +func (s *SyncMap[K, V]) Clear() { + s.m.Clear() +} + +func (s *SyncMap[K, V]) Range(f func(key K, value V) bool) { + s.m.Range(func(key, value any) bool { + return f(key.(K), value.(V)) + }) +} diff --git a/otel/propagator_test.go b/otel/propagator_test.go index 5c0d4b682..aaf9b36f9 100644 --- a/otel/propagator_test.go +++ b/otel/propagator_test.go @@ -35,19 +35,21 @@ func createTransactionAndMaybeSpan(transactionContext transactionTestContext, wi transaction.TraceID = TraceIDFromHex(transactionContext.traceID) transaction.SpanID = SpanIDFromHex(transactionContext.spanID) + otelTraceID := otelTraceIDFromHex(transactionContext.traceID) + otelSpanID := otelSpanIDFromHex(transactionContext.spanID) if withSpan { span := transaction.StartChild("op") // We want the child to have the SpanID from transactionContext, so // we "swap" span IDs from the transaction and the child span. transaction.SpanID = span.SpanID span.SpanID = SpanIDFromHex(transactionContext.spanID) - sentrySpanMap.Set(trace.SpanID(span.SpanID), span, trace.SpanID{}) + sentrySpanMap.Set(trace.SpanID(span.SpanID), span, otelTraceID) } - sentrySpanMap.Set(trace.SpanID(transaction.SpanID), transaction, trace.SpanID{}) + sentrySpanMap.Set(trace.SpanID(transaction.SpanID), transaction, otelTraceID) otelContext := trace.SpanContextConfig{ - TraceID: otelTraceIDFromHex(transactionContext.traceID), - SpanID: otelSpanIDFromHex(transactionContext.spanID), + TraceID: otelTraceID, + SpanID: otelSpanID, TraceFlags: trace.FlagsSampled, } return otelContext diff --git a/otel/span_map.go b/otel/span_map.go index 52ac7e539..9e2db91d0 100644 --- a/otel/span_map.go +++ b/otel/span_map.go @@ -1,123 +1,79 @@ package sentryotel import ( - "sync" + "sync/atomic" "github.com/getsentry/sentry-go" + "github.com/getsentry/sentry-go/internal/util" otelTrace "go.opentelemetry.io/otel/trace" ) -type spanInfo struct { - span *sentry.Span - finished bool - children map[otelTrace.SpanID]struct{} - parentID otelTrace.SpanID +// TransactionEntry holds a reference to the root transaction span and +// tracks the number of active spans belonging to this trace. +type TransactionEntry struct { + root *sentry.Span + activeCount atomic.Int64 } // SentrySpanMap is a mapping between OpenTelemetry spans and Sentry spans. -// It helps Sentry span processor and propagator to keep track of unfinished -// Sentry spans and to establish parent-child links between spans. +// It stores spans for lookup by the propagator and event processor, and +// manages transaction entries for creating child spans via the shared spanRecorder. type SentrySpanMap struct { - spanMap map[otelTrace.SpanID]*spanInfo - mu sync.RWMutex + spans util.SyncMap[otelTrace.SpanID, *sentry.Span] + transactions util.SyncMap[otelTrace.TraceID, *TransactionEntry] } -func (ssm *SentrySpanMap) Get(otelSpandID otelTrace.SpanID) (*sentry.Span, bool) { - ssm.mu.RLock() - defer ssm.mu.RUnlock() - info, ok := ssm.spanMap[otelSpandID] - if !ok { - return nil, false - } - return info.span, true -} - -func (ssm *SentrySpanMap) Set(otelSpandID otelTrace.SpanID, sentrySpan *sentry.Span, parentID otelTrace.SpanID) { - ssm.mu.Lock() - defer ssm.mu.Unlock() - - info := &spanInfo{ - span: sentrySpan, - finished: false, - children: make(map[otelTrace.SpanID]struct{}), - parentID: parentID, - } - ssm.spanMap[otelSpandID] = info - - if parentID != (otelTrace.SpanID{}) { - if parentInfo, ok := ssm.spanMap[parentID]; ok { - parentInfo.children[otelSpandID] = struct{}{} - } - } +// Get returns the current sentry.Span associated with the given OTel spanID. +func (ssm *SentrySpanMap) Get(spanID otelTrace.SpanID) (*sentry.Span, bool) { + return ssm.spans.Load(spanID) } -func (ssm *SentrySpanMap) MarkFinished(otelSpandID otelTrace.SpanID) { - ssm.mu.Lock() - defer ssm.mu.Unlock() - - info, ok := ssm.spanMap[otelSpandID] - if !ok { - return - } - - info.finished = true - ssm.tryCleanupSpan(otelSpandID) +// GetTransaction returns the transaction information for the given OTel traceID. +func (ssm *SentrySpanMap) GetTransaction(traceID otelTrace.TraceID) (*TransactionEntry, bool) { + return ssm.transactions.Load(traceID) } -// tryCleanupSpan deletes a parent and all children only if the whole subtree is marked finished. -// Must be called with lock held. -func (ssm *SentrySpanMap) tryCleanupSpan(spanID otelTrace.SpanID) { - info, ok := ssm.spanMap[spanID] - if !ok || !info.finished { - return - } +// Set stores the span and transaction information on the map. It handles both root and child spans automatically. +// +// If there is a cache miss on the given traceID, a transaction entry is created. Subsequent calls for the same traceID +// just increment the active span count. +func (ssm *SentrySpanMap) Set(spanID otelTrace.SpanID, span *sentry.Span, traceID otelTrace.TraceID) { + ssm.spans.Store(spanID, span) - if !info.span.IsTransaction() { - parentID := info.parentID - if parentID != (otelTrace.SpanID{}) { - if parentInfo, parentExists := ssm.spanMap[parentID]; parentExists && !parentInfo.finished { - return - } - } - } + t := &TransactionEntry{root: span} + t.activeCount.Store(1) - // We need to have a lookup first to see if every child is marked as finished to actually cleanup everything. - // There probably is a better way to do this - for childID := range info.children { - if childInfo, exists := ssm.spanMap[childID]; exists && !childInfo.finished { - return - } + if existing, loaded := ssm.transactions.LoadOrStore(traceID, t); loaded { + existing.activeCount.Add(1) } +} - parentID := info.parentID - if parentID != (otelTrace.SpanID{}) { - if parentInfo, ok := ssm.spanMap[parentID]; ok { - delete(parentInfo.children, spanID) - } - } +// MarkFinished removes a span from the map and decrements the transaction's active count. When the count reaches zero, +// the transaction entry is removed. +func (ssm *SentrySpanMap) MarkFinished(spanID otelTrace.SpanID, traceID otelTrace.TraceID) { + ssm.spans.Delete(spanID) - for childID := range info.children { - if childInfo, exists := ssm.spanMap[childID]; exists && childInfo.finished { - ssm.tryCleanupSpan(childID) + if entry, ok := ssm.transactions.Load(traceID); ok { + if entry.activeCount.Add(-1) <= 0 { + ssm.transactions.Delete(traceID) } } - - delete(ssm.spanMap, spanID) - if parentID != (otelTrace.SpanID{}) { - ssm.tryCleanupSpan(parentID) - } } +// Clear removes all spans stored on the map. func (ssm *SentrySpanMap) Clear() { - ssm.mu.Lock() - defer ssm.mu.Unlock() - ssm.spanMap = make(map[otelTrace.SpanID]*spanInfo) + ssm.spans.Clear() + ssm.transactions.Clear() } +// Len returns the number of spans on the map. func (ssm *SentrySpanMap) Len() int { - ssm.mu.RLock() - defer ssm.mu.RUnlock() - return len(ssm.spanMap) + count := 0 + ssm.spans.Range(func(_ otelTrace.SpanID, _ *sentry.Span) bool { + count++ + return true + }) + return count } -var sentrySpanMap = SentrySpanMap{spanMap: make(map[otelTrace.SpanID]*spanInfo)} +var sentrySpanMap = SentrySpanMap{} diff --git a/otel/span_processor.go b/otel/span_processor.go index ba2f67a16..dcffb3feb 100644 --- a/otel/span_processor.go +++ b/otel/span_processor.go @@ -32,46 +32,42 @@ func (ssp *sentrySpanProcessor) OnStart(parent context.Context, s otelSdkTrace.R otelTraceID := otelSpanContext.TraceID() otelParentSpanID := s.Parent().SpanID() - var sentryParentSpan *sentry.Span - if otelSpanContext.IsValid() { - sentryParentSpan, _ = sentrySpanMap.Get(otelParentSpanID) - } - - if sentryParentSpan != nil { - span := sentryParentSpan.StartChild(s.Name()) + txn, ok := sentrySpanMap.GetTransaction(otelTraceID) + if ok { + span := txn.root.StartChild(s.Name()) span.SpanID = sentry.SpanID(otelSpanID) + span.ParentSpanID = sentry.SpanID(otelParentSpanID) span.StartTime = s.StartTime() - - sentrySpanMap.Set(otelSpanID, span, otelParentSpanID) - } else { - traceParentContext := getTraceParentContext(parent) - transaction := sentry.StartTransaction( - parent, - s.Name(), - sentry.WithSpanSampled(traceParentContext.Sampled), - ) - transaction.SpanID = sentry.SpanID(otelSpanID) - transaction.TraceID = sentry.TraceID(otelTraceID) - transaction.ParentSpanID = sentry.SpanID(otelParentSpanID) - transaction.StartTime = s.StartTime() - if dynamicSamplingContext, valid := parent.Value(dynamicSamplingContextKey{}).(sentry.DynamicSamplingContext); valid { - transaction.SetDynamicSamplingContext(dynamicSamplingContext) - } - - sentrySpanMap.Set(otelSpanID, transaction, otelParentSpanID) + sentrySpanMap.Set(otelSpanID, span, otelTraceID) + return + } + traceParentContext := getTraceParentContext(parent) + transaction := sentry.StartTransaction( + parent, + s.Name(), + sentry.WithSpanSampled(traceParentContext.Sampled), + ) + transaction.SpanID = sentry.SpanID(otelSpanID) + transaction.TraceID = sentry.TraceID(otelTraceID) + transaction.ParentSpanID = sentry.SpanID(otelParentSpanID) + transaction.StartTime = s.StartTime() + if dsc, valid := parent.Value(dynamicSamplingContextKey{}).(sentry.DynamicSamplingContext); valid { + transaction.SetDynamicSamplingContext(dsc) } + sentrySpanMap.Set(otelSpanID, transaction, otelTraceID) } // https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/sdk.md#onendspan func (ssp *sentrySpanProcessor) OnEnd(s otelSdkTrace.ReadOnlySpan) { otelSpanId := s.SpanContext().SpanID() + otelTraceId := s.SpanContext().TraceID() sentrySpan, ok := sentrySpanMap.Get(otelSpanId) if !ok || sentrySpan == nil { return } if utils.IsSentryRequestSpan(sentrySpan.Context(), s) { - sentrySpanMap.MarkFinished(otelSpanId) + sentrySpanMap.MarkFinished(otelSpanId, otelTraceId) return } @@ -84,7 +80,7 @@ func (ssp *sentrySpanProcessor) OnEnd(s otelSdkTrace.ReadOnlySpan) { sentrySpan.EndTime = s.EndTime() sentrySpan.Finish() - sentrySpanMap.MarkFinished(otelSpanId) + sentrySpanMap.MarkFinished(otelSpanId, otelTraceId) } // https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/sdk.md#shutdown-1 diff --git a/otel/span_processor_test.go b/otel/span_processor_test.go index 23d4a31df..e424a6ea7 100644 --- a/otel/span_processor_test.go +++ b/otel/span_processor_test.go @@ -446,7 +446,7 @@ func TestSpanWithFinishedParentShouldBeDeleted(t *testing.T) { parent.End() _, parentExists = sentrySpanMap.Get(parentSpanID) - assertEqual(t, parentExists, true) + assertEqual(t, parentExists, false) _, childExists = sentrySpanMap.Get(childSpanID) assertEqual(t, childExists, true) From d3c9cf4d43400d50d4ab5770de2f05dca8c99d33 Mon Sep 17 00:00:00 2001 From: Giannis Gkiortzis <58184179+giortzisg@users.noreply.github.com> Date: Mon, 16 Feb 2026 15:18:23 +0100 Subject: [PATCH 2/7] fix: race on MarkFinished --- internal/util/map.go | 4 ++++ otel/span_map.go | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/util/map.go b/internal/util/map.go index 8da46b28f..7ca9d17a8 100644 --- a/internal/util/map.go +++ b/internal/util/map.go @@ -10,6 +10,10 @@ func (s *SyncMap[K, V]) Store(key K, value V) { s.m.Store(key, value) } +func (s *SyncMap[K, V]) CompareAndDelete(key K, value V) { + s.m.CompareAndDelete(key, value) +} + func (s *SyncMap[K, V]) Load(key K) (V, bool) { v, ok := s.m.Load(key) if !ok { diff --git a/otel/span_map.go b/otel/span_map.go index 9e2db91d0..757ff582c 100644 --- a/otel/span_map.go +++ b/otel/span_map.go @@ -55,7 +55,7 @@ func (ssm *SentrySpanMap) MarkFinished(spanID otelTrace.SpanID, traceID otelTrac if entry, ok := ssm.transactions.Load(traceID); ok { if entry.activeCount.Add(-1) <= 0 { - ssm.transactions.Delete(traceID) + ssm.transactions.CompareAndDelete(traceID, entry) } } } From d4f8f6788364aab4f1fccef7af84463a49ad50c1 Mon Sep 17 00:00:00 2001 From: Giannis Gkiortzis <58184179+giortzisg@users.noreply.github.com> Date: Mon, 16 Feb 2026 17:05:48 +0100 Subject: [PATCH 3/7] check for remote parent span --- otel/span_processor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otel/span_processor.go b/otel/span_processor.go index dcffb3feb..6311313f9 100644 --- a/otel/span_processor.go +++ b/otel/span_processor.go @@ -33,7 +33,7 @@ func (ssp *sentrySpanProcessor) OnStart(parent context.Context, s otelSdkTrace.R otelParentSpanID := s.Parent().SpanID() txn, ok := sentrySpanMap.GetTransaction(otelTraceID) - if ok { + if ok && s.Parent().IsValid() && !s.Parent().IsRemote() { span := txn.root.StartChild(s.Name()) span.SpanID = sentry.SpanID(otelSpanID) span.ParentSpanID = sentry.SpanID(otelParentSpanID) From affa2ab112506dbb712940519c9a165b0a27192a Mon Sep 17 00:00:00 2001 From: Giannis Gkiortzis <58184179+giortzisg@users.noreply.github.com> Date: Thu, 19 Feb 2026 10:57:29 +0100 Subject: [PATCH 4/7] identify root transactions correctly --- otel/event_processor.go | 2 +- otel/event_processor_test.go | 2 +- otel/propagator.go | 2 +- otel/span_map.go | 56 ++++++++++++++++++++++++------------ otel/span_processor.go | 4 +-- otel/span_processor_test.go | 41 +++++++++++++------------- 6 files changed, 63 insertions(+), 44 deletions(-) diff --git a/otel/event_processor.go b/otel/event_processor.go index c86a63732..ad3277310 100644 --- a/otel/event_processor.go +++ b/otel/event_processor.go @@ -20,7 +20,7 @@ func linkTraceContextToErrorEvent(event *sentry.Event, hint *sentry.EventHint) * otelSpanContext := trace.SpanContextFromContext(hint.Context) var sentrySpan *sentry.Span if otelSpanContext.IsValid() { - sentrySpan, _ = sentrySpanMap.Get(otelSpanContext.SpanID()) + sentrySpan, _ = sentrySpanMap.Get(otelSpanContext.TraceID(), otelSpanContext.SpanID()) } if sentrySpan == nil { return event diff --git a/otel/event_processor_test.go b/otel/event_processor_test.go index 9cde8f577..aea840b01 100644 --- a/otel/event_processor_test.go +++ b/otel/event_processor_test.go @@ -19,7 +19,7 @@ func TestLinkTraceContextToErrorEventSetsContext(t *testing.T) { t.Run(name, func(t *testing.T) { _, _, tracer := setupSpanProcessorTest() ctx, otelSpan := tracer.Start(emptyContextWithSentry(), "spanName") - sentrySpan, _ := sentrySpanMap.Get(otelSpan.SpanContext().SpanID()) + sentrySpan, _ := sentrySpanMap.Get(otelSpan.SpanContext().TraceID(), otelSpan.SpanContext().SpanID()) hub := sentry.GetHubFromContext(ctx) client, scope := hub.Client(), hub.Scope() diff --git a/otel/propagator.go b/otel/propagator.go index 8f17ff4fe..de785dea2 100644 --- a/otel/propagator.go +++ b/otel/propagator.go @@ -24,7 +24,7 @@ func (p sentryPropagator) Inject(ctx context.Context, carrier propagation.TextMa var sentrySpan *sentry.Span if spanContext.IsValid() { - sentrySpan, _ = sentrySpanMap.Get(spanContext.SpanID()) + sentrySpan, _ = sentrySpanMap.Get(spanContext.TraceID(), spanContext.SpanID()) } else { sentrySpan = nil } diff --git a/otel/span_map.go b/otel/span_map.go index 757ff582c..bacb3edd4 100644 --- a/otel/span_map.go +++ b/otel/span_map.go @@ -13,19 +13,32 @@ import ( type TransactionEntry struct { root *sentry.Span activeCount atomic.Int64 + // spans holds active (not yet finished) spans for Get lookups. + spans util.SyncMap[otelTrace.SpanID, *sentry.Span] + // knownSpanIDs tracks all span IDs ever added to this transaction. + knownSpanIDs util.SyncMap[otelTrace.SpanID, struct{}] +} + +// HasSpan returns true if the given spanID was ever part of this transaction. +func (te *TransactionEntry) HasSpan(spanID otelTrace.SpanID) bool { + _, ok := te.knownSpanIDs.Load(spanID) + return ok } // SentrySpanMap is a mapping between OpenTelemetry spans and Sentry spans. -// It stores spans for lookup by the propagator and event processor, and -// manages transaction entries for creating child spans via the shared spanRecorder. +// It stores spans per transaction for lookup by the propagator and event processor, +// and manages transaction entries for creating child spans via the shared spanRecorder. type SentrySpanMap struct { - spans util.SyncMap[otelTrace.SpanID, *sentry.Span] transactions util.SyncMap[otelTrace.TraceID, *TransactionEntry] } -// Get returns the current sentry.Span associated with the given OTel spanID. -func (ssm *SentrySpanMap) Get(spanID otelTrace.SpanID) (*sentry.Span, bool) { - return ssm.spans.Load(spanID) +// Get returns the current sentry.Span associated with the given OTel traceID and spanID. +func (ssm *SentrySpanMap) Get(traceID otelTrace.TraceID, spanID otelTrace.SpanID) (*sentry.Span, bool) { + entry, ok := ssm.transactions.Load(traceID) + if !ok { + return nil, false + } + return entry.spans.Load(spanID) } // GetTransaction returns the transaction information for the given OTel traceID. @@ -36,41 +49,46 @@ func (ssm *SentrySpanMap) GetTransaction(traceID otelTrace.TraceID) (*Transactio // Set stores the span and transaction information on the map. It handles both root and child spans automatically. // // If there is a cache miss on the given traceID, a transaction entry is created. Subsequent calls for the same traceID -// just increment the active span count. +// just increment the active span count and store the span in the entry. func (ssm *SentrySpanMap) Set(spanID otelTrace.SpanID, span *sentry.Span, traceID otelTrace.TraceID) { - ssm.spans.Store(spanID, span) - t := &TransactionEntry{root: span} t.activeCount.Store(1) + t.spans.Store(spanID, span) + t.knownSpanIDs.Store(spanID, struct{}{}) if existing, loaded := ssm.transactions.LoadOrStore(traceID, t); loaded { existing.activeCount.Add(1) + existing.spans.Store(spanID, span) + existing.knownSpanIDs.Store(spanID, struct{}{}) } } -// MarkFinished removes a span from the map and decrements the transaction's active count. When the count reaches zero, -// the transaction entry is removed. +// MarkFinished removes a span from the active set and decrements the transaction's active count. +// When the count reaches zero, the transaction entry is removed. +// The span ID is kept in knownSpanIDs so that HasSpan continues to work for child span creation. func (ssm *SentrySpanMap) MarkFinished(spanID otelTrace.SpanID, traceID otelTrace.TraceID) { - ssm.spans.Delete(spanID) + entry, ok := ssm.transactions.Load(traceID) + if !ok { + return + } + + entry.spans.Delete(spanID) - if entry, ok := ssm.transactions.Load(traceID); ok { - if entry.activeCount.Add(-1) <= 0 { - ssm.transactions.CompareAndDelete(traceID, entry) - } + if entry.activeCount.Add(-1) <= 0 { + ssm.transactions.CompareAndDelete(traceID, entry) } } // Clear removes all spans stored on the map. func (ssm *SentrySpanMap) Clear() { - ssm.spans.Clear() ssm.transactions.Clear() } // Len returns the number of spans on the map. func (ssm *SentrySpanMap) Len() int { count := 0 - ssm.spans.Range(func(_ otelTrace.SpanID, _ *sentry.Span) bool { - count++ + ssm.transactions.Range(func(_ otelTrace.TraceID, entry *TransactionEntry) bool { + count += int(entry.activeCount.Load()) return true }) return count diff --git a/otel/span_processor.go b/otel/span_processor.go index 6311313f9..1b1e9aa0e 100644 --- a/otel/span_processor.go +++ b/otel/span_processor.go @@ -33,7 +33,7 @@ func (ssp *sentrySpanProcessor) OnStart(parent context.Context, s otelSdkTrace.R otelParentSpanID := s.Parent().SpanID() txn, ok := sentrySpanMap.GetTransaction(otelTraceID) - if ok && s.Parent().IsValid() && !s.Parent().IsRemote() { + if ok && s.Parent().IsValid() && !s.Parent().IsRemote() && txn.HasSpan(otelParentSpanID) { span := txn.root.StartChild(s.Name()) span.SpanID = sentry.SpanID(otelSpanID) span.ParentSpanID = sentry.SpanID(otelParentSpanID) @@ -61,7 +61,7 @@ func (ssp *sentrySpanProcessor) OnStart(parent context.Context, s otelSdkTrace.R func (ssp *sentrySpanProcessor) OnEnd(s otelSdkTrace.ReadOnlySpan) { otelSpanId := s.SpanContext().SpanID() otelTraceId := s.SpanContext().TraceID() - sentrySpan, ok := sentrySpanMap.Get(otelSpanId) + sentrySpan, ok := sentrySpanMap.Get(otelTraceId, otelSpanId) if !ok || sentrySpan == nil { return } diff --git a/otel/span_processor_test.go b/otel/span_processor_test.go index e424a6ea7..5bbdcd795 100644 --- a/otel/span_processor_test.go +++ b/otel/span_processor_test.go @@ -103,7 +103,7 @@ func TestOnStartRootSpan(t *testing.T) { if sentrySpanMap.Len() != 1 { t.Errorf("Span map size is %d, expected: 1", sentrySpanMap.Len()) } - sentrySpan, ok := sentrySpanMap.Get(otelSpan.SpanContext().SpanID()) + sentrySpan, ok := sentrySpanMap.Get(otelSpan.SpanContext().TraceID(), otelSpan.SpanContext().SpanID()) if !ok { t.Errorf("Sentry span not found in the map") } @@ -158,7 +158,7 @@ func TestOnStartWithTraceParentContext(t *testing.T) { if sentrySpanMap.Len() != 1 { t.Errorf("Span map size is %d, expected: 1", sentrySpanMap.Len()) } - sentrySpan, ok := sentrySpanMap.Get(otelSpan.SpanContext().SpanID()) + sentrySpan, ok := sentrySpanMap.Get(otelSpan.SpanContext().TraceID(), otelSpan.SpanContext().SpanID()) if !ok { t.Errorf("Sentry span not found in the map") } @@ -201,11 +201,11 @@ func TestOnStartWithExistingParentSpan(t *testing.T) { t.Errorf("Span map size is %d, expected: 2", sentrySpanMap.Len()) } - sentryTransaction, ok1 := sentrySpanMap.Get(otelRootSpan.SpanContext().SpanID()) + sentryTransaction, ok1 := sentrySpanMap.Get(otelRootSpan.SpanContext().TraceID(), otelRootSpan.SpanContext().SpanID()) if !ok1 { t.Errorf("Sentry span not found in the map") } - sentryChildSpan, ok2 := sentrySpanMap.Get(otelChildSpan.SpanContext().SpanID()) + sentryChildSpan, ok2 := sentrySpanMap.Get(otelChildSpan.SpanContext().TraceID(), otelChildSpan.SpanContext().SpanID()) if !ok2 { t.Errorf("Sentry span not found in the map") } @@ -229,7 +229,7 @@ func TestOnEndWithTransaction(t *testing.T) { attribute.String("key1", "value1"), ), ) - sentryTransaction, _ := sentrySpanMap.Get(otelSpan.SpanContext().SpanID()) + sentryTransaction, _ := sentrySpanMap.Get(otelSpan.SpanContext().TraceID(), otelSpan.SpanContext().SpanID()) assertTrue(t, sentryTransaction.EndTime.IsZero()) otelSpan.End() @@ -275,8 +275,8 @@ func TestOnEndWithChildSpan(t *testing.T) { attribute.String("childKey1", "value1"), ), ) - sentryTransaction, _ := sentrySpanMap.Get(otelRootSpan.SpanContext().SpanID()) - sentryChildSpan, _ := sentrySpanMap.Get(otelChildSpan.SpanContext().SpanID()) + sentryTransaction, _ := sentrySpanMap.Get(otelRootSpan.SpanContext().TraceID(), otelRootSpan.SpanContext().SpanID()) + sentryChildSpan, _ := sentrySpanMap.Get(otelChildSpan.SpanContext().TraceID(), otelChildSpan.SpanContext().SpanID()) otelChildSpan.End() otelRootSpan.End() @@ -313,7 +313,7 @@ func TestOnEndDoesNotFinishSentryRequests(t *testing.T) { // Hostname is same as in Sentry DSN trace.WithAttributes(attribute.String("http.url", "https://example.com/api/123/envelope/")), ) - sentrySpan, _ := sentrySpanMap.Get(otelSpan.SpanContext().SpanID()) + sentrySpan, _ := sentrySpanMap.Get(otelSpan.SpanContext().TraceID(), otelSpan.SpanContext().SpanID()) otelSpan.End() // The span map should be empty @@ -342,8 +342,8 @@ func TestParseSpanAttributesHttpClient(t *testing.T) { trace.WithAttributes(attribute.String("http.url", "http://localhost:2345/api/checkout2?q1=q2#fragment")), ) - sentryTransaction, _ := sentrySpanMap.Get(otelRootSpan.SpanContext().SpanID()) - sentrySpan, _ := sentrySpanMap.Get(otelChildSpan.SpanContext().SpanID()) + sentryTransaction, _ := sentrySpanMap.Get(otelRootSpan.SpanContext().TraceID(), otelRootSpan.SpanContext().SpanID()) + sentrySpan, _ := sentrySpanMap.Get(otelChildSpan.SpanContext().TraceID(), otelChildSpan.SpanContext().SpanID()) otelChildSpan.End() otelRootSpan.End() @@ -381,8 +381,8 @@ func TestParseSpanAttributesHttpServer(t *testing.T) { // We ignore "http.url" if "http.target" is present trace.WithAttributes(attribute.String("http.url", "http://localhost:2345/api/checkout?q1=q2#fragment")), ) - sentryTransaction, _ := sentrySpanMap.Get(otelRootSpan.SpanContext().SpanID()) - sentrySpan, _ := sentrySpanMap.Get(otelChildSpan.SpanContext().SpanID()) + sentryTransaction, _ := sentrySpanMap.Get(otelRootSpan.SpanContext().TraceID(), otelRootSpan.SpanContext().SpanID()) + sentrySpan, _ := sentrySpanMap.Get(otelChildSpan.SpanContext().TraceID(), otelChildSpan.SpanContext().SpanID()) otelChildSpan.End() otelRootSpan.End() @@ -406,20 +406,20 @@ func TestSpanBecomesChildOfFinishedSpan(t *testing.T) { emptyContextWithSentry(), "rootSpan", ) - sentryTransaction, _ := sentrySpanMap.Get(otelRootSpan.SpanContext().SpanID()) + sentryTransaction, _ := sentrySpanMap.Get(otelRootSpan.SpanContext().TraceID(), otelRootSpan.SpanContext().SpanID()) ctx, childSpan1 := tracer.Start( ctx, "span name 1", ) - sentrySpan1, _ := sentrySpanMap.Get(childSpan1.SpanContext().SpanID()) + sentrySpan1, _ := sentrySpanMap.Get(childSpan1.SpanContext().TraceID(), childSpan1.SpanContext().SpanID()) childSpan1.End() _, childSpan2 := tracer.Start( ctx, "span name 2", ) - sentrySpan2, _ := sentrySpanMap.Get(childSpan2.SpanContext().SpanID()) + sentrySpan2, _ := sentrySpanMap.Get(childSpan2.SpanContext().TraceID(), childSpan2.SpanContext().SpanID()) childSpan2.End() otelRootSpan.End() @@ -435,23 +435,24 @@ func TestSpanWithFinishedParentShouldBeDeleted(t *testing.T) { _, _, tracer := setupSpanProcessorTest() ctx, parent := tracer.Start(context.Background(), "parent") + traceID := parent.SpanContext().TraceID() parentSpanID := parent.SpanContext().SpanID() _, child := tracer.Start(ctx, "child") childSpanID := child.SpanContext().SpanID() - _, parentExists := sentrySpanMap.Get(parentSpanID) - _, childExists := sentrySpanMap.Get(childSpanID) + _, parentExists := sentrySpanMap.Get(traceID, parentSpanID) + _, childExists := sentrySpanMap.Get(traceID, childSpanID) assertEqual(t, parentExists, true) assertEqual(t, childExists, true) parent.End() - _, parentExists = sentrySpanMap.Get(parentSpanID) + _, parentExists = sentrySpanMap.Get(traceID, parentSpanID) assertEqual(t, parentExists, false) - _, childExists = sentrySpanMap.Get(childSpanID) + _, childExists = sentrySpanMap.Get(traceID, childSpanID) assertEqual(t, childExists, true) child.End() - _, childExists = sentrySpanMap.Get(childSpanID) + _, childExists = sentrySpanMap.Get(traceID, childSpanID) assertEqual(t, childExists, false) assertEqual(t, sentrySpanMap.Len(), 0) } From 0e5fed59d50ced7fb093ead263744c75ff6326b8 Mon Sep 17 00:00:00 2001 From: Giannis Gkiortzis <58184179+giortzisg@users.noreply.github.com> Date: Fri, 20 Feb 2026 10:17:48 +0100 Subject: [PATCH 5/7] add test for transaction independence --- otel/span_processor_test.go | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/otel/span_processor_test.go b/otel/span_processor_test.go index 5bbdcd795..51491f759 100644 --- a/otel/span_processor_test.go +++ b/otel/span_processor_test.go @@ -456,3 +456,39 @@ func TestSpanWithFinishedParentShouldBeDeleted(t *testing.T) { assertEqual(t, childExists, false) assertEqual(t, sentrySpanMap.Len(), 0) } + +func TestTransactionsStayIndependent(t *testing.T) { + _, _, tracer := setupSpanProcessorTest() + sharedTraceID := "bc6d53f15eb88f4320054569b8c553d4" + + ctx1 := trace.ContextWithSpanContext( + emptyContextWithSentry(), + trace.NewSpanContext(trace.SpanContextConfig{ + TraceID: otelTraceIDFromHex(sharedTraceID), + SpanID: otelSpanIDFromHex("1111111111111111"), + TraceFlags: trace.FlagsSampled, + }), + ) + ctx2 := trace.ContextWithSpanContext( + emptyContextWithSentry(), + trace.NewSpanContext(trace.SpanContextConfig{ + TraceID: otelTraceIDFromHex(sharedTraceID), + SpanID: otelSpanIDFromHex("2222222222222222"), + TraceFlags: trace.FlagsSampled, + }), + ) + + _, otelRoot1 := tracer.Start(ctx1, "request1") + sentryTxn1, _ := sentrySpanMap.Get(otelRoot1.SpanContext().TraceID(), otelRoot1.SpanContext().SpanID()) + + _, otelRoot2 := tracer.Start(ctx2, "request2") + sentryTxn2, _ := sentrySpanMap.Get(otelRoot2.SpanContext().TraceID(), otelRoot2.SpanContext().SpanID()) + + assertEqual(t, sentryTxn1.IsTransaction(), true) + assertEqual(t, sentryTxn2.IsTransaction(), true) + + assertTrue(t, sentryTxn1.SpanID != sentryTxn2.SpanID) + + otelRoot1.End() + otelRoot2.End() +} From 981cbccbdcdb337b19ac4f7a17b1482a4d90d391 Mon Sep 17 00:00:00 2001 From: Giannis Gkiortzis <58184179+giortzisg@users.noreply.github.com> Date: Fri, 20 Feb 2026 10:51:34 +0100 Subject: [PATCH 6/7] add span map bench --- otel/span_map_test.go | 89 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 otel/span_map_test.go diff --git a/otel/span_map_test.go b/otel/span_map_test.go new file mode 100644 index 000000000..753bf74ea --- /dev/null +++ b/otel/span_map_test.go @@ -0,0 +1,89 @@ +package sentryotel + +import ( + "context" + "fmt" + "sync" + "testing" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/trace" +) + +type noopExporter struct{} + +func (e *noopExporter) ExportSpans(_ context.Context, _ []sdktrace.ReadOnlySpan) error { + return nil +} +func (e *noopExporter) Shutdown(_ context.Context) error { return nil } + +func setupTracerProvider(useSentry bool) (*sdktrace.TracerProvider, func()) { + res, _ := resource.New(context.Background()) + + tp := sdktrace.NewTracerProvider( + sdktrace.WithResource(res), + sdktrace.WithSampler(sdktrace.AlwaysSample()), + ) + + if useSentry { + sentryProcessor := NewSentrySpanProcessor() + tp.RegisterSpanProcessor(sentryProcessor) + } + + tp.RegisterSpanProcessor(sdktrace.NewBatchSpanProcessor(&noopExporter{})) + + otel.SetTracerProvider(tp) + return tp, func() { _ = tp.Shutdown(context.Background()) } +} + +func simulateWorkflowBatch(ctx context.Context, tracer trace.Tracer, numInstances, dbSpansPerInstance int) { + ctx, rootSpan := tracer.Start(ctx, "job.workflow_runner") + + var wg sync.WaitGroup + for i := 0; i < numInstances; i++ { + wg.Add(1) + go func(idx int) { + defer wg.Done() + wfCtx, wfSpan := tracer.Start(ctx, fmt.Sprintf("workflow.debt_reminder_%d", idx)) + + for j := 0; j < dbSpansPerInstance; j++ { + _, dbSpan := tracer.Start(wfCtx, fmt.Sprintf("postgres.query_%d", j)) + dbSpan.End() + } + + wfSpan.End() + }(i) + } + wg.Wait() + rootSpan.End() +} + +// BenchmarkSpanMapContention measures how much the Sentry span processor slows down unrelated handler +// spans when a large workflow transaction is being created and cleaned up concurrently. +func BenchmarkSpanMapContention(b *testing.B) { + _, cleanup := setupTracerProvider(true) + defer cleanup() + tracer := otel.Tracer("bench") + + b.ResetTimer() + b.RunParallel(func(pb *testing.PB) { + iter := 0 + for pb.Next() { + if iter%500 == 0 { + // Every 500th iteration, simulate a large workflow batch. + // This creates 100×33 = 3300 child spans under a single root, + // then ends them all — exercising the cleanup path under contention. + simulateWorkflowBatch(context.Background(), tracer, 100, 33) + } else { + // This is the hot path that gets blocked from span cleanup. + ctx, span := tracer.Start(context.Background(), "GET /api/ping") + _, child := tracer.Start(ctx, "db.query") + child.End() + span.End() + } + iter++ + } + }) +} From 373e58cbeea06416cbb76c9bb287f2d131c91dfb Mon Sep 17 00:00:00 2001 From: Giannis Gkiortzis <58184179+giortzisg@users.noreply.github.com> Date: Fri, 20 Feb 2026 13:56:30 +0100 Subject: [PATCH 7/7] fix: concurrent Set and MarkFinished --- otel/span_map.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/otel/span_map.go b/otel/span_map.go index bacb3edd4..4317981c9 100644 --- a/otel/span_map.go +++ b/otel/span_map.go @@ -75,7 +75,15 @@ func (ssm *SentrySpanMap) MarkFinished(spanID otelTrace.SpanID, traceID otelTrac entry.spans.Delete(spanID) if entry.activeCount.Add(-1) <= 0 { - ssm.transactions.CompareAndDelete(traceID, entry) + // CompareAndSwap(CAS) is used to prevent a race between Set and MarkFinished. + // The race has two windows: + // 1. MarkFinished decremented activeCount to 0 but hasn't CAS'd yet -> Set Adds(1), and CAS fails keeping the + // transaction, since we just added a new span. + // 2. MarkFinished already CAS'd -> Set will store on the transaction marked for deletion (better than + // creating a new orphaned span). + if entry.activeCount.CompareAndSwap(0, -1) { + ssm.transactions.CompareAndDelete(traceID, entry) + } } }