getsentry · giortzisg · Feb 16, 2026 · Feb 16, 2026 · Feb 16, 2026 · Feb 19, 2026
diff --git a/internal/util/map.go b/internal/util/map.go
@@ -0,0 +1,43 @@
+package util
+
+import "sync"
+
+type SyncMap[K comparable, V any] struct {
+	m sync.Map
+}
+
+func (s *SyncMap[K, V]) Store(key K, value V) {
+	s.m.Store(key, value)
+}
+
+func (s *SyncMap[K, V]) CompareAndDelete(key K, value V) {
+	s.m.CompareAndDelete(key, value)
+}
+
+func (s *SyncMap[K, V]) Load(key K) (V, bool) {
+	v, ok := s.m.Load(key)
+	if !ok {
+		var zero V
+		return zero, false
+	}
+	return v.(V), true
+}
+
+func (s *SyncMap[K, V]) Delete(key K) {
+	s.m.Delete(key)
+}
+
+func (s *SyncMap[K, V]) LoadOrStore(key K, value V) (V, bool) {
+	actual, loaded := s.m.LoadOrStore(key, value)
+	return actual.(V), loaded
+}
+
+func (s *SyncMap[K, V]) Clear() {
+	s.m.Clear()
+}
+
+func (s *SyncMap[K, V]) Range(f func(key K, value V) bool) {
+	s.m.Range(func(key, value any) bool {
+		return f(key.(K), value.(V))
+	})
+}
diff --git a/otel/event_processor.go b/otel/event_processor.go
@@ -20,7 +20,7 @@ func linkTraceContextToErrorEvent(event *sentry.Event, hint *sentry.EventHint) *
 	otelSpanContext := trace.SpanContextFromContext(hint.Context)
 	var sentrySpan *sentry.Span
 	if otelSpanContext.IsValid() {
-		sentrySpan, _ = sentrySpanMap.Get(otelSpanContext.SpanID())
+		sentrySpan, _ = sentrySpanMap.Get(otelSpanContext.TraceID(), otelSpanContext.SpanID())
 	}
 	if sentrySpan == nil {
 		return event

diff --git a/otel/event_processor_test.go b/otel/event_processor_test.go
@@ -19,7 +19,7 @@ func TestLinkTraceContextToErrorEventSetsContext(t *testing.T) {
 		t.Run(name, func(t *testing.T) {
 			_, _, tracer := setupSpanProcessorTest()
 			ctx, otelSpan := tracer.Start(emptyContextWithSentry(), "spanName")
-			sentrySpan, _ := sentrySpanMap.Get(otelSpan.SpanContext().SpanID())
+			sentrySpan, _ := sentrySpanMap.Get(otelSpan.SpanContext().TraceID(), otelSpan.SpanContext().SpanID())
 
 			hub := sentry.GetHubFromContext(ctx)
 			client, scope := hub.Client(), hub.Scope()

diff --git a/otel/propagator.go b/otel/propagator.go
@@ -24,7 +24,7 @@ func (p sentryPropagator) Inject(ctx context.Context, carrier propagation.TextMa
 	var sentrySpan *sentry.Span
 
 	if spanContext.IsValid() {
-		sentrySpan, _ = sentrySpanMap.Get(spanContext.SpanID())
+		sentrySpan, _ = sentrySpanMap.Get(spanContext.TraceID(), spanContext.SpanID())
 	} else {
 		sentrySpan = nil
 	}

diff --git a/otel/propagator_test.go b/otel/propagator_test.go
@@ -35,19 +35,21 @@ func createTransactionAndMaybeSpan(transactionContext transactionTestContext, wi
 	transaction.TraceID = TraceIDFromHex(transactionContext.traceID)
 	transaction.SpanID = SpanIDFromHex(transactionContext.spanID)
 
+	otelTraceID := otelTraceIDFromHex(transactionContext.traceID)
+	otelSpanID := otelSpanIDFromHex(transactionContext.spanID)
 	if withSpan {
 		span := transaction.StartChild("op")
 		// We want the child to have the SpanID from transactionContext, so
 		// we "swap" span IDs from the transaction and the child span.
 		transaction.SpanID = span.SpanID
 		span.SpanID = SpanIDFromHex(transactionContext.spanID)
-		sentrySpanMap.Set(trace.SpanID(span.SpanID), span, trace.SpanID{})
+		sentrySpanMap.Set(trace.SpanID(span.SpanID), span, otelTraceID)
 	}
-	sentrySpanMap.Set(trace.SpanID(transaction.SpanID), transaction, trace.SpanID{})
+	sentrySpanMap.Set(trace.SpanID(transaction.SpanID), transaction, otelTraceID)
 
 	otelContext := trace.SpanContextConfig{
-		TraceID:    otelTraceIDFromHex(transactionContext.traceID),
-		SpanID:     otelSpanIDFromHex(transactionContext.spanID),
+		TraceID:    otelTraceID,
+		SpanID:     otelSpanID,
 		TraceFlags: trace.FlagsSampled,
 	}
 	return otelContext

diff --git a/otel/span_map.go b/otel/span_map.go
@@ -1,123 +1,105 @@
 package sentryotel
 
 import (
-	"sync"
+	"sync/atomic"
 
 	"github.com/getsentry/sentry-go"
+	"github.com/getsentry/sentry-go/internal/util"
 	otelTrace "go.opentelemetry.io/otel/trace"
 )
 
-type spanInfo struct {
-	span     *sentry.Span
-	finished bool
-	children map[otelTrace.SpanID]struct{}
-	parentID otelTrace.SpanID
+// TransactionEntry holds a reference to the root transaction span and
+// tracks the number of active spans belonging to this trace.
+type TransactionEntry struct {
+	root        *sentry.Span
+	activeCount atomic.Int64
+	// spans holds active (not yet finished) spans for Get lookups.
+	spans util.SyncMap[otelTrace.SpanID, *sentry.Span]
+	// knownSpanIDs tracks all span IDs ever added to this transaction.
+	knownSpanIDs util.SyncMap[otelTrace.SpanID, struct{}]
+}
+
+// HasSpan returns true if the given spanID was ever part of this transaction.
+func (te *TransactionEntry) HasSpan(spanID otelTrace.SpanID) bool {
+	_, ok := te.knownSpanIDs.Load(spanID)
+	return ok
 }
 
 // SentrySpanMap is a mapping between OpenTelemetry spans and Sentry spans.
-// It helps Sentry span processor and propagator to keep track of unfinished
-// Sentry spans and to establish parent-child links between spans.
+// It stores spans per transaction for lookup by the propagator and event processor,
+// and manages transaction entries for creating child spans via the shared spanRecorder.
 type SentrySpanMap struct {
-	spanMap map[otelTrace.SpanID]*spanInfo
-	mu      sync.RWMutex
+	transactions util.SyncMap[otelTrace.TraceID, *TransactionEntry]
 }
 
-func (ssm *SentrySpanMap) Get(otelSpandID otelTrace.SpanID) (*sentry.Span, bool) {
-	ssm.mu.RLock()
-	defer ssm.mu.RUnlock()
-	info, ok := ssm.spanMap[otelSpandID]
+// Get returns the current sentry.Span associated with the given OTel traceID and spanID.
+func (ssm *SentrySpanMap) Get(traceID otelTrace.TraceID, spanID otelTrace.SpanID) (*sentry.Span, bool) {
+	entry, ok := ssm.transactions.Load(traceID)
 	if !ok {
 		return nil, false
 	}
-	return info.span, true
+	return entry.spans.Load(spanID)
 }
 
-func (ssm *SentrySpanMap) Set(otelSpandID otelTrace.SpanID, sentrySpan *sentry.Span, parentID otelTrace.SpanID) {
-	ssm.mu.Lock()
-	defer ssm.mu.Unlock()
-
-	info := &spanInfo{
-		span:     sentrySpan,
-		finished: false,
-		children: make(map[otelTrace.SpanID]struct{}),
-		parentID: parentID,
-	}
-	ssm.spanMap[otelSpandID] = info
-
-	if parentID != (otelTrace.SpanID{}) {
-		if parentInfo, ok := ssm.spanMap[parentID]; ok {
-			parentInfo.children[otelSpandID] = struct{}{}
-		}
-	}
+// GetTransaction returns the transaction information for the given OTel traceID.
+func (ssm *SentrySpanMap) GetTransaction(traceID otelTrace.TraceID) (*TransactionEntry, bool) {
+	return ssm.transactions.Load(traceID)
 }
 
-func (ssm *SentrySpanMap) MarkFinished(otelSpandID otelTrace.SpanID) {
-	ssm.mu.Lock()
-	defer ssm.mu.Unlock()
-
-	info, ok := ssm.spanMap[otelSpandID]
-	if !ok {
-		return
+// Set stores the span and transaction information on the map. It handles both root and child spans automatically.
+//
+// If there is a cache miss on the given traceID, a transaction entry is created. Subsequent calls for the same traceID
+// just increment the active span count and store the span in the entry.
+func (ssm *SentrySpanMap) Set(spanID otelTrace.SpanID, span *sentry.Span, traceID otelTrace.TraceID) {
+	t := &TransactionEntry{root: span}
+	t.activeCount.Store(1)
+	t.spans.Store(spanID, span)
+	t.knownSpanIDs.Store(spanID, struct{}{})
+
+	if existing, loaded := ssm.transactions.LoadOrStore(traceID, t); loaded {
+		existing.activeCount.Add(1)
+		existing.spans.Store(spanID, span)
+		existing.knownSpanIDs.Store(spanID, struct{}{})
 	}
-
-	info.finished = true
-	ssm.tryCleanupSpan(otelSpandID)
 }
 
-// tryCleanupSpan deletes a parent and all children only if the whole subtree is marked finished.
-// Must be called with lock held.
-func (ssm *SentrySpanMap) tryCleanupSpan(spanID otelTrace.SpanID) {
-	info, ok := ssm.spanMap[spanID]
-	if !ok || !info.finished {
+// MarkFinished removes a span from the active set and decrements the transaction's active count.
+// When the count reaches zero, the transaction entry is removed.
+// The span ID is kept in knownSpanIDs so that HasSpan continues to work for child span creation.
+func (ssm *SentrySpanMap) MarkFinished(spanID otelTrace.SpanID, traceID otelTrace.TraceID) {
+	entry, ok := ssm.transactions.Load(traceID)
+	if !ok {
 		return
 	}
 
-	if !info.span.IsTransaction() {
-		parentID := info.parentID
-		if parentID != (otelTrace.SpanID{}) {
-			if parentInfo, parentExists := ssm.spanMap[parentID]; parentExists && !parentInfo.finished {
-				return
-			}
+	entry.spans.Delete(spanID)
+
+	if entry.activeCount.Add(-1) <= 0 {
+		// CompareAndSwap(CAS) is used to prevent a race between Set and MarkFinished.
+		// The race has two windows:
+		// 1. MarkFinished decremented activeCount to 0 but hasn't CAS'd yet -> Set Adds(1), and CAS fails keeping the
+		// transaction, since we just added a new span.
+		// 2. MarkFinished already CAS'd -> Set will store on the transaction marked for deletion (better than
+		// creating a new orphaned span).
+		if entry.activeCount.CompareAndSwap(0, -1) {
+			ssm.transactions.CompareAndDelete(traceID, entry)
 		}
 	}
-
-	// We need to have a lookup first to see if every child is marked as finished to actually cleanup everything.
-	// There probably is a better way to do this
-	for childID := range info.children {
-		if childInfo, exists := ssm.spanMap[childID]; exists && !childInfo.finished {
-			return
-		}
-	}
-
-	parentID := info.parentID
-	if parentID != (otelTrace.SpanID{}) {
-		if parentInfo, ok := ssm.spanMap[parentID]; ok {
-			delete(parentInfo.children, spanID)
-		}
-	}
-
-	for childID := range info.children {
-		if childInfo, exists := ssm.spanMap[childID]; exists && childInfo.finished {
-			ssm.tryCleanupSpan(childID)
-		}
-	}
-
-	delete(ssm.spanMap, spanID)
-	if parentID != (otelTrace.SpanID{}) {
-		ssm.tryCleanupSpan(parentID)
-	}
 }
 
+// Clear removes all spans stored on the map.
 func (ssm *SentrySpanMap) Clear() {
-	ssm.mu.Lock()
-	defer ssm.mu.Unlock()
-	ssm.spanMap = make(map[otelTrace.SpanID]*spanInfo)
+	ssm.transactions.Clear()
 }
 
+// Len returns the number of spans on the map.
 func (ssm *SentrySpanMap) Len() int {
-	ssm.mu.RLock()
-	defer ssm.mu.RUnlock()
-	return len(ssm.spanMap)
+	count := 0
+	ssm.transactions.Range(func(_ otelTrace.TraceID, entry *TransactionEntry) bool {
+		count += int(entry.activeCount.Load())
+		return true
+	})
+	return count
 }
 
-var sentrySpanMap = SentrySpanMap{spanMap: make(map[otelTrace.SpanID]*spanInfo)}
+var sentrySpanMap = SentrySpanMap{}
diff --git a/otel/span_map_test.go b/otel/span_map_test.go
@@ -0,0 +1,89 @@
+package sentryotel
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"testing"
+
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/sdk/resource"
+	sdktrace "go.opentelemetry.io/otel/sdk/trace"
+	"go.opentelemetry.io/otel/trace"
+)
+
+type noopExporter struct{}
+
+func (e *noopExporter) ExportSpans(_ context.Context, _ []sdktrace.ReadOnlySpan) error {
+	return nil
+}
+func (e *noopExporter) Shutdown(_ context.Context) error { return nil }
+
+func setupTracerProvider(useSentry bool) (*sdktrace.TracerProvider, func()) {
+	res, _ := resource.New(context.Background())
+
+	tp := sdktrace.NewTracerProvider(
+		sdktrace.WithResource(res),
+		sdktrace.WithSampler(sdktrace.AlwaysSample()),
+	)
+
+	if useSentry {
+		sentryProcessor := NewSentrySpanProcessor()
+		tp.RegisterSpanProcessor(sentryProcessor)
+	}
+
+	tp.RegisterSpanProcessor(sdktrace.NewBatchSpanProcessor(&noopExporter{}))
+
+	otel.SetTracerProvider(tp)
+	return tp, func() { _ = tp.Shutdown(context.Background()) }
+}
+
+func simulateWorkflowBatch(ctx context.Context, tracer trace.Tracer, numInstances, dbSpansPerInstance int) {
+	ctx, rootSpan := tracer.Start(ctx, "job.workflow_runner")
+
+	var wg sync.WaitGroup
+	for i := 0; i < numInstances; i++ {
+		wg.Add(1)
+		go func(idx int) {
+			defer wg.Done()
+			wfCtx, wfSpan := tracer.Start(ctx, fmt.Sprintf("workflow.debt_reminder_%d", idx))
+
+			for j := 0; j < dbSpansPerInstance; j++ {
+				_, dbSpan := tracer.Start(wfCtx, fmt.Sprintf("postgres.query_%d", j))
+				dbSpan.End()
+			}
+
+			wfSpan.End()
+		}(i)
+	}
+	wg.Wait()
+	rootSpan.End()
+}
+
+// BenchmarkSpanMapContention measures how much the Sentry span processor slows down unrelated handler
+// spans when a large workflow transaction is being created and cleaned up concurrently.
+func BenchmarkSpanMapContention(b *testing.B) {
+	_, cleanup := setupTracerProvider(true)
+	defer cleanup()
+	tracer := otel.Tracer("bench")
+
+	b.ResetTimer()
+	b.RunParallel(func(pb *testing.PB) {
+		iter := 0
+		for pb.Next() {
+			if iter%500 == 0 {
+				// Every 500th iteration, simulate a large workflow batch.
+				// This creates 100×33 = 3300 child spans under a single root,
+				// then ends them all — exercising the cleanup path under contention.
+				simulateWorkflowBatch(context.Background(), tracer, 100, 33)
+			} else {
+				// This is the hot path that gets blocked from span cleanup.
+				ctx, span := tracer.Start(context.Background(), "GET /api/ping")
+				_, child := tracer.Start(ctx, "db.query")
+				child.End()
+				span.End()
+			}
+			iter++
+		}
+	})
+}