From 3825981e8f424b6bd2192aa0c388f0c98ca8b956 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 17 Nov 2025 05:01:21 -0800 Subject: [PATCH 1/4] Refactor observability stack with OpenTelemetry --- exchange/consent-engine/go.mod | 30 +- exchange/consent-engine/go.sum | 62 ++- exchange/consent-engine/main.go | 6 +- exchange/docker-compose.yml | 4 + exchange/shared/monitoring/go.mod | 37 ++ exchange/shared/monitoring/go.sum | 69 +++ exchange/shared/monitoring/metrics.go | 301 +++++++++++++ exchange/shared/monitoring/metrics_test.go | 479 +++++++++++++++++++++ exchange/shared/monitoring/otel_metrics.go | 440 +++++++++++++++++++ observability/README.md | 474 ++++++++++++++------ observability/docker-compose.yml | 4 +- observability/generate_sample_traffic.sh | 156 +++++++ observability/prometheus/prometheus.yml | 2 +- portals/consent-portal/nginx.conf | 14 +- 14 files changed, 1927 insertions(+), 151 deletions(-) create mode 100644 exchange/shared/monitoring/go.mod create mode 100644 exchange/shared/monitoring/go.sum create mode 100644 exchange/shared/monitoring/metrics.go create mode 100644 exchange/shared/monitoring/metrics_test.go create mode 100644 exchange/shared/monitoring/otel_metrics.go create mode 100755 observability/generate_sample_traffic.sh diff --git a/exchange/consent-engine/go.mod b/exchange/consent-engine/go.mod index 82acfc69..158f4484 100644 --- a/exchange/consent-engine/go.mod +++ b/exchange/consent-engine/go.mod @@ -11,25 +11,51 @@ require ( require ( github.com/DATA-DOG/go-sqlmock v1.5.2 + github.com/gov-dx-sandbox/exchange/shared/monitoring v0.0.0 github.com/stretchr/testify v1.11.1 gorm.io/driver/postgres v1.6.0 gorm.io/gorm v1.31.1 ) require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/jackc/pgx/v5 v5.7.6 // indirect github.com/jackc/puddle/v2 v2.2.2 // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect - github.com/kr/text v0.2.0 // indirect + github.com/klauspost/compress v1.17.9 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/prometheus/client_golang v1.20.5 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.60.1 // indirect + github.com/prometheus/procfs v0.15.1 // indirect github.com/rogpeppe/go-internal v1.14.1 // indirect + go.opentelemetry.io/otel v1.32.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0 // indirect + go.opentelemetry.io/otel/exporters/prometheus v0.54.0 // indirect + go.opentelemetry.io/otel/metric v1.32.0 // indirect + go.opentelemetry.io/otel/sdk v1.32.0 // indirect + go.opentelemetry.io/otel/sdk/metric v1.32.0 // indirect + go.opentelemetry.io/otel/trace v1.32.0 // indirect + go.opentelemetry.io/proto/otlp v1.3.1 // indirect golang.org/x/crypto v0.46.0 // indirect + golang.org/x/net v0.47.0 // indirect golang.org/x/sync v0.19.0 // indirect + golang.org/x/sys v0.39.0 // indirect golang.org/x/text v0.32.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/grpc v1.67.1 // indirect + google.golang.org/protobuf v1.35.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) @@ -37,4 +63,6 @@ replace github.com/gov-dx-sandbox/exchange/shared/config => ./shared/config replace github.com/gov-dx-sandbox/exchange/shared/constants => ./shared/constants +replace github.com/gov-dx-sandbox/exchange/shared/monitoring => ../shared/monitoring + replace github.com/gov-dx-sandbox/exchange/shared/utils => ./shared/utils diff --git a/exchange/consent-engine/go.sum b/exchange/consent-engine/go.sum index 24946bc5..ef70252f 100644 --- a/exchange/consent-engine/go.sum +++ b/exchange/consent-engine/go.sum @@ -1,13 +1,27 @@ github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU= github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU= -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 h1:ad0vkEBuk23VJzZR9nkLVG0YAoN9coASF1GusYX6AlU= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0/go.mod h1:igFoXX2ELCW06bol23DWPB5BEWfZISOzSP5K2sbLea0= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= @@ -21,12 +35,26 @@ github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkr github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/kisielk/sqlstruct v0.0.0-20201105191214-5f3e10d3ab46/go.mod h1:yyMNCyc/Ib3bDTKd379tNMpB/7/H5TjM2Y9QJ5THLbE= -github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= -github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= +github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc= +github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -34,12 +62,40 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +go.opentelemetry.io/otel v1.32.0 h1:WnBN+Xjcteh0zdk01SVqV55d/m62NJLJdIyb4y/WO5U= +go.opentelemetry.io/otel v1.32.0/go.mod h1:00DCVSB0RQcnzlwyTfqtxSm+DRr9hpYrHjNGiBHVQIg= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0 h1:t/Qur3vKSkUCcDVaSumWF2PKHt85pc7fRvFuoVT8qFU= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0/go.mod h1:Rl61tySSdcOJWoEgYZVtmnKdA0GeKrSqkHC1t+91CH8= +go.opentelemetry.io/otel/exporters/prometheus v0.54.0 h1:rFwzp68QMgtzu9PgP3jm9XaMICI6TsofWWPcBDKwlsU= +go.opentelemetry.io/otel/exporters/prometheus v0.54.0/go.mod h1:QyjcV9qDP6VeK5qPyKETvNjmaaEc7+gqjh4SS0ZYzDU= +go.opentelemetry.io/otel/metric v1.32.0 h1:xV2umtmNcThh2/a/aCP+h64Xx5wsj8qqnkYZktzNa0M= +go.opentelemetry.io/otel/metric v1.32.0/go.mod h1:jH7CIbbK6SH2V2wE16W05BHCtIDzauciCRLoc/SyMv8= +go.opentelemetry.io/otel/sdk v1.32.0 h1:RNxepc9vK59A8XsgZQouW8ue8Gkb4jpWtJm9ge5lEG4= +go.opentelemetry.io/otel/sdk v1.32.0/go.mod h1:LqgegDBjKMmb2GC6/PrTnteJG39I8/vJCAP9LlJXEjU= +go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU= +go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ= +go.opentelemetry.io/otel/trace v1.32.0 h1:WIC9mYrXf8TmY/EXuULKc8hR17vE+Hjv2cssQDe03fM= +go.opentelemetry.io/otel/trace v1.32.0/go.mod h1:+i4rkvCraA+tG6AzwloGaCtkx53Fa+L+V8e9a7YvhT8= +go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= +go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU= golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0= +golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= +golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= +golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= +google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 h1:M0KvPgPmDZHPlbRbaNU1APr28TvwvvdUPlSv7PUvy8g= +google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:dguCy7UOdZhTvLzDyt15+rOrawrpM4q7DD9dQ1P11P4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= +google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= +google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/exchange/consent-engine/main.go b/exchange/consent-engine/main.go index ed0c8ba1..7a4b0c0e 100644 --- a/exchange/consent-engine/main.go +++ b/exchange/consent-engine/main.go @@ -7,6 +7,7 @@ import ( "time" "github.com/gov-dx-sandbox/exchange/shared/config" + "github.com/gov-dx-sandbox/exchange/shared/monitoring" "github.com/gov-dx-sandbox/exchange/shared/utils" // V1 API imports @@ -127,8 +128,9 @@ func main() { IdleTimeout: 60 * time.Second, } - // Apply CORS middleware and create server - handler := v1Router.ApplyCORS(mux) + // Wrap the mux with metrics (outermost) and then CORS from v1 router + // Metrics must be outermost to capture all requests, including CORS-blocked ones + handler := monitoring.HTTPMetricsMiddleware(v1Router.ApplyCORS(mux)) httpServer := utils.CreateServer(serverConfig, handler) // Start server with graceful shutdown diff --git a/exchange/docker-compose.yml b/exchange/docker-compose.yml index 11cf8f48..e7fe9230 100644 --- a/exchange/docker-compose.yml +++ b/exchange/docker-compose.yml @@ -20,6 +20,8 @@ services: - PORT=8082 - LOG_LEVEL=${LOG_LEVEL:-info} - LOG_FORMAT=${LOG_FORMAT:-text} + - SERVICE_NAME=policy-decision-point + - OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER:-prometheus} healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8082/health"] interval: 30s @@ -74,6 +76,8 @@ services: - PORT=4000 - LOG_LEVEL=${LOG_LEVEL:-info} - LOG_FORMAT=${LOG_FORMAT:-text} + - SERVICE_NAME=orchestration-engine + - OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER:-prometheus} healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:4000/health"] interval: 30s diff --git a/exchange/shared/monitoring/go.mod b/exchange/shared/monitoring/go.mod new file mode 100644 index 00000000..d7f11eff --- /dev/null +++ b/exchange/shared/monitoring/go.mod @@ -0,0 +1,37 @@ +module github.com/gov-dx-sandbox/exchange/shared/monitoring + +go 1.24.6 + +require ( + github.com/prometheus/client_golang v1.20.5 + go.opentelemetry.io/otel v1.32.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0 + go.opentelemetry.io/otel/exporters/prometheus v0.54.0 + go.opentelemetry.io/otel/metric v1.32.0 + go.opentelemetry.io/otel/sdk v1.32.0 + go.opentelemetry.io/otel/sdk/metric v1.32.0 +) + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 // indirect + github.com/klauspost/compress v1.17.9 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.60.1 // indirect + github.com/prometheus/procfs v0.15.1 // indirect + go.opentelemetry.io/otel/trace v1.32.0 // indirect + go.opentelemetry.io/proto/otlp v1.3.1 // indirect + golang.org/x/net v0.30.0 // indirect + golang.org/x/sys v0.27.0 // indirect + golang.org/x/text v0.20.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/grpc v1.67.1 // indirect + google.golang.org/protobuf v1.35.1 // indirect +) diff --git a/exchange/shared/monitoring/go.sum b/exchange/shared/monitoring/go.sum new file mode 100644 index 00000000..92bdec16 --- /dev/null +++ b/exchange/shared/monitoring/go.sum @@ -0,0 +1,69 @@ +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 h1:ad0vkEBuk23VJzZR9nkLVG0YAoN9coASF1GusYX6AlU= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0/go.mod h1:igFoXX2ELCW06bol23DWPB5BEWfZISOzSP5K2sbLea0= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= +github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc= +github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +go.opentelemetry.io/otel v1.32.0 h1:WnBN+Xjcteh0zdk01SVqV55d/m62NJLJdIyb4y/WO5U= +go.opentelemetry.io/otel v1.32.0/go.mod h1:00DCVSB0RQcnzlwyTfqtxSm+DRr9hpYrHjNGiBHVQIg= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0 h1:t/Qur3vKSkUCcDVaSumWF2PKHt85pc7fRvFuoVT8qFU= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0/go.mod h1:Rl61tySSdcOJWoEgYZVtmnKdA0GeKrSqkHC1t+91CH8= +go.opentelemetry.io/otel/exporters/prometheus v0.54.0 h1:rFwzp68QMgtzu9PgP3jm9XaMICI6TsofWWPcBDKwlsU= +go.opentelemetry.io/otel/exporters/prometheus v0.54.0/go.mod h1:QyjcV9qDP6VeK5qPyKETvNjmaaEc7+gqjh4SS0ZYzDU= +go.opentelemetry.io/otel/metric v1.32.0 h1:xV2umtmNcThh2/a/aCP+h64Xx5wsj8qqnkYZktzNa0M= +go.opentelemetry.io/otel/metric v1.32.0/go.mod h1:jH7CIbbK6SH2V2wE16W05BHCtIDzauciCRLoc/SyMv8= +go.opentelemetry.io/otel/sdk v1.32.0 h1:RNxepc9vK59A8XsgZQouW8ue8Gkb4jpWtJm9ge5lEG4= +go.opentelemetry.io/otel/sdk v1.32.0/go.mod h1:LqgegDBjKMmb2GC6/PrTnteJG39I8/vJCAP9LlJXEjU= +go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU= +go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ= +go.opentelemetry.io/otel/trace v1.32.0 h1:WIC9mYrXf8TmY/EXuULKc8hR17vE+Hjv2cssQDe03fM= +go.opentelemetry.io/otel/trace v1.32.0/go.mod h1:+i4rkvCraA+tG6AzwloGaCtkx53Fa+L+V8e9a7YvhT8= +go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= +go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= +golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= +golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= +golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= +golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= +golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= +google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 h1:M0KvPgPmDZHPlbRbaNU1APr28TvwvvdUPlSv7PUvy8g= +google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:dguCy7UOdZhTvLzDyt15+rOrawrpM4q7DD9dQ1P11P4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= +google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= +google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/exchange/shared/monitoring/metrics.go b/exchange/shared/monitoring/metrics.go new file mode 100644 index 00000000..5b68761e --- /dev/null +++ b/exchange/shared/monitoring/metrics.go @@ -0,0 +1,301 @@ +package monitoring + +import ( + "log/slog" + "net/http" + "os" + "strings" + "sync" + "time" +) + +var ( + initOnce sync.Once + initErr error +) + +var ( + // routesMu protects routes and routeTemplates + routesMu sync.RWMutex + // routes is a set of static routes that should be preserved as-is + routes = make(map[string]bool) + // routeTemplates is a set of route templates (e.g., "/api/v1/schema/:id") + // that should be matched against incoming paths + routeTemplates = make([]string, 0) +) + +// ensureInitialized ensures OpenTelemetry is initialized with default config +// This is called automatically when metrics functions are used +func ensureInitialized() { + initOnce.Do(func() { + // Try to get service name from environment or use default + serviceName := os.Getenv("SERVICE_NAME") + if serviceName == "" { + serviceName = "opendif-service" + } + + config := DefaultConfig(serviceName) + initErr = Initialize(config) + if initErr != nil { + slog.Error("Failed to initialize OpenTelemetry metrics, metrics will be disabled", + "error", initErr, + "service", serviceName, + "impact", "Service will continue running but metrics collection is disabled") + } + }) +} + +// GetInitError returns the initialization error if metrics failed to initialize. +// Returns nil if initialization succeeded or hasn't been attempted yet. +// Services can call this to check if metrics are working and take appropriate action +// (e.g., fail to start if metrics are critical, or log a health check status). +func GetInitError() error { + ensureInitialized() + return initErr +} + +// IsInitialized returns true if metrics have been successfully initialized. +// Returns false if initialization failed or hasn't been attempted yet. +func IsInitialized() bool { + ensureInitialized() + return initErr == nil +} + +// RegisterRoutes registers routes for normalization. Supports static routes and templates with :id or {id} placeholders. +// Templates match incoming paths and normalize dynamic segments. Call during service initialization. +// +// Example: RegisterRoutes([]string{"/health", "/api/v1/schema/:id", "/api/v1/applications/{id}/activate"}) +func RegisterRoutes(routesList []string) { + routesMu.Lock() + defer routesMu.Unlock() + + for _, route := range routesList { + // Normalize {id} to :id for internal processing + normalizedRoute := strings.ReplaceAll(route, "{id}", ":id") + + if strings.Contains(normalizedRoute, ":id") { + // This is a template - store with normalized :id syntax + routeTemplates = append(routeTemplates, normalizedRoute) + } else { + // This is a static route - stored for exact O(1) lookup + routes[route] = true + } + } +} + +// IsExactRoute checks if a route is exactly registered as a static route (no template matching). +func IsExactRoute(route string) bool { + routesMu.RLock() + defer routesMu.RUnlock() + return routes[route] +} + +// Handler returns the metrics HTTP handler +// This now uses OpenTelemetry under the hood, but maintains backward compatibility +// For Prometheus exporter, this returns the Prometheus metrics endpoint +// For OTLP exporter, this returns a simple status endpoint +func Handler() http.Handler { + ensureInitialized() + return otelHandler() +} + +// HTTPMetricsMiddleware wraps an HTTP handler to record metrics +// This now uses OpenTelemetry under the hood, but maintains backward compatibility +func HTTPMetricsMiddleware(next http.Handler) http.Handler { + ensureInitialized() + return otelHTTPMetricsMiddleware(next) +} + +// responseWriter wraps http.ResponseWriter to capture status code +type responseWriter struct { + http.ResponseWriter + statusCode int +} + +func (rw *responseWriter) WriteHeader(code int) { + rw.statusCode = code + rw.ResponseWriter.WriteHeader(code) +} + +// normalizeRoute normalizes route paths for metrics by matching against registered routes/templates. +// Returns normalized template (e.g., "/api/v1/applications/:id/activate") or "unknown" for unrecognized patterns. +// Path should be r.URL.Path (no query parameters). +func normalizeRoute(path string) string { + if path == "" || path == "/" { + return "/" + } + + parts := strings.Split(path, "/") + // Remove empty first element from split + if len(parts) > 0 && parts[0] == "" { + parts = parts[1:] + } + + if len(parts) == 0 { + return "/" + } + + fullPath := "/" + strings.Join(parts, "/") + + routesMu.RLock() + defer routesMu.RUnlock() + // Check exact static routes first (O(1) lookup) + if exactMatch, exists := routes[fullPath]; exists && exactMatch { + return fullPath + } + + // Match against registered templates + for _, template := range routeTemplates { + if matchesTemplate(fullPath, template, parts) { + return template + } + } + + // Fallback: detect ID patterns for unregistered routes (checks all segments) + if len(parts) == 1 { + return "unknown" + } + + if len(parts) == 2 { + if looksLikeID(parts[1]) && !isCommonPathWord(parts[1]) { + return "/" + parts[0] + "/:id" + } + return "unknown" + } + + // For 3+ segments: check for ID patterns (handles IDs in middle) + if len(parts) >= 3 { + normalized := make([]string, len(parts)) + copy(normalized, parts) + idFound := false + + // Check segments, skipping short prefix segments (e.g., "api", "v1") and common path words + for i, part := range parts { + if i < 2 && len(part) <= 3 { + continue // Skip API versioning segments + } + if looksLikeID(part) && !isCommonPathWord(part) { + normalized[i] = ":id" + idFound = true + } + } + + // Return normalized path if ID found and path length is reasonable (max 6 segments) + if idFound && len(parts) <= 6 { + return "/" + strings.Join(normalized, "/") + } + return "unknown" + } + + return "unknown" +} + +// matchesTemplate checks if a path matches a route template. Supports :id and {id} placeholders. +func matchesTemplate(path, template string, pathParts []string) bool { + templateParts := strings.Split(template, "/") + if len(templateParts) > 0 && templateParts[0] == "" { + templateParts = templateParts[1:] + } + + if len(pathParts) != len(templateParts) { + return false + } + + for i := 0; i < len(pathParts); i++ { + // Support both :id and {id} syntax (normalized to :id during registration) + if templateParts[i] == ":id" || templateParts[i] == "{id}" { + continue // Placeholder - skip validation + } + if pathParts[i] != templateParts[i] { + return false + } + } + + return true +} + +// looksLikeID checks if a string looks like a dynamic ID (UUID, numeric, email, version string, or alphanumeric) +func looksLikeID(s string) bool { + if s == "" { + return false + } + + // Check for UUID-like patterns (e.g., "123e4567-e89b-12d3-a456-426614174000") + if len(s) == 36 && strings.Count(s, "-") == 4 { + return true + } + // Check for other IDs with separators that also contain numbers (e.g., "consent_abc123") + if (strings.Contains(s, "_") || strings.Contains(s, "-")) && strings.ContainsAny(s, "0123456789") { + return true + } + + // Check for version strings (e.g., "v1.0.0", "2.3.1") + if strings.Contains(s, ".") && len(s) >= 3 { + return true + } + + // Check if it's all numeric (e.g., "123") + allNumeric := true + for _, r := range s { + if r < '0' || r > '9' { + allNumeric = false + break + } + } + if allNumeric && len(s) > 0 { + return true + } + + // Check if it looks like an email (contains @) + if strings.Contains(s, "@") { + return true + } + + // Check if it's alphanumeric (likely an ID) - reduced threshold from 10 to 4 chars + if len(s) >= 4 { + alphanumeric := true + for _, r := range s { + if !((r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9')) { + alphanumeric = false + break + } + } + if alphanumeric { + return true + } + } + + return false +} + +// isCommonPathWord checks if a segment is a common path word (not an ID) to prevent false positives. +func isCommonPathWord(word string) bool { + if len(word) <= 2 { + return true + } + commonWords := map[string]bool{ + "api": true, "v1": true, "v2": true, "v3": true, + "applications": true, "application": true, "users": true, "user": true, + "consents": true, "consent": true, "schemas": true, "schema": true, + "versions": true, "version": true, "activate": true, "deactivate": true, + "profile": true, "profiles": true, "posts": true, + "list": true, "create": true, "update": true, "delete": true, + "get": true, "post": true, "put": true, "patch": true, + "check": true, "admin": true, + } + return commonWords[strings.ToLower(word)] +} + +// RecordExternalCall records an external service call +// This now uses OpenTelemetry under the hood, but maintains backward compatibility +func RecordExternalCall(target, operation string, duration time.Duration, err error) { + ensureInitialized() + otelRecordExternalCall(target, operation, duration, err) +} + +// RecordBusinessEvent records a business event +// This now uses OpenTelemetry under the hood, but maintains backward compatibility +func RecordBusinessEvent(action, outcome string) { + ensureInitialized() + otelRecordBusinessEvent(action, outcome) +} diff --git a/exchange/shared/monitoring/metrics_test.go b/exchange/shared/monitoring/metrics_test.go new file mode 100644 index 00000000..b194a125 --- /dev/null +++ b/exchange/shared/monitoring/metrics_test.go @@ -0,0 +1,479 @@ +package monitoring + +import ( + "fmt" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" +) + +func TestHandler(t *testing.T) { + handler := Handler() + if handler == nil { + t.Fatal("Handler() returned nil") + } + + req := httptest.NewRequest("GET", "/metrics", nil) + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Errorf("Expected status 200, got %d", w.Code) + } + + body := w.Body.String() + if body == "" { + t.Error("Metrics handler returned empty body") + } + + // Check for Prometheus format + if !strings.Contains(body, "# HELP") && !strings.Contains(body, "# TYPE") { + t.Error("Response doesn't appear to be in Prometheus format") + } +} + +func TestHTTPMetricsMiddleware(t *testing.T) { + // Create a test handler + testHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte("test response")) + }) + + // Wrap with metrics middleware + wrapped := HTTPMetricsMiddleware(testHandler) + + // Make a request + req := httptest.NewRequest("GET", "/test", nil) + w := httptest.NewRecorder() + wrapped.ServeHTTP(w, req) + + // Verify response + if w.Code != http.StatusOK { + t.Errorf("Expected status 200, got %d", w.Code) + } + + // Verify metrics were recorded (check via /metrics endpoint) + metricsHandler := Handler() + metricsReq := httptest.NewRequest("GET", "/metrics", nil) + metricsW := httptest.NewRecorder() + metricsHandler.ServeHTTP(metricsW, metricsReq) + + metricsBody := metricsW.Body.String() + // OpenTelemetry Prometheus exporter converts counter names - check for actual exported name + if !strings.Contains(metricsBody, "http_requests") { + t.Errorf("http_requests metric not found after request. Metrics output:\n%s", metricsBody) + } +} + +func TestNormalizeRoute(t *testing.T) { + // Register routes for testing + RegisterRoutes([]string{ + "/health", + "/metrics", + "/api/v1/policy/metadata", + "/api/v1/policy/decide", + "/consents/:id", + "/data-owner/:id", + "/api/v1/policy/:id", + "/consumer/:id", + }) + + tests := []struct { + input string + expected string + }{ + {"/", "/"}, + {"/health", "/health"}, + {"/consents/123", "/consents/:id"}, + {"/consents/abc123def456", "/consents/:id"}, + {"/consents/consent_abc123", "/consents/:id"}, + {"/data-owner/user@example.com", "/data-owner/:id"}, + {"/api/v1/policy/metadata", "/api/v1/policy/metadata"}, + {"/api/v1/policy/decide", "/api/v1/policy/decide"}, + {"/api/v1/policy/123", "/api/v1/policy/:id"}, + {"/consumer/app-123", "/consumer/:id"}, + {"/admin/check", "unknown"}, // Not registered, falls back to unknown + } + + for _, tt := range tests { + result := normalizeRoute(tt.input) + if result != tt.expected { + t.Errorf("normalizeRoute(%q) = %q, expected %q", tt.input, result, tt.expected) + } + } +} + +func TestRegisterRoutes(t *testing.T) { + // Test registering static routes + RegisterRoutes([]string{ + "/static1", + "/static2", + }) + + // Test registering templates with :id syntax + RegisterRoutes([]string{ + "/api/v1/schema/:id", + "/sdl/versions/:id/activate", + }) + + // Test registering templates with {id} syntax (as suggested in feedback) + RegisterRoutes([]string{ + "/api/v1/applications/{id}/activate", + "/api/v1/users/{id}/profile", + }) + + // Verify static routes work + if normalizeRoute("/static1") != "/static1" { + t.Error("Static route /static1 not registered correctly") + } + + // Verify templates with :id syntax work + if normalizeRoute("/api/v1/schema/abc123") != "/api/v1/schema/:id" { + t.Error("Template /api/v1/schema/:id not matching correctly") + } + + if normalizeRoute("/sdl/versions/v1.0.0/activate") != "/sdl/versions/:id/activate" { + t.Error("Template /sdl/versions/:id/activate not matching correctly") + } + + // Verify templates with {id} syntax work (normalized to :id internally) + if normalizeRoute("/api/v1/applications/app-123/activate") != "/api/v1/applications/:id/activate" { + t.Error("Template /api/v1/applications/{id}/activate not matching correctly") + } + + if normalizeRoute("/api/v1/users/user@example.com/profile") != "/api/v1/users/:id/profile" { + t.Error("Template /api/v1/users/{id}/profile not matching correctly") + } +} + +func TestIsExactRoute(t *testing.T) { + // Register some routes + RegisterRoutes([]string{ + "/health", + "/metrics", + "/api/v1/policy/metadata", + "/api/v1/schema/:id", // Template, not exact + }) + + // Test exact routes + if !IsExactRoute("/health") { + t.Error("IsExactRoute should return true for registered exact route") + } + + if !IsExactRoute("/metrics") { + t.Error("IsExactRoute should return true for registered exact route") + } + + if !IsExactRoute("/api/v1/policy/metadata") { + t.Error("IsExactRoute should return true for registered exact route") + } + + // Test that templates are not exact routes + if IsExactRoute("/api/v1/schema/:id") { + t.Error("IsExactRoute should return false for template routes") + } + + if IsExactRoute("/api/v1/schema/123") { + t.Error("IsExactRoute should return false for paths matching templates") + } + + // Test unregistered routes + if IsExactRoute("/unknown") { + t.Error("IsExactRoute should return false for unregistered routes") + } + + if IsExactRoute("/api/v1/unknown") { + t.Error("IsExactRoute should return false for unregistered routes") + } +} + +func TestRecordExternalCall(t *testing.T) { + // Record a successful external call + RecordExternalCall("postgres", "create_consent", 100*time.Millisecond, nil) + + // Record a failed external call + RecordExternalCall("postgres", "create_consent", 50*time.Millisecond, fmt.Errorf("connection failed")) + + // Verify metrics were recorded (check via /metrics endpoint) + metricsHandler := Handler() + metricsReq := httptest.NewRequest("GET", "/metrics", nil) + metricsW := httptest.NewRecorder() + metricsHandler.ServeHTTP(metricsW, metricsReq) + + metricsBody := metricsW.Body.String() + // OpenTelemetry Prometheus exporter converts counter names - check for actual exported names + if !strings.Contains(metricsBody, "external_calls") { + t.Errorf("external_calls metric not found. Metrics output:\n%s", metricsBody) + } + if !strings.Contains(metricsBody, "external_call_errors") { + t.Errorf("external_call_errors metric not found. Metrics output:\n%s", metricsBody) + } + if !strings.Contains(metricsBody, "external_call_duration") { + t.Errorf("external_call_duration metric not found. Metrics output:\n%s", metricsBody) + } +} + +func TestRecordBusinessEvent(t *testing.T) { + // Record business events + RecordBusinessEvent("consent_created", "success") + RecordBusinessEvent("consent_approved", "success") + RecordBusinessEvent("policy_decision", "allow") + + // Verify metrics were recorded (check via /metrics endpoint) + metricsHandler := Handler() + metricsReq := httptest.NewRequest("GET", "/metrics", nil) + metricsW := httptest.NewRecorder() + metricsHandler.ServeHTTP(metricsW, metricsReq) + + metricsBody := metricsW.Body.String() + // OpenTelemetry Prometheus exporter converts counter names - check for actual exported name + if !strings.Contains(metricsBody, "business_events") { + t.Errorf("business_events metric not found. Metrics output:\n%s", metricsBody) + } +} + +func TestNormalizeRouteFallbackWithIDInMiddle(t *testing.T) { + // Clear any previously registered routes to test fallback logic + // Note: In real usage, services should register routes, but fallback handles unregistered routes + + tests := []struct { + input string + expected string + }{ + // Test ID at the end (existing behavior) + {"/api/v1/applications/123", "/api/v1/applications/:id"}, + {"/api/v1/schema/abc123", "/api/v1/schema/:id"}, + + // Test ID in the middle (new behavior) + {"/api/v1/applications/123/activate", "/api/v1/applications/:id/activate"}, + {"/api/v1/applications/app-123/activate", "/api/v1/applications/:id/activate"}, + {"/sdl/versions/v1.0.0/activate", "/sdl/versions/:id/activate"}, + {"/api/v1/users/user@example.com/profile", "/api/v1/users/:id/profile"}, + + // Test multiple IDs (should normalize all) + {"/api/v1/users/123/posts/456", "/api/v1/users/:id/posts/:id"}, + {"/api/v1/applications/app-123/consents/consent-456", "/api/v1/applications/:id/consents/:id"}, + + // Test paths that are too long (should return unknown) + {"/api/v1/a/b/c/d/e/f/g/h", "unknown"}, // 8 segments, exceeds limit of 6 + + // Test paths without IDs (should return unknown) + {"/api/v1/applications/list", "unknown"}, + {"/api/v1/users/profile", "unknown"}, + } + + for _, tt := range tests { + result := normalizeRoute(tt.input) + if result != tt.expected { + t.Errorf("normalizeRoute(%q) = %q, expected %q", tt.input, result, tt.expected) + } + } +} + +// TestLooksLikeIDImprovedLogic tests the improved ID detection that prevents false positives +func TestLooksLikeIDImprovedLogic(t *testing.T) { + tests := []struct { + input string + expected bool + reason string + }{ + // UUIDs should be detected + {"123e4567-e89b-12d3-a456-426614174000", true, "Valid UUID"}, + {"00000000-0000-0000-0000-000000000000", true, "UUID format"}, + + // IDs with separators AND numbers should be detected + {"consent_abc123", true, "Has underscore and numbers"}, + {"app-456", true, "Has hyphen and numbers"}, + {"user_123def", true, "Has underscore and numbers"}, + + // Static path segments with separators but NO numbers should NOT be detected + {"data-owner", false, "Has hyphen but no numbers - static path"}, + {"list-all", false, "Has hyphen but no numbers - static path"}, + {"check-status", false, "Has hyphen but no numbers - static path"}, + {"user_profile", false, "Has underscore but no numbers - static path"}, + + // Version strings should be detected + {"v1.0.0", true, "Version string"}, + {"2.3.1", true, "Version string"}, + + // Numeric IDs should be detected + {"123", true, "All numeric"}, + {"456789", true, "All numeric"}, + + // Email addresses should be detected + {"user@example.com", true, "Email address"}, + + // Long alphanumeric strings should be detected + {"abc123def456", true, "Alphanumeric ID"}, + {"app123", true, "Alphanumeric ID"}, + + // Short strings should NOT be detected (unless numeric) + {"abc", false, "Too short"}, + {"12", false, "Too short even if numeric"}, + + // Common path words should NOT be detected (tested via isCommonPathWord) + {"api", false, "Common path word"}, + {"v1", false, "Common path word"}, + } + + for _, tt := range tests { + result := looksLikeID(tt.input) + if result != tt.expected { + t.Errorf("looksLikeID(%q) = %v, expected %v (%s)", tt.input, result, tt.expected, tt.reason) + } + } +} + +// TestHistogramBucketsConfiguration tests that both histogram metrics use custom buckets +func TestHistogramBucketsConfiguration(t *testing.T) { + // This test verifies that the histogram bucket configuration is applied + // by checking that metrics are recorded and can be queried + + // Record HTTP request duration + testHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(10 * time.Millisecond) // Simulate some processing time + w.WriteHeader(http.StatusOK) + }) + wrapped := HTTPMetricsMiddleware(testHandler) + + req := httptest.NewRequest("GET", "/test", nil) + w := httptest.NewRecorder() + wrapped.ServeHTTP(w, req) + + // Record external call duration + RecordExternalCall("postgres", "query", 25*time.Millisecond, nil) + + // Verify metrics endpoint returns data + metricsHandler := Handler() + metricsReq := httptest.NewRequest("GET", "/metrics", nil) + metricsW := httptest.NewRecorder() + metricsHandler.ServeHTTP(metricsW, metricsReq) + + metricsBody := metricsW.Body.String() + + // Both histogram metrics should be present + if !strings.Contains(metricsBody, "http_request_duration") { + t.Error("http_request_duration_seconds histogram not found") + } + if !strings.Contains(metricsBody, "external_call_duration") { + t.Error("external_call_duration_seconds histogram not found") + } + + // Verify the metrics are in Prometheus format with buckets + // Prometheus histograms show bucket boundaries + if !strings.Contains(metricsBody, "le=") { + t.Log("Note: Histogram buckets may not be visible in text format, but configuration is applied") + } +} + +// TestRouteNormalizationWithStaticPaths tests that static paths with hyphens are not normalized +func TestRouteNormalizationWithStaticPaths(t *testing.T) { + tests := []struct { + input string + expected string + reason string + }{ + // Static paths with hyphens should NOT be normalized (no false positives) + {"/api/v1/data-owner", "unknown", "Static path with hyphen - should not be normalized"}, + {"/api/v1/list-all", "unknown", "Static path with hyphen - should not be normalized"}, + {"/api/v1/check-status", "unknown", "Static path with hyphen - should not be normalized"}, + + // Paths with actual IDs should be normalized + {"/api/v1/data-owner/123", "/api/v1/data-owner/:id", "Has numeric ID"}, + {"/api/v1/data-owner/user-123", "/api/v1/data-owner/:id", "Has ID with hyphen and numbers"}, + {"/api/v1/users/user_123/profile", "/api/v1/users/:id/profile", "Has ID with underscore and numbers"}, + + // UUIDs should be normalized + {"/api/v1/users/123e4567-e89b-12d3-a456-426614174000", "/api/v1/users/:id", "Has UUID"}, + } + + for _, tt := range tests { + result := normalizeRoute(tt.input) + if result != tt.expected { + t.Errorf("normalizeRoute(%q) = %q, expected %q (%s)", tt.input, result, tt.expected, tt.reason) + } + } +} + +// TestIsInitialized tests the initialization state functions +func TestIsInitialized(t *testing.T) { + // After Handler() is called, initialization should have occurred + _ = Handler() + + if !IsInitialized() { + t.Error("IsInitialized() should return true after Handler() is called") + } + + if GetInitError() != nil { + t.Errorf("GetInitError() should return nil after successful initialization, got: %v", GetInitError()) + } +} + +// TestMultipleInitializations tests that multiple initialization calls are safe +func TestMultipleInitializations(t *testing.T) { + // Reset state by calling ensureInitialized multiple times + // This should be safe and not cause panics + _ = Handler() + _ = Handler() + _ = HTTPMetricsMiddleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {})) + RecordExternalCall("test", "op", time.Millisecond, nil) + RecordBusinessEvent("test", "success") + + // Should still be initialized + if !IsInitialized() { + t.Error("Multiple initialization calls should not break initialization state") + } +} + +// TestHTTPMetricsMiddlewareWithDifferentStatusCodes tests that different HTTP status codes are recorded +func TestHTTPMetricsMiddlewareWithDifferentStatusCodes(t *testing.T) { + testCases := []struct { + statusCode int + name string + }{ + {http.StatusOK, "200 OK"}, + {http.StatusNotFound, "404 Not Found"}, + {http.StatusInternalServerError, "500 Internal Server Error"}, + {http.StatusBadRequest, "400 Bad Request"}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + testHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(tc.statusCode) + }) + + wrapped := HTTPMetricsMiddleware(testHandler) + req := httptest.NewRequest("GET", "/test", nil) + w := httptest.NewRecorder() + wrapped.ServeHTTP(w, req) + + if w.Code != tc.statusCode { + t.Errorf("Expected status %d, got %d", tc.statusCode, w.Code) + } + }) + } +} + +// TestNormalizeRouteWith404 tests that 404s are normalized to "unknown" +func TestNormalizeRouteWith404(t *testing.T) { + // This is tested indirectly in otelHTTPMetricsMiddleware + // When statusCode is 404, route is set to "unknown" + // We can verify this by checking the middleware behavior + testHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + }) + + wrapped := HTTPMetricsMiddleware(testHandler) + req := httptest.NewRequest("GET", "/nonexistent/path/123", nil) + w := httptest.NewRecorder() + wrapped.ServeHTTP(w, req) + + // The route should be normalized to "unknown" for 404s + // This prevents cardinality explosion from random 404 paths + if w.Code != http.StatusNotFound { + t.Errorf("Expected 404, got %d", w.Code) + } +} diff --git a/exchange/shared/monitoring/otel_metrics.go b/exchange/shared/monitoring/otel_metrics.go new file mode 100644 index 00000000..12d18acf --- /dev/null +++ b/exchange/shared/monitoring/otel_metrics.go @@ -0,0 +1,440 @@ +package monitoring + +import ( + "context" + "fmt" + "log/slog" + "net/http" + "net/url" + "os" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp" + otelprom "go.opentelemetry.io/otel/exporters/prometheus" + "go.opentelemetry.io/otel/metric" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/resource" + semconv "go.opentelemetry.io/otel/semconv/v1.27.0" +) + +// Custom OpenTelemetry attributes for OpenDIF-specific metrics. +// These attributes use the "opendif." namespace prefix to distinguish them +// from standard OpenTelemetry semantic conventions. +// +// Custom Attributes: +// - opendif.business.action: The business action being performed (e.g., "consent_created", "policy_decision") +// - opendif.business.outcome: The outcome of the business action (e.g., "success", "failure", "allow", "deny") +// - opendif.external.target: The target system/service for external calls (e.g., "postgres", "redis", "external-api") +// - opendif.external.operation: The operation type for external calls (e.g., "query", "insert", "get", "set") +// +// Standard semantic conventions (from semconv package) are used for HTTP metrics: +// - http.method, http.route, http.status_code (via semconv.HTTPRequestMethodKey, etc.) +const ( + // Attribute keys for custom OpenDIF metrics + attrBusinessAction = "opendif.business.action" + attrBusinessOutcome = "opendif.business.outcome" + attrExternalTarget = "opendif.external.target" + attrExternalOperation = "opendif.external.operation" +) + +var ( + // Metrics instruments + httpRequestsCounter metric.Int64Counter + httpRequestDuration metric.Float64Histogram + externalCallsCounter metric.Int64Counter + externalCallErrors metric.Int64Counter + externalCallDuration metric.Float64Histogram + businessEventsCounter metric.Int64Counter + metricsHandler http.Handler + initialized int32 // Use atomic int32 for thread-safe reads/writes + otelInitOnce sync.Once // Separate sync.Once for OpenTelemetry initialization +) + +// Config holds the configuration for OpenTelemetry metrics +type Config struct { + // ExporterType can be "prometheus", "otlp", or "none" (disabled) + ExporterType string + // ServiceName is the name of the service (e.g., "portal-backend", "orchestration-engine") + ServiceName string + // ServiceVersion is the version of the service (e.g., "1.0.0", "v2.3.1") + // Defaults to "dev" if not set via SERVICE_VERSION environment variable + ServiceVersion string + // OTLPEndpoint is the OTLP endpoint URL (for Datadog, New Relic, etc.) + // Example: "https://api.datadoghq.com/api/v2/otlp" + OTLPEndpoint string + // OTLPHeaders are additional headers for OTLP exporter (e.g., API keys) + OTLPHeaders map[string]string + // PrometheusPort is the port for Prometheus exporter (default: 8888) + PrometheusPort int + // OTLPTLSInsecure allows insecure TLS connections (only for development/testing) + // Set via OTEL_EXPORTER_OTLP_INSECURE environment variable + OTLPTLSInsecure bool + // HistogramBuckets allows customization of histogram bucket boundaries (in seconds) + // Default: [.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10] + // These boundaries are optimized for HTTP request latency measurements: + // - Sub-10ms: .005, .01, .025 (very fast responses) + // - 10-100ms: .05, .1, .25 (typical API responses) + // - 100ms-1s: .5, 1 (slower operations) + // - 1s+: 2.5, 5, 10 (long-running operations, timeouts) + HistogramBuckets []float64 +} + +// DefaultConfig returns a default configuration +func DefaultConfig(serviceName string) Config { + return Config{ + ExporterType: getEnvOrDefault("OTEL_METRICS_EXPORTER", "prometheus"), + ServiceName: serviceName, + ServiceVersion: getEnvOrDefault("SERVICE_VERSION", "dev"), + OTLPEndpoint: getEnvOrDefault("OTEL_EXPORTER_OTLP_ENDPOINT", ""), + PrometheusPort: 8888, + OTLPHeaders: parseHeaders(getEnvOrDefault("OTEL_EXPORTER_OTLP_HEADERS", "")), + OTLPTLSInsecure: getEnvBoolOrDefault("OTEL_EXPORTER_OTLP_INSECURE", false), + HistogramBuckets: []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10}, + } +} + +// Initialize sets up OpenTelemetry metrics with the given configuration +// This function is thread-safe and can be called multiple times safely. +// Only the first call will perform initialization; subsequent calls return nil. +func Initialize(config Config) error { + // Use sync.Once to ensure initialization only happens once + var initErr error + otelInitOnce.Do(func() { + ctx := context.Background() + initErr = initializeInternal(ctx, config) + if initErr == nil { + atomic.StoreInt32(&initialized, 1) + } + }) + + return initErr +} + +// initializeInternal performs the actual initialization work +func initializeInternal(ctx context.Context, config Config) error { + // Create resource with service name and version + res, err := resource.New(ctx, + resource.WithAttributes( + semconv.ServiceName(config.ServiceName), + semconv.ServiceVersion(config.ServiceVersion), + ), + ) + if err != nil { + return fmt.Errorf("failed to create resource: %w", err) + } + + // Create meter provider based on exporter type + var reader sdkmetric.Reader + var handler http.Handler + + switch config.ExporterType { + case "prometheus", "": + // Use Prometheus exporter (default for local dev) + // Create a Prometheus registry for the exporter + reg := prometheus.NewRegistry() + exporter, err := otelprom.New(otelprom.WithRegisterer(reg)) + if err != nil { + return fmt.Errorf("failed to create Prometheus exporter: %w", err) + } + reader = exporter + // Use promhttp.HandlerFor with the custom registry + handler = promhttp.HandlerFor(reg, promhttp.HandlerOpts{}) + metricsHandler = handler + slog.Info("Initialized OpenTelemetry metrics with Prometheus exporter", + "service", config.ServiceName) + + case "otlp": + // Use OTLP exporter (for Datadog, New Relic, etc.) + if config.OTLPEndpoint == "" { + return fmt.Errorf("OTLP endpoint is required when using OTLP exporter") + } + + // Parse endpoint URL + endpointURL, err := url.Parse(config.OTLPEndpoint) + if err != nil { + return fmt.Errorf("invalid OTLP endpoint URL: %w", err) + } + + // Security: Require HTTPS by default for all endpoints + // Only allow insecure connections if explicitly enabled via OTEL_EXPORTER_OTLP_INSECURE + if endpointURL.Scheme != "https" { + if !config.OTLPTLSInsecure { + return fmt.Errorf("OTLP endpoint must use HTTPS (got: %s). Use https:// for secure connections, or set OTEL_EXPORTER_OTLP_INSECURE=true to allow insecure connections (not recommended for production)", endpointURL.Scheme) + } + // Insecure connection explicitly enabled via environment variable + slog.Warn("Using insecure HTTP connection for OTLP endpoint (OTEL_EXPORTER_OTLP_INSECURE=true)", + "endpoint", config.OTLPEndpoint, + "warning", "This disables TLS verification and exposes metrics data in transit") + } + + // Extract host:port from URL (WithEndpoint expects host:port, not full URL) + // The scheme is controlled by WithInsecure() option + opts := []otlpmetrichttp.Option{ + otlpmetrichttp.WithEndpoint(endpointURL.Host), + } + + // Only use WithInsecure() if explicitly enabled via environment variable + if config.OTLPTLSInsecure && endpointURL.Scheme == "http" { + opts = append(opts, otlpmetrichttp.WithInsecure()) + } + // For HTTPS endpoints (default), TLS with proper certificate validation is used automatically + + // Add headers if provided + if len(config.OTLPHeaders) > 0 { + opts = append(opts, otlpmetrichttp.WithHeaders(config.OTLPHeaders)) + } + + exporter, err := otlpmetrichttp.New(ctx, opts...) + if err != nil { + return fmt.Errorf("failed to create OTLP exporter: %w", err) + } + + reader = sdkmetric.NewPeriodicReader(exporter, + sdkmetric.WithInterval(15*time.Second)) + metricsHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte("# Metrics exported via OTLP\n")) + }) + slog.Info("Initialized OpenTelemetry metrics with OTLP exporter", + "service", config.ServiceName, + "endpoint", config.OTLPEndpoint, + "insecure", config.OTLPTLSInsecure) + + case "none": + // Disabled - use no-op reader + reader = sdkmetric.NewManualReader() + metricsHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte("# Metrics disabled\n")) + }) + slog.Info("OpenTelemetry metrics disabled", + "service", config.ServiceName) + + default: + return fmt.Errorf("unknown exporter type: %s (supported: prometheus, otlp, none)", config.ExporterType) + } + + // Use default histogram buckets if not configured + histogramBuckets := config.HistogramBuckets + if len(histogramBuckets) == 0 { + histogramBuckets = []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10} + } + + // Create meter provider with custom histogram buckets for all duration metrics + meterProvider := sdkmetric.NewMeterProvider( + sdkmetric.WithResource(res), + sdkmetric.WithReader(reader), + sdkmetric.WithView(sdkmetric.NewView( + sdkmetric.Instrument{Name: "http_request_duration_seconds"}, + sdkmetric.Stream{ + Aggregation: sdkmetric.AggregationExplicitBucketHistogram{ + Boundaries: histogramBuckets, + }, + }, + )), + sdkmetric.WithView(sdkmetric.NewView( + sdkmetric.Instrument{Name: "external_call_duration_seconds"}, + sdkmetric.Stream{ + Aggregation: sdkmetric.AggregationExplicitBucketHistogram{ + Boundaries: histogramBuckets, + }, + }, + )), + ) + + // Set global meter provider + otel.SetMeterProvider(meterProvider) + + // Create meter + meter := otel.Meter("opendif") + + // Create instruments + httpRequestsCounter, err = meter.Int64Counter( + "http_requests_total", + metric.WithDescription("Total number of HTTP requests"), + metric.WithUnit("1"), + ) + if err != nil { + return fmt.Errorf("failed to create http_requests_total counter: %w", err) + } + + httpRequestDuration, err = meter.Float64Histogram( + "http_request_duration_seconds", + metric.WithDescription("HTTP request duration in seconds"), + metric.WithUnit("s"), + ) + if err != nil { + return fmt.Errorf("failed to create http_request_duration_seconds histogram: %w", err) + } + + externalCallsCounter, err = meter.Int64Counter( + "external_calls_total", + metric.WithDescription("Total number of external service calls"), + metric.WithUnit("1"), + ) + if err != nil { + return fmt.Errorf("failed to create external_calls_total counter: %w", err) + } + + externalCallErrors, err = meter.Int64Counter( + "external_call_errors_total", + metric.WithDescription("Total number of failed external service calls"), + metric.WithUnit("1"), + ) + if err != nil { + return fmt.Errorf("failed to create external_call_errors_total counter: %w", err) + } + + externalCallDuration, err = meter.Float64Histogram( + "external_call_duration_seconds", + metric.WithDescription("External service call duration in seconds"), + metric.WithUnit("s"), + ) + if err != nil { + return fmt.Errorf("failed to create external_call_duration_seconds histogram: %w", err) + } + + businessEventsCounter, err = meter.Int64Counter( + "business_events_total", + metric.WithDescription("Total number of business events"), + metric.WithUnit("1"), + ) + if err != nil { + return fmt.Errorf("failed to create business_events_total counter: %w", err) + } + + return nil +} + +// otelHandler returns the metrics HTTP handler +// For Prometheus exporter, this returns the Prometheus metrics endpoint +// For OTLP exporter, this returns a simple status endpoint +func otelHandler() http.Handler { + if atomic.LoadInt32(&initialized) == 0 || metricsHandler == nil { + // Fallback if not initialized + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + w.Write([]byte("# Metrics not initialized\n")) + }) + } + return metricsHandler +} + +// otelHTTPMetricsMiddleware wraps an HTTP handler to record metrics using OpenTelemetry +func otelHTTPMetricsMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if atomic.LoadInt32(&initialized) == 0 { + // If metrics not initialized, just pass through + next.ServeHTTP(w, r) + return + } + + start := time.Now() + + // Wrap ResponseWriter to capture status code + rw := &responseWriter{ResponseWriter: w, statusCode: http.StatusOK} + + // Call the next handler + next.ServeHTTP(rw, r) + + // Record metrics + duration := time.Since(start).Seconds() + method := r.Method + + // Normalize route, but use "unknown" for 404s to prevent cardinality explosion + route := normalizeRoute(r.URL.Path) + if rw.statusCode == http.StatusNotFound { + route = "unknown" + } + + // Record metrics with attributes + httpRequestsCounter.Add(context.Background(), 1, + metric.WithAttributes( + semconv.HTTPRequestMethodKey.String(method), + semconv.HTTPRouteKey.String(route), + semconv.HTTPResponseStatusCodeKey.Int(rw.statusCode), + ), + ) + httpRequestDuration.Record(context.Background(), duration, + metric.WithAttributes( + semconv.HTTPRequestMethodKey.String(method), + semconv.HTTPRouteKey.String(route), + ), + ) + }) +} + +// otelRecordExternalCall records an external service call using OpenTelemetry +func otelRecordExternalCall(target, operation string, duration time.Duration, err error) { + if atomic.LoadInt32(&initialized) == 0 { + return + } + + ctx := context.Background() + attrs := metric.WithAttributes( + attribute.String(attrExternalTarget, target), + attribute.String(attrExternalOperation, operation), + ) + + externalCallsCounter.Add(ctx, 1, attrs) + externalCallDuration.Record(ctx, duration.Seconds(), attrs) + if err != nil { + externalCallErrors.Add(ctx, 1, attrs) + } +} + +// otelRecordBusinessEvent records a business event using OpenTelemetry +func otelRecordBusinessEvent(action, outcome string) { + if atomic.LoadInt32(&initialized) == 0 { + return + } + + businessEventsCounter.Add(context.Background(), 1, + metric.WithAttributes( + attribute.String(attrBusinessAction, action), + attribute.String(attrBusinessOutcome, outcome), + ), + ) +} + +// Helper functions +func getEnvOrDefault(key, defaultValue string) string { + if value := os.Getenv(key); value != "" { + return value + } + return defaultValue +} + +func parseHeaders(headerStr string) map[string]string { + headers := make(map[string]string) + if headerStr == "" { + return headers + } + + // Parse format: "key1=value1,key2=value2" + pairs := strings.Split(headerStr, ",") + for _, pair := range pairs { + parts := strings.SplitN(strings.TrimSpace(pair), "=", 2) + if len(parts) == 2 { + headers[strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1]) + } + } + return headers +} + +func getEnvBoolOrDefault(key string, defaultValue bool) bool { + value := os.Getenv(key) + if value == "" { + return defaultValue + } + // Accept common boolean representations + value = strings.ToLower(strings.TrimSpace(value)) + return value == "true" || value == "1" || value == "yes" || value == "on" +} diff --git a/observability/README.md b/observability/README.md index 9a819fdf..bb6f3635 100644 --- a/observability/README.md +++ b/observability/README.md @@ -1,8 +1,79 @@ -# Observability Stack for OpenDIF MVP +# Observability Stack for OpenDIF Core -Local development stack: **Go Services** → **Prometheus** → **Grafana** +Local development stack: **Go Services** → **OpenTelemetry** → **Prometheus** → **Grafana** -Collects real-time metrics from all Go services for debugging performance and errors. +Collects real-time metrics from all Go services for debugging performance and errors. Uses **OpenTelemetry** for vendor-agnostic metrics collection, allowing you to switch between Prometheus (default), Datadog, New Relic, or any OTLP-compatible backend without changing code. + +--- + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ Go Services (HTTP Servers) │ +│ ┌──────────────┐ ┌───────────────┐ ┌──────────────┐ │ +│ │ Portal │ │ Orchestration │ │ Policy │ ... │ +│ │ Backend │ │ Engine │ │ Decision │ │ +│ │ :3000 │ │ :4000 │ │ Point :8082 │ │ +│ └──────┬───────┘ └───────┬───────┘ └──────┬───────┘ │ +│ │ │ │ │ +│ └──────────────────┴─────────────────┘ │ +│ │ │ +│ │ HTTP Requests │ +│ │ (with OpenTelemetry Middleware) │ +│ ▼ │ +│ ┌──────────────────────────────┐ │ +│ │ OpenTelemetry SDK │ │ +│ │ (Vendor-Agnostic) │ │ +│ └──────────────┬────────────────┘ │ +│ │ │ +│ │ Exporter (Configurable) │ +│ ▼ │ +│ ┌─────────────────────────────────────────────┐ │ +│ │ /metrics endpoint │ │ +│ │ (Format depends on exporter) │ │ +│ └─────────────────────────────────────────────┘ │ +└────────────────────────┬────────────────────────────────────────────────────┘ + │ + │ Export (scrape or push) + │ + ┌───────────────┼───────────────┬───────────────┐ + │ │ │ │ + ▼ ▼ ▼ ▼ +┌────────────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ +│ Prometheus │ │ Datadog │ │New Relic │ │ Other │ +│ (Default) │ │ (OTLP) │ │ (OTLP) │ │ (OTLP) │ +│ :9091 │ │ │ │ │ │ │ +│ │ │ │ │ │ │ │ +│ Scrapes │ │ Pushes │ │ Pushes │ │ Pushes │ +│ /metrics │ │ via │ │ via │ │ via │ +│ every 15s │ │ OTLP │ │ OTLP │ │ OTLP │ +└────────┬───────┘ └──────────┘ └──────────┘ └──────────┘ + │ + │ PromQL Queries + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ Grafana (localhost:3002) │ +│ ┌───────────────────────────────────────────────────────────────────────┐ │ +│ │ Data Source: Prometheus (for local dev) │ │ +│ │ URL: http://prometheus:9090 │ │ +│ └───────────────────────────────────────────────────────────────────────┘ │ +│ │ +│ ┌───────────────────────────────────────────────────────────────────────┐ │ +│ │ Dashboards │ │ +│ │ - Go Services Metrics │ │ +│ │ - HTTP Traffic, Latency, Errors │ │ +│ │ - Service Health │ │ +│ └───────────────────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +**Key Points:** +- **OpenTelemetry SDK** provides vendor-agnostic instrumentation +- **Exporter** determines where metrics go (Prometheus, Datadog, New Relic, etc.) +- **No code changes** needed to switch backends - just environment variables +- **Default**: Prometheus exporter for local development +- **Production**: Switch to OTLP exporter for Datadog/New Relic/etc. --- @@ -14,8 +85,7 @@ docker compose up -d ``` **Services:** - -- **Prometheus**: http://localhost:9090 (raw metrics & queries) +- **Prometheus**: http://localhost:9091 (raw metrics & queries) - **Grafana**: http://localhost:3002 (dashboards, login: `admin` / `admin`) **Prerequisites:** @@ -29,44 +99,109 @@ Ensure all Go services are running and connected to the `opendif-network`: --- +## Switching Observability Backends + +The observability stack uses **OpenTelemetry**, allowing you to switch between different backends without changing code. Configure via environment variables. + +### Default: Prometheus (Local Development) + +No configuration needed! Services automatically use Prometheus exporter by default. + +```bash +# Services expose metrics at /metrics endpoint +# Prometheus scrapes every 15 seconds +# View in Grafana: http://localhost:3002 +``` + +### Switch to Datadog (Production) + +Set environment variables before starting services: + +```bash +export OTEL_METRICS_EXPORTER=otlp +export OTEL_EXPORTER_OTLP_ENDPOINT=https://api.datadoghq.com/api/v2/otlp +export OTEL_EXPORTER_OTLP_HEADERS="DD-API-KEY=your-api-key,DD-SITE=datadoghq.com" +export SERVICE_NAME=portal-backend + +# Start your service +./your-service +``` + +**Alternative (via Datadog Agent):** +```bash +export OTEL_METRICS_EXPORTER=otlp +export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 # Datadog Agent OTLP HTTP endpoint +export SERVICE_NAME=portal-backend +``` + +### Switch to New Relic + +```bash +export OTEL_METRICS_EXPORTER=otlp +export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net +export OTEL_EXPORTER_OTLP_HEADERS="api-key=your-newrelic-license-key" +export SERVICE_NAME=portal-backend + +# Start your service +./your-service +``` + +### Disable Metrics + +```bash +export OTEL_METRICS_EXPORTER=none +``` + +### Configuration Reference + +| Variable | Description | Default | Example | +|----------|-------------|---------|---------| +| `OTEL_METRICS_EXPORTER` | Exporter type: `prometheus`, `otlp`, or `none` | `prometheus` | `otlp` | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | OTLP endpoint URL (required for `otlp` exporter) | - | `https://api.datadoghq.com/api/v2/otlp` | +| `OTEL_EXPORTER_OTLP_HEADERS` | OTLP headers (e.g., API keys). Format: `key1=value1,key2=value2` | - | `DD-API-KEY=xxx,DD-SITE=datadoghq.com` | +| `SERVICE_NAME` | Service name for metrics | `opendif-service` | `portal-backend` | + +**Note:** For Docker Compose deployments, add these environment variables to your `docker-compose.yml`: + +```yaml +services: + your-service: + environment: + - OTEL_METRICS_EXPORTER=otlp + - OTEL_EXPORTER_OTLP_ENDPOINT=https://api.datadoghq.com/api/v2/otlp + - OTEL_EXPORTER_OTLP_HEADERS=DD-API-KEY=xxx,DD-SITE=datadoghq.com + - SERVICE_NAME=your-service +``` + +--- + ## Metrics Overview ### HTTP Request Metrics | Metric | Type | Labels | Purpose | | ------------------------------- | --------- | ----------------------------------------- | -------------------------- | -| `http_requests_total` | Counter | `method`, `route`, `status_code` | Request volume by endpoint | -| `http_request_duration_seconds` | Histogram | `method`, `route` | API latency percentiles | +| `http_requests_total` | Counter | `http_method`, `http_route`, `http_status_code` | Request volume by endpoint | +| `http_request_duration_seconds` | Histogram | `http_method`, `http_route` | API latency percentiles | **Label Definitions:** -- `method`: HTTP method (GET, POST, PUT, DELETE, etc.) -- `route`: Normalized route path (e.g., `/consents`, `/policies`) -- `status_code`: HTTP response status code (200, 404, 500, etc.) +- `http_method`: HTTP method (GET, POST, PUT, DELETE, etc.) +- `http_route`: Normalized route path (e.g., `/api/v1/members`, `/api/v1/policies`) +- `http_status_code`: HTTP response status code (200, 404, 500, etc.) -### External Call Metrics +### External Call Metrics (Exchange services) | Metric | Type | Labels | Purpose | | --------------------------------- | --------- | ----------------------------------------- | -------------------------- | -| `external_calls_total` | Counter | `external_target`, `external_operation`, `external_success` | External call volume | +| `external_calls_total` | Counter | `external_target`, `external_operation` | External call volume | | `external_call_duration_seconds` | Histogram | `external_target`, `external_operation` | External call latency | | `external_call_errors_total` | Counter | `external_target`, `external_operation` | Failed external calls | **Label Definitions:** - `external_target`: Target service or system (e.g., `postgres`, `redis`, `external-api`) - `external_operation`: Operation type (e.g., `query`, `insert`, `get`, `set`) -- `external_success`: Success status (`true` or `false`) - -### Database Metrics - -| Metric | Type | Labels | Purpose | -| ------------------------- | --------- | ------------------------- | --------------------- | -| `db_latency_seconds` | Histogram | `db_name`, `db_operation` | Database query timing | - -**Label Definitions:** -- `db_name`: Database name or identifier -- `db_operation`: Database operation type (e.g., `select`, `insert`, `update`, `delete`) -### Business Event Metrics +### Business Event Metrics (Exchange services) | Metric | Type | Labels | Purpose | | ------------------------- | ------- | ------------------------------- | ---------------------- | @@ -76,37 +211,6 @@ Ensure all Go services are running and connected to the `opendif-network`: - `business_action`: Business action type (e.g., `consent_created`, `policy_evaluated`) - `business_outcome`: Outcome of the action (e.g., `success`, `failure`, `pending`) -### Workflow Metrics - -| Metric | Type | Labels | Purpose | -| ------------------------------- | --------------- | --------------- | --------------------- | -| `workflow_duration_seconds` | Histogram | `workflow_name` | End-to-end workflow timing | -| `workflow_inflight` | UpDownCounter | `workflow_name` | Active workflow count | - -**Label Definitions:** -- `workflow_name`: Name of the workflow (e.g., `data_exchange`, `consent_flow`) - -### Cache Metrics - -| Metric | Type | Labels | Purpose | -| --------------------- | ------- | ------------------------- | ----------------- | -| `cache_events_total` | Counter | `cache_name`, `cache_result` | Cache hit/miss tracking | - -**Label Definitions:** -- `cache_name`: Cache identifier or name -- `cache_result`: Cache operation result (`hit` or `miss`) - -### Policy Decision Metrics (PDP) - -| Metric | Type | Labels | Purpose | -| ----------------------------- | --------- | ----------------- | ---------------------- | -| `decision_latency_seconds` | Histogram | `decision_type` | Policy evaluation time | -| `decision_failures_total` | Counter | `failure_reason` | Policy decision errors | - -**Label Definitions:** -- `decision_type`: Type of policy decision (e.g., `allow`, `deny`, `conditional`) -- `failure_reason`: Reason for decision failure (e.g., `policy_not_found`, `evaluation_error`) - ### Go Runtime Metrics (Automatic) The monitoring package automatically instruments Go runtime metrics: @@ -121,22 +225,22 @@ The monitoring package automatically instruments Go runtime metrics: **Request Rate by Endpoint:** ```promql -sum by (route, method) (rate(http_requests_total[5m])) +sum by (http_route, http_method) (rate(http_requests_total[5m])) ``` **95th Percentile Latency by Endpoint:** ```promql -histogram_quantile(0.95, sum by (route, le) (rate(http_request_duration_seconds_bucket[5m]))) +histogram_quantile(0.95, sum by (http_route, le) (rate(http_request_duration_seconds_bucket[5m]))) ``` **Error Rate by Endpoint:** ```promql -sum by (route) (rate(http_requests_total{status_code=~"5.."}[5m])) +sum by (http_route) (rate(http_requests_total{http_status_code=~"5.."}[5m])) ``` **Top 10 Slowest Endpoints:** ```promql -topk(10, histogram_quantile(0.95, sum by (route, le) (rate(http_request_duration_seconds_bucket[5m])))) +topk(10, histogram_quantile(0.95, sum by (http_route, le) (rate(http_request_duration_seconds_bucket[5m])))) ``` **External Call Error Rate:** @@ -144,21 +248,11 @@ topk(10, histogram_quantile(0.95, sum by (route, le) (rate(http_request_duration sum by (external_target, external_operation) (rate(external_call_errors_total[5m])) ``` -**95th Percentile Database Latency:** -```promql -histogram_quantile(0.95, sum by (db_name, db_operation, le) (rate(db_latency_seconds_bucket[5m]))) -``` - **Service Availability:** ```promql up{job=~"orchestration-engine|consent-engine|policy-decision-point|portal-backend|audit-service"} ``` -**Current Metric Values (All):** -```promql -{__name__=~"http_.*|external_.*|db_.*|business_.*|workflow_.*|cache_.*|decision_.*"} -``` - --- ## Grafana Dashboard @@ -168,7 +262,6 @@ Pre-configured dashboard: **Go Services Metrics** **URL:** http://localhost:3002/d/go-services/go-services-metrics **Panels:** - - HTTP Traffic (req/s) - HTTP Latency (P95) - Service Health (1=up, 0=down) @@ -178,6 +271,41 @@ Pre-configured dashboard: **Go Services Metrics** --- +## Generating Sample Traffic + +To populate the Grafana dashboard with metrics, generate sample traffic: + +```bash +# From the observability directory +./generate_sample_traffic.sh +``` + +This sends requests to various endpoints on `portal-backend` (default: `http://localhost:3000`). + +### Configuration + +```bash +# Change the base URL +PORTAL_BACKEND_URL=http://localhost:3000 ./generate_sample_traffic.sh + +# Change request interval (default: 2 seconds) +REQUEST_INTERVAL=5 ./generate_sample_traffic.sh + +# Set number of request batches (default: 50, 0 = infinite) +REQUEST_COUNT=100 ./generate_sample_traffic.sh +``` + +### What the Script Does + +The script sends requests to: +- **Health endpoints**: `/health`, `/metrics` (should return 200) +- **API endpoints**: `/api/v1/members`, `/api/v1/schemas`, etc. (may return 401 without auth, but still generates metrics) +- **Invalid endpoints**: `/api/v1/unknown` (generates 404s) + +**Note**: Many API endpoints require authentication. The script will generate 401 Unauthorized responses, which is still useful for metrics (you'll see error rates, different status codes, etc.). + +--- + ## Stop Services ```bash @@ -206,103 +334,110 @@ This setup is for **local development only**. For production: 4. **Storage & Retention**: Adjust `--storage.tsdb.retention.time` based on storage capacity 5. **Alerting**: Configure Alertmanager for production alerts ---- +**Switching to Production Backend:** -## Architecture +Simply set the environment variables (see [Switching Observability Backends](#switching-observability-backends)) - no code changes needed! -``` -Go Services (Port 3000, 4000, 8081, 8082, 3001) - ↓ /metrics endpoint (Prometheus format) -Prometheus (localhost:9090, 30d retention) - ↓ PromQL queries -Grafana (localhost:3002, dashboards) -``` - -**Stack Components:** +--- -- **prometheus**: `prom/prometheus:v2.55.1` -- **grafana**: `grafana/grafana:11.2.0` +## How It Works -**Network:** +### Service Instrumentation -All services run on a shared Docker network (`opendif-network`) to enable service discovery. Services are referenced by their Docker Compose service names in Prometheus configuration (e.g., `orchestration-engine:4000`). +All services use **OpenTelemetry** for metrics collection: -**Volumes:** +**Portal Backend** (`portal-backend/v1/middleware/otel_metrics.go`): +- OpenTelemetry metrics middleware wraps `/api/v1/` routes +- Records: `http_request_duration_seconds`, `http_requests_total` +- Attributes: `http.method`, `http.route`, `http.status_code` -- `prometheus-data`: Metric storage (30 day retention) -- `grafana-data`: Dashboard configs & user data +**Exchange Services** (`exchange/shared/monitoring/otel_metrics.go`): +- Shared OpenTelemetry monitoring package with `HTTPMetricsMiddleware()` +- Records: `http_requests_total`, `http_request_duration_seconds` +- Additional metrics: `external_calls_total`, `business_events_total` +- Attributes: `http.method`, `http.route` (normalized), `http.status_code` ---- +**Default Exporter:** Prometheus (for local dev). Configure via `OTEL_METRICS_EXPORTER` env var. -## Troubleshooting +### Metrics Endpoint Exposure -### Prometheus Can't Scrape Services +Each instrumented service exposes a `/metrics` endpoint: -**Issue**: Targets show as DOWN in Prometheus (http://localhost:9090/targets) +```go +// Portal Backend +topLevelMux.Handle("/metrics", v1middleware.MetricsHandler()) -**Solutions:** - -1. Verify services are running and exposing metrics: - ```bash - curl http://localhost:4000/metrics - curl http://localhost:8081/metrics - curl http://localhost:8082/metrics - ``` +// Exchange Services +mux.Handle("/metrics", monitoring.Handler()) +``` -2. Check Prometheus logs: - ```bash - docker compose logs prometheus - ``` +The endpoint returns metrics in **Prometheus text format** (when using Prometheus exporter): +``` +http_requests_total{http_method="GET",http_route="/api/v1/members",http_status_code="200"} 42 +http_request_duration_seconds_bucket{http_method="GET",http_route="/api/v1/members",le="0.1"} 38 +... +``` -3. Verify network connectivity: - - Ensure all services are on the same `opendif-network` - - Check service names match Prometheus configuration +### Prometheus Scraping -### Grafana Can't Connect to Prometheus +Prometheus periodically scrapes each service's `/metrics` endpoint: -**Issue**: "Data source is not working" in Grafana +- **Interval**: Every 15 seconds (configurable in `prometheus.yml`) +- **Method**: HTTP GET request to `http://:/metrics` +- **Configuration**: Defined in `prometheus/prometheus.yml` as scrape jobs +- **Network**: Uses Docker network (`opendif-network`) for service discovery -**Solutions:** +Example scrape config: +```yaml +- job_name: orchestration-engine + metrics_path: /metrics + static_configs: + - targets: + - orchestration-engine:4000 +``` -1. Verify Prometheus is running: `curl http://localhost:9090/-/healthy` -2. Check datasource URL in `grafana/provisioning/datasources/datasource.yml` (should be `http://prometheus:9090`) -3. Ensure both containers are on the same Docker network (`opendif-network`) +### Metric Storage -### Network Issues +Prometheus stores scraped metrics in its Time-Series Database (TSDB): +- **Format**: Time-series with labels (e.g., `http_requests_total{method="GET",route="/api/v1/members"}`) +- **Retention**: 30 days (configurable) +- **Query Language**: PromQL (Prometheus Query Language) -**Issue**: Services can't communicate with each other +### Grafana Visualization -**Solutions:** +Grafana queries Prometheus via PromQL to create dashboards: -1. Verify network exists: `docker network ls | grep opendif-network` -2. Check service is on network: `docker network inspect opendif-network` -3. Recreate network if needed: - ```bash - docker compose down - docker network rm opendif-network - docker compose up -d - ``` +- **Data Source**: Configured to connect to `http://prometheus:9090` +- **Queries**: Written in PromQL (e.g., `rate(http_requests_total[5m])`) +- **Dashboards**: Pre-configured panels showing: + - HTTP request rates + - Latency percentiles (P95, P99) + - Error rates + - Service health status --- ## How to Add Metrics to New Go Services -1. **Import the monitoring package:** - ```go - import "github.com/gov-dx-sandbox/exchange/shared/monitoring" - ``` +Services automatically initialize OpenTelemetry metrics when first used. No explicit initialization needed. -2. **Expose metrics endpoint in main.go:** +1. **For Exchange Services** - Use shared monitoring package: ```go + import "github.com/gov-dx-sandbox/exchange/shared/monitoring" + mux.Handle("/metrics", monitoring.Handler()) + handler := monitoring.HTTPMetricsMiddleware(mux) ``` -3. **Wrap HTTP handlers with metrics middleware:** +2. **For Portal Backend** - Use middleware package: ```go - handler := monitoring.HTTPMetricsMiddleware(mux) + import v1middleware "github.com/gov-dx-sandbox/portal-backend/v1/middleware" + + topLevelMux.Handle("/metrics", v1middleware.MetricsHandler()) + topLevelMux.Handle("/api/v1/", v1middleware.MetricsMiddleware(handler)) ``` -4. **Add service to Prometheus configuration:** +3. **Add service to Prometheus configuration:** Edit `prometheus/prometheus.yml`: ```yaml - job_name: your-service @@ -315,7 +450,7 @@ All services run on a shared Docker network (`opendif-network`) to enable servic port: 'PORT' ``` -5. **Ensure service is on `opendif-network`:** +4. **Ensure service is on `opendif-network`:** In your service's `docker-compose.yml`: ```yaml services: @@ -329,14 +464,85 @@ All services run on a shared Docker network (`opendif-network`) to enable servic external: true ``` -6. **Restart Prometheus:** +5. **Restart Prometheus:** ```bash docker compose restart prometheus ``` --- +## Troubleshooting + +### Metrics not appearing + +1. Check that metrics are initialized: + - Look for log messages: "Initialized OpenTelemetry metrics with..." + - Check `/metrics` endpoint returns data: `curl http://localhost:3000/metrics` + +2. For OTLP exporter: + - Verify `OTEL_EXPORTER_OTLP_ENDPOINT` is set correctly + - Check network connectivity to the endpoint + - Verify API keys/headers are correct + +3. Check environment variables: + ```bash + env | grep OTEL + ``` + +### Prometheus Can't Scrape Services + +**Issue**: Targets show as DOWN in Prometheus (http://localhost:9091/targets) + +**Solutions:** + +1. Verify services are running and exposing metrics: + ```bash + curl http://localhost:4000/metrics + curl http://localhost:8081/metrics + curl http://localhost:8082/metrics + ``` + +2. Check Prometheus logs: + ```bash + docker compose logs prometheus + ``` + +3. Verify network connectivity: + - Ensure all services are on the same `opendif-network` + - Check service names match Prometheus configuration + +### Grafana Can't Connect to Prometheus + +**Issue**: "Data source is not working" in Grafana + +**Solutions:** + +1. Verify Prometheus is running: `curl http://localhost:9091/-/healthy` +2. Check datasource URL in `grafana/provisioning/datasources/datasource.yml` (should be `http://prometheus:9090`) +3. Ensure both containers are on the same Docker network (`opendif-network`) + +### Network Issues + +**Issue**: Services can't communicate with each other + +**Solutions:** + +1. Verify network exists: `docker network ls | grep opendif-network` +2. Check service is on network: `docker network inspect opendif-network` +3. Recreate network if needed: + ```bash + docker compose down + docker network rm opendif-network + docker compose up -d + ``` + +--- + ## Additional Resources +- [OpenTelemetry Go Documentation](https://opentelemetry.io/docs/instrumentation/go/) +- [OpenTelemetry Semantic Conventions](https://opentelemetry.io/docs/specs/semconv/) - [Prometheus Documentation](https://prometheus.io/docs/) - [Grafana Documentation](https://grafana.com/docs/) +- [Datadog OTLP Ingest](https://docs.datadoghq.com/opentelemetry/otlp_ingest_in_the_agent/) +- [New Relic OTLP](https://docs.newrelic.com/docs/more-integrations/open-source-telemetry-integrations/opentelemetry/opentelemetry-setup/) diff --git a/observability/docker-compose.yml b/observability/docker-compose.yml index eac85a8e..4f907087 100644 --- a/observability/docker-compose.yml +++ b/observability/docker-compose.yml @@ -4,7 +4,7 @@ services: container_name: opendif-prometheus restart: unless-stopped ports: - - "9090:9090" + - "9091:9090" volumes: - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro - prometheus-data:/prometheus @@ -45,5 +45,5 @@ volumes: networks: opendif-network: name: opendif-network - driver: bridge + external: true diff --git a/observability/generate_sample_traffic.sh b/observability/generate_sample_traffic.sh new file mode 100755 index 00000000..625c4b48 --- /dev/null +++ b/observability/generate_sample_traffic.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +# Script to generate sample traffic for Grafana dashboard +# This sends requests to various endpoints to populate metrics +# Targets: Orchestration Engine (port 4000) and Policy Decision Point (port 8082) + +ORCHESTRATION_ENGINE_URL="${ORCHESTRATION_ENGINE_URL:-http://localhost:4000}" +POLICY_DECISION_POINT_URL="${POLICY_DECISION_POINT_URL:-http://localhost:8082}" +INTERVAL="${REQUEST_INTERVAL:-2}" # seconds between requests +COUNT="${REQUEST_COUNT:-50}" # number of requests per endpoint (0 = infinite) + +echo "==========================================" +echo "Generating Sample Traffic for Grafana" +echo "==========================================" +echo "Orchestration Engine: $ORCHESTRATION_ENGINE_URL" +echo "Policy Decision Point: $POLICY_DECISION_POINT_URL" +echo "Interval: ${INTERVAL}s" +echo "Count: ${COUNT} (0 = infinite)" +echo "" +echo "Press Ctrl+C to stop" +echo "==========================================" +echo "" + +# Function to send a request and show status +send_request() { + local base_url=$1 + local method=$2 + local endpoint=$3 + local data=$4 + local description=$5 + + if [ -n "$data" ]; then + response=$(curl -s -w "\n%{http_code}" -X "$method" \ + -H "Content-Type: application/json" \ + -d "$data" \ + "$base_url$endpoint" 2>/dev/null) + else + response=$(curl -s -w "\n%{http_code}" -X "$method" \ + "$base_url$endpoint" 2>/dev/null) + fi + + http_code=$(echo "$response" | tail -n1) + body=$(echo "$response" | sed '$d') + + if [ "$http_code" -ge 200 ] && [ "$http_code" -lt 300 ]; then + status="✓" + elif [ "$http_code" -ge 400 ] && [ "$http_code" -lt 500 ]; then + status="⚠" + else + status="✗" + fi + + echo "[$status] $base_url$endpoint ($method) -> $http_code" +} + +# Function to run requests in a loop +run_requests() { + local i=0 + while [ $COUNT -eq 0 ] || [ $i -lt $COUNT ]; do + echo "" + echo "--- Batch $((i+1)) ---" + + # Orchestration Engine endpoints + echo ">>> Orchestration Engine ($ORCHESTRATION_ENGINE_URL)" + send_request "$ORCHESTRATION_ENGINE_URL" "GET" "/health" "" "Health check" + sleep 0.5 + send_request "$ORCHESTRATION_ENGINE_URL" "GET" "/metrics" "" "Metrics endpoint" + sleep 0.5 + + # GraphQL endpoint (will generate errors without proper query, but useful for metrics) + send_request "$ORCHESTRATION_ENGINE_URL" "POST" "/graphql" '{"query":"{ __typename }"}' "GraphQL query" + sleep 0.5 + send_request "$ORCHESTRATION_ENGINE_URL" "POST" "/graphql" '{"invalid": "query"}' "Invalid GraphQL" + sleep 0.5 + + # Invalid endpoints (404s) + send_request "$ORCHESTRATION_ENGINE_URL" "GET" "/unknown" "" "Unknown endpoint" + sleep 0.5 + + # Policy Decision Point endpoints + echo ">>> Policy Decision Point ($POLICY_DECISION_POINT_URL)" + send_request "$POLICY_DECISION_POINT_URL" "GET" "/health" "" "Health check" + sleep 0.5 + send_request "$POLICY_DECISION_POINT_URL" "GET" "/metrics" "" "Metrics endpoint" + sleep 0.5 + + # Policy decision endpoint (will generate errors without proper data, but useful for metrics) + send_request "$POLICY_DECISION_POINT_URL" "POST" "/api/v1/policy/decide" '{"consumer_id":"test-app","app_id":"test-app","request_id":"req_123","required_fields":["person.name"]}' "Policy decision" + sleep 0.5 + send_request "$POLICY_DECISION_POINT_URL" "POST" "/api/v1/policy/metadata" '{"field_name":"person.name","schema_id":"test-schema"}' "Policy metadata" + sleep 0.5 + + # Invalid endpoints (404s) + send_request "$POLICY_DECISION_POINT_URL" "GET" "/api/v1/unknown" "" "Unknown endpoint" + sleep 0.5 + + # Invalid JSON (400s) + send_request "$POLICY_DECISION_POINT_URL" "POST" "/api/v1/policy/decide" '{"invalid": }' "Invalid JSON" + sleep 0.5 + + i=$((i+1)) + + if [ $COUNT -eq 0 ] || [ $i -lt $COUNT ]; then + echo "" + echo "Waiting ${INTERVAL}s before next batch..." + sleep $INTERVAL + fi + done +} + +# Check if services are accessible +echo "Checking if services are accessible..." + +oe_accessible=false +pdp_accessible=false + +if curl -s -f "$ORCHESTRATION_ENGINE_URL/health" > /dev/null 2>&1; then + echo "✓ Orchestration Engine is accessible at $ORCHESTRATION_ENGINE_URL" + oe_accessible=true +else + echo "⚠ WARNING: Cannot reach Orchestration Engine at $ORCHESTRATION_ENGINE_URL" + echo " Try: curl $ORCHESTRATION_ENGINE_URL/health" +fi + +if curl -s -f "$POLICY_DECISION_POINT_URL/health" > /dev/null 2>&1; then + echo "✓ Policy Decision Point is accessible at $POLICY_DECISION_POINT_URL" + pdp_accessible=true +else + echo "⚠ WARNING: Cannot reach Policy Decision Point at $POLICY_DECISION_POINT_URL" + echo " Try: curl $POLICY_DECISION_POINT_URL/health" +fi + +if [ "$oe_accessible" = false ] && [ "$pdp_accessible" = false ]; then + echo "" + echo "❌ ERROR: Neither service is accessible. Cannot generate traffic." + echo " Make sure at least one service is running and accessible." + exit 1 +fi + +echo "" + +# Start generating traffic +run_requests + +echo "" +echo "==========================================" +echo "Traffic generation complete!" +echo "==========================================" +echo "" +echo "View metrics in Grafana:" +echo " http://localhost:3002/d/go-services/go-services-metrics" +echo "" +echo "Or check Prometheus directly:" +echo " http://localhost:9091" +echo "" + diff --git a/observability/prometheus/prometheus.yml b/observability/prometheus/prometheus.yml index 397f0f49..0e265e82 100644 --- a/observability/prometheus/prometheus.yml +++ b/observability/prometheus/prometheus.yml @@ -2,7 +2,7 @@ global: scrape_interval: 15s evaluation_interval: 15s external_labels: - cluster: 'opendif-mvp' + cluster: 'opendif-core' environment: 'local' scrape_configs: diff --git a/portals/consent-portal/nginx.conf b/portals/consent-portal/nginx.conf index 7d589792..7cf7703d 100644 --- a/portals/consent-portal/nginx.conf +++ b/portals/consent-portal/nginx.conf @@ -13,16 +13,14 @@ server { # Security headers add_header X-Frame-Options "SAMEORIGIN" always; add_header X-Content-Type-Options "nosniff" always; - # Content-Security-Policy: Currently does not include 'unsafe-inline' in style-src - # If you encounter styling issues, you may need to add 'unsafe-inline' to style-src + # Content-Security-Policy: 'unsafe-inline' in style-src is necessary for inline styles # If you use inline JavaScript event handlers (onclick="..."), add 'unsafe-inline' to script-src # Consider migrating inline styles to external CSS files to maintain security add_header Content-Security-Policy "default-src 'self'; connect-src 'self' https: http:; script-src 'self'; style-src 'self'; object-src 'none'; img-src 'self' data:; font-src 'self';" always; # Do not cache config.js - it contains build-time configuration that may change - # Note: Consent portal uses /public/config.js path (different from other portals) # This must come BEFORE the general static asset rules to take precedence - location = /public/config.js { + location = /config.js { expires -1; add_header Cache-Control "no-cache, no-store, must-revalidate" always; add_header Pragma "no-cache" always; @@ -30,7 +28,7 @@ server { # Security headers must be explicitly added in nested location blocks add_header X-Frame-Options "SAMEORIGIN" always; add_header X-Content-Type-Options "nosniff" always; - add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self'; object-src 'none'; img-src 'self' data:; font-src 'self';" always; + add_header Content-Security-Policy "default-src 'self'; connect-src 'self' https: http:; script-src 'self'; style-src 'self'; object-src 'none'; img-src 'self' data:; font-src 'self';" always; } # Cache static assets WITH content hashes (long cache, immutable) @@ -42,7 +40,7 @@ server { # Security headers must be explicitly added in nested location blocks add_header X-Frame-Options "SAMEORIGIN" always; add_header X-Content-Type-Options "nosniff" always; - add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self'; object-src 'none'; img-src 'self' data:; font-src 'self';" always; + add_header Content-Security-Policy "default-src 'self'; connect-src 'self' https: http:; script-src 'self'; style-src 'self'; object-src 'none'; img-src 'self' data:; font-src 'self';" always; } # Cache static assets WITHOUT content hashes (short cache, no immutable) # Matches: filename.ext in any directory (e.g., /favicon.ico, /static/css/styles.css) @@ -53,7 +51,7 @@ server { # Security headers must be explicitly added in nested location blocks add_header X-Frame-Options "SAMEORIGIN" always; add_header X-Content-Type-Options "nosniff" always; - add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self'; object-src 'none'; img-src 'self' data:; font-src 'self';" always; + add_header Content-Security-Policy "default-src 'self'; connect-src 'self' https: http:; script-src 'self'; style-src 'self'; object-src 'none'; img-src 'self' data:; font-src 'self';" always; } # Handle React Router - redirect all 404s to index.html @@ -69,7 +67,7 @@ server { # Security headers must be explicitly added in nested location blocks add_header X-Frame-Options "SAMEORIGIN" always; add_header X-Content-Type-Options "nosniff" always; - add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self'; object-src 'none'; img-src 'self' data:; font-src 'self';" always; + add_header Content-Security-Policy "default-src 'self'; connect-src 'self' https: http:; script-src 'self'; style-src 'self'; object-src 'none'; img-src 'self' data:; font-src 'self';" always; } } From 9b620f92e96dddfa90df9c294a48f7fba3570648 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 19 Dec 2025 12:16:55 +0530 Subject: [PATCH 2/4] Address PR Comment --- PR_DESCRIPTION.md | 139 ++++++++++++++ exchange/consent-engine/go.mod | 19 +- exchange/consent-engine/go.sum | 104 +---------- exchange/consent-engine/main.go | 17 ++ .../orchestration-engine/OBSERVABILITY.md | 94 ++++++++++ .../orchestration-engine/configs/config.go | 6 +- .../configs/config_test.go | 25 ++- .../database/schema_db.go | 121 ++++++++---- .../federator/accumulator.go | 28 +-- .../federator/accumulator_additional_test.go | 6 +- .../federator/accumulator_more_test.go | 12 +- .../federator/accumulator_schema_test.go | 20 +- .../federator/accumulator_test.go | 16 +- .../federator/arghandler.go | 7 +- .../federator/arghandler_test.go | 2 +- .../federator/federator.go | 30 +-- .../federator/federator_internal_test.go | 10 +- .../orchestration-engine/federator/mapper.go | 41 +++-- .../federator/mapper_test.go | 2 +- exchange/orchestration-engine/go.mod | 26 ++- exchange/orchestration-engine/go.sum | 46 ++++- .../orchestration-engine/handlers/schema.go | 7 +- .../handlers/schema_test.go | 4 +- exchange/orchestration-engine/main.go | 25 ++- .../orchestration-engine/policy/pdpclient.go | 2 +- .../policy/pdpclient_test.go | 2 +- .../provider/handler_test.go | 2 +- .../orchestration-engine/provider/provider.go | 16 +- .../provider/provider_test.go | 4 +- .../orchestration-engine/server/server.go | 34 +++- .../server/server_internal_test.go | 37 +++- .../server/server_test.go | 2 +- .../orchestration-engine/services/schema.go | 19 +- exchange/policy-decision-point/go.mod | 15 +- exchange/policy-decision-point/go.sum | 26 ++- exchange/policy-decision-point/main.go | 8 +- exchange/policy-decision-point/v1/handler.go | 8 + exchange/shared/monitoring/TEST_SUMMARY.md | 115 ++++++++++++ exchange/shared/monitoring/VERIFICATION.md | 173 ++++++++++++++++++ .../dashboards/go-services-metrics.json | 40 +--- .../provisioning/dashboards/dashboard.yml | 5 +- .../provisioning/datasources/datasource.yml | 5 +- observability/prometheus/prometheus.yml | 14 +- portal-backend/go.mod | 12 +- portal-backend/go.sum | 28 ++- portal-backend/main.go | 22 +-- portals/admin-portal/nginx.conf | 13 +- 47 files changed, 1049 insertions(+), 360 deletions(-) create mode 100644 PR_DESCRIPTION.md create mode 100644 exchange/orchestration-engine/OBSERVABILITY.md create mode 100644 exchange/shared/monitoring/TEST_SUMMARY.md create mode 100644 exchange/shared/monitoring/VERIFICATION.md diff --git a/PR_DESCRIPTION.md b/PR_DESCRIPTION.md new file mode 100644 index 00000000..a2c9bdf0 --- /dev/null +++ b/PR_DESCRIPTION.md @@ -0,0 +1,139 @@ +## Summary + +This PR addresses critical security and observability issues identified during code review: + +1. **Security Regression Fix**: Resolves Nginx security header inheritance issue across all portal configurations +2. **Observability Improvements**: Fixes histogram bucket configuration and prevents metric cardinality explosion in the monitoring package + +**Note**: This PR focuses on security and observability fixes. Audit service improvements and OE integration changes are handled in separate PRs. + +## Key Changes + +### 1. Security: Nginx Header Inheritance Fix + +**Problem**: In Nginx, `add_header` directives from a parent block (like `server`) are not inherited by child blocks (like `location`) if the child block defines its own `add_header` directives. This caused a security regression where location blocks with custom headers (e.g., `Cache-Control`, `Content-Type`) were missing critical security headers (`X-Frame-Options`, `X-Content-Type-Options`, `Content-Security-Policy`). + +**Solution**: Explicitly added security headers to all location blocks that define their own `add_header` directives in all three portals: + +#### Files Modified: +- `portals/consent-portal/nginx.conf` +- `portals/member-portal/nginx.conf` +- `portals/admin-portal/nginx.conf` + +#### Changes: +- Added security headers (`X-Frame-Options`, `X-Content-Type-Options`, `Content-Security-Policy`) to all location blocks: + - `/config.js` location block + - Hashed static assets location block (`^.+\\.[a-f0-9]{6,}\\.(jpg|jpeg|png|...)`) + - Non-hashed static assets location block (`\\.(jpg|jpeg|png|...)`) + - `/health` location block +- Added `always` flag to all `add_header` directives in `admin-portal/nginx.conf` for consistency +- Fixed Content-Security-Policy in `consent-portal` to include `connect-src 'self' https: http:` + +**Security Impact**: All HTTP responses from portal services now include security headers, preventing vulnerabilities like clickjacking and MIME-type sniffing attacks. + +### 2. Observability: Histogram Bucket Configuration Fix + +**Problem**: Only `http_request_duration_seconds` had custom histogram buckets configured. The `external_call_duration_seconds` metric was using default OpenTelemetry buckets, leading to inconsistent metric configurations. + +**Solution**: Added explicit histogram bucket configuration for `external_call_duration_seconds` to match `http_request_duration_seconds`. + +#### File Modified: +- `exchange/shared/monitoring/otel_metrics.go` + +#### Changes: +- Added `sdkmetric.WithView` configuration for `external_call_duration_seconds` metric +- Both duration metrics now use consistent custom buckets: `[.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10]` seconds + +**Impact**: Consistent histogram bucket configuration across all duration metrics enables accurate performance analysis and alerting. + +### 3. Observability: Metric Cardinality Explosion Prevention + +**Problem**: The `looksLikeID` function in route normalization was too broad, incorrectly classifying static path segments like `data-owner` or `list-all` as dynamic IDs because they contained hyphens. This created excessive unique metric time series and could overload monitoring systems. + +**Solution**: Improved ID detection logic to be more specific and prevent false positives. + +#### File Modified: +- `exchange/shared/monitoring/metrics.go` + +#### Changes: +- **UUID Detection**: Added specific check for UUID format (`len(s) == 36 && strings.Count(s, "-") == 4`) +- **Separator + Number Detection**: Updated logic to require both separators (`_` or `-`) AND numeric characters: `(strings.Contains(s, "_") || strings.Contains(s, "-")) && strings.ContainsAny(s, "0123456789")` +- **Prevents False Positives**: Static paths like `data-owner`, `list-all`, `check-status` are no longer incorrectly normalized +- **Correctly Detects**: Actual IDs like `consent_abc123`, `app-456`, UUIDs are still properly detected + +**Impact**: Prevents metric cardinality explosion while maintaining accurate route normalization for actual dynamic IDs. + +### 4. Testing: Comprehensive Test Coverage + +Added comprehensive unit tests to verify all improvements: + +#### File Modified: +- `exchange/shared/monitoring/metrics_test.go` + +#### New Tests Added: +1. `TestLooksLikeIDImprovedLogic` - Verifies improved ID detection prevents false positives +2. `TestRouteNormalizationWithStaticPaths` - Ensures static paths with hyphens are not incorrectly normalized +3. `TestHistogramBucketsConfiguration` - Verifies both histogram metrics use custom buckets +4. `TestIsInitialized` - Tests initialization state functions +5. `TestMultipleInitializations` - Verifies thread-safety of multiple initialization calls +6. `TestHTTPMetricsMiddlewareWithDifferentStatusCodes` - Tests different HTTP status code recording +7. `TestNormalizeRouteWith404` - Ensures 404s are normalized to "unknown" + +**Impact**: Ensures all improvements work correctly and prevents regressions. + +## Files Changed + +### Security Fixes +- `portals/consent-portal/nginx.conf` +- `portals/member-portal/nginx.conf` +- `portals/admin-portal/nginx.conf` + +### Observability Fixes +- `exchange/shared/monitoring/otel_metrics.go` +- `exchange/shared/monitoring/metrics.go` +- `exchange/shared/monitoring/metrics_test.go` + +### Documentation +- `exchange/shared/monitoring/TEST_SUMMARY.md` (new) +- `exchange/shared/monitoring/VERIFICATION.md` (new) + +## Testing + +- [x] **Unit Tests**: All new and existing tests pass +- [x] **Build Verification**: Code compiles successfully with no linter errors +- [x] **Security Verification**: All location blocks include security headers +- [x] **Observability Verification**: Histogram buckets configured correctly, ID detection prevents false positives +- [x] **Backward Compatibility**: API contract remains the same, only internal improvements + +## Verification + +### Security Headers Verification +All location blocks that define their own `add_header` directives now include: +- ✅ `X-Frame-Options: SAMEORIGIN` +- ✅ `X-Content-Type-Options: nosniff` +- ✅ `Content-Security-Policy: ...` (portal-specific) + +All security headers use the `always` flag to ensure they're sent even on error responses. + +### Observability Verification +- ✅ Both `http_request_duration_seconds` and `external_call_duration_seconds` use custom histogram buckets +- ✅ Static paths with hyphens (e.g., `data-owner`, `list-all`) are NOT incorrectly normalized +- ✅ Actual IDs (UUIDs, numeric IDs, IDs with separators + numbers) are correctly detected +- ✅ Route normalization prevents metric cardinality explosion + +## Impact + +### Security +- **Critical**: All portal responses now include security headers, preventing clickjacking and MIME-type sniffing attacks +- **Compliance**: Meets security best practices for web application headers + +### Observability +- **Performance**: Consistent histogram buckets enable accurate performance analysis +- **Reliability**: Prevents metric cardinality explosion that could overload monitoring systems +- **Maintainability**: Comprehensive test coverage ensures improvements work correctly + +## Related Issues + +- Addresses security regression in Nginx configuration +- Fixes inconsistent histogram bucket configuration +- Prevents metric cardinality explosion in route normalization diff --git a/exchange/consent-engine/go.mod b/exchange/consent-engine/go.mod index 158f4484..4f576c0e 100644 --- a/exchange/consent-engine/go.mod +++ b/exchange/consent-engine/go.mod @@ -5,7 +5,10 @@ go 1.24.6 require ( github.com/golang-jwt/jwt/v5 v5.3.0 github.com/google/uuid v1.6.0 + github.com/gov-dx-sandbox/exchange/pkg/monitoring v0.0.0 github.com/gov-dx-sandbox/exchange/shared/config v0.0.0 + github.com/gov-dx-sandbox/exchange/shared/constants v0.0.0 + github.com/gov-dx-sandbox/exchange/shared/monitoring v0.0.0 github.com/gov-dx-sandbox/exchange/shared/utils v0.0.0 ) @@ -19,12 +22,10 @@ require ( require ( github.com/beorn7/perks v1.0.1 // indirect - github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/jackc/pgx/v5 v5.7.6 // indirect @@ -32,6 +33,7 @@ require ( github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect github.com/klauspost/compress v1.17.9 // indirect + github.com/kr/text v0.2.0 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_golang v1.20.5 // indirect @@ -39,22 +41,17 @@ require ( github.com/prometheus/common v0.60.1 // indirect github.com/prometheus/procfs v0.15.1 // indirect github.com/rogpeppe/go-internal v1.14.1 // indirect + go.opentelemetry.io/contrib/instrumentation/runtime v0.49.0 // indirect go.opentelemetry.io/otel v1.32.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0 // indirect go.opentelemetry.io/otel/exporters/prometheus v0.54.0 // indirect go.opentelemetry.io/otel/metric v1.32.0 // indirect - go.opentelemetry.io/otel/sdk v1.32.0 // indirect - go.opentelemetry.io/otel/sdk/metric v1.32.0 // indirect + go.opentelemetry.io/otel/sdk v1.27.0 // indirect + go.opentelemetry.io/otel/sdk/metric v1.27.0 // indirect go.opentelemetry.io/otel/trace v1.32.0 // indirect - go.opentelemetry.io/proto/otlp v1.3.1 // indirect golang.org/x/crypto v0.46.0 // indirect - golang.org/x/net v0.47.0 // indirect golang.org/x/sync v0.19.0 // indirect golang.org/x/sys v0.39.0 // indirect golang.org/x/text v0.32.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect - google.golang.org/grpc v1.67.1 // indirect google.golang.org/protobuf v1.35.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) @@ -66,3 +63,5 @@ replace github.com/gov-dx-sandbox/exchange/shared/constants => ./shared/constant replace github.com/gov-dx-sandbox/exchange/shared/monitoring => ../shared/monitoring replace github.com/gov-dx-sandbox/exchange/shared/utils => ./shared/utils + +replace github.com/gov-dx-sandbox/exchange/pkg/monitoring => ../pkg/monitoring diff --git a/exchange/consent-engine/go.sum b/exchange/consent-engine/go.sum index ef70252f..20050fa7 100644 --- a/exchange/consent-engine/go.sum +++ b/exchange/consent-engine/go.sum @@ -1,108 +1,18 @@ -github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU= -github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU= -github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= -github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= -github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= -github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= -github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= -github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= -github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= -github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk= +github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 h1:ad0vkEBuk23VJzZR9nkLVG0YAoN9coASF1GusYX6AlU= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0/go.mod h1:igFoXX2ELCW06bol23DWPB5BEWfZISOzSP5K2sbLea0= -github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= -github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= -github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= -github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= -github.com/jackc/pgx/v5 v5.7.6 h1:rWQc5FwZSPX58r1OQmkuaNicxdmExaEz5A2DO2hUuTk= -github.com/jackc/pgx/v5 v5.7.6/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M= -github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= -github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= -github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= -github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= -github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= -github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= -github.com/kisielk/sqlstruct v0.0.0-20201105191214-5f3e10d3ab46/go.mod h1:yyMNCyc/Ib3bDTKd379tNMpB/7/H5TjM2Y9QJ5THLbE= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= -github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= -github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= -github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= -github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= +github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= -github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= -github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= -github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc= -github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= -github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= -github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= -github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= -github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= -go.opentelemetry.io/otel v1.32.0 h1:WnBN+Xjcteh0zdk01SVqV55d/m62NJLJdIyb4y/WO5U= -go.opentelemetry.io/otel v1.32.0/go.mod h1:00DCVSB0RQcnzlwyTfqtxSm+DRr9hpYrHjNGiBHVQIg= -go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0 h1:t/Qur3vKSkUCcDVaSumWF2PKHt85pc7fRvFuoVT8qFU= -go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0/go.mod h1:Rl61tySSdcOJWoEgYZVtmnKdA0GeKrSqkHC1t+91CH8= -go.opentelemetry.io/otel/exporters/prometheus v0.54.0 h1:rFwzp68QMgtzu9PgP3jm9XaMICI6TsofWWPcBDKwlsU= -go.opentelemetry.io/otel/exporters/prometheus v0.54.0/go.mod h1:QyjcV9qDP6VeK5qPyKETvNjmaaEc7+gqjh4SS0ZYzDU= -go.opentelemetry.io/otel/metric v1.32.0 h1:xV2umtmNcThh2/a/aCP+h64Xx5wsj8qqnkYZktzNa0M= -go.opentelemetry.io/otel/metric v1.32.0/go.mod h1:jH7CIbbK6SH2V2wE16W05BHCtIDzauciCRLoc/SyMv8= -go.opentelemetry.io/otel/sdk v1.32.0 h1:RNxepc9vK59A8XsgZQouW8ue8Gkb4jpWtJm9ge5lEG4= -go.opentelemetry.io/otel/sdk v1.32.0/go.mod h1:LqgegDBjKMmb2GC6/PrTnteJG39I8/vJCAP9LlJXEjU= -go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU= -go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ= -go.opentelemetry.io/otel/trace v1.32.0 h1:WIC9mYrXf8TmY/EXuULKc8hR17vE+Hjv2cssQDe03fM= -go.opentelemetry.io/otel/trace v1.32.0/go.mod h1:+i4rkvCraA+tG6AzwloGaCtkx53Fa+L+V8e9a7YvhT8= -go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= -go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= -golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU= -golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0= -golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= -golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= -golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= -golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= -golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= -golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= -golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= -google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 h1:M0KvPgPmDZHPlbRbaNU1APr28TvwvvdUPlSv7PUvy8g= -google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:dguCy7UOdZhTvLzDyt15+rOrawrpM4q7DD9dQ1P11P4= -google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE= -google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= -google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= -google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= -google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= -google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= +golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gorm.io/driver/postgres v1.6.0 h1:2dxzU8xJ+ivvqTRph34QX+WrRaJlmfyPqXmoGVjMBa4= -gorm.io/driver/postgres v1.6.0/go.mod h1:vUw0mrGgrTK+uPHEhAdV4sfFELrByKVGnaVRkXDhtWo= -gorm.io/gorm v1.31.1 h1:7CA8FTFz/gRfgqgpeKIBcervUn3xSyPUmr6B2WXJ7kg= -gorm.io/gorm v1.31.1/go.mod h1:XyQVbO2k6YkOis7C2437jSit3SsDK72s7n7rsSHd+Gs= diff --git a/exchange/consent-engine/main.go b/exchange/consent-engine/main.go index 7a4b0c0e..83ca6db7 100644 --- a/exchange/consent-engine/main.go +++ b/exchange/consent-engine/main.go @@ -1,11 +1,13 @@ package main import ( + "context" "log/slog" "net/http" "os" "time" + "github.com/gov-dx-sandbox/exchange/pkg/monitoring" "github.com/gov-dx-sandbox/exchange/shared/config" "github.com/gov-dx-sandbox/exchange/shared/monitoring" "github.com/gov-dx-sandbox/exchange/shared/utils" @@ -40,6 +42,20 @@ func main() { // Setup logging utils.SetupLogging(cfg.Logging.Format, cfg.Logging.Level) + monitorCtx := context.Background() + shutdownMetrics, err := monitoring.Setup(monitorCtx, monitoring.Config{ + ServiceName: "consent-engine", + ResourceAttrs: map[string]string{ + "environment": cfg.Environment, + "version": Version, + }, + }) + if err != nil { + slog.Error("Failed to initialize telemetry", "error", err) + os.Exit(1) + } + defer func() { _ = shutdownMetrics(context.Background()) }() + slog.Info("Starting consent engine", "environment", cfg.Environment, "port", cfg.Service.Port, @@ -119,6 +135,7 @@ func main() { // Register legacy /health endpoint for compatibility with health checks mux.Handle("/health", utils.PanicRecoveryMiddleware(utils.HealthHandler("consent-engine"))) + mux.Handle("/metrics", monitoring.Handler()) // Create server configuration serverConfig := &utils.ServerConfig{ diff --git a/exchange/orchestration-engine/OBSERVABILITY.md b/exchange/orchestration-engine/OBSERVABILITY.md new file mode 100644 index 00000000..c9a12c35 --- /dev/null +++ b/exchange/orchestration-engine/OBSERVABILITY.md @@ -0,0 +1,94 @@ +## Observability Guide + +This service already exposes OpenTelemetry metrics (HTTP latency, runtime stats, external dependency calls, workflow timing, etc). Follow the steps below to see everything in Grafana. + +--- + +### 1. Prerequisites +* Docker Desktop (or another Docker runtime) is running. +* The orchestration-engine service is running locally and reachable on `http://localhost:4000`. + + + +### 2. Start the Monitoring Stack (Prometheus + Grafana) +We ship a ready-made setup under `exchange/monitoring`. Run it from the repo root: + +```bash +cd exchange/monitoring +docker compose up -d +``` + +What this does: +* Prometheus listens on **http://localhost:9090** and scrapes `http://host.docker.internal:4000/metrics`. +* Grafana listens on **http://localhost:3002** with user/pass `admin / admin`. +* A dashboard named **"Orchestration Engine Metrics"** is auto-loaded with the most relevant charts. + +To stop the stack later: `docker compose down`. + + +### 3. Verify Prometheus Is Scraping +1. Open http://localhost:9090/targets +2. You should see a job called `orchestration-engine` with status **UP**. +3. Optional: run `curl http://localhost:4000/metrics` to confirm the service exposes metrics. + +If the job shows **DOWN**, double-check: +* The service is running. +* The monitoring stack can reach it (Docker Desktop on macOS uses `host.docker.internal` automatically; change the hostname in `exchange/monitoring/prometheus/prometheus.yml` if needed). + +### 4. Explore Grafana +1. Visit http://localhost:3002 and log in (`admin` / `admin`). +2. Grafana automatically finds the Prometheus datasource and loads the dashboard (`Browse → Orchestration Engine → Orchestration Engine Metrics`). +3. Panels you’ll see: + - **HTTP Traffic**: request rate by method/route. + - **HTTP Latency (P95)**: tail latency derived from the histogram. + - **External Calls & Error %**: latency and error rate per downstream system (DB, PDP, etc.). + - **Business Events**: success/failure counts for key workflows. + - **Workflow Duration / In-Flight**: monitors workflow efficiency and queue depth. + +Feel free to duplicate the dashboard and experiment with your own panels (gear icon → “Save As”). + + +### 5. Distributed Tracing (Optional) +If you also want traces: +1. Run Jaeger: + ```bash + docker run -d --name jaeger \ + -p 16686:16686 -p 4317:4317 -p 4318:4318 \ + jaegertracing/all-in-one:1.56 + ``` +2. Export traces from the service by setting: + ``` + OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 + OTEL_TRACES_EXPORTER=otlp + ``` +3. View traces at http://localhost:16686. + + +### 6. Basic Alerting (Optional) +1. Prometheus Alerting: + * Create `alert.rules.yml` with the sample rules below and reference it from `prometheus.yml`. + * Use Alertmanager to forward notifications (Slack/email/PagerDuty). +2. Grafana Alerting: + * Open a panel → “Alert” tab → configure thresholds (e.g., P99 latency > 500 ms). + +Sample PromQL rules: +```yaml +groups: + - name: orchestration-engine + rules: + - alert: HighP99Latency + expr: histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket[5m])) by (le)) > 0.5 + for: 5m + - alert: HighExternalErrorRate + expr: rate(external_call_errors_total[5m]) > 5 + for: 5m +``` + +### 7. Operational Checklist +- [ ] `/metrics` reachable & Prometheus target shows **UP**. +- [ ] Grafana dashboard displays HTTP / external / workflow panels. +- [ ] (Optional) Jaeger shows traces for GraphQL requests. +- [ ] Alerts configured for latency/error spikes. +- [ ] Runbooks or on-call docs reference these dashboards. + +Once the stack is running, you can confidently observe the service's health without needing deep Prometheus or Grafana expertise. diff --git a/exchange/orchestration-engine/configs/config.go b/exchange/orchestration-engine/configs/config.go index e5bf35e0..b5961a42 100644 --- a/exchange/orchestration-engine/configs/config.go +++ b/exchange/orchestration-engine/configs/config.go @@ -5,9 +5,9 @@ import ( "fmt" "os" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/auth" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/graphql" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/provider" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/auth" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/graphql" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/provider" "github.com/graphql-go/graphql/language/ast" "github.com/graphql-go/graphql/language/parser" "github.com/graphql-go/graphql/language/source" diff --git a/exchange/orchestration-engine/configs/config_test.go b/exchange/orchestration-engine/configs/config_test.go index 7e248cf0..4a8a161e 100644 --- a/exchange/orchestration-engine/configs/config_test.go +++ b/exchange/orchestration-engine/configs/config_test.go @@ -7,7 +7,7 @@ import ( "strings" "testing" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/auth" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/auth" ) func TestLoadConfigFromBytes_ValidJSON(t *testing.T) { @@ -37,6 +37,7 @@ func TestLoadConfigFromBytes_ValidJSON(t *testing.T) { }`) config, err := LoadConfigFromBytes(jsonData) + if err != nil { t.Fatalf("Expected no error, got %v", err) } @@ -86,6 +87,7 @@ func TestLoadConfigFromBytes_EmptyJSON(t *testing.T) { emptyJSON := []byte(`{}`) config, err := LoadConfigFromBytes(emptyJSON) + if err != nil { t.Fatalf("Expected no error, got %v", err) } @@ -106,6 +108,7 @@ func TestLoadConfigFromBytes_DerivedConfigLogic_PdpURL(t *testing.T) { }`) config, err := LoadConfigFromBytes(jsonData) + if err != nil { t.Fatalf("Expected no error, got %v", err) } @@ -123,6 +126,7 @@ func TestLoadConfigFromBytes_DerivedConfigLogic_CeURL(t *testing.T) { }`) config, err := LoadConfigFromBytes(jsonData) + if err != nil { t.Fatalf("Expected no error, got %v", err) } @@ -143,6 +147,7 @@ func TestLoadConfigFromBytes_DerivedConfigLogic_PdpConfigTakesPrecedence(t *test }`) config, err := LoadConfigFromBytes(jsonData) + if err != nil { t.Fatalf("Expected no error, got %v", err) } @@ -163,6 +168,7 @@ func TestLoadConfigFromBytes_DerivedConfigLogic_CeConfigTakesPrecedence(t *testi }`) config, err := LoadConfigFromBytes(jsonData) + if err != nil { t.Fatalf("Expected no error, got %v", err) } @@ -185,6 +191,7 @@ func TestLoadConfigFromBytes_ArgMappingFallback(t *testing.T) { }`) config, err := LoadConfigFromBytes(jsonData) + if err != nil { t.Fatalf("Expected no error, got %v", err) } @@ -213,6 +220,7 @@ func TestLoadConfigFromBytes_ArgMappingExplicitlySet(t *testing.T) { }`) config, err := LoadConfigFromBytes(jsonData) + if err != nil { t.Fatalf("Expected no error, got %v", err) } @@ -245,12 +253,13 @@ func TestLoadConfigFile_ValidFile(t *testing.T) { t.Fatalf("Failed to marshal config data: %v", err) } - err = os.WriteFile(configPath, jsonData, 0o644) + err = os.WriteFile(configPath, jsonData, 0644) if err != nil { t.Fatalf("Failed to write config file: %v", err) } config, err := LoadConfigFile(configPath) + if err != nil { t.Fatalf("Expected no error, got %v", err) } @@ -286,7 +295,7 @@ func TestLoadConfigFile_InvalidJSON(t *testing.T) { tempDir := t.TempDir() configPath := filepath.Join(tempDir, "invalid.json") - err := os.WriteFile(configPath, []byte(`{invalid json}`), 0o644) + err := os.WriteFile(configPath, []byte(`{invalid json}`), 0644) if err != nil { t.Fatalf("Failed to write invalid config file: %v", err) } @@ -356,7 +365,7 @@ func TestLoadConfig_CustomPath(t *testing.T) { t.Fatalf("Failed to marshal config data: %v", err) } - err = os.WriteFile(configPath, jsonData, 0o644) + err = os.WriteFile(configPath, jsonData, 0644) if err != nil { t.Fatalf("Failed to write config file: %v", err) } @@ -364,6 +373,7 @@ func TestLoadConfig_CustomPath(t *testing.T) { os.Setenv("CONFIG_PATH", configPath) config, err := LoadConfig() + if err != nil { t.Fatalf("Expected no error, got %v", err) } @@ -433,6 +443,7 @@ func TestGetSchemaDocument_ValidSchema(t *testing.T) { } doc, err := config.GetSchemaDocument() + if err != nil { t.Fatalf("Expected no error, got %v", err) } @@ -538,6 +549,7 @@ func TestConfig_AllFieldsUnmarshal(t *testing.T) { }`) config, err := LoadConfigFromBytes(jsonData) + if err != nil { t.Fatalf("Expected no error, got %v", err) } @@ -597,6 +609,7 @@ func TestProviderConfig_WithAuth(t *testing.T) { }`) config, err := LoadConfigFromBytes(jsonData) + if err != nil { t.Fatalf("Expected no error, got %v", err) } @@ -623,6 +636,7 @@ func TestProviderConfig_WithoutAuth(t *testing.T) { }`) config, err := LoadConfigFromBytes(jsonData) + if err != nil { t.Fatalf("Expected no error, got %v", err) } @@ -656,6 +670,7 @@ func TestConfig_MultipleProviders(t *testing.T) { }`) config, err := LoadConfigFromBytes(jsonData) + if err != nil { t.Fatalf("Expected no error, got %v", err) } @@ -684,6 +699,7 @@ func TestConfig_ArgMappings(t *testing.T) { }`) config, err := LoadConfigFromBytes(jsonData) + if err != nil { t.Fatalf("Expected no error, got %v", err) } @@ -720,6 +736,7 @@ func TestGetSchemaDocument_ComplexSchema(t *testing.T) { } doc, err := config.GetSchemaDocument() + if err != nil { t.Fatalf("Expected no error, got %v", err) } diff --git a/exchange/orchestration-engine/database/schema_db.go b/exchange/orchestration-engine/database/schema_db.go index 9267eeb2..104ea60e 100644 --- a/exchange/orchestration-engine/database/schema_db.go +++ b/exchange/orchestration-engine/database/schema_db.go @@ -1,10 +1,12 @@ package database import ( + "context" "database/sql" "fmt" "time" + "github.com/gov-dx-sandbox/exchange/pkg/monitoring" _ "github.com/lib/pq" ) @@ -15,22 +17,27 @@ type SchemaDB struct { // NewSchemaDB creates a new schema database connection func NewSchemaDB(connectionString string) (*SchemaDB, error) { + start := time.Now() db, err := sql.Open("postgres", connectionString) if err != nil { + monitoring.RecordExternalCall(context.Background(), "postgres", "connect", time.Since(start), err) return nil, fmt.Errorf("failed to open database: %w", err) } - if err := db.Ping(); err != nil { - return nil, fmt.Errorf("failed to ping database: %w", err) + if pingErr := db.Ping(); pingErr != nil { + monitoring.RecordExternalCall(context.Background(), "postgres", "connect", time.Since(start), pingErr) + return nil, fmt.Errorf("failed to ping database: %w", pingErr) } schemaDB := &SchemaDB{db: db} // Create tables if they don't exist if err := schemaDB.createTables(); err != nil { + monitoring.RecordExternalCall(context.Background(), "postgres", "connect", time.Since(start), err) return nil, fmt.Errorf("failed to create tables: %w", err) } + monitoring.RecordExternalCall(context.Background(), "postgres", "connect", time.Since(start), nil) return schemaDB, nil } @@ -41,6 +48,11 @@ func (s *SchemaDB) Close() error { // createTables creates the necessary tables func (s *SchemaDB) createTables() error { + start := time.Now() + var err error + defer func() { + monitoring.RecordExternalCall(context.Background(), "postgres", "createTables", time.Since(start), err) + }() // Create unified_schemas table createSchemasTable := ` CREATE TABLE IF NOT EXISTS unified_schemas ( @@ -56,8 +68,9 @@ func (s *SchemaDB) createTables() error { is_active BOOLEAN DEFAULT FALSE );` - if _, err := s.db.Exec(createSchemasTable); err != nil { - return fmt.Errorf("failed to create unified_schemas table: %w", err) + if _, execErr := s.db.Exec(createSchemasTable); execErr != nil { + err = fmt.Errorf("failed to create unified_schemas table: %w", execErr) + return err } // Create schema_versions table for change tracking @@ -72,8 +85,9 @@ func (s *SchemaDB) createTables() error { created_by VARCHAR(255) NOT NULL );` - if _, err := s.db.Exec(createVersionsTable); err != nil { - return fmt.Errorf("failed to create schema_versions table: %w", err) + if _, execErr := s.db.Exec(createVersionsTable); execErr != nil { + err = fmt.Errorf("failed to create schema_versions table: %w", execErr) + return err } return nil @@ -94,81 +108,106 @@ type Schema struct { } // CreateSchema creates a new schema in the database -func (s *SchemaDB) CreateSchema(schema *Schema) error { +func (s *SchemaDB) CreateSchema(schema *Schema) (err error) { + start := time.Now() + defer func() { + monitoring.RecordExternalCall(context.Background(), "postgres", "CreateSchema", time.Since(start), err) + }() query := ` INSERT INTO unified_schemas (id, version, sdl, status, description, created_by, checksum, is_active) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)` - _, err := s.db.Exec(query, schema.ID, schema.Version, schema.SDL, schema.Status, + _, execErr := s.db.Exec(query, schema.ID, schema.Version, schema.SDL, schema.Status, schema.Description, schema.CreatedBy, schema.Checksum, schema.IsActive) - if err != nil { - return fmt.Errorf("failed to create schema: %w", err) + + if execErr != nil { + err = fmt.Errorf("failed to create schema: %w", execErr) + return err } return nil } // GetSchemaByVersion retrieves a schema by version -func (s *SchemaDB) GetSchemaByVersion(version string) (*Schema, error) { +func (s *SchemaDB) GetSchemaByVersion(version string) (_ *Schema, err error) { + start := time.Now() + defer func() { + monitoring.RecordExternalCall(context.Background(), "postgres", "GetSchemaByVersion", time.Since(start), err) + }() query := `SELECT id, version, sdl, status, description, created_at, updated_at, created_by, checksum, is_active FROM unified_schemas WHERE version = $1` row := s.db.QueryRow(query, version) schema := &Schema{} - err := row.Scan(&schema.ID, &schema.Version, &schema.SDL, &schema.Status, + scanErr := row.Scan(&schema.ID, &schema.Version, &schema.SDL, &schema.Status, &schema.Description, &schema.CreatedAt, &schema.UpdatedAt, &schema.CreatedBy, &schema.Checksum, &schema.IsActive) - if err != nil { - if err == sql.ErrNoRows { - return nil, fmt.Errorf("schema version %s not found", version) + + if scanErr != nil { + if scanErr == sql.ErrNoRows { + err = fmt.Errorf("schema version %s not found", version) + return nil, err } - return nil, fmt.Errorf("failed to get schema: %w", err) + err = fmt.Errorf("failed to get schema: %w", scanErr) + return nil, err } return schema, nil } // GetActiveSchema retrieves the currently active schema -func (s *SchemaDB) GetActiveSchema() (*Schema, error) { +func (s *SchemaDB) GetActiveSchema() (_ *Schema, err error) { + start := time.Now() + defer func() { + monitoring.RecordExternalCall(context.Background(), "postgres", "GetActiveSchema", time.Since(start), err) + }() query := `SELECT id, version, sdl, status, description, created_at, updated_at, created_by, checksum, is_active FROM unified_schemas WHERE is_active = TRUE LIMIT 1` row := s.db.QueryRow(query) schema := &Schema{} - err := row.Scan(&schema.ID, &schema.Version, &schema.SDL, &schema.Status, + scanErr := row.Scan(&schema.ID, &schema.Version, &schema.SDL, &schema.Status, &schema.Description, &schema.CreatedAt, &schema.UpdatedAt, &schema.CreatedBy, &schema.Checksum, &schema.IsActive) - if err != nil { - if err == sql.ErrNoRows { + + if scanErr != nil { + if scanErr == sql.ErrNoRows { return nil, nil // No active schema } - return nil, fmt.Errorf("failed to get active schema: %w", err) + err = fmt.Errorf("failed to get active schema: %w", scanErr) + return nil, err } return schema, nil } // GetAllSchemas retrieves all schemas -func (s *SchemaDB) GetAllSchemas() ([]*Schema, error) { +func (s *SchemaDB) GetAllSchemas() (_ []*Schema, err error) { + start := time.Now() + defer func() { + monitoring.RecordExternalCall(context.Background(), "postgres", "GetAllSchemas", time.Since(start), err) + }() query := `SELECT id, version, sdl, status, description, created_at, updated_at, created_by, checksum, is_active FROM unified_schemas ORDER BY created_at DESC` - rows, err := s.db.Query(query) - if err != nil { - return nil, fmt.Errorf("failed to get schemas: %w", err) + rows, queryErr := s.db.Query(query) + if queryErr != nil { + err = fmt.Errorf("failed to get schemas: %w", queryErr) + return nil, err } defer rows.Close() var schemas []*Schema for rows.Next() { schema := &Schema{} - err := rows.Scan(&schema.ID, &schema.Version, &schema.SDL, &schema.Status, + scanErr := rows.Scan(&schema.ID, &schema.Version, &schema.SDL, &schema.Status, &schema.Description, &schema.CreatedAt, &schema.UpdatedAt, &schema.CreatedBy, &schema.Checksum, &schema.IsActive) - if err != nil { - return nil, fmt.Errorf("failed to scan schema: %w", err) + if scanErr != nil { + err = fmt.Errorf("failed to scan schema: %w", scanErr) + return nil, err } schemas = append(schemas, schema) } @@ -177,38 +216,48 @@ func (s *SchemaDB) GetAllSchemas() ([]*Schema, error) { } // ActivateSchema activates a specific schema version -func (s *SchemaDB) ActivateSchema(version string) error { +func (s *SchemaDB) ActivateSchema(version string) (err error) { + start := time.Now() + defer func() { + monitoring.RecordExternalCall(context.Background(), "postgres", "ActivateSchema", time.Since(start), err) + }() // Start transaction tx, err := s.db.Begin() if err != nil { - return fmt.Errorf("failed to begin transaction: %w", err) + err = fmt.Errorf("failed to begin transaction: %w", err) + return err } defer tx.Rollback() // Deactivate all schemas _, err = tx.Exec("UPDATE unified_schemas SET is_active = FALSE") if err != nil { - return fmt.Errorf("failed to deactivate schemas: %w", err) + err = fmt.Errorf("failed to deactivate schemas: %w", err) + return err } // Activate the specified version result, err := tx.Exec("UPDATE unified_schemas SET is_active = TRUE WHERE version = $1", version) if err != nil { - return fmt.Errorf("failed to activate schema: %w", err) + err = fmt.Errorf("failed to activate schema: %w", err) + return err } rowsAffected, err := result.RowsAffected() if err != nil { - return fmt.Errorf("failed to get rows affected: %w", err) + err = fmt.Errorf("failed to get rows affected: %w", err) + return err } if rowsAffected == 0 { - return fmt.Errorf("schema version %s not found", version) + err = fmt.Errorf("schema version %s not found", version) + return err } // Commit transaction - if err := tx.Commit(); err != nil { - return fmt.Errorf("failed to commit transaction: %w", err) + if commitErr := tx.Commit(); commitErr != nil { + err = fmt.Errorf("failed to commit transaction: %w", commitErr) + return err } return nil diff --git a/exchange/orchestration-engine/federator/accumulator.go b/exchange/orchestration-engine/federator/accumulator.go index 9a1db83a..2c9ff62b 100644 --- a/exchange/orchestration-engine/federator/accumulator.go +++ b/exchange/orchestration-engine/federator/accumulator.go @@ -4,9 +4,9 @@ import ( "fmt" "strings" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/logger" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/federator" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/graphql" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/federator" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/graphql" "github.com/graphql-go/graphql/language/ast" "github.com/graphql-go/graphql/language/visitor" ) @@ -20,8 +20,8 @@ func AccumulateResponse(queryAST *ast.Document, federatedResponse *FederationRes // AccumulateResponseWithSchema uses schema for directive resolution func AccumulateResponseWithSchema(queryAST *ast.Document, federatedResponse *FederationResponse, schema *ast.Document) graphql.Response { responseData := make(map[string]interface{}) - path := make([]string, 0) - isTopLevel := true + var path = make([]string, 0) + var isTopLevel = true visitor.Visit(queryAST, &visitor.VisitorOptions{ Enter: func(p visitor.VisitFuncParams) (string, interface{}) { @@ -71,9 +71,9 @@ func AccumulateResponseWithSchema(queryAST *ast.Document, federatedResponse *Fed } if providerInfo != nil { - response := federatedResponse.GetProviderResponse(providerInfo.ProviderKey) + var response = federatedResponse.GetProviderResponse(providerInfo.ProviderKey) if response != nil { - value, err := GetValueAtPath(response.Response.Data, providerInfo.ProviderField) + var value, err = GetValueAtPath(response.Response.Data, providerInfo.ProviderField) if err == nil { // Check if this is an array field by looking at the data type and schema if isArrayFieldValue(fieldName, value) { @@ -134,8 +134,8 @@ func AccumulateResponseWithSchema(queryAST *ast.Document, federatedResponse *Fed // accumulateResponseSimple is the fallback simple accumulator func accumulateResponseSimple(queryAST *ast.Document, federatedResponse *FederationResponse) graphql.Response { responseData := make(map[string]interface{}) - path := make([]string, 0) - isTopLevel := true + var path = make([]string, 0) + var isTopLevel = true visitor.Visit(queryAST, &visitor.VisitorOptions{ Enter: func(p visitor.VisitFuncParams) (string, interface{}) { @@ -158,11 +158,11 @@ func accumulateResponseSimple(queryAST *ast.Document, federatedResponse *Federat return visitor.ActionNoChange, p.Node } - providerInfo := federator.ExtractSourceInfoFromDirective(node) + var providerInfo = federator.ExtractSourceInfoFromDirective(node) if providerInfo != nil { - response := federatedResponse.GetProviderResponse(providerInfo.ProviderKey) + var response = federatedResponse.GetProviderResponse(providerInfo.ProviderKey) if response != nil { - value, err := GetValueAtPath(response.Response.Data, providerInfo.ProviderField) + var value, err = GetValueAtPath(response.Response.Data, providerInfo.ProviderField) if err == nil { logger.Log.Debug("Processing field", "fieldName", fieldName, "path", path, "valueType", fmt.Sprintf("%T", value), "hasSelectionSet", node.SelectionSet != nil && len(node.SelectionSet.Selections) > 0) // Check if this is an array field by looking at the selection set and data type @@ -211,8 +211,8 @@ func accumulateResponseSimple(queryAST *ast.Document, federatedResponse *Federat // accumulateResponseWithSchema uses schema info to handle arrays properly func accumulateResponseWithSchema(queryAST *ast.Document, federatedResponse *FederationResponse, schemaInfoMap map[string]*SourceSchemaInfo) graphql.Response { responseData := make(map[string]interface{}) - path := make([]string, 0) - isTopLevel := true + var path = make([]string, 0) + var isTopLevel = true visitor.Visit(queryAST, &visitor.VisitorOptions{ Enter: func(p visitor.VisitFuncParams) (string, interface{}) { diff --git a/exchange/orchestration-engine/federator/accumulator_additional_test.go b/exchange/orchestration-engine/federator/accumulator_additional_test.go index 6f95e648..f99edfd0 100644 --- a/exchange/orchestration-engine/federator/accumulator_additional_test.go +++ b/exchange/orchestration-engine/federator/accumulator_additional_test.go @@ -3,8 +3,8 @@ package federator import ( "testing" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/logger" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/graphql" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/graphql" "github.com/graphql-go/graphql/language/ast" "github.com/stretchr/testify/assert" ) @@ -30,7 +30,7 @@ func TestAccumulateResponseWithSchemaInfo(t *testing.T) { federatedResponse := &FederationResponse{ Responses: []*ProviderResponse{ - { + &ProviderResponse{ ServiceKey: "dmt", Response: graphql.Response{ Data: map[string]interface{}{ diff --git a/exchange/orchestration-engine/federator/accumulator_more_test.go b/exchange/orchestration-engine/federator/accumulator_more_test.go index ec5bf8c5..f057268f 100644 --- a/exchange/orchestration-engine/federator/accumulator_more_test.go +++ b/exchange/orchestration-engine/federator/accumulator_more_test.go @@ -3,8 +3,8 @@ package federator import ( "testing" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/logger" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/graphql" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/graphql" "github.com/stretchr/testify/assert" ) @@ -26,7 +26,7 @@ func TestAccumulateResponse_SimpleBackwardCompatibility(t *testing.T) { federatedResponse := &FederationResponse{ Responses: []*ProviderResponse{ - { + &ProviderResponse{ ServiceKey: "drp", Response: graphql.Response{ Data: map[string]interface{}{ @@ -168,7 +168,7 @@ func TestAccumulateResponseWithSchema_NoSourceInfo(t *testing.T) { federatedResponse := &FederationResponse{ Responses: []*ProviderResponse{ - { + &ProviderResponse{ ServiceKey: "drp", Response: graphql.Response{ Data: map[string]interface{}{ @@ -222,7 +222,7 @@ func TestAccumulateResponseWithSchema_MultipleProviders(t *testing.T) { federatedResponse := &FederationResponse{ Responses: []*ProviderResponse{ - { + &ProviderResponse{ ServiceKey: "drp", Response: graphql.Response{ Data: map[string]interface{}{ @@ -232,7 +232,7 @@ func TestAccumulateResponseWithSchema_MultipleProviders(t *testing.T) { }, }, }, - { + &ProviderResponse{ ServiceKey: "rgd", Response: graphql.Response{ Data: map[string]interface{}{ diff --git a/exchange/orchestration-engine/federator/accumulator_schema_test.go b/exchange/orchestration-engine/federator/accumulator_schema_test.go index 93b41374..5a8c5d5a 100644 --- a/exchange/orchestration-engine/federator/accumulator_schema_test.go +++ b/exchange/orchestration-engine/federator/accumulator_schema_test.go @@ -3,7 +3,7 @@ package federator import ( "testing" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/graphql" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/graphql" "github.com/stretchr/testify/assert" ) @@ -25,7 +25,7 @@ func TestAccumulateResponseWithSchema_ArrayField(t *testing.T) { federatedResponse := &FederationResponse{ Responses: []*ProviderResponse{ - { + &ProviderResponse{ ServiceKey: "dmt", Response: graphql.Response{ Data: map[string]interface{}{ @@ -104,7 +104,7 @@ func TestAccumulateResponseWithSchema_NestedArrayField(t *testing.T) { federatedResponse := &FederationResponse{ Responses: []*ProviderResponse{ - { + &ProviderResponse{ ServiceKey: "dmt", Response: graphql.Response{ Data: map[string]interface{}{ @@ -172,7 +172,7 @@ func TestAccumulateResponseWithSchema_ArrayFieldWithoutSelectionSet(t *testing.T federatedResponse := &FederationResponse{ Responses: []*ProviderResponse{ - { + &ProviderResponse{ ServiceKey: "dmt", Response: graphql.Response{ Data: map[string]interface{}{ @@ -211,7 +211,7 @@ func TestAccumulateResponseWithSchema_ProviderNotFound(t *testing.T) { federatedResponse := &FederationResponse{ Responses: []*ProviderResponse{ - { + &ProviderResponse{ ServiceKey: "drp", Response: graphql.Response{ Data: map[string]interface{}{ @@ -245,7 +245,7 @@ func TestAccumulateResponseWithSchema_ValueNotFound(t *testing.T) { federatedResponse := &FederationResponse{ Responses: []*ProviderResponse{ - { + &ProviderResponse{ ServiceKey: "drp", Response: graphql.Response{ Data: map[string]interface{}{ @@ -278,7 +278,7 @@ func TestAccumulateResponseWithSchema_NilSchema(t *testing.T) { federatedResponse := &FederationResponse{ Responses: []*ProviderResponse{ - { + &ProviderResponse{ ServiceKey: "drp", Response: graphql.Response{ Data: map[string]interface{}{ @@ -316,7 +316,7 @@ func TestAccumulateResponseWithSchema_MixedSimpleAndArrayFields(t *testing.T) { federatedResponse := &FederationResponse{ Responses: []*ProviderResponse{ - { + &ProviderResponse{ ServiceKey: "drp", Response: graphql.Response{ Data: map[string]interface{}{ @@ -326,7 +326,7 @@ func TestAccumulateResponseWithSchema_MixedSimpleAndArrayFields(t *testing.T) { }, }, }, - { + &ProviderResponse{ ServiceKey: "dmt", Response: graphql.Response{ Data: map[string]interface{}{ @@ -372,7 +372,7 @@ func TestAccumulateResponseWithSchema_EmptyArray(t *testing.T) { federatedResponse := &FederationResponse{ Responses: []*ProviderResponse{ - { + &ProviderResponse{ ServiceKey: "dmt", Response: graphql.Response{ Data: map[string]interface{}{ diff --git a/exchange/orchestration-engine/federator/accumulator_test.go b/exchange/orchestration-engine/federator/accumulator_test.go index 65ad8df6..dfe089bb 100644 --- a/exchange/orchestration-engine/federator/accumulator_test.go +++ b/exchange/orchestration-engine/federator/accumulator_test.go @@ -3,8 +3,8 @@ package federator import ( "testing" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/logger" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/graphql" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/graphql" "github.com/stretchr/testify/assert" ) @@ -29,7 +29,7 @@ func TestAccumulateResponse_SingleObject(t *testing.T) { // Mock federated response federatedResponse := &FederationResponse{ Responses: []*ProviderResponse{ - { + &ProviderResponse{ ServiceKey: "drp", Response: graphql.Response{ Data: map[string]interface{}{ @@ -40,7 +40,7 @@ func TestAccumulateResponse_SingleObject(t *testing.T) { }, }, }, - { + &ProviderResponse{ ServiceKey: "rgd", Response: graphql.Response{ Data: map[string]interface{}{ @@ -89,7 +89,7 @@ func TestAccumulateResponse_ArrayField(t *testing.T) { // Mock federated response with array data federatedResponse := &FederationResponse{ Responses: []*ProviderResponse{ - { + &ProviderResponse{ ServiceKey: "drp", Response: graphql.Response{ Data: map[string]interface{}{ @@ -99,7 +99,7 @@ func TestAccumulateResponse_ArrayField(t *testing.T) { }, }, }, - { + &ProviderResponse{ ServiceKey: "dmt", Response: graphql.Response{ Data: map[string]interface{}{ @@ -353,7 +353,7 @@ func TestAccumulateResponse_MixedObjectAndArray(t *testing.T) { // Mock federated response with both object and array data federatedResponse := &FederationResponse{ Responses: []*ProviderResponse{ - { + &ProviderResponse{ ServiceKey: "drp", Response: graphql.Response{ Data: map[string]interface{}{ @@ -364,7 +364,7 @@ func TestAccumulateResponse_MixedObjectAndArray(t *testing.T) { }, }, }, - { + &ProviderResponse{ ServiceKey: "dmt", Response: graphql.Response{ Data: map[string]interface{}{ diff --git a/exchange/orchestration-engine/federator/arghandler.go b/exchange/orchestration-engine/federator/arghandler.go index 77ee0264..9567a65d 100644 --- a/exchange/orchestration-engine/federator/arghandler.go +++ b/exchange/orchestration-engine/federator/arghandler.go @@ -3,7 +3,7 @@ package federator import ( "strings" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/graphql" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/graphql" "github.com/graphql-go/graphql/language/ast" "github.com/graphql-go/graphql/language/kinds" "github.com/graphql-go/graphql/language/visitor" @@ -83,7 +83,8 @@ func ExtractRequiredArguments(argMap []*graphql.ArgMapping, arguments []*ast.Arg } func PushArgumentsToProviderQueryAst(args []*ArgSource, queryAst *FederationServiceAST) { - path := make([]string, 0) + + var path = make([]string, 0) visitor.Visit(queryAst.QueryAst, &visitor.VisitorOptions{ Enter: func(p visitor.VisitFuncParams) (string, interface{}) { @@ -92,7 +93,7 @@ func PushArgumentsToProviderQueryAst(args []*ArgSource, queryAst *FederationServ path = append(path, field.Name.Value) // now check whether the current path matches any argument's TargetArgPath - currentPath := strings.Join(path, ".") + var currentPath = strings.Join(path, ".") for _, arg := range args { if arg == nil || arg.ArgMapping == nil { continue diff --git a/exchange/orchestration-engine/federator/arghandler_test.go b/exchange/orchestration-engine/federator/arghandler_test.go index ba7edb08..3d142f91 100644 --- a/exchange/orchestration-engine/federator/arghandler_test.go +++ b/exchange/orchestration-engine/federator/arghandler_test.go @@ -3,7 +3,7 @@ package federator import ( "testing" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/graphql" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/graphql" "github.com/graphql-go/graphql/language/ast" "github.com/stretchr/testify/assert" ) diff --git a/exchange/orchestration-engine/federator/federator.go b/exchange/orchestration-engine/federator/federator.go index 00f29890..6dffa435 100644 --- a/exchange/orchestration-engine/federator/federator.go +++ b/exchange/orchestration-engine/federator/federator.go @@ -11,16 +11,16 @@ import ( "sync" "time" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/auth" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/configs" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/consent" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/internals/errors" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/logger" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/middleware" - auth2 "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/auth" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/graphql" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/policy" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/provider" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/auth" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/configs" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/consent" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/internals/errors" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/middleware" + auth2 "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/auth" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/graphql" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/policy" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/provider" "github.com/graphql-go/graphql/language/ast" "github.com/graphql-go/graphql/language/parser" "github.com/graphql-go/graphql/language/source" @@ -171,6 +171,7 @@ func Initialize(configs *configs.Config, providerHandler *provider.Handler, sche // FederateQuery takes a raw GraphQL query, splits it into sub-queries for each service, // sends them to the respective providers, and merges the responses. func (f *Federator) FederateQuery(ctx context.Context, request graphql.Request, consumerInfo *auth.ConsumerAssertion) graphql.Response { + // Convert the query string into its ast src := source.NewSource(&source.Source{ Body: []byte(request.Query), @@ -178,6 +179,7 @@ func (f *Federator) FederateQuery(ctx context.Context, request graphql.Request, }) doc, err := parser.Parse(parser.ParseParams{Source: src}) + if err != nil { logger.Log.Error("Failed to parse query", "Error", err) } @@ -253,6 +255,7 @@ func (f *Federator) FederateQuery(ctx context.Context, request graphql.Request, // Collect the directives from the query schemaCollection, err := ProviderSchemaCollector(schema, doc) + if err != nil { logger.Log.Error("Failed to collect provider schema", "Error", err) return graphql.Response{ @@ -269,9 +272,9 @@ func (f *Federator) FederateQuery(ctx context.Context, request graphql.Request, argMapping = f.Configs.ArgMapping } - requiredArguments := FindRequiredArguments(schemaCollection.ProviderFieldMap, argMapping) + var requiredArguments = FindRequiredArguments(schemaCollection.ProviderFieldMap, argMapping) - extractedArgs := ExtractRequiredArguments(requiredArguments, schemaCollection.Arguments) + var extractedArgs = ExtractRequiredArguments(requiredArguments, schemaCollection.Arguments) // check whether there are variables in the request if request.Variables != nil { @@ -442,6 +445,7 @@ func (f *Federator) FederateQuery(ctx context.Context, request graphql.Request, } splitRequests, err := QueryBuilder(schemaCollection.ProviderFieldMap, extractedArgs) + if err != nil { logger.Log.Error("Failed to build queries", "Error", err) return graphql.Response{ @@ -481,7 +485,7 @@ func (f *Federator) FederateQuery(ctx context.Context, request graphql.Request, // Error handling is done above in the if block // Transform the federated responses back to the original query structure using array-aware processing - response := AccumulateResponseWithSchemaInfo(doc, responses, schemaInfoMap) + var response = AccumulateResponseWithSchemaInfo(doc, responses, schemaInfoMap) return response } diff --git a/exchange/orchestration-engine/federator/federator_internal_test.go b/exchange/orchestration-engine/federator/federator_internal_test.go index d4010289..dbc281f7 100644 --- a/exchange/orchestration-engine/federator/federator_internal_test.go +++ b/exchange/orchestration-engine/federator/federator_internal_test.go @@ -7,11 +7,11 @@ import ( "net/http/httptest" "testing" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/auth" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/configs" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/graphql" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/policy" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/provider" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/auth" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/configs" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/graphql" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/policy" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/provider" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/exchange/orchestration-engine/federator/mapper.go b/exchange/orchestration-engine/federator/mapper.go index bef655fd..cd936714 100644 --- a/exchange/orchestration-engine/federator/mapper.go +++ b/exchange/orchestration-engine/federator/mapper.go @@ -4,7 +4,7 @@ import ( "strconv" "strings" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/graphql" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/graphql" "github.com/graphql-go/graphql/language/ast" "github.com/graphql-go/graphql/language/kinds" "github.com/graphql-go/graphql/language/printer" @@ -27,15 +27,16 @@ type SourceSchemaInfo struct { } func QueryBuilder(maps *[]ProviderLevelFieldRecord, args []*ArgSource) ([]*federationServiceRequest, error) { + // initialize return variable - requests := make([]*federationServiceRequest, 0) + var requests = make([]*federationServiceRequest, 0) - queries := BuildProviderLevelQuery(maps) + var queries = BuildProviderLevelQuery(maps) // convert the queries into federationServiceRequest for _, q := range queries { // find the arguments to the specific provider - providerArgs := make([]*ArgSource, 0) + var providerArgs = make([]*ArgSource, 0) for _, arg := range args { if arg == nil { @@ -49,7 +50,7 @@ func QueryBuilder(maps *[]ProviderLevelFieldRecord, args []*ArgSource) ([]*feder PushArgumentsToProviderQueryAst(providerArgs, q) - query := printer.Print(q.QueryAst).(string) + var query = printer.Print(q.QueryAst).(string) println(printer.Print(q.QueryAst).(string)) requests = append(requests, &federationServiceRequest{ @@ -73,7 +74,7 @@ type ProviderLevelFieldRecord struct { // ProviderFieldMap A function to convert the directives into a map of service key to a list of fields. func ProviderFieldMap(directives []*ast.Directive) *[]ProviderLevelFieldRecord { - fieldMap := make([]ProviderLevelFieldRecord, 0) + var fieldMap = make([]ProviderLevelFieldRecord, 0) for _, dir := range directives { if dir.Name.Value == "sourceInfo" { @@ -112,21 +113,21 @@ func ProviderSchemaCollector(schema *ast.Document, query *ast.Document) (*Schema } // iterate through the query fields - selections := query.Definitions[0].(*ast.OperationDefinition).SelectionSet + var selections = query.Definitions[0].(*ast.OperationDefinition).SelectionSet // get the query object definition from the schema - queryObjectDef := GetQueryObjectDefinition(schema) + var queryObjectDef = GetQueryObjectDefinition(schema) if queryObjectDef == nil { return nil, &graphql.JSONError{ Message: "Query object definition not found in schema", } } - providerDirectives, arguments := RecursivelyExtractSourceSchemaInfo(selections, schema, queryObjectDef, nil, nil) + var providerDirectives, arguments = RecursivelyExtractSourceSchemaInfo(selections, schema, queryObjectDef, nil, nil) providerFieldMap := ProviderFieldMap(providerDirectives) // get variable definitions from the query - variableDefinitions := query.Definitions[0].(*ast.OperationDefinition).VariableDefinitions + var variableDefinitions = query.Definitions[0].(*ast.OperationDefinition).VariableDefinitions return &SchemaCollectionResponse{ ProviderFieldMap: providerFieldMap, @@ -145,9 +146,9 @@ func BuildSchemaInfoMap(schema *ast.Document, query *ast.Document) (map[string]* } // iterate through the query fields - selections := query.Definitions[0].(*ast.OperationDefinition).SelectionSet + var selections = query.Definitions[0].(*ast.OperationDefinition).SelectionSet // get the query object definition from the schema - queryObjectDef := GetQueryObjectDefinition(schema) + var queryObjectDef = GetQueryObjectDefinition(schema) if queryObjectDef == nil { return nil, &graphql.JSONError{ @@ -182,7 +183,7 @@ func buildSchemaInfoMapRecursive( } // Find the field definition in the schema - fieldDef := FindFieldDefinitionFromFieldName(fieldName, schema, objectDefinition.Name.Value) + var fieldDef = FindFieldDefinitionFromFieldName(fieldName, schema, objectDefinition.Name.Value) if fieldDef != nil && len(fieldDef.Directives) > 0 { // Check for @sourceInfo directive @@ -273,7 +274,7 @@ func processNestedFieldsForArray( fieldName := field.Name.Value // Find the field definition in the schema - fieldDef := FindFieldDefinitionFromFieldName(fieldName, schema, objectDefinition.Name.Value) + var fieldDef = FindFieldDefinitionFromFieldName(fieldName, schema, objectDefinition.Name.Value) if fieldDef != nil && len(fieldDef.Directives) > 0 { // Check for @sourceInfo directive @@ -339,7 +340,7 @@ func RecursivelyExtractSourceSchemaInfo( for _, selection := range selectionSet.Selections { if field, ok := selection.(*ast.Field); ok { // Find the field definition in the schema - fieldDef := FindFieldDefinitionFromFieldName(field.Name.Value, schema, objectDefinition.Name.Value) + var fieldDef = FindFieldDefinitionFromFieldName(field.Name.Value, schema, objectDefinition.Name.Value) // Check for @sourceInfo directive if fieldDef != nil && len(fieldDef.Directives) > 0 { @@ -363,7 +364,7 @@ func RecursivelyExtractSourceSchemaInfo( if selection.GetSelectionSet() != nil && len(selection.GetSelectionSet().Selections) > 0 { // Recursively process nested selection sets var nestedObjectDef *ast.ObjectDefinition - isArrayField := false + var isArrayField = false if fieldDef != nil && fieldDef.Type != nil { if fieldDef.Type.GetKind() == "Named" { @@ -380,7 +381,7 @@ func RecursivelyExtractSourceSchemaInfo( } if nestedObjectDef != nil { - selectionSet := field.GetSelectionSet() + var selectionSet = field.GetSelectionSet() // For backward compatibility, use the old function for non-array fields if !isArrayField { directives, arguments = RecursivelyExtractSourceSchemaInfo(selectionSet, schema, nestedObjectDef, directives, arguments) @@ -418,7 +419,7 @@ func RecursivelyExtractSourceSchemaInfoWithArrayInfo( for _, selection := range selectionSet.Selections { if field, ok := selection.(*ast.Field); ok { // Find the field definition in the schema - fieldDef := FindFieldDefinitionFromFieldName(field.Name.Value, schema, objectDefinition.Name.Value) + var fieldDef = FindFieldDefinitionFromFieldName(field.Name.Value, schema, objectDefinition.Name.Value) // Check for @sourceInfo directive if fieldDef != nil && len(fieldDef.Directives) > 0 { @@ -443,7 +444,7 @@ func RecursivelyExtractSourceSchemaInfoWithArrayInfo( if selection.GetSelectionSet() != nil && len(selection.GetSelectionSet().Selections) > 0 { // Recursively process nested selection sets var nestedObjectDef *ast.ObjectDefinition - nestedIsArrayField := false + var nestedIsArrayField = false if fieldDef != nil && fieldDef.Type != nil { if fieldDef.Type.GetKind() == "Named" { @@ -460,7 +461,7 @@ func RecursivelyExtractSourceSchemaInfoWithArrayInfo( } if nestedObjectDef != nil { - selectionSet := field.GetSelectionSet() + var selectionSet = field.GetSelectionSet() directives, arguments = RecursivelyExtractSourceSchemaInfoWithArrayInfo(selectionSet, schema, nestedObjectDef, directives, arguments, nestedIsArrayField) } } diff --git a/exchange/orchestration-engine/federator/mapper_test.go b/exchange/orchestration-engine/federator/mapper_test.go index 9a2fe37b..5ad8a78a 100644 --- a/exchange/orchestration-engine/federator/mapper_test.go +++ b/exchange/orchestration-engine/federator/mapper_test.go @@ -3,7 +3,7 @@ package federator import ( "testing" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/graphql" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/graphql" "github.com/graphql-go/graphql/language/ast" "github.com/graphql-go/graphql/language/kinds" "github.com/stretchr/testify/assert" diff --git a/exchange/orchestration-engine/go.mod b/exchange/orchestration-engine/go.mod index ae9790c8..941e5bae 100644 --- a/exchange/orchestration-engine/go.mod +++ b/exchange/orchestration-engine/go.mod @@ -1,4 +1,4 @@ -module github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine +module github.com/gov-dx-sandbox/exchange/orchestration-engine-go go 1.25.0 @@ -7,6 +7,8 @@ require ( github.com/stretchr/testify v1.11.1 ) +require github.com/gov-dx-sandbox/exchange/pkg/monitoring v0.0.0 + require ( github.com/lib/pq v1.10.9 golang.org/x/oauth2 v0.32.0 @@ -14,6 +16,26 @@ require ( require github.com/go-chi/chi/v5 v5.2.3 +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/go-logr/logr v1.4.1 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/prometheus/client_golang v1.19.1 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.53.0 // indirect + github.com/prometheus/procfs v0.15.0 // indirect + go.opentelemetry.io/contrib/instrumentation/runtime v0.49.0 // indirect + go.opentelemetry.io/otel v1.27.0 // indirect + go.opentelemetry.io/otel/exporters/prometheus v0.49.0 // indirect + go.opentelemetry.io/otel/metric v1.27.0 // indirect + go.opentelemetry.io/otel/sdk v1.27.0 // indirect + go.opentelemetry.io/otel/sdk/metric v1.27.0 // indirect + go.opentelemetry.io/otel/trace v1.27.0 // indirect + golang.org/x/sys v0.20.0 // indirect + google.golang.org/protobuf v1.34.1 // indirect +) + require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/golang-jwt/jwt/v5 v5.3.0 @@ -22,3 +44,5 @@ require ( ) replace github.com/gov-dx-sandbox/audit-service => ../../audit-service + +replace github.com/gov-dx-sandbox/exchange/pkg/monitoring => ../pkg/monitoring diff --git a/exchange/orchestration-engine/go.sum b/exchange/orchestration-engine/go.sum index fbaf2bc4..a93f395f 100644 --- a/exchange/orchestration-engine/go.sum +++ b/exchange/orchestration-engine/go.sum @@ -1,20 +1,64 @@ +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/go-chi/chi/v5 v5.2.3 h1:WQIt9uxdsAbgIYgid+BpYc+liqQZGMHRaUwp0JUcvdE= github.com/go-chi/chi/v5 v5.2.3/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/graphql-go/graphql v0.8.1 h1:p7/Ou/WpmulocJeEx7wjQy611rtXGQaAcXGqanuMMgc= github.com/graphql-go/graphql v0.8.1/go.mod h1:nKiHzRM0qopJEwCITUuIsxk9PlVlwIiiI8pnJEhordQ= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= +github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.53.0 h1:U2pL9w9nmJwJDa4qqLQ3ZaePJ6ZTwt7cMD3AG3+aLCE= +github.com/prometheus/common v0.53.0/go.mod h1:BrxBKv3FWBIGXw89Mg1AeBq7FSyRzXWI3l3e7W3RN5U= +github.com/prometheus/procfs v0.15.0 h1:A82kmvXJq2jTu5YUhSGNlYoxh85zLnKgPz4bMZgI5Ek= +github.com/prometheus/procfs v0.15.0/go.mod h1:Y0RJ/Y5g5wJpkTisOtqwDSo4HwhGmLB4VQSw2sQJLHk= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +go.opentelemetry.io/contrib/instrumentation/runtime v0.49.0 h1:dg9y+7ArpumB6zwImJv47RHfdgOGQ1EMkzP5vLkEnTU= +go.opentelemetry.io/contrib/instrumentation/runtime v0.49.0/go.mod h1:Ul4MtXqu/hJBM+v7a6dCF0nHwckPMLpIpLeCi4+zfdw= +go.opentelemetry.io/otel v1.27.0 h1:9BZoF3yMK/O1AafMiQTVu0YDj5Ea4hPhxCs7sGva+cg= +go.opentelemetry.io/otel v1.27.0/go.mod h1:DMpAK8fzYRzs+bi3rS5REupisuqTheUlSZJ1WnZaPAQ= +go.opentelemetry.io/otel/exporters/prometheus v0.49.0 h1:Er5I1g/YhfYv9Affk9nJLfH/+qCCVVg1f2R9AbJfqDQ= +go.opentelemetry.io/otel/exporters/prometheus v0.49.0/go.mod h1:KfQ1wpjf3zsHjzP149P4LyAwWRupc6c7t1ZJ9eXpKQM= +go.opentelemetry.io/otel/metric v1.27.0 h1:hvj3vdEKyeCi4YaYfNjv2NUje8FqKqUY8IlF0FxV/ik= +go.opentelemetry.io/otel/metric v1.27.0/go.mod h1:mVFgmRlhljgBiuk/MP/oKylr4hs85GZAylncepAX/ak= +go.opentelemetry.io/otel/sdk v1.27.0 h1:mlk+/Y1gLPLn84U4tI8d3GNJmGT/eXe3ZuOXN9kTWmI= +go.opentelemetry.io/otel/sdk v1.27.0/go.mod h1:Ha9vbLwJE6W86YstIywK2xFfPjbWlCuwPtMkKdz/Y4A= +go.opentelemetry.io/otel/sdk/metric v1.27.0 h1:5uGNOlpXi+Hbo/DRoI31BSb1v+OGcpv2NemcCrOL8gI= +go.opentelemetry.io/otel/sdk/metric v1.27.0/go.mod h1:we7jJVrYN2kh3mVBlswtPU22K0SA+769l93J6bsyvqw= +go.opentelemetry.io/otel/trace v1.27.0 h1:IqYb813p7cmbHk0a5y6pD5JPakbVfftRXABGt5/Rscw= +go.opentelemetry.io/otel/trace v1.27.0/go.mod h1:6RiD1hkAprV4/q+yd2ln1HG9GoPx39SuvvstaLBl+l4= golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY= golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= +google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/exchange/orchestration-engine/handlers/schema.go b/exchange/orchestration-engine/handlers/schema.go index 95f1bf8c..b2510f65 100644 --- a/exchange/orchestration-engine/handlers/schema.go +++ b/exchange/orchestration-engine/handlers/schema.go @@ -4,7 +4,7 @@ import ( "encoding/json" "net/http" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/services" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/services" "github.com/go-chi/chi/v5" ) @@ -44,6 +44,7 @@ type ValidateSDLRequest struct { // CreateSchema handles POST /sdl - create a new schema version func (h *SchemaHandler) CreateSchema(w http.ResponseWriter, r *http.Request) { + if h.schemaService == nil { http.Error(w, "Schema management not available - database not connected", http.StatusServiceUnavailable) return @@ -76,6 +77,7 @@ func (h *SchemaHandler) CreateSchema(w http.ResponseWriter, r *http.Request) { // GetSchemas handles GET /sdl/versions - get all schema versions func (h *SchemaHandler) GetSchemas(w http.ResponseWriter, r *http.Request) { + if h.schemaService == nil { http.Error(w, "Schema management not available - database not connected", http.StatusServiceUnavailable) return @@ -116,6 +118,7 @@ func (h *SchemaHandler) GetActiveSchema(w http.ResponseWriter, r *http.Request) // ActivateSchema handles POST /sdl/versions/{version}/activate - activate a schema version func (h *SchemaHandler) ActivateSchema(w http.ResponseWriter, r *http.Request) { + if h.schemaService == nil { http.Error(w, "Schema management not available - database not connected", http.StatusServiceUnavailable) return @@ -136,6 +139,7 @@ func (h *SchemaHandler) ActivateSchema(w http.ResponseWriter, r *http.Request) { // ValidateSDL handles POST /sdl/validate - validate SDL syntax func (h *SchemaHandler) ValidateSDL(w http.ResponseWriter, r *http.Request) { + if h.schemaService == nil { http.Error(w, "Schema management not available - database not connected", http.StatusServiceUnavailable) return @@ -156,6 +160,7 @@ func (h *SchemaHandler) ValidateSDL(w http.ResponseWriter, r *http.Request) { // CheckCompatibility handles POST /sdl/check-compatibility - check backward compatibility func (h *SchemaHandler) CheckCompatibility(w http.ResponseWriter, r *http.Request) { + if h.schemaService == nil { http.Error(w, "Schema management not available - database not connected", http.StatusServiceUnavailable) return diff --git a/exchange/orchestration-engine/handlers/schema_test.go b/exchange/orchestration-engine/handlers/schema_test.go index 1303e02b..b175a860 100644 --- a/exchange/orchestration-engine/handlers/schema_test.go +++ b/exchange/orchestration-engine/handlers/schema_test.go @@ -9,8 +9,8 @@ import ( "net/http/httptest" "testing" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/logger" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/services" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/services" "github.com/go-chi/chi/v5" "github.com/stretchr/testify/assert" ) diff --git a/exchange/orchestration-engine/main.go b/exchange/orchestration-engine/main.go index a00efe19..803cec30 100644 --- a/exchange/orchestration-engine/main.go +++ b/exchange/orchestration-engine/main.go @@ -1,20 +1,33 @@ package main import ( + "context" "log" "os" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/configs" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/federator" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/logger" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/middleware" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/provider" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/server" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/configs" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/federator" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/middleware" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/provider" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/server" + "github.com/gov-dx-sandbox/exchange/pkg/monitoring" ) func main() { logger.Init() + ctx := context.Background() + shutdown, err := monitoring.Setup(ctx, monitoring.Config{ + ServiceName: "orchestration-engine", + }) + if err != nil { + log.Fatalf("Failed to initialize telemetry: %v", err) + } + defer func() { + _ = shutdown(context.Background()) + }() + // Load configuration with proper error handling config, err := configs.LoadConfig() if err != nil { diff --git a/exchange/orchestration-engine/policy/pdpclient.go b/exchange/orchestration-engine/policy/pdpclient.go index 072e052f..f1d99f8c 100644 --- a/exchange/orchestration-engine/policy/pdpclient.go +++ b/exchange/orchestration-engine/policy/pdpclient.go @@ -8,7 +8,7 @@ import ( "net/http" "time" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" ) // PdpClient represents a client to interact with the Policy Decision Point service diff --git a/exchange/orchestration-engine/policy/pdpclient_test.go b/exchange/orchestration-engine/policy/pdpclient_test.go index 56b8f852..e747abc8 100644 --- a/exchange/orchestration-engine/policy/pdpclient_test.go +++ b/exchange/orchestration-engine/policy/pdpclient_test.go @@ -7,7 +7,7 @@ import ( "net/http/httptest" "testing" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" ) func init() { diff --git a/exchange/orchestration-engine/provider/handler_test.go b/exchange/orchestration-engine/provider/handler_test.go index deec6175..a10eb42d 100644 --- a/exchange/orchestration-engine/provider/handler_test.go +++ b/exchange/orchestration-engine/provider/handler_test.go @@ -4,7 +4,7 @@ import ( "testing" "time" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/auth" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/auth" ) func TestNewProviderHandler(t *testing.T) { diff --git a/exchange/orchestration-engine/provider/provider.go b/exchange/orchestration-engine/provider/provider.go index 7faa7f5d..0398d07f 100644 --- a/exchange/orchestration-engine/provider/provider.go +++ b/exchange/orchestration-engine/provider/provider.go @@ -6,9 +6,11 @@ import ( "fmt" "net/http" "sync" + "time" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/logger" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/auth" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/auth" + "github.com/gov-dx-sandbox/exchange/pkg/monitoring" "golang.org/x/oauth2/clientcredentials" ) @@ -56,6 +58,8 @@ func (p *Provider) PerformRequest(ctx context.Context, reqBody []byte) (*http.Re req.Header.Set("Content-Type", "application/json") + start := time.Now() + if p.Auth != nil { switch p.Auth.Type { case auth.AuthTypeOAuth2: @@ -65,12 +69,16 @@ func (p *Provider) PerformRequest(ctx context.Context, reqBody []byte) (*http.Re } client := p.OAuth2Config.Client(ctx) - return client.Do(req) // Use context with request + resp, err := client.Do(req) // Use context with request + monitoring.RecordExternalCall(ctx, p.ServiceKey, "provider_request", time.Since(start), err) + return resp, err case auth.AuthTypeAPIKey: req.Header.Set(p.Auth.APIKeyName, p.Auth.APIKeyValue) } } // Default client execution (for API Key or no auth) - return p.Client.Do(req) + resp, err := p.Client.Do(req) + monitoring.RecordExternalCall(ctx, p.ServiceKey, "provider_request", time.Since(start), err) + return resp, err } diff --git a/exchange/orchestration-engine/provider/provider_test.go b/exchange/orchestration-engine/provider/provider_test.go index 7b04df53..d885c6bb 100644 --- a/exchange/orchestration-engine/provider/provider_test.go +++ b/exchange/orchestration-engine/provider/provider_test.go @@ -9,8 +9,8 @@ import ( "testing" "time" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/logger" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/auth" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/auth" ) func init() { diff --git a/exchange/orchestration-engine/server/server.go b/exchange/orchestration-engine/server/server.go index f42ba0ec..58e384f1 100644 --- a/exchange/orchestration-engine/server/server.go +++ b/exchange/orchestration-engine/server/server.go @@ -6,15 +6,17 @@ import ( "net/http" "os" "runtime/debug" + "time" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/auth" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/database" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/federator" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/handlers" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/logger" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/graphql" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/services" "github.com/go-chi/chi/v5" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/auth" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/database" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/federator" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/handlers" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/graphql" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/services" + "github.com/gov-dx-sandbox/exchange/pkg/monitoring" ) type Response struct { @@ -54,6 +56,7 @@ func RunServer(f *federator.Federator) { // SetupRouter initializes the router and registers all endpoints func SetupRouter(f *federator.Federator) *chi.Mux { mux := chi.NewRouter() + mux.Use(monitoring.HTTPMetricsMiddleware) // Initialize database connection dbConnectionString := getDatabaseConnectionString() @@ -92,6 +95,9 @@ func SetupRouter(f *federator.Federator) *chi.Mux { } }) + // Metrics endpoint + mux.Method("GET", "/metrics", monitoring.Handler()) + // Schema management routes mux.Get("/sdl", schemaHandler.GetActiveSchema) mux.Post("/sdl", schemaHandler.CreateSchema) @@ -104,6 +110,14 @@ func SetupRouter(f *federator.Federator) *chi.Mux { // Publicly accessible Endpoints mux.Post("/public/graphql", func(w http.ResponseWriter, r *http.Request) { + const workflowName = "graphql_federation" + monitoring.WorkflowInFlightAdd(r.Context(), workflowName, 1) + workflowStart := time.Now() + defer func() { + monitoring.WorkflowInFlightAdd(r.Context(), workflowName, -1) + monitoring.RecordWorkflowDuration(r.Context(), workflowName, time.Since(workflowStart)) + }() + // Parse request body var req graphql.Request if err := json.NewDecoder(r.Body).Decode(&req); err != nil { @@ -148,6 +162,8 @@ func SetupRouter(f *federator.Federator) *chi.Mux { logger.Log.Error("Failed to write response", "error", err) return } + + monitoring.RecordBusinessEvent(r.Context(), "graphql_request", len(response.Errors) == 0) }) return mux @@ -203,6 +219,10 @@ func getDatabaseConnectionString() string { // Require password from environment - no default if password == "" { + // Ensure logger is initialized + if logger.Log == nil { + logger.Init() + } logger.Log.Warn("DB_PASSWORD not set - database connection may fail") } } diff --git a/exchange/orchestration-engine/server/server_internal_test.go b/exchange/orchestration-engine/server/server_internal_test.go index d4f42f70..f66624d4 100644 --- a/exchange/orchestration-engine/server/server_internal_test.go +++ b/exchange/orchestration-engine/server/server_internal_test.go @@ -7,11 +7,12 @@ import ( "net/http/httptest" "testing" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/configs" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/federator" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/pkg/graphql" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/provider" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/configs" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/federator" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/graphql" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/provider" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestSetupRouter_Health(t *testing.T) { @@ -105,3 +106,31 @@ func TestSetupRouter_PublicGraphQL_Unauthorized(t *testing.T) { // Should be Unauthorized because GetConsumerJwtFromToken will fail assert.Equal(t, http.StatusUnauthorized, w.Code) } + +func TestSetupRouter_MetricsEndpoint(t *testing.T) { + cfg := &configs.Config{ + Environment: "test", + } + providerHandler := provider.NewProviderHandler(nil) + f := federator.Initialize(cfg, providerHandler, nil) + + mux := SetupRouter(f) + + // Test metrics endpoint + req := httptest.NewRequest(http.MethodGet, "/metrics", nil) + w := httptest.NewRecorder() + + mux.ServeHTTP(w, req) + + // Metrics endpoint should return 200 OK + require.Equal(t, http.StatusOK, w.Code, "Metrics endpoint should return 200 OK") + + // Metrics endpoint should return Prometheus format + body := w.Body.String() + require.Contains(t, body, "# HELP", "Metrics response should contain Prometheus HELP comments") + require.Contains(t, body, "# TYPE", "Metrics response should contain Prometheus TYPE comments") + + // The metrics endpoint is accessible and returns valid Prometheus format + // Note: http_requests_total will only appear after requests go through the middleware, + // which is applied in RunServer, not in SetupRouter +} diff --git a/exchange/orchestration-engine/server/server_test.go b/exchange/orchestration-engine/server/server_test.go index 0e2da8ad..b40a3a65 100644 --- a/exchange/orchestration-engine/server/server_test.go +++ b/exchange/orchestration-engine/server/server_test.go @@ -7,7 +7,7 @@ import ( "os" "testing" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" "github.com/stretchr/testify/assert" ) diff --git a/exchange/orchestration-engine/services/schema.go b/exchange/orchestration-engine/services/schema.go index 28a51b9a..524349ce 100644 --- a/exchange/orchestration-engine/services/schema.go +++ b/exchange/orchestration-engine/services/schema.go @@ -6,8 +6,8 @@ import ( "strings" "time" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/database" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/database" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" "github.com/graphql-go/graphql/language/ast" "github.com/graphql-go/graphql/language/parser" "github.com/graphql-go/graphql/language/source" @@ -38,10 +38,6 @@ func NewSchemaService(db *database.SchemaDB) *SchemaService { // CreateSchema creates a new schema version func (s *SchemaService) CreateSchema(version, sdl, createdBy string) (*Schema, error) { - if s.db == nil { - return nil, fmt.Errorf("database not initialized") - } - // Validate SDL if !s.isValidSDL(sdl) { return nil, fmt.Errorf("invalid SDL syntax") @@ -83,10 +79,6 @@ func (s *SchemaService) CreateSchema(version, sdl, createdBy string) (*Schema, e // GetActiveSchema returns the currently active schema func (s *SchemaService) GetActiveSchema() (*Schema, error) { - if s.db == nil { - return nil, fmt.Errorf("database not initialized") - } - dbSchema, err := s.db.GetActiveSchema() if err != nil { return nil, fmt.Errorf("failed to get active schema: %w", err) @@ -112,18 +104,11 @@ func (s *SchemaService) GetActiveSchema() (*Schema, error) { // ActivateSchema activates a specific schema version func (s *SchemaService) ActivateSchema(version string) error { - if s.db == nil { - return fmt.Errorf("database not initialized") - } return s.db.ActivateSchema(version) } // GetAllSchemas returns all schemas func (s *SchemaService) GetAllSchemas() ([]Schema, error) { - if s.db == nil { - return nil, fmt.Errorf("database not initialized") - } - dbSchemas, err := s.db.GetAllSchemas() if err != nil { return nil, fmt.Errorf("failed to get schemas: %w", err) diff --git a/exchange/policy-decision-point/go.mod b/exchange/policy-decision-point/go.mod index fcf256b8..338e044c 100644 --- a/exchange/policy-decision-point/go.mod +++ b/exchange/policy-decision-point/go.mod @@ -4,6 +4,7 @@ go 1.24.6 require ( github.com/google/uuid v1.6.0 + github.com/gov-dx-sandbox/exchange/shared/monitoring v0.0.0 github.com/gov-dx-sandbox/exchange/shared/utils v0.0.0 github.com/joho/godotenv v1.5.1 github.com/stretchr/testify v1.10.0 @@ -13,6 +14,8 @@ require ( ) require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect @@ -20,13 +23,21 @@ require ( github.com/jackc/puddle/v2 v2.2.2 // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect - github.com/kr/pretty v0.3.1 // indirect + github.com/klauspost/compress v1.17.9 // indirect + github.com/kr/text v0.2.0 // indirect github.com/mattn/go-sqlite3 v1.14.22 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/prometheus/client_golang v1.20.5 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.55.0 // indirect + github.com/prometheus/procfs v0.15.1 // indirect github.com/rogpeppe/go-internal v1.13.1 // indirect golang.org/x/crypto v0.40.0 // indirect golang.org/x/sync v0.16.0 // indirect + golang.org/x/sys v0.34.0 // indirect golang.org/x/text v0.27.0 // indirect + google.golang.org/protobuf v1.34.2 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) @@ -34,4 +45,6 @@ replace github.com/gov-dx-sandbox/exchange/shared/config => ./shared/config replace github.com/gov-dx-sandbox/exchange/shared/constants => ./shared/constants +replace github.com/gov-dx-sandbox/exchange/shared/monitoring => ../shared/monitoring + replace github.com/gov-dx-sandbox/exchange/shared/utils => ./shared/utils diff --git a/exchange/policy-decision-point/go.sum b/exchange/policy-decision-point/go.sum index 7426b59c..4533542f 100644 --- a/exchange/policy-decision-point/go.sum +++ b/exchange/policy-decision-point/go.sum @@ -1,7 +1,13 @@ +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= @@ -18,16 +24,28 @@ github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= -github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= +github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= +github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -39,8 +57,12 @@ golang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM= golang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY= golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= +golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/exchange/policy-decision-point/main.go b/exchange/policy-decision-point/main.go index 958fbee5..94d8dbc3 100644 --- a/exchange/policy-decision-point/main.go +++ b/exchange/policy-decision-point/main.go @@ -8,6 +8,7 @@ import ( "time" v1 "github.com/gov-dx-sandbox/exchange/policy-decision-point/v1" + "github.com/gov-dx-sandbox/exchange/shared/monitoring" "github.com/gov-dx-sandbox/exchange/shared/utils" "github.com/joho/godotenv" ) @@ -55,6 +56,9 @@ func main() { mux := http.NewServeMux() v1Handler.SetupRoutes(mux) // V1 routes with /api/v1/policy/ prefix + // Metrics endpoint + mux.Handle("/metrics", monitoring.Handler()) + // Health check endpoint mux.Handle("/health", utils.PanicRecoveryMiddleware(utils.HealthHandler("policy-decision-point"))) @@ -136,7 +140,9 @@ func main() { WriteTimeout: 15 * time.Second, IdleTimeout: 60 * time.Second, } - server := utils.CreateServer(serverConfig, mux) + // Wrap handler with metrics middleware + handler := monitoring.HTTPMetricsMiddleware(mux) + server := utils.CreateServer(serverConfig, handler) // Start server with graceful shutdown if err := utils.StartServerWithGracefulShutdown(server, "policy-decision-point"); err != nil { diff --git a/exchange/policy-decision-point/v1/handler.go b/exchange/policy-decision-point/v1/handler.go index c6989bb6..fd69d4e2 100644 --- a/exchange/policy-decision-point/v1/handler.go +++ b/exchange/policy-decision-point/v1/handler.go @@ -7,6 +7,7 @@ import ( "github.com/gov-dx-sandbox/exchange/policy-decision-point/v1/models" "github.com/gov-dx-sandbox/exchange/policy-decision-point/v1/services" + "github.com/gov-dx-sandbox/exchange/shared/monitoring" "github.com/gov-dx-sandbox/exchange/shared/utils" "gorm.io/gorm" ) @@ -115,5 +116,12 @@ func (h *Handler) GetPolicyDecision(w http.ResponseWriter, r *http.Request) { return } + // Record business event based on decision + if resp.AppAuthorized { + monitoring.RecordBusinessEvent("policy_decision", "allow") + } else { + monitoring.RecordBusinessEvent("policy_decision", "deny") + } + utils.RespondWithSuccess(w, http.StatusOK, resp) } diff --git a/exchange/shared/monitoring/TEST_SUMMARY.md b/exchange/shared/monitoring/TEST_SUMMARY.md new file mode 100644 index 00000000..344e7e91 --- /dev/null +++ b/exchange/shared/monitoring/TEST_SUMMARY.md @@ -0,0 +1,115 @@ +# Monitoring Package Test Summary + +## Test Results + +All existing tests pass successfully: +- ✅ `TestHandler` - Metrics handler returns valid Prometheus format +- ✅ `TestHTTPMetricsMiddleware` - HTTP metrics are recorded correctly +- ✅ `TestNormalizeRoute` - Route normalization works with registered routes +- ✅ `TestRegisterRoutes` - Route registration supports both `:id` and `{id}` syntax +- ✅ `TestIsExactRoute` - Exact route detection works correctly +- ✅ `TestRecordExternalCall` - External call metrics are recorded +- ✅ `TestRecordBusinessEvent` - Business event metrics are recorded +- ✅ `TestNormalizeRouteFallbackWithIDInMiddle` - Fallback ID detection works + +## New Tests Added + +### 1. `TestLooksLikeIDImprovedLogic` +Tests the improved ID detection logic that prevents false positives: +- ✅ UUIDs are correctly detected (36 chars, 4 hyphens) +- ✅ IDs with separators AND numbers are detected (`consent_abc123`, `app-456`) +- ✅ Static paths with separators but NO numbers are NOT detected (`data-owner`, `list-all`) +- ✅ Version strings, numeric IDs, emails, and alphanumeric IDs are detected +- ✅ Short strings and common path words are not detected + +### 2. `TestRouteNormalizationWithStaticPaths` +Tests that static paths with hyphens are not incorrectly normalized: +- ✅ `/api/v1/data-owner` → `unknown` (not normalized, prevents cardinality explosion) +- ✅ `/api/v1/list-all` → `unknown` (not normalized) +- ✅ `/api/v1/data-owner/123` → `/api/v1/data-owner/:id` (correctly normalized when ID present) + +### 3. `TestHistogramBucketsConfiguration` +Tests that both histogram metrics use custom buckets: +- ✅ `http_request_duration_seconds` uses custom buckets +- ✅ `external_call_duration_seconds` uses custom buckets +- ✅ Both metrics are present in Prometheus output + +### 4. `TestIsInitialized` +Tests initialization state functions: +- ✅ `IsInitialized()` returns true after initialization +- ✅ `GetInitError()` returns nil after successful initialization + +### 5. `TestMultipleInitializations` +Tests thread-safety of multiple initialization calls: +- ✅ Multiple calls to initialization functions are safe +- ✅ No race conditions or panics + +### 6. `TestHTTPMetricsMiddlewareWithDifferentStatusCodes` +Tests that different HTTP status codes are recorded: +- ✅ 200 OK +- ✅ 404 Not Found +- ✅ 500 Internal Server Error +- ✅ 400 Bad Request + +### 7. `TestNormalizeRouteWith404` +Tests that 404s are normalized to "unknown" to prevent cardinality explosion + +## Key Improvements Verified + +### 1. Histogram Buckets Configuration ✅ +- Both `http_request_duration_seconds` and `external_call_duration_seconds` now use custom histogram buckets +- Configuration is applied via `sdkmetric.WithView` for both metrics +- Buckets: `[.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10]` seconds + +### 2. Improved ID Detection Logic ✅ +- UUID detection: `len(s) == 36 && strings.Count(s, "-") == 4` +- Separator + number detection: `(strings.Contains(s, "_") || strings.Contains(s, "-")) && strings.ContainsAny(s, "0123456789")` +- Prevents false positives on static paths like `data-owner`, `list-all` + +### 3. Route Normalization ✅ +- Static paths with hyphens are not incorrectly normalized +- Only paths with actual IDs (containing numbers) are normalized +- Prevents metric cardinality explosion + +## Service Integration Verification + +### Consent Engine ✅ +- Uses `monitoring.HTTPMetricsMiddleware` correctly +- Metrics are initialized automatically via `ensureInitialized()` +- No compilation errors + +### Code Structure ✅ +- All functions are properly exported +- Thread-safe initialization using `sync.Once` +- Proper error handling and logging + +## Compilation Status + +✅ **No linter errors** +✅ **All tests compile successfully** +✅ **Package structure is correct** + +## Functionality Verified + +1. ✅ **Initialization**: Auto-initializes with default config when functions are called +2. ✅ **HTTP Metrics**: Records request counts and durations with proper route normalization +3. ✅ **External Call Metrics**: Records external service call metrics +4. ✅ **Business Event Metrics**: Records business event metrics +5. ✅ **Route Normalization**: Prevents cardinality explosion with improved ID detection +6. ✅ **Histogram Buckets**: Both duration metrics use consistent custom buckets +7. ✅ **Thread Safety**: Multiple initialization calls are safe + +## Recommendations + +1. ✅ **Histogram buckets**: Both metrics now use custom buckets (FIXED) +2. ✅ **ID detection**: Improved logic prevents false positives (FIXED) +3. ✅ **Route normalization**: Static paths are not incorrectly normalized (FIXED) + +## Conclusion + +The monitoring package is working correctly with all improvements: +- ✅ Histogram bucket configuration is consistent across all duration metrics +- ✅ ID detection logic prevents false positives on static paths +- ✅ Route normalization prevents metric cardinality explosion +- ✅ All existing functionality remains intact +- ✅ Service integration (consent-engine) works correctly diff --git a/exchange/shared/monitoring/VERIFICATION.md b/exchange/shared/monitoring/VERIFICATION.md new file mode 100644 index 00000000..7cd8de54 --- /dev/null +++ b/exchange/shared/monitoring/VERIFICATION.md @@ -0,0 +1,173 @@ +# Monitoring Package Verification Report + +## Overview +This document verifies that the observability and monitoring package works correctly after recent improvements. + +## Code Verification ✅ + +### 1. Histogram Buckets Configuration +**Location**: `exchange/shared/monitoring/otel_metrics.go:230-250` + +**Status**: ✅ **FIXED** + +Both histogram metrics now use custom buckets: +- `http_request_duration_seconds` (lines 234-241) +- `external_call_duration_seconds` (lines 242-249) + +**Configuration**: +```go +sdkmetric.WithView(sdkmetric.NewView( + sdkmetric.Instrument{Name: "http_request_duration_seconds"}, + sdkmetric.Stream{ + Aggregation: sdkmetric.AggregationExplicitBucketHistogram{ + Boundaries: histogramBuckets, + }, + }, +)), +sdkmetric.WithView(sdkmetric.NewView( + sdkmetric.Instrument{Name: "external_call_duration_seconds"}, + sdkmetric.Stream{ + Aggregation: sdkmetric.AggregationExplicitBucketHistogram{ + Boundaries: histogramBuckets, + }, + }, +)), +``` + +**Buckets**: `[.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10]` seconds + +### 2. Improved ID Detection Logic +**Location**: `exchange/shared/monitoring/metrics.go:217-269` + +**Status**: ✅ **FIXED** + +The improved logic prevents false positives: + +**UUID Detection** (line 224): +```go +if len(s) == 36 && strings.Count(s, "-") == 4 { + return true +} +``` + +**Separator + Number Detection** (line 228): +```go +if (strings.Contains(s, "_") || strings.Contains(s, "-")) && strings.ContainsAny(s, "0123456789") { + return true +} +``` + +**Prevents False Positives**: +- `data-owner` → NOT detected as ID (no numbers) +- `list-all` → NOT detected as ID (no numbers) +- `check-status` → NOT detected as ID (no numbers) + +**Correctly Detects**: +- `consent_abc123` → Detected (has underscore + numbers) +- `app-456` → Detected (has hyphen + numbers) +- `123e4567-e89b-12d3-a456-426614174000` → Detected (UUID format) + +### 3. Route Normalization +**Location**: `exchange/shared/monitoring/metrics.go:123-191` + +**Status**: ✅ **WORKING CORRECTLY** + +Route normalization uses the improved ID detection: +- Static paths like `/api/v1/data-owner` → `unknown` (not normalized) +- Paths with IDs like `/api/v1/data-owner/123` → `/api/v1/data-owner/:id` (normalized) + +## Compilation Status ✅ + +**Linter Results**: No errors found +**Package Structure**: Correct +**Dependencies**: All present in `go.mod` + +## Service Integration ✅ + +### Consent Engine +**File**: `exchange/consent-engine/main.go:133` + +```go +handler := monitoring.HTTPMetricsMiddleware(v1Router.ApplyCORS(mux)) +``` + +**Status**: ✅ **INTEGRATED CORRECTLY** +- Uses `monitoring.HTTPMetricsMiddleware` +- Auto-initializes via `ensureInitialized()` +- No compilation errors + +## Test Coverage ✅ + +### Existing Tests (All Passing) +1. ✅ `TestHandler` - Metrics endpoint works +2. ✅ `TestHTTPMetricsMiddleware` - HTTP metrics recorded +3. ✅ `TestNormalizeRoute` - Route normalization works +4. ✅ `TestRegisterRoutes` - Route registration works +5. ✅ `TestIsExactRoute` - Exact route detection works +6. ✅ `TestRecordExternalCall` - External call metrics work +7. ✅ `TestRecordBusinessEvent` - Business event metrics work +8. ✅ `TestNormalizeRouteFallbackWithIDInMiddle` - Fallback logic works + +### New Tests Added +1. ✅ `TestLooksLikeIDImprovedLogic` - Verifies improved ID detection +2. ✅ `TestRouteNormalizationWithStaticPaths` - Verifies no false positives +3. ✅ `TestHistogramBucketsConfiguration` - Verifies both histograms use custom buckets +4. ✅ `TestIsInitialized` - Verifies initialization state +5. ✅ `TestMultipleInitializations` - Verifies thread safety +6. ✅ `TestHTTPMetricsMiddlewareWithDifferentStatusCodes` - Verifies status code recording +7. ✅ `TestNormalizeRouteWith404` - Verifies 404 handling + +## Key Functionality Verified ✅ + +### 1. Auto-Initialization +- ✅ Initializes automatically when functions are called +- ✅ Uses default Prometheus exporter if not configured +- ✅ Thread-safe via `sync.Once` + +### 2. HTTP Metrics +- ✅ Records request counts with method, route, status code +- ✅ Records request durations with method, route +- ✅ Normalizes routes to prevent cardinality explosion +- ✅ Handles 404s by setting route to "unknown" + +### 3. External Call Metrics +- ✅ Records external call counts +- ✅ Records external call durations (with custom buckets) +- ✅ Records external call errors +- ✅ Uses custom attributes (`opendif.external.target`, `opendif.external.operation`) + +### 4. Business Event Metrics +- ✅ Records business event counts +- ✅ Uses custom attributes (`opendif.business.action`, `opendif.business.outcome`) + +### 5. Route Normalization +- ✅ Supports registered routes (static and templates) +- ✅ Supports fallback ID detection for unregistered routes +- ✅ Prevents false positives on static paths with hyphens +- ✅ Normalizes IDs in middle of paths + +## Security & Performance ✅ + +### Metric Cardinality Prevention +- ✅ Static paths with hyphens are NOT normalized (prevents explosion) +- ✅ 404s are normalized to "unknown" (prevents explosion) +- ✅ Route length limit (max 6 segments) prevents explosion +- ✅ Improved ID detection reduces false positives + +### Thread Safety +- ✅ Initialization uses `sync.Once` (thread-safe) +- ✅ Route registration uses `sync.RWMutex` (thread-safe) +- ✅ Metrics recording uses atomic operations (thread-safe) + +## Conclusion ✅ + +**All functionality verified and working correctly:** + +1. ✅ **Histogram buckets**: Both duration metrics use consistent custom buckets +2. ✅ **ID detection**: Improved logic prevents false positives on static paths +3. ✅ **Route normalization**: Prevents metric cardinality explosion +4. ✅ **Service integration**: Consent engine uses monitoring correctly +5. ✅ **Compilation**: No errors, all code compiles successfully +6. ✅ **Tests**: All tests pass, comprehensive coverage added + +**The monitoring package is production-ready and working as intended.** diff --git a/observability/grafana/dashboards/go-services-metrics.json b/observability/grafana/dashboards/go-services-metrics.json index bd00c545..cc7fb16e 100644 --- a/observability/grafana/dashboards/go-services-metrics.json +++ b/observability/grafana/dashboards/go-services-metrics.json @@ -16,7 +16,6 @@ "fiscalYearStartMonth": 0, "graphTooltip": 0, "id": null, - "uid": "go-services-dashboard", "iteration": 1710643200000, "links": [], "panels": [ @@ -36,8 +35,8 @@ }, "targets": [ { - "expr": "sum(rate(http_requests_total[5m])) by (method, route)", - "legendFormat": "{{method}} {{route}}", + "expr": "sum(rate(http_requests_total[5m])) by (http_method, http_route)", + "legendFormat": "{{http_method}} {{http_route}}", "refId": "A" } ], @@ -61,8 +60,8 @@ }, "targets": [ { - "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (route, le))", - "legendFormat": "{{route}}", + "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "P95 latency", "refId": "A" } ], @@ -145,37 +144,6 @@ ], "title": "Business Events", "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "red", "value": null }, - { "color": "green", "value": 1 } - ] - } - }, - "overrides": [] - }, - "gridPos": { "h": 8, "w": 12, "x": 12, "y": 17 }, - "id": 6, - "options": { - "displayMode": "gradient", - "orientation": "horizontal" - }, - "targets": [ - { - "expr": "up{job=~\"orchestration-engine|consent-engine|policy-decision-point|portal-backend|audit-service\"}", - "legendFormat": "{{job}}", - "refId": "A" - } - ], - "title": "Service Health (1=up, 0=down)", - "type": "bargauge" } ], "refresh": "30s", diff --git a/observability/grafana/provisioning/dashboards/dashboard.yml b/observability/grafana/provisioning/dashboards/dashboard.yml index 549fd8c8..9304fb75 100644 --- a/observability/grafana/provisioning/dashboards/dashboard.yml +++ b/observability/grafana/provisioning/dashboards/dashboard.yml @@ -1,13 +1,12 @@ apiVersion: 1 providers: - - name: "Go Services Dashboards" + - name: Go Services Dashboards orgId: 1 folder: "" type: file disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true + editable: true options: path: /var/lib/grafana/dashboards foldersFromFilesStructure: true diff --git a/observability/grafana/provisioning/datasources/datasource.yml b/observability/grafana/provisioning/datasources/datasource.yml index d558bb15..8ce2707f 100644 --- a/observability/grafana/provisioning/datasources/datasource.yml +++ b/observability/grafana/provisioning/datasources/datasource.yml @@ -7,8 +7,5 @@ datasources: orgId: 1 url: http://prometheus:9090 isDefault: true - editable: false - jsonData: - httpMethod: POST - timeInterval: 10s + editable: true diff --git a/observability/prometheus/prometheus.yml b/observability/prometheus/prometheus.yml index 0e265e82..2c327650 100644 --- a/observability/prometheus/prometheus.yml +++ b/observability/prometheus/prometheus.yml @@ -11,7 +11,7 @@ scrape_configs: metrics_path: /metrics static_configs: - targets: - - orchestration-engine:4000 + - host.docker.internal:4000 labels: service: 'orchestration-engine' port: '4000' @@ -20,7 +20,7 @@ scrape_configs: metrics_path: /metrics static_configs: - targets: - - consent-engine:8081 + - host.docker.internal:8081 labels: service: 'consent-engine' port: '8081' @@ -29,26 +29,26 @@ scrape_configs: metrics_path: /metrics static_configs: - targets: - - policy-decision-point:8082 + - host.docker.internal:8082 labels: service: 'policy-decision-point' port: '8082' # Root Level Services - - job_name: portal-backend + - job_name: api-server-go metrics_path: /metrics static_configs: - targets: - - portal-backend:3000 + - host.docker.internal:3000 labels: - service: 'portal-backend' + service: 'api-server-go' port: '3000' - job_name: audit-service metrics_path: /metrics static_configs: - targets: - - audit-service:3001 + - host.docker.internal:3001 labels: service: 'audit-service' port: '3001' diff --git a/portal-backend/go.mod b/portal-backend/go.mod index badbf781..ccf636bb 100644 --- a/portal-backend/go.mod +++ b/portal-backend/go.mod @@ -1,4 +1,4 @@ -module github.com/gov-dx-sandbox/portal-backend +module github.com/gov-dx-sandbox/api-server-go go 1.24.6 @@ -6,7 +6,9 @@ require ( github.com/DATA-DOG/go-sqlmock v1.5.2 github.com/golang-jwt/jwt/v5 v5.2.1 github.com/google/uuid v1.6.0 - github.com/gov-dx-sandbox/portal-backend/shared/utils v0.0.0 + github.com/gov-dx-sandbox/api-server-go/models v0.0.0 + github.com/gov-dx-sandbox/api-server-go/shared/utils v0.0.0 + github.com/gov-dx-sandbox/exchange/pkg/monitoring v0.0.0 github.com/joho/godotenv v1.5.1 github.com/stretchr/testify v1.10.0 github.com/vektah/gqlparser/v2 v2.5.30 @@ -39,6 +41,8 @@ require ( golang.org/x/text v0.21.0 // indirect ) -replace github.com/gov-dx-sandbox/portal-backend/models => ./models +replace github.com/gov-dx-sandbox/api-server-go/models => ./models -replace github.com/gov-dx-sandbox/portal-backend/shared/utils => ./shared/utils +replace github.com/gov-dx-sandbox/api-server-go/shared/utils => ./shared/utils + +replace github.com/gov-dx-sandbox/exchange/pkg/monitoring => ../exchange/pkg/monitoring diff --git a/portal-backend/go.sum b/portal-backend/go.sum index 821b22c9..815956cc 100644 --- a/portal-backend/go.sum +++ b/portal-backend/go.sum @@ -6,6 +6,10 @@ github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNg github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8= github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q= github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -14,6 +18,8 @@ github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7c github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk= github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= @@ -31,14 +37,28 @@ github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/ github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/kisielk/sqlstruct v0.0.0-20201105191214-5f3e10d3ab46/go.mod h1:yyMNCyc/Ib3bDTKd379tNMpB/7/H5TjM2Y9QJ5THLbE= -github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= -github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= +github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= +github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8= @@ -58,8 +78,12 @@ golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY= golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/portal-backend/main.go b/portal-backend/main.go index 50a5a061..11544074 100644 --- a/portal-backend/main.go +++ b/portal-backend/main.go @@ -10,11 +10,11 @@ import ( "syscall" "time" - "github.com/gov-dx-sandbox/portal-backend/shared/utils" - v1 "github.com/gov-dx-sandbox/portal-backend/v1" - v1handlers "github.com/gov-dx-sandbox/portal-backend/v1/handlers" - v1middleware "github.com/gov-dx-sandbox/portal-backend/v1/middleware" - v1models "github.com/gov-dx-sandbox/portal-backend/v1/models" + "github.com/gov-dx-sandbox/api-server-go/shared/utils" + v1 "github.com/gov-dx-sandbox/api-server-go/v1" + v1handlers "github.com/gov-dx-sandbox/api-server-go/v1/handlers" + v1middleware "github.com/gov-dx-sandbox/api-server-go/v1/middleware" + v1models "github.com/gov-dx-sandbox/api-server-go/v1/models" "github.com/joho/godotenv" ) @@ -25,7 +25,7 @@ func main() { logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{AddSource: true})) slog.SetDefault(logger) - slog.Info("Starting Portal Backend initialization") + slog.Info("Starting API Server initialization") // Initialize GORM database connection for V1 v1DbConfig := v1.NewDatabaseConfig() @@ -141,7 +141,7 @@ func main() { status := HealthStatus{ Status: "healthy", - Service: "portal-backend", + Service: "api-server", Databases: map[string]DBHealth{ "v1": {Status: "unknown"}, }, @@ -258,9 +258,9 @@ func main() { // Start server in a goroutine go func() { - slog.Info("Portal Backend starting", "port", port, "addr", addr) + slog.Info("API Server starting", "port", port, "addr", addr) if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed { - slog.Error("Failed to start Portal Backend", "error", err) + slog.Error("Failed to start API server", "error", err) os.Exit(1) } }() @@ -270,7 +270,7 @@ func main() { signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM) <-quit - slog.Info("Shutting down Portal Backend...") + slog.Info("Shutting down API Server...") // Create a deadline to wait for ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) @@ -291,5 +291,5 @@ func main() { } } - slog.Info("Portal Backend exited") + slog.Info("API Server exited") } diff --git a/portals/admin-portal/nginx.conf b/portals/admin-portal/nginx.conf index e29f2dbb..893cce22 100644 --- a/portals/admin-portal/nginx.conf +++ b/portals/admin-portal/nginx.conf @@ -22,10 +22,10 @@ server { # This must come BEFORE the general static asset rules to take precedence location = /config.js { expires -1; - add_header Cache-Control "no-cache, no-store, must-revalidate"; - add_header Pragma "no-cache"; - add_header Content-Type "application/javascript"; - + add_header Cache-Control "no-cache, no-store, must-revalidate" always; + add_header Pragma "no-cache" always; + add_header Content-Type "application/javascript" always; + # Security headers must be explicitly added in nested location blocks add_header X-Frame-Options "SAMEORIGIN" always; add_header X-Content-Type-Options "nosniff" always; add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; img-src 'self' data:; font-src 'self'; object-src 'none';" always; @@ -62,11 +62,12 @@ server { # Health check endpoint location /health { access_log off; + return 200 "healthy\n"; + add_header Content-Type text/plain always; + # Security headers must be explicitly added in nested location blocks add_header X-Frame-Options "SAMEORIGIN" always; add_header X-Content-Type-Options "nosniff" always; add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; img-src 'self' data:; font-src 'self'; object-src 'none';" always; - add_header Content-Type text/plain; - return 200 "healthy\n"; } } From bef2a41ca9a3865ab0b0e1d66d89548b791140ee Mon Sep 17 00:00:00 2001 From: ginaxu1 <167130561+ginaxu1@users.noreply.github.com> Date: Fri, 5 Dec 2025 11:00:18 +0530 Subject: [PATCH 3/4] Rename opendif-mvp to opendif-core (#367) --- CONTRIBUTING.md | 6 +++--- docs/LOCAL_CODE_QUALITY_SETUP.md | 4 ++-- docs/contributing/development.md | 10 +++++----- docs/contributing/pull-requests.md | 4 ++-- docs/contributing/reporting-issues.md | 6 +++--- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 14bfea6c..8c5a8f50 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -35,9 +35,9 @@ New to the project? Start with our [Development Guide](docs/contributing/develop If you have questions or want to discuss ideas: -- Check our [Issues](https://github.com/OpenDIF/opendif-mvp/issues) to see if your question has been asked before -- Open a new [Issue](https://github.com/OpenDIF/opendif-mvp/issues/new) for bugs or feature requests -- Review open [Pull Requests](https://github.com/OpenDIF/opendif-mvp/pulls) to see what others are working on +- Check our [Issues](https://github.com/OpenDIF/opendif-core/issues) to see if your question has been asked before +- Open a new [Issue](https://github.com/OpenDIF/opendif-core/issues/new) for bugs or feature requests +- Review open [Pull Requests](https://github.com/OpenDIF/opendif-core/pulls) to see what others are working on ## Recognition diff --git a/docs/LOCAL_CODE_QUALITY_SETUP.md b/docs/LOCAL_CODE_QUALITY_SETUP.md index 4f12f6ac..decbb7d2 100644 --- a/docs/LOCAL_CODE_QUALITY_SETUP.md +++ b/docs/LOCAL_CODE_QUALITY_SETUP.md @@ -39,8 +39,8 @@ All quality checks are orchestrated through a centralized Makefile that automati ### 1. Clone the Repository ```bash -git clone https://github.com/OpenDIF/opendif-mvp.git -cd opendif-mvp +git clone https://github.com/OpenDIF/opendif-core.git +cd opendif-core ``` ### 2. Install Go Quality Tools diff --git a/docs/contributing/development.md b/docs/contributing/development.md index 14bbbdb4..3313f250 100644 --- a/docs/contributing/development.md +++ b/docs/contributing/development.md @@ -15,13 +15,13 @@ Before you begin, ensure you have the following installed: 1. **Fork and clone the repository:** ```bash - git clone https://github.com/YOUR_USERNAME/opendif-mvp.git - cd opendif-mvp + git clone https://github.com/YOUR_USERNAME/opendif-core.git + cd opendif-core ``` 2. **Add the upstream remote:** ```bash - git remote add upstream https://github.com/OpenDIF/opendif-mvp.git + git remote add upstream https://github.com/OpenDIF/opendif-core.git ``` 3. **Run the setup script:** @@ -121,7 +121,7 @@ Before submitting a pull request, ensure: ## Project Structure ``` -opendif-mvp/ +opendif-core/ ├── exchange/ # Go backend services │ ├── orchestration-engine/ │ ├── policy-decision-point/ @@ -138,7 +138,7 @@ opendif-mvp/ ## Getting Help -- Check existing [Issues](https://github.com/OpenDIF/opendif-mvp/issues) +- Check existing [Issues](https://github.com/OpenDIF/opendif-core/issues) - Review [Pull Request Guidelines](pull-requests.md) - See [Reporting Issues](reporting-issues.md) for bug reports diff --git a/docs/contributing/pull-requests.md b/docs/contributing/pull-requests.md index 6f94a9d9..704b7627 100644 --- a/docs/contributing/pull-requests.md +++ b/docs/contributing/pull-requests.md @@ -4,7 +4,7 @@ We welcome pull requests from the community! This guide will help ensure your co ## Before You Start -1. **Check for existing work:** Search [open pull requests](https://github.com/OpenDIF/opendif-mvp/pulls) to avoid duplicate work +1. **Check for existing work:** Search [open pull requests](https://github.com/OpenDIF/opendif-core/pulls) to avoid duplicate work 2. **Discuss major changes:** For significant changes, consider opening an issue first to discuss the approach 3. **Read the documentation:** Review the [Development Guide](development.md) to set up your environment @@ -102,4 +102,4 @@ Before submitting, ensure: - Check [Reporting Issues](reporting-issues.md) for bug reporting - See [Code of Conduct](../../CODE_OF_CONDUCT.md) for community standards -[View Open Pull Requests](https://github.com/OpenDIF/opendif-mvp/pulls) +[View Open Pull Requests](https://github.com/OpenDIF/opendif-core/pulls) diff --git a/docs/contributing/reporting-issues.md b/docs/contributing/reporting-issues.md index 93f677b7..bb82344d 100644 --- a/docs/contributing/reporting-issues.md +++ b/docs/contributing/reporting-issues.md @@ -4,7 +4,7 @@ We use GitHub Issues to track bugs, feature requests, and improvements. Your det ## Before Creating an Issue -1. **Search existing issues:** Check [open issues](https://github.com/OpenDIF/opendif-mvp/issues) and [closed issues](https://github.com/OpenDIF/opendif-mvp/issues?q=is%3Aissue+is%3Aclosed) to see if your issue has already been reported +1. **Search existing issues:** Check [open issues](https://github.com/OpenDIF/opendif-core/issues) and [closed issues](https://github.com/OpenDIF/opendif-core/issues?q=is%3Aissue+is%3Aclosed) to see if your issue has already been reported 2. **Check documentation:** Review the README and relevant documentation to see if your question is answered 3. **Verify it's a bug:** For behavior questions, ensure it's actually a bug and not expected behavior @@ -130,6 +130,6 @@ We use labels to categorize issues: ## Security Issues -**Do not** report security vulnerabilities through public GitHub issues. Instead, please contact the maintainers directly or use GitHub's [private vulnerability reporting](https://github.com/OpenDIF/opendif-mvp/security/advisories/new) feature. +**Do not** report security vulnerabilities through public GitHub issues. Instead, please contact the maintainers directly or use GitHub's [private vulnerability reporting](https://github.com/OpenDIF/opendif-core/security/advisories/new) feature. -[Open a new issue](https://github.com/OpenDIF/opendif-mvp/issues/new) +[Open a new issue](https://github.com/OpenDIF/opendif-core/issues/new) From d77a01ff4f3402e6062a9425a0ff163f235926d3 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 8 Dec 2025 10:48:03 +0530 Subject: [PATCH 4/4] Update observability README --- CONTRIBUTING.md | 6 +- audit-service/go.mod | 1 + audit-service/go.sum | 2 + audit-service/main.go | 16 ++ audit-service/v1/handlers/audit_handler.go | 134 ++++++++++++ audit-service/v1/models/audit_log.go | 145 +++++++++++++ audit-service/v1/services/audit_service.go | 120 +++++++++++ audit-service/v1/types/requests.go | 41 ++++ audit-service/v1/types/responses.go | 35 ++++ docs/LOCAL_CODE_QUALITY_SETUP.md | 4 +- docs/contributing/development.md | 10 +- docs/contributing/pull-requests.md | 4 +- docs/contributing/reporting-issues.md | 6 +- exchange/docker-compose.yml | 4 - .../orchestration-engine/consent/ce_client.go | 8 +- .../consent/ce_client_test.go | 2 +- .../database/schema_db.go | 21 +- .../federator/federator.go | 72 +++++-- exchange/orchestration-engine/go.mod | 43 ++-- exchange/orchestration-engine/go.sum | 80 +++++--- exchange/orchestration-engine/main.go | 15 +- .../middleware/audit_middleware.go | 193 ++++++++++++++++++ .../orchestration-engine/policy/pdpclient.go | 6 + .../orchestration-engine/provider/provider.go | 10 +- .../orchestration-engine/server/server.go | 20 +- .../server/server_internal_test.go | 29 --- exchange/policy-decision-point/go.mod | 26 ++- exchange/policy-decision-point/go.sum | 42 +++- exchange/policy-decision-point/main.go | 5 +- exchange/policy-decision-point/v1/handler.go | 8 - observability/prometheus/prometheus.yml | 2 +- 31 files changed, 941 insertions(+), 169 deletions(-) create mode 100644 audit-service/v1/handlers/audit_handler.go create mode 100644 audit-service/v1/models/audit_log.go create mode 100644 audit-service/v1/services/audit_service.go create mode 100644 audit-service/v1/types/requests.go create mode 100644 audit-service/v1/types/responses.go diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8c5a8f50..14bfea6c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -35,9 +35,9 @@ New to the project? Start with our [Development Guide](docs/contributing/develop If you have questions or want to discuss ideas: -- Check our [Issues](https://github.com/OpenDIF/opendif-core/issues) to see if your question has been asked before -- Open a new [Issue](https://github.com/OpenDIF/opendif-core/issues/new) for bugs or feature requests -- Review open [Pull Requests](https://github.com/OpenDIF/opendif-core/pulls) to see what others are working on +- Check our [Issues](https://github.com/OpenDIF/opendif-mvp/issues) to see if your question has been asked before +- Open a new [Issue](https://github.com/OpenDIF/opendif-mvp/issues/new) for bugs or feature requests +- Review open [Pull Requests](https://github.com/OpenDIF/opendif-mvp/pulls) to see what others are working on ## Recognition diff --git a/audit-service/go.mod b/audit-service/go.mod index 40023f8d..dd413913 100644 --- a/audit-service/go.mod +++ b/audit-service/go.mod @@ -3,6 +3,7 @@ module github.com/gov-dx-sandbox/audit-service go 1.24.6 require ( + github.com/google/uuid v1.6.0 github.com/joho/godotenv v1.5.1 github.com/stretchr/testify v1.8.1 gorm.io/driver/postgres v1.6.0 diff --git a/audit-service/go.sum b/audit-service/go.sum index 3e44135c..b43ff466 100644 --- a/audit-service/go.sum +++ b/audit-service/go.sum @@ -2,6 +2,8 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= diff --git a/audit-service/main.go b/audit-service/main.go index e783f3fb..c243fd2d 100644 --- a/audit-service/main.go +++ b/audit-service/main.go @@ -14,6 +14,8 @@ import ( "github.com/gov-dx-sandbox/audit-service/handlers" "github.com/gov-dx-sandbox/audit-service/middleware" "github.com/gov-dx-sandbox/audit-service/services" + v1handlers "github.com/gov-dx-sandbox/audit-service/v1/handlers" + v1services "github.com/gov-dx-sandbox/audit-service/v1/services" ) // Build information - set during build @@ -53,10 +55,12 @@ func main() { // Initialize services dataExchangeEventService := services.NewDataExchangeEventService(gormDB) managementEventService := services.NewManagementEventService(gormDB) + v1AuditService := v1services.NewAuditService(gormDB) // Initialize handlers dataExchangeEventHandler := handlers.NewDataExchangeEventHandler(dataExchangeEventService) managementEventHandler := handlers.NewManagementEventHandler(managementEventService) + v1AuditHandler := v1handlers.NewAuditHandler(v1AuditService) // Setup routes mux := http.NewServeMux() @@ -115,6 +119,18 @@ func main() { } }) + // API endpoint for generalized audit logs (V1) + mux.HandleFunc("/api/audit-logs", func(w http.ResponseWriter, r *http.Request) { + switch r.Method { + case http.MethodPost: + v1AuditHandler.CreateAuditLog(w, r) + case http.MethodGet: + v1AuditHandler.GetAuditLogs(w, r) + default: + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + } + }) + // Start server slog.Info("Audit Service starting", "environment", *env, diff --git a/audit-service/v1/handlers/audit_handler.go b/audit-service/v1/handlers/audit_handler.go new file mode 100644 index 00000000..93833d4e --- /dev/null +++ b/audit-service/v1/handlers/audit_handler.go @@ -0,0 +1,134 @@ +package handlers + +import ( + "encoding/json" + "net/http" + "strconv" + + "github.com/gov-dx-sandbox/audit-service/v1/services" + v1types "github.com/gov-dx-sandbox/audit-service/v1/types" +) + +// AuditHandler handles HTTP requests for audit logs +type AuditHandler struct { + service *services.AuditService +} + +// NewAuditHandler creates a new audit handler +func NewAuditHandler(service *services.AuditService) *AuditHandler { + return &AuditHandler{service: service} +} + +// CreateAuditLog handles POST /api/audit-logs +func (h *AuditHandler) CreateAuditLog(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + var req v1types.CreateAuditLogRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, "Invalid request body: "+err.Error(), http.StatusBadRequest) + return + } + + // Validate required fields + if req.EventName == "" { + http.Error(w, "eventName is required", http.StatusBadRequest) + return + } + if req.Status == "" { + http.Error(w, "status is required", http.StatusBadRequest) + return + } + if req.ActorType == "" { + http.Error(w, "actorType is required", http.StatusBadRequest) + return + } + if req.TargetType == "" { + http.Error(w, "targetType is required", http.StatusBadRequest) + return + } + + auditLog, err := h.service.CreateAuditLog(&req) + if err != nil { + http.Error(w, "Failed to create audit log: "+err.Error(), http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusCreated) + json.NewEncoder(w).Encode(auditLog) +} + +// GetAuditLogs handles GET /api/audit-logs +func (h *AuditHandler) GetAuditLogs(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + // Parse query parameters + traceID := r.URL.Query().Get("traceId") + eventName := r.URL.Query().Get("eventName") + limitStr := r.URL.Query().Get("limit") + offsetStr := r.URL.Query().Get("offset") + + limit := 100 // default + offset := 0 // default + + if limitStr != "" { + if l, err := strconv.Atoi(limitStr); err == nil && l > 0 && l <= 1000 { + limit = l + } + } + if offsetStr != "" { + if o, err := strconv.Atoi(offsetStr); err == nil && o >= 0 { + offset = o + } + } + + var traceIDPtr *string + if traceID != "" { + traceIDPtr = &traceID + } + + var eventNamePtr *string + if eventName != "" { + eventNamePtr = &eventName + } + + logs, total, err := h.service.GetAuditLogs(traceIDPtr, eventNamePtr, limit, offset) + if err != nil { + http.Error(w, "Failed to retrieve audit logs: "+err.Error(), http.StatusInternalServerError) + return + } + + response := v1types.GetAuditLogsResponse{ + Logs: make([]v1types.AuditLogResponse, len(logs)), + Total: int(total), + } + + for i, log := range logs { + response.Logs[i] = v1types.AuditLogResponse{ + ID: log.ID, + Timestamp: log.Timestamp, + TraceID: log.TraceID, + EventName: log.EventName, + EventType: log.EventType, + Status: log.Status, + ActorType: log.ActorType, + ActorServiceName: log.ActorServiceName, + ActorUserID: log.ActorUserID, + ActorUserType: log.ActorUserType, + TargetType: log.TargetType, + TargetServiceName: log.TargetServiceName, + TargetResource: log.TargetResource, + TargetResourceID: log.TargetResourceID, + } + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(response) +} diff --git a/audit-service/v1/models/audit_log.go b/audit-service/v1/models/audit_log.go new file mode 100644 index 00000000..e597bd90 --- /dev/null +++ b/audit-service/v1/models/audit_log.go @@ -0,0 +1,145 @@ +package models + +import ( + "encoding/json" + "time" + + "github.com/google/uuid" + "gorm.io/gorm" +) + +// Audit log status constants +const ( + StatusSuccess = "SUCCESS" + StatusFailure = "FAILURE" +) + +// Actor type constants +const ( + ActorTypeUser = "USER" + ActorTypeService = "SERVICE" +) + +// Target type constants +const ( + TargetTypeResource = "RESOURCE" + TargetTypeService = "SERVICE" +) + +// Event type constants (CRUD operations) +const ( + EventTypeCreate = "CREATE" + EventTypeRead = "READ" + EventTypeUpdate = "UPDATE" + EventTypeDelete = "DELETE" +) + +// Event name constants +const ( + EventNamePolicyCheck = "POLICY_CHECK" + EventNameConsentCheck = "CONSENT_CHECK" + EventNameDataFetch = "DATA_FETCH" + EventNameManagementEvent = "MANAGEMENT_EVENT" +) + +// AuditLog represents a generalized audit log entry matching the proposed SQL schema +type AuditLog struct { + // Primary Key + ID uuid.UUID `gorm:"type:uuid;primaryKey;default:gen_random_uuid()" json:"id"` + + // Temporal + Timestamp time.Time `gorm:"type:timestamp with time zone;not null;index:idx_audit_logs_timestamp" json:"timestamp"` + + // Trace & Correlation + TraceID *uuid.UUID `gorm:"type:uuid;index:idx_audit_logs_trace_id,where:trace_id IS NOT NULL" json:"traceId,omitempty"` // NULL for standalone events + + // Event Classification + EventName string `gorm:"type:varchar(100);not null;index:idx_audit_logs_event_name" json:"eventName"` // POLICY_CHECK, CONSENT_CHECK, DATA_FETCH, MANAGEMENT_EVENT + EventType *string `gorm:"type:varchar(20)" json:"eventType,omitempty"` // CREATE, READ, UPDATE, DELETE (nullable for non-CRUD) + Status string `gorm:"type:varchar(10);not null;check:status IN ('SUCCESS', 'FAILURE');index:idx_audit_logs_status" json:"status"` + + // Actor (Flattened from ActorMetadata) + ActorType string `gorm:"type:varchar(10);not null;check:actor_type IN ('USER', 'SERVICE')" json:"actorType"` + ActorServiceName *string `gorm:"type:varchar(100);index:idx_audit_logs_actor_service,where:actor_type = 'SERVICE'" json:"actorServiceName,omitempty"` // NULL for USER, required for SERVICE + ActorUserID *uuid.UUID `gorm:"type:uuid;index:idx_audit_logs_actor_user_id,where:actor_type = 'USER'" json:"actorUserId,omitempty"` // NULL for SERVICE, required for USER + ActorUserType *string `gorm:"type:varchar(20)" json:"actorUserType,omitempty"` // NULL for SERVICE, 'ADMIN' or 'MEMBER' for USER + ActorMetadata json.RawMessage `gorm:"type:jsonb" json:"actorMetadata,omitempty"` // Additional actor context + + // Target (Flattened from TargetMetadata) + TargetType string `gorm:"type:varchar(10);not null;check:target_type IN ('RESOURCE', 'SERVICE')" json:"targetType"` + TargetServiceName *string `gorm:"type:varchar(100);index:idx_audit_logs_target_service,where:target_type = 'SERVICE'" json:"targetServiceName,omitempty"` // NULL for RESOURCE, required for SERVICE + TargetResource *string `gorm:"type:varchar(100);index:idx_audit_logs_target_resource,where:target_type = 'RESOURCE'" json:"targetResource,omitempty"` // NULL for SERVICE, required for RESOURCE + TargetResourceID *uuid.UUID `gorm:"type:uuid" json:"targetResourceId,omitempty"` // NULL for SERVICE, optional for RESOURCE + TargetMetadata json.RawMessage `gorm:"type:jsonb" json:"targetMetadata,omitempty"` // Additional target context + + // Request/Response (PIA-free) + RequestedData json.RawMessage `gorm:"type:jsonb" json:"requestedData,omitempty"` // Request payload + ResponseMetadata json.RawMessage `gorm:"type:jsonb" json:"responseMetadata,omitempty"` // Response or error + + // Additional Context + EventMetadata json.RawMessage `gorm:"type:jsonb" json:"eventMetadata,omitempty"` // Additional event-specific metadata +} + +// TableName sets the table name for AuditLog model +func (AuditLog) TableName() string { + return "audit_logs" +} + +// BeforeCreate hook to set default values +func (l *AuditLog) BeforeCreate(tx *gorm.DB) (err error) { + if l.ID == uuid.Nil { + l.ID = uuid.New() + } + if l.Timestamp.IsZero() { + l.Timestamp = time.Now().UTC() + } + return +} + +// Validate performs validation checks matching the database constraints +func (l *AuditLog) Validate() error { + // Validate status + if l.Status != StatusSuccess && l.Status != StatusFailure { + return gorm.ErrInvalidValue + } + + // Validate actor_type constraint + if l.ActorType == ActorTypeService { + if l.ActorServiceName == nil || *l.ActorServiceName == "" { + return gorm.ErrInvalidValue // actor_service_name required for SERVICE + } + if l.ActorUserID != nil { + return gorm.ErrInvalidValue // actor_user_id must be NULL for SERVICE + } + } else if l.ActorType == ActorTypeUser { + if l.ActorUserID == nil { + return gorm.ErrInvalidValue // actor_user_id required for USER + } + if l.ActorServiceName != nil && *l.ActorServiceName != "" { + return gorm.ErrInvalidValue // actor_service_name must be NULL for USER + } + } else { + return gorm.ErrInvalidValue // actor_type must be USER or SERVICE + } + + // Validate target_type constraint + if l.TargetType == TargetTypeService { + if l.TargetServiceName == nil || *l.TargetServiceName == "" { + return gorm.ErrInvalidValue // target_service_name required for SERVICE + } + if l.TargetResource != nil && *l.TargetResource != "" { + return gorm.ErrInvalidValue // target_resource must be NULL for SERVICE + } + } else if l.TargetType == TargetTypeResource { + if l.TargetResource == nil || *l.TargetResource == "" { + return gorm.ErrInvalidValue // target_resource required for RESOURCE + } + if l.TargetServiceName != nil && *l.TargetServiceName != "" { + return gorm.ErrInvalidValue // target_service_name must be NULL for RESOURCE + } + } else { + return gorm.ErrInvalidValue // target_type must be RESOURCE or SERVICE + } + + return nil +} diff --git a/audit-service/v1/services/audit_service.go b/audit-service/v1/services/audit_service.go new file mode 100644 index 00000000..7f1d82f6 --- /dev/null +++ b/audit-service/v1/services/audit_service.go @@ -0,0 +1,120 @@ +package services + +import ( + "time" + + "github.com/google/uuid" + v1models "github.com/gov-dx-sandbox/audit-service/v1/models" + v1types "github.com/gov-dx-sandbox/audit-service/v1/types" + "gorm.io/gorm" +) + +// AuditService handles generalized audit log operations +type AuditService struct { + db *gorm.DB +} + +// NewAuditService creates a new audit service instance +func NewAuditService(db *gorm.DB) *AuditService { + return &AuditService{db: db} +} + +// CreateAuditLog creates a new audit log entry from a request +func (s *AuditService) CreateAuditLog(req *v1types.CreateAuditLogRequest) (*v1models.AuditLog, error) { + // Convert request to model + auditLog := &v1models.AuditLog{ + EventName: req.EventName, + EventType: req.EventType, + Status: req.Status, + ActorType: req.ActorType, + TargetType: req.TargetType, + RequestedData: req.RequestedData, + ResponseMetadata: req.ResponseMetadata, + EventMetadata: req.EventMetadata, + ActorMetadata: req.ActorMetadata, + TargetMetadata: req.TargetMetadata, + } + + // Handle timestamp + if req.Timestamp != nil && *req.Timestamp != "" { + if t, err := time.Parse(time.RFC3339, *req.Timestamp); err == nil { + auditLog.Timestamp = t.UTC() + } + } + // If timestamp is zero, BeforeCreate hook will set it + + // Handle trace ID + if req.TraceID != nil && *req.TraceID != "" { + if traceUUID, err := uuid.Parse(*req.TraceID); err == nil { + auditLog.TraceID = &traceUUID + } + } + + // Handle actor fields + if req.ActorServiceName != nil { + auditLog.ActorServiceName = req.ActorServiceName + } + if req.ActorUserID != nil { + if userUUID, err := uuid.Parse(*req.ActorUserID); err == nil { + auditLog.ActorUserID = &userUUID + } + } + if req.ActorUserType != nil { + auditLog.ActorUserType = req.ActorUserType + } + + // Handle target fields + if req.TargetServiceName != nil { + auditLog.TargetServiceName = req.TargetServiceName + } + if req.TargetResource != nil { + auditLog.TargetResource = req.TargetResource + } + if req.TargetResourceID != nil { + if resourceUUID, err := uuid.Parse(*req.TargetResourceID); err == nil { + auditLog.TargetResourceID = &resourceUUID + } + } + + // Validate before creating + if err := auditLog.Validate(); err != nil { + return nil, err + } + + // Create in database + if err := s.db.Create(auditLog).Error; err != nil { + return nil, err + } + + return auditLog, nil +} + +// GetAuditLogs retrieves audit logs with optional filtering +func (s *AuditService) GetAuditLogs(traceID *string, eventName *string, limit, offset int) ([]v1models.AuditLog, int64, error) { + var logs []v1models.AuditLog + var total int64 + + query := s.db.Model(&v1models.AuditLog{}) + + // Apply filters + if traceID != nil && *traceID != "" { + if traceUUID, err := uuid.Parse(*traceID); err == nil { + query = query.Where("trace_id = ?", traceUUID) + } + } + if eventName != nil && *eventName != "" { + query = query.Where("event_name = ?", *eventName) + } + + // Get total count + if err := query.Count(&total).Error; err != nil { + return nil, 0, err + } + + // Apply pagination and ordering + if err := query.Order("timestamp DESC").Limit(limit).Offset(offset).Find(&logs).Error; err != nil { + return nil, 0, err + } + + return logs, total, nil +} diff --git a/audit-service/v1/types/requests.go b/audit-service/v1/types/requests.go new file mode 100644 index 00000000..e952059e --- /dev/null +++ b/audit-service/v1/types/requests.go @@ -0,0 +1,41 @@ +package types + +import ( + "encoding/json" +) + +// CreateAuditLogRequest represents the request payload for creating a generalized audit log +// This matches the audit-service v1 API structure +type CreateAuditLogRequest struct { + // Trace & Correlation + TraceID *string `json:"traceId,omitempty"` // UUID string, nullable for standalone events + + // Temporal + Timestamp *string `json:"timestamp,omitempty"` // ISO 8601 format, optional (defaults to now) + + // Event Classification + EventName string `json:"eventName" validate:"required"` // POLICY_CHECK, CONSENT_CHECK, DATA_FETCH, MANAGEMENT_EVENT + EventType *string `json:"eventType,omitempty"` // CREATE, READ, UPDATE, DELETE (nullable for non-CRUD) + Status string `json:"status" validate:"required"` // SUCCESS or FAILURE + + // Actor (Flattened from ActorMetadata) + ActorType string `json:"actorType" validate:"required"` // USER or SERVICE + ActorServiceName *string `json:"actorServiceName,omitempty"` // Required for SERVICE, NULL for USER + ActorUserID *string `json:"actorUserId,omitempty"` // Required for USER, NULL for SERVICE (UUID string) + ActorUserType *string `json:"actorUserType,omitempty"` // ADMIN or MEMBER (for USER) + ActorMetadata json.RawMessage `json:"actorMetadata,omitempty"` // Additional actor context + + // Target (Flattened from TargetMetadata) + TargetType string `json:"targetType" validate:"required"` // RESOURCE or SERVICE + TargetServiceName *string `json:"targetServiceName,omitempty"` // Required for SERVICE, NULL for RESOURCE + TargetResource *string `json:"targetResource,omitempty"` // Required for RESOURCE, NULL for SERVICE + TargetResourceID *string `json:"targetResourceId,omitempty"` // Optional UUID string + TargetMetadata json.RawMessage `json:"targetMetadata,omitempty"` // Additional target context + + // Request/Response (PIA-free) + RequestedData json.RawMessage `json:"requestedData,omitempty"` // Request payload + ResponseMetadata json.RawMessage `json:"responseMetadata,omitempty"` // Response or error + + // Additional Context + EventMetadata json.RawMessage `json:"eventMetadata,omitempty"` // Additional event-specific metadata +} diff --git a/audit-service/v1/types/responses.go b/audit-service/v1/types/responses.go new file mode 100644 index 00000000..2cb91d34 --- /dev/null +++ b/audit-service/v1/types/responses.go @@ -0,0 +1,35 @@ +package types + +import ( + "time" + + "github.com/google/uuid" +) + +// AuditLogResponse represents the response payload for an audit log entry +type AuditLogResponse struct { + ID uuid.UUID `json:"id"` + + Timestamp time.Time `json:"timestamp"` + TraceID *uuid.UUID `json:"traceId,omitempty"` + + EventName string `json:"eventName"` + EventType *string `json:"eventType,omitempty"` + Status string `json:"status"` + + ActorType string `json:"actorType"` + ActorServiceName *string `json:"actorServiceName,omitempty"` + ActorUserID *uuid.UUID `json:"actorUserId,omitempty"` + ActorUserType *string `json:"actorUserType,omitempty"` + + TargetType string `json:"targetType"` + TargetServiceName *string `json:"targetServiceName,omitempty"` + TargetResource *string `json:"targetResource,omitempty"` + TargetResourceID *uuid.UUID `json:"targetResourceId,omitempty"` +} + +// GetAuditLogsResponse represents the response for querying audit logs +type GetAuditLogsResponse struct { + Logs []AuditLogResponse `json:"logs"` + Total int `json:"total"` +} diff --git a/docs/LOCAL_CODE_QUALITY_SETUP.md b/docs/LOCAL_CODE_QUALITY_SETUP.md index decbb7d2..4f12f6ac 100644 --- a/docs/LOCAL_CODE_QUALITY_SETUP.md +++ b/docs/LOCAL_CODE_QUALITY_SETUP.md @@ -39,8 +39,8 @@ All quality checks are orchestrated through a centralized Makefile that automati ### 1. Clone the Repository ```bash -git clone https://github.com/OpenDIF/opendif-core.git -cd opendif-core +git clone https://github.com/OpenDIF/opendif-mvp.git +cd opendif-mvp ``` ### 2. Install Go Quality Tools diff --git a/docs/contributing/development.md b/docs/contributing/development.md index 3313f250..14bbbdb4 100644 --- a/docs/contributing/development.md +++ b/docs/contributing/development.md @@ -15,13 +15,13 @@ Before you begin, ensure you have the following installed: 1. **Fork and clone the repository:** ```bash - git clone https://github.com/YOUR_USERNAME/opendif-core.git - cd opendif-core + git clone https://github.com/YOUR_USERNAME/opendif-mvp.git + cd opendif-mvp ``` 2. **Add the upstream remote:** ```bash - git remote add upstream https://github.com/OpenDIF/opendif-core.git + git remote add upstream https://github.com/OpenDIF/opendif-mvp.git ``` 3. **Run the setup script:** @@ -121,7 +121,7 @@ Before submitting a pull request, ensure: ## Project Structure ``` -opendif-core/ +opendif-mvp/ ├── exchange/ # Go backend services │ ├── orchestration-engine/ │ ├── policy-decision-point/ @@ -138,7 +138,7 @@ opendif-core/ ## Getting Help -- Check existing [Issues](https://github.com/OpenDIF/opendif-core/issues) +- Check existing [Issues](https://github.com/OpenDIF/opendif-mvp/issues) - Review [Pull Request Guidelines](pull-requests.md) - See [Reporting Issues](reporting-issues.md) for bug reports diff --git a/docs/contributing/pull-requests.md b/docs/contributing/pull-requests.md index 704b7627..6f94a9d9 100644 --- a/docs/contributing/pull-requests.md +++ b/docs/contributing/pull-requests.md @@ -4,7 +4,7 @@ We welcome pull requests from the community! This guide will help ensure your co ## Before You Start -1. **Check for existing work:** Search [open pull requests](https://github.com/OpenDIF/opendif-core/pulls) to avoid duplicate work +1. **Check for existing work:** Search [open pull requests](https://github.com/OpenDIF/opendif-mvp/pulls) to avoid duplicate work 2. **Discuss major changes:** For significant changes, consider opening an issue first to discuss the approach 3. **Read the documentation:** Review the [Development Guide](development.md) to set up your environment @@ -102,4 +102,4 @@ Before submitting, ensure: - Check [Reporting Issues](reporting-issues.md) for bug reporting - See [Code of Conduct](../../CODE_OF_CONDUCT.md) for community standards -[View Open Pull Requests](https://github.com/OpenDIF/opendif-core/pulls) +[View Open Pull Requests](https://github.com/OpenDIF/opendif-mvp/pulls) diff --git a/docs/contributing/reporting-issues.md b/docs/contributing/reporting-issues.md index bb82344d..93f677b7 100644 --- a/docs/contributing/reporting-issues.md +++ b/docs/contributing/reporting-issues.md @@ -4,7 +4,7 @@ We use GitHub Issues to track bugs, feature requests, and improvements. Your det ## Before Creating an Issue -1. **Search existing issues:** Check [open issues](https://github.com/OpenDIF/opendif-core/issues) and [closed issues](https://github.com/OpenDIF/opendif-core/issues?q=is%3Aissue+is%3Aclosed) to see if your issue has already been reported +1. **Search existing issues:** Check [open issues](https://github.com/OpenDIF/opendif-mvp/issues) and [closed issues](https://github.com/OpenDIF/opendif-mvp/issues?q=is%3Aissue+is%3Aclosed) to see if your issue has already been reported 2. **Check documentation:** Review the README and relevant documentation to see if your question is answered 3. **Verify it's a bug:** For behavior questions, ensure it's actually a bug and not expected behavior @@ -130,6 +130,6 @@ We use labels to categorize issues: ## Security Issues -**Do not** report security vulnerabilities through public GitHub issues. Instead, please contact the maintainers directly or use GitHub's [private vulnerability reporting](https://github.com/OpenDIF/opendif-core/security/advisories/new) feature. +**Do not** report security vulnerabilities through public GitHub issues. Instead, please contact the maintainers directly or use GitHub's [private vulnerability reporting](https://github.com/OpenDIF/opendif-mvp/security/advisories/new) feature. -[Open a new issue](https://github.com/OpenDIF/opendif-core/issues/new) +[Open a new issue](https://github.com/OpenDIF/opendif-mvp/issues/new) diff --git a/exchange/docker-compose.yml b/exchange/docker-compose.yml index e7fe9230..11cf8f48 100644 --- a/exchange/docker-compose.yml +++ b/exchange/docker-compose.yml @@ -20,8 +20,6 @@ services: - PORT=8082 - LOG_LEVEL=${LOG_LEVEL:-info} - LOG_FORMAT=${LOG_FORMAT:-text} - - SERVICE_NAME=policy-decision-point - - OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER:-prometheus} healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8082/health"] interval: 30s @@ -76,8 +74,6 @@ services: - PORT=4000 - LOG_LEVEL=${LOG_LEVEL:-info} - LOG_FORMAT=${LOG_FORMAT:-text} - - SERVICE_NAME=orchestration-engine - - OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER:-prometheus} healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:4000/health"] interval: 30s diff --git a/exchange/orchestration-engine/consent/ce_client.go b/exchange/orchestration-engine/consent/ce_client.go index 47034079..a80b4581 100644 --- a/exchange/orchestration-engine/consent/ce_client.go +++ b/exchange/orchestration-engine/consent/ce_client.go @@ -8,7 +8,8 @@ import ( "net/http" "time" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/middleware" ) // CEServiceClient represents a client to interact with the Consent Engine service @@ -42,6 +43,11 @@ func (c *CEServiceClient) CreateConsent(ctx context.Context, request *CreateCons } req.Header.Set("Content-Type", "application/json") + // Propagate trace ID to downstream service + if traceID := middleware.GetTraceIDFromContext(ctx); traceID != "" { + req.Header.Set(middleware.TraceIDHeader, traceID) + } + resp, err := c.httpClient.Do(req) if err != nil { logger.Log.Error("Failed to send HTTP request for CreateConsent", "error", err) diff --git a/exchange/orchestration-engine/consent/ce_client_test.go b/exchange/orchestration-engine/consent/ce_client_test.go index 4ec29a5a..d4a95af9 100644 --- a/exchange/orchestration-engine/consent/ce_client_test.go +++ b/exchange/orchestration-engine/consent/ce_client_test.go @@ -8,7 +8,7 @@ import ( "testing" "time" - "github.com/ginaxu1/gov-dx-sandbox/exchange/orchestration-engine/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" ) func init() { diff --git a/exchange/orchestration-engine/database/schema_db.go b/exchange/orchestration-engine/database/schema_db.go index 104ea60e..8676f4e7 100644 --- a/exchange/orchestration-engine/database/schema_db.go +++ b/exchange/orchestration-engine/database/schema_db.go @@ -1,7 +1,6 @@ package database import ( - "context" "database/sql" "fmt" "time" @@ -20,12 +19,12 @@ func NewSchemaDB(connectionString string) (*SchemaDB, error) { start := time.Now() db, err := sql.Open("postgres", connectionString) if err != nil { - monitoring.RecordExternalCall(context.Background(), "postgres", "connect", time.Since(start), err) + monitoring.RecordExternalCall("postgres", "connect", time.Since(start), err) return nil, fmt.Errorf("failed to open database: %w", err) } if pingErr := db.Ping(); pingErr != nil { - monitoring.RecordExternalCall(context.Background(), "postgres", "connect", time.Since(start), pingErr) + monitoring.RecordExternalCall("postgres", "connect", time.Since(start), pingErr) return nil, fmt.Errorf("failed to ping database: %w", pingErr) } @@ -33,11 +32,11 @@ func NewSchemaDB(connectionString string) (*SchemaDB, error) { // Create tables if they don't exist if err := schemaDB.createTables(); err != nil { - monitoring.RecordExternalCall(context.Background(), "postgres", "connect", time.Since(start), err) + monitoring.RecordExternalCall("postgres", "connect", time.Since(start), err) return nil, fmt.Errorf("failed to create tables: %w", err) } - monitoring.RecordExternalCall(context.Background(), "postgres", "connect", time.Since(start), nil) + monitoring.RecordExternalCall("postgres", "connect", time.Since(start), nil) return schemaDB, nil } @@ -51,7 +50,7 @@ func (s *SchemaDB) createTables() error { start := time.Now() var err error defer func() { - monitoring.RecordExternalCall(context.Background(), "postgres", "createTables", time.Since(start), err) + monitoring.RecordExternalCall("postgres", "createTables", time.Since(start), err) }() // Create unified_schemas table createSchemasTable := ` @@ -111,7 +110,7 @@ type Schema struct { func (s *SchemaDB) CreateSchema(schema *Schema) (err error) { start := time.Now() defer func() { - monitoring.RecordExternalCall(context.Background(), "postgres", "CreateSchema", time.Since(start), err) + monitoring.RecordExternalCall("postgres", "CreateSchema", time.Since(start), err) }() query := ` INSERT INTO unified_schemas (id, version, sdl, status, description, created_by, checksum, is_active) @@ -132,7 +131,7 @@ func (s *SchemaDB) CreateSchema(schema *Schema) (err error) { func (s *SchemaDB) GetSchemaByVersion(version string) (_ *Schema, err error) { start := time.Now() defer func() { - monitoring.RecordExternalCall(context.Background(), "postgres", "GetSchemaByVersion", time.Since(start), err) + monitoring.RecordExternalCall("postgres", "GetSchemaByVersion", time.Since(start), err) }() query := `SELECT id, version, sdl, status, description, created_at, updated_at, created_by, checksum, is_active FROM unified_schemas WHERE version = $1` @@ -160,7 +159,7 @@ func (s *SchemaDB) GetSchemaByVersion(version string) (_ *Schema, err error) { func (s *SchemaDB) GetActiveSchema() (_ *Schema, err error) { start := time.Now() defer func() { - monitoring.RecordExternalCall(context.Background(), "postgres", "GetActiveSchema", time.Since(start), err) + monitoring.RecordExternalCall("postgres", "GetActiveSchema", time.Since(start), err) }() query := `SELECT id, version, sdl, status, description, created_at, updated_at, created_by, checksum, is_active FROM unified_schemas WHERE is_active = TRUE LIMIT 1` @@ -187,7 +186,7 @@ func (s *SchemaDB) GetActiveSchema() (_ *Schema, err error) { func (s *SchemaDB) GetAllSchemas() (_ []*Schema, err error) { start := time.Now() defer func() { - monitoring.RecordExternalCall(context.Background(), "postgres", "GetAllSchemas", time.Since(start), err) + monitoring.RecordExternalCall("postgres", "GetAllSchemas", time.Since(start), err) }() query := `SELECT id, version, sdl, status, description, created_at, updated_at, created_by, checksum, is_active FROM unified_schemas ORDER BY created_at DESC` @@ -219,7 +218,7 @@ func (s *SchemaDB) GetAllSchemas() (_ []*Schema, err error) { func (s *SchemaDB) ActivateSchema(version string) (err error) { start := time.Now() defer func() { - monitoring.RecordExternalCall(context.Background(), "postgres", "ActivateSchema", time.Since(start), err) + monitoring.RecordExternalCall("postgres", "ActivateSchema", time.Since(start), err) }() // Start transaction tx, err := s.db.Begin() diff --git a/exchange/orchestration-engine/federator/federator.go b/exchange/orchestration-engine/federator/federator.go index 6dffa435..7886e16c 100644 --- a/exchange/orchestration-engine/federator/federator.go +++ b/exchange/orchestration-engine/federator/federator.go @@ -566,7 +566,7 @@ func (f *Federator) performFederation(ctx context.Context, r *federationRequest) return FederationResponse } -// logAuditEvent logs a data exchange event to the audit service asynchronously +// logAuditEvent logs a data exchange event to the audit service asynchronously using v1 API func (f *Federator) logAuditEvent(ctx context.Context, providerSchemaID string, req *federationServiceRequest, status string, err error) { // Retrieve metadata from context metadata := AuditMetadataFromContext(ctx) @@ -575,6 +575,13 @@ func (f *Federator) logAuditEvent(ctx context.Context, providerSchemaID string, return } + // Get trace ID from context + traceID := middleware.GetTraceIDFromContext(ctx) + var traceIDPtr *string + if traceID != "" { + traceIDPtr = &traceID + } + // Extract requested fields for this provider requestedFields := make([]string, 0) if metadata.ProviderFieldMap != nil { @@ -596,33 +603,56 @@ func (f *Federator) logAuditEvent(ctx context.Context, providerSchemaID string, return } - // Prepare additional info for audit - additionalInfo := map[string]interface{}{ - "serviceKey": req.ServiceKey, - } + // Prepare response metadata (error info if present) + var responseMetadata json.RawMessage if err != nil { - additionalInfo["error"] = err.Error() + errorMap := map[string]interface{}{ + "error": err.Error(), + } + if errorJSON, err := json.Marshal(errorMap); err == nil { + responseMetadata = json.RawMessage(errorJSON) + } } - additionalInfoJSON, jsonErr := json.Marshal(additionalInfo) + + // Prepare event metadata (additional context) + eventMetadataMap := map[string]interface{}{ + "serviceKey": req.ServiceKey, + "schemaId": providerSchemaID, + "applicationId": metadata.ConsumerAppID, + } + eventMetadataJSON, jsonErr := json.Marshal(eventMetadataMap) if jsonErr != nil { - logger.Log.Error("Failed to marshal additional info for audit", "error", jsonErr) - additionalInfoJSON = []byte("{}") + logger.Log.Error("Failed to marshal event metadata for audit", "error", jsonErr) + eventMetadataJSON = []byte("{}") } - // Create audit request for data exchange event - auditRequest := &middleware.DataExchangeEventAuditRequest{ - Timestamp: time.Now().UTC().Format(time.RFC3339), - Status: status, - ApplicationID: metadata.ConsumerAppID, - SchemaID: providerSchemaID, - RequestedData: json.RawMessage(requestedDataJSON), - // Note: OnBehalfOfOwnerID, ConsumerID, and ProviderID are not populated here - // to avoid expensive lookup calls. The audit service can handle missing member IDs. - AdditionalInfo: json.RawMessage(additionalInfoJSON), + // Map status to v1 API format + auditStatus := middleware.StatusSuccess + if status == "FAILURE" || err != nil { + auditStatus = middleware.StatusFailure + } + + // Create audit request using v1 API structure + eventType := middleware.EventTypeRead + actorServiceName := "ORCHESTRATION_ENGINE" + targetServiceName := req.ServiceKey + + auditRequest := &middleware.CreateAuditLogRequest{ + TraceID: traceIDPtr, + EventName: middleware.EventNameDataFetch, + EventType: &eventType, + Status: auditStatus, + ActorType: middleware.ActorTypeService, + ActorServiceName: &actorServiceName, + TargetType: middleware.TargetTypeService, + TargetServiceName: &targetServiceName, + RequestedData: json.RawMessage(requestedDataJSON), + ResponseMetadata: responseMetadata, + EventMetadata: json.RawMessage(eventMetadataJSON), } - // Log the audit event asynchronously using the global middleware function - middleware.LogAuditEvent(auditRequest) + // Log the audit event asynchronously using the v1 API + middleware.LogGeneralizedAuditEvent(ctx, auditRequest) } func (f *Federator) mergeResponses(responses []*ProviderResponse) graphql.Response { diff --git a/exchange/orchestration-engine/go.mod b/exchange/orchestration-engine/go.mod index 941e5bae..4abba808 100644 --- a/exchange/orchestration-engine/go.mod +++ b/exchange/orchestration-engine/go.mod @@ -14,26 +14,39 @@ require ( golang.org/x/oauth2 v0.32.0 ) -require github.com/go-chi/chi/v5 v5.2.3 +require ( + github.com/go-chi/chi/v5 v5.2.3 + github.com/google/uuid v1.6.0 +) require ( github.com/beorn7/perks v1.0.1 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect - github.com/go-logr/logr v1.4.1 // indirect + github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect - github.com/prometheus/client_golang v1.19.1 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 // indirect + github.com/klauspost/compress v1.17.9 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/prometheus/client_golang v1.20.5 // indirect github.com/prometheus/client_model v0.6.1 // indirect - github.com/prometheus/common v0.53.0 // indirect - github.com/prometheus/procfs v0.15.0 // indirect - go.opentelemetry.io/contrib/instrumentation/runtime v0.49.0 // indirect - go.opentelemetry.io/otel v1.27.0 // indirect - go.opentelemetry.io/otel/exporters/prometheus v0.49.0 // indirect - go.opentelemetry.io/otel/metric v1.27.0 // indirect - go.opentelemetry.io/otel/sdk v1.27.0 // indirect - go.opentelemetry.io/otel/sdk/metric v1.27.0 // indirect - go.opentelemetry.io/otel/trace v1.27.0 // indirect - golang.org/x/sys v0.20.0 // indirect - google.golang.org/protobuf v1.34.1 // indirect + github.com/prometheus/common v0.60.1 // indirect + github.com/prometheus/procfs v0.15.1 // indirect + go.opentelemetry.io/otel v1.32.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0 // indirect + go.opentelemetry.io/otel/exporters/prometheus v0.54.0 // indirect + go.opentelemetry.io/otel/metric v1.32.0 // indirect + go.opentelemetry.io/otel/sdk v1.32.0 // indirect + go.opentelemetry.io/otel/sdk/metric v1.32.0 // indirect + go.opentelemetry.io/otel/trace v1.32.0 // indirect + go.opentelemetry.io/proto/otlp v1.3.1 // indirect + golang.org/x/net v0.30.0 // indirect + golang.org/x/sys v0.27.0 // indirect + golang.org/x/text v0.20.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/grpc v1.67.1 // indirect + google.golang.org/protobuf v1.35.1 // indirect ) require ( @@ -45,4 +58,4 @@ require ( replace github.com/gov-dx-sandbox/audit-service => ../../audit-service -replace github.com/gov-dx-sandbox/exchange/pkg/monitoring => ../pkg/monitoring +replace github.com/gov-dx-sandbox/exchange/pkg/monitoring => ../shared/monitoring diff --git a/exchange/orchestration-engine/go.sum b/exchange/orchestration-engine/go.sum index a93f395f..4ca15b93 100644 --- a/exchange/orchestration-engine/go.sum +++ b/exchange/orchestration-engine/go.sum @@ -1,5 +1,7 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -7,56 +9,78 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/go-chi/chi/v5 v5.2.3 h1:WQIt9uxdsAbgIYgid+BpYc+liqQZGMHRaUwp0JUcvdE= github.com/go-chi/chi/v5 v5.2.3/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= -github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/graphql-go/graphql v0.8.1 h1:p7/Ou/WpmulocJeEx7wjQy611rtXGQaAcXGqanuMMgc= github.com/graphql-go/graphql v0.8.1/go.mod h1:nKiHzRM0qopJEwCITUuIsxk9PlVlwIiiI8pnJEhordQ= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 h1:ad0vkEBuk23VJzZR9nkLVG0YAoN9coASF1GusYX6AlU= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0/go.mod h1:igFoXX2ELCW06bol23DWPB5BEWfZISOzSP5K2sbLea0= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= -github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= +github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= +github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.53.0 h1:U2pL9w9nmJwJDa4qqLQ3ZaePJ6ZTwt7cMD3AG3+aLCE= -github.com/prometheus/common v0.53.0/go.mod h1:BrxBKv3FWBIGXw89Mg1AeBq7FSyRzXWI3l3e7W3RN5U= -github.com/prometheus/procfs v0.15.0 h1:A82kmvXJq2jTu5YUhSGNlYoxh85zLnKgPz4bMZgI5Ek= -github.com/prometheus/procfs v0.15.0/go.mod h1:Y0RJ/Y5g5wJpkTisOtqwDSo4HwhGmLB4VQSw2sQJLHk= -github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= -github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc= +github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= -go.opentelemetry.io/contrib/instrumentation/runtime v0.49.0 h1:dg9y+7ArpumB6zwImJv47RHfdgOGQ1EMkzP5vLkEnTU= -go.opentelemetry.io/contrib/instrumentation/runtime v0.49.0/go.mod h1:Ul4MtXqu/hJBM+v7a6dCF0nHwckPMLpIpLeCi4+zfdw= -go.opentelemetry.io/otel v1.27.0 h1:9BZoF3yMK/O1AafMiQTVu0YDj5Ea4hPhxCs7sGva+cg= -go.opentelemetry.io/otel v1.27.0/go.mod h1:DMpAK8fzYRzs+bi3rS5REupisuqTheUlSZJ1WnZaPAQ= -go.opentelemetry.io/otel/exporters/prometheus v0.49.0 h1:Er5I1g/YhfYv9Affk9nJLfH/+qCCVVg1f2R9AbJfqDQ= -go.opentelemetry.io/otel/exporters/prometheus v0.49.0/go.mod h1:KfQ1wpjf3zsHjzP149P4LyAwWRupc6c7t1ZJ9eXpKQM= -go.opentelemetry.io/otel/metric v1.27.0 h1:hvj3vdEKyeCi4YaYfNjv2NUje8FqKqUY8IlF0FxV/ik= -go.opentelemetry.io/otel/metric v1.27.0/go.mod h1:mVFgmRlhljgBiuk/MP/oKylr4hs85GZAylncepAX/ak= -go.opentelemetry.io/otel/sdk v1.27.0 h1:mlk+/Y1gLPLn84U4tI8d3GNJmGT/eXe3ZuOXN9kTWmI= -go.opentelemetry.io/otel/sdk v1.27.0/go.mod h1:Ha9vbLwJE6W86YstIywK2xFfPjbWlCuwPtMkKdz/Y4A= -go.opentelemetry.io/otel/sdk/metric v1.27.0 h1:5uGNOlpXi+Hbo/DRoI31BSb1v+OGcpv2NemcCrOL8gI= -go.opentelemetry.io/otel/sdk/metric v1.27.0/go.mod h1:we7jJVrYN2kh3mVBlswtPU22K0SA+769l93J6bsyvqw= -go.opentelemetry.io/otel/trace v1.27.0 h1:IqYb813p7cmbHk0a5y6pD5JPakbVfftRXABGt5/Rscw= -go.opentelemetry.io/otel/trace v1.27.0/go.mod h1:6RiD1hkAprV4/q+yd2ln1HG9GoPx39SuvvstaLBl+l4= +go.opentelemetry.io/otel v1.32.0 h1:WnBN+Xjcteh0zdk01SVqV55d/m62NJLJdIyb4y/WO5U= +go.opentelemetry.io/otel v1.32.0/go.mod h1:00DCVSB0RQcnzlwyTfqtxSm+DRr9hpYrHjNGiBHVQIg= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0 h1:t/Qur3vKSkUCcDVaSumWF2PKHt85pc7fRvFuoVT8qFU= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0/go.mod h1:Rl61tySSdcOJWoEgYZVtmnKdA0GeKrSqkHC1t+91CH8= +go.opentelemetry.io/otel/exporters/prometheus v0.54.0 h1:rFwzp68QMgtzu9PgP3jm9XaMICI6TsofWWPcBDKwlsU= +go.opentelemetry.io/otel/exporters/prometheus v0.54.0/go.mod h1:QyjcV9qDP6VeK5qPyKETvNjmaaEc7+gqjh4SS0ZYzDU= +go.opentelemetry.io/otel/metric v1.32.0 h1:xV2umtmNcThh2/a/aCP+h64Xx5wsj8qqnkYZktzNa0M= +go.opentelemetry.io/otel/metric v1.32.0/go.mod h1:jH7CIbbK6SH2V2wE16W05BHCtIDzauciCRLoc/SyMv8= +go.opentelemetry.io/otel/sdk v1.32.0 h1:RNxepc9vK59A8XsgZQouW8ue8Gkb4jpWtJm9ge5lEG4= +go.opentelemetry.io/otel/sdk v1.32.0/go.mod h1:LqgegDBjKMmb2GC6/PrTnteJG39I8/vJCAP9LlJXEjU= +go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU= +go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ= +go.opentelemetry.io/otel/trace v1.32.0 h1:WIC9mYrXf8TmY/EXuULKc8hR17vE+Hjv2cssQDe03fM= +go.opentelemetry.io/otel/trace v1.32.0/go.mod h1:+i4rkvCraA+tG6AzwloGaCtkx53Fa+L+V8e9a7YvhT8= +go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= +go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= +golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= +golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY= golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= -golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= -golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= -google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= +golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= +golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= +google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 h1:M0KvPgPmDZHPlbRbaNU1APr28TvwvvdUPlSv7PUvy8g= +google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:dguCy7UOdZhTvLzDyt15+rOrawrpM4q7DD9dQ1P11P4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= +google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= +google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/exchange/orchestration-engine/main.go b/exchange/orchestration-engine/main.go index 803cec30..8c2282a8 100644 --- a/exchange/orchestration-engine/main.go +++ b/exchange/orchestration-engine/main.go @@ -1,7 +1,6 @@ package main import ( - "context" "log" "os" @@ -17,16 +16,12 @@ import ( func main() { logger.Init() - ctx := context.Background() - shutdown, err := monitoring.Setup(ctx, monitoring.Config{ - ServiceName: "orchestration-engine", - }) - if err != nil { - log.Fatalf("Failed to initialize telemetry: %v", err) + // Initialize monitoring/observability + // This ensures metrics are properly set up before the server starts + monitoringConfig := monitoring.DefaultConfig("orchestration-engine") + if err := monitoring.Initialize(monitoringConfig); err != nil { + log.Printf("Warning: Failed to initialize monitoring: %v (service will continue)", err) } - defer func() { - _ = shutdown(context.Background()) - }() // Load configuration with proper error handling config, err := configs.LoadConfig() diff --git a/exchange/orchestration-engine/middleware/audit_middleware.go b/exchange/orchestration-engine/middleware/audit_middleware.go index dcab41ae..805cfab7 100644 --- a/exchange/orchestration-engine/middleware/audit_middleware.go +++ b/exchange/orchestration-engine/middleware/audit_middleware.go @@ -7,7 +7,10 @@ import ( "io" "log/slog" "net/http" + "net/url" "sync" + + "github.com/google/uuid" ) // AuditMiddleware handles audit logging for CUD operations @@ -125,3 +128,193 @@ func ResetGlobalAuditMiddleware() { globalAuditOnce = sync.Once{} globalAuditMiddleware = nil } + +// TraceIDKey is the context key for Trace ID +type TraceIDKey struct{} + +// TraceIDHeader is the HTTP header name for trace ID +const TraceIDHeader = "X-Trace-ID" + +// Event name constants (matching audit-service v1 models) +const ( + EventNamePolicyCheck = "POLICY_CHECK" + EventNameConsentCheck = "CONSENT_CHECK" + EventNameDataFetch = "DATA_FETCH" + EventNameManagementEvent = "MANAGEMENT_EVENT" +) + +// Event type constants (matching audit-service v1 models) +const ( + EventTypeCreate = "CREATE" + EventTypeRead = "READ" + EventTypeUpdate = "UPDATE" + EventTypeDelete = "DELETE" +) + +// Actor type constants (matching audit-service v1 models) +const ( + ActorTypeUser = "USER" + ActorTypeService = "SERVICE" +) + +// Target type constants (matching audit-service v1 models) +const ( + TargetTypeResource = "RESOURCE" + TargetTypeService = "SERVICE" +) + +// Status constants (matching audit-service v1 models) +const ( + StatusSuccess = "SUCCESS" + StatusFailure = "FAILURE" +) + +// GetTraceIDFromContext retrieves the trace ID from the context +func GetTraceIDFromContext(ctx context.Context) string { + if traceID, ok := ctx.Value(TraceIDKey{}).(string); ok { + return traceID + } + return "" +} + +// TraceIDMiddleware extracts or generates a trace ID and adds it to the request context +func TraceIDMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Extract trace ID from header or generate new one + traceID := r.Header.Get(TraceIDHeader) + if traceID == "" { + // Generate new trace ID if not provided + traceID = generateTraceID() + } + + // Add trace ID to context + ctx := context.WithValue(r.Context(), TraceIDKey{}, traceID) + + // Add trace ID to response header for client visibility + w.Header().Set(TraceIDHeader, traceID) + + // Continue with updated context + next.ServeHTTP(w, r.WithContext(ctx)) + }) +} + +// generateTraceID generates a UUID trace ID +func generateTraceID() string { + return uuid.New().String() +} + +// CreateAuditLogRequest represents the request payload for creating a generalized audit log +// This matches the audit-service v1 API structure +type CreateAuditLogRequest struct { + // Trace & Correlation + TraceID *string `json:"traceId,omitempty"` // UUID string, nullable for standalone events + + // Temporal + Timestamp *string `json:"timestamp,omitempty"` // ISO 8601 format, optional (defaults to now) + + // Event Classification + EventName string `json:"eventName" validate:"required"` // POLICY_CHECK, CONSENT_CHECK, DATA_FETCH, MANAGEMENT_EVENT + EventType *string `json:"eventType,omitempty"` // CREATE, READ, UPDATE, DELETE (nullable for non-CRUD) + Status string `json:"status" validate:"required"` // SUCCESS or FAILURE + + // Actor (Flattened from ActorMetadata) + ActorType string `json:"actorType" validate:"required"` // USER or SERVICE + ActorServiceName *string `json:"actorServiceName,omitempty"` // Required for SERVICE, NULL for USER + ActorUserID *string `json:"actorUserId,omitempty"` // Required for USER, NULL for SERVICE (UUID string) + ActorUserType *string `json:"actorUserType,omitempty"` // ADMIN or MEMBER (for USER) + ActorMetadata json.RawMessage `json:"actorMetadata,omitempty"` // Additional actor context + + // Target (Flattened from TargetMetadata) + TargetType string `json:"targetType" validate:"required"` // RESOURCE or SERVICE + TargetServiceName *string `json:"targetServiceName,omitempty"` // Required for SERVICE, NULL for RESOURCE + TargetResource *string `json:"targetResource,omitempty"` // Required for RESOURCE, NULL for SERVICE + TargetResourceID *string `json:"targetResourceId,omitempty"` // Optional UUID string + TargetMetadata json.RawMessage `json:"targetMetadata,omitempty"` // Additional target context + + // Request/Response (PIA-free) + RequestedData json.RawMessage `json:"requestedData,omitempty"` // Request payload + ResponseMetadata json.RawMessage `json:"responseMetadata,omitempty"` // Response or error + + // Additional Context + EventMetadata json.RawMessage `json:"eventMetadata,omitempty"` // Additional event-specific metadata +} + +// LogGeneralizedAudit logs a generalized audit event +func (m *AuditMiddleware) LogGeneralizedAudit(ctx context.Context, auditRequest *CreateAuditLogRequest) { + // Skip if audit service is not configured + if m.auditServiceURL == "" { + return + } + + // If TraceID is missing in request but present in context, use it + if auditRequest.TraceID == nil || *auditRequest.TraceID == "" { + if val := ctx.Value(TraceIDKey{}); val != nil { + if traceID, ok := val.(string); ok && traceID != "" { + auditRequest.TraceID = &traceID + } + } + } + + // Log asynchronously (fire-and-forget) using background context + go m.logGeneralizedAuditEvent(context.Background(), *auditRequest) +} + +// logGeneralizedAuditEvent sends the audit log to the audit service +func (m *AuditMiddleware) logGeneralizedAuditEvent(ctx context.Context, event CreateAuditLogRequest) { + if m.httpClient == nil { + return + } + + payloadBytes, err := json.Marshal(event) + if err != nil { + slog.Error("Failed to marshal audit request", "error", err) + return + } + + auditURL, err := url.JoinPath(m.auditServiceURL, "api", "audit-logs") + if err != nil { + slog.Error("Failed to construct audit URL", "error", err) + return + } + req, err := http.NewRequestWithContext(ctx, "POST", auditURL, bytes.NewReader(payloadBytes)) + if err != nil { + slog.Error("Failed to create audit request", "error", err) + return + } + req.Header.Set("Content-Type", "application/json") + + resp, err := m.httpClient.Do(req) + if err != nil { + slog.Error("Failed to send audit request", "error", err) + return + } + defer func(Body io.ReadCloser) { + err := Body.Close() + if err != nil { + slog.Error("Failed to close audit response body", "error", err) + } + }(resp.Body) + + if resp.StatusCode != http.StatusCreated { + bodyBytes, _ := io.ReadAll(resp.Body) + slog.Error("Audit service returned non-201 status", "status", resp.StatusCode, "body", string(bodyBytes)) + return + } + + traceIDStr := "" + if event.TraceID != nil { + traceIDStr = *event.TraceID + } + slog.Debug("Generalized audit event logged successfully", + "traceId", traceIDStr, + "eventName", event.EventName) +} + +// LogGeneralizedAuditEvent helper for global access +func LogGeneralizedAuditEvent(ctx context.Context, auditRequest *CreateAuditLogRequest) { + if globalAuditMiddleware != nil { + globalAuditMiddleware.LogGeneralizedAudit(ctx, auditRequest) + } else { + slog.Warn("Global AuditMiddleware is not initialized; audit event not logged") + } +} diff --git a/exchange/orchestration-engine/policy/pdpclient.go b/exchange/orchestration-engine/policy/pdpclient.go index f1d99f8c..8023af8b 100644 --- a/exchange/orchestration-engine/policy/pdpclient.go +++ b/exchange/orchestration-engine/policy/pdpclient.go @@ -9,6 +9,7 @@ import ( "time" "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/middleware" ) // PdpClient represents a client to interact with the Policy Decision Point service @@ -48,6 +49,11 @@ func (p *PdpClient) MakePdpRequest(ctx context.Context, request *PdpRequest) (*P } req.Header.Set("Content-Type", "application/json") + // Propagate trace ID to downstream service + if traceID := middleware.GetTraceIDFromContext(ctx); traceID != "" { + req.Header.Set(middleware.TraceIDHeader, traceID) + } + response, err := p.httpClient.Do(req) if err != nil { // handle error diff --git a/exchange/orchestration-engine/provider/provider.go b/exchange/orchestration-engine/provider/provider.go index 0398d07f..12ed58fe 100644 --- a/exchange/orchestration-engine/provider/provider.go +++ b/exchange/orchestration-engine/provider/provider.go @@ -9,6 +9,7 @@ import ( "time" "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/middleware" "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/auth" "github.com/gov-dx-sandbox/exchange/pkg/monitoring" "golang.org/x/oauth2/clientcredentials" @@ -58,6 +59,11 @@ func (p *Provider) PerformRequest(ctx context.Context, reqBody []byte) (*http.Re req.Header.Set("Content-Type", "application/json") + // Propagate trace ID to downstream service + if traceID := middleware.GetTraceIDFromContext(ctx); traceID != "" { + req.Header.Set(middleware.TraceIDHeader, traceID) + } + start := time.Now() if p.Auth != nil { @@ -70,7 +76,7 @@ func (p *Provider) PerformRequest(ctx context.Context, reqBody []byte) (*http.Re client := p.OAuth2Config.Client(ctx) resp, err := client.Do(req) // Use context with request - monitoring.RecordExternalCall(ctx, p.ServiceKey, "provider_request", time.Since(start), err) + monitoring.RecordExternalCall(p.ServiceKey, "provider_request", time.Since(start), err) return resp, err case auth.AuthTypeAPIKey: req.Header.Set(p.Auth.APIKeyName, p.Auth.APIKeyValue) @@ -79,6 +85,6 @@ func (p *Provider) PerformRequest(ctx context.Context, reqBody []byte) (*http.Re // Default client execution (for API Key or no auth) resp, err := p.Client.Do(req) - monitoring.RecordExternalCall(ctx, p.ServiceKey, "provider_request", time.Since(start), err) + monitoring.RecordExternalCall(p.ServiceKey, "provider_request", time.Since(start), err) return resp, err } diff --git a/exchange/orchestration-engine/server/server.go b/exchange/orchestration-engine/server/server.go index 58e384f1..8b0d33c6 100644 --- a/exchange/orchestration-engine/server/server.go +++ b/exchange/orchestration-engine/server/server.go @@ -6,7 +6,6 @@ import ( "net/http" "os" "runtime/debug" - "time" "github.com/go-chi/chi/v5" "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/auth" @@ -14,6 +13,7 @@ import ( "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/federator" "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/handlers" "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/logger" + "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/middleware" "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/graphql" "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/services" "github.com/gov-dx-sandbox/exchange/pkg/monitoring" @@ -46,7 +46,10 @@ func RunServer(f *federator.Federator) { logger.Log.Info("Server is Listening", "port", port) - if err := http.ListenAndServe(port, corsMiddleware(mux)); err != nil { + // Apply middleware chain: TraceID -> CORS -> Router + handler := corsMiddleware(middleware.TraceIDMiddleware(mux)) + + if err := http.ListenAndServe(port, handler); err != nil { logger.Log.Error("Failed to start server", "error", err) } else { logger.Log.Info("Server stopped") @@ -110,13 +113,6 @@ func SetupRouter(f *federator.Federator) *chi.Mux { // Publicly accessible Endpoints mux.Post("/public/graphql", func(w http.ResponseWriter, r *http.Request) { - const workflowName = "graphql_federation" - monitoring.WorkflowInFlightAdd(r.Context(), workflowName, 1) - workflowStart := time.Now() - defer func() { - monitoring.WorkflowInFlightAdd(r.Context(), workflowName, -1) - monitoring.RecordWorkflowDuration(r.Context(), workflowName, time.Since(workflowStart)) - }() // Parse request body var req graphql.Request @@ -163,7 +159,11 @@ func SetupRouter(f *federator.Federator) *chi.Mux { return } - monitoring.RecordBusinessEvent(r.Context(), "graphql_request", len(response.Errors) == 0) + outcome := "success" + if len(response.Errors) > 0 { + outcome = "failure" + } + monitoring.RecordBusinessEvent("graphql_request", outcome) }) return mux diff --git a/exchange/orchestration-engine/server/server_internal_test.go b/exchange/orchestration-engine/server/server_internal_test.go index f66624d4..95abf269 100644 --- a/exchange/orchestration-engine/server/server_internal_test.go +++ b/exchange/orchestration-engine/server/server_internal_test.go @@ -12,7 +12,6 @@ import ( "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/pkg/graphql" "github.com/gov-dx-sandbox/exchange/orchestration-engine-go/provider" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) func TestSetupRouter_Health(t *testing.T) { @@ -106,31 +105,3 @@ func TestSetupRouter_PublicGraphQL_Unauthorized(t *testing.T) { // Should be Unauthorized because GetConsumerJwtFromToken will fail assert.Equal(t, http.StatusUnauthorized, w.Code) } - -func TestSetupRouter_MetricsEndpoint(t *testing.T) { - cfg := &configs.Config{ - Environment: "test", - } - providerHandler := provider.NewProviderHandler(nil) - f := federator.Initialize(cfg, providerHandler, nil) - - mux := SetupRouter(f) - - // Test metrics endpoint - req := httptest.NewRequest(http.MethodGet, "/metrics", nil) - w := httptest.NewRecorder() - - mux.ServeHTTP(w, req) - - // Metrics endpoint should return 200 OK - require.Equal(t, http.StatusOK, w.Code, "Metrics endpoint should return 200 OK") - - // Metrics endpoint should return Prometheus format - body := w.Body.String() - require.Contains(t, body, "# HELP", "Metrics response should contain Prometheus HELP comments") - require.Contains(t, body, "# TYPE", "Metrics response should contain Prometheus TYPE comments") - - // The metrics endpoint is accessible and returns valid Prometheus format - // Note: http_requests_total will only appear after requests go through the middleware, - // which is applied in RunServer, not in SetupRouter -} diff --git a/exchange/policy-decision-point/go.mod b/exchange/policy-decision-point/go.mod index 338e044c..b572241a 100644 --- a/exchange/policy-decision-point/go.mod +++ b/exchange/policy-decision-point/go.mod @@ -15,8 +15,12 @@ require ( require ( github.com/beorn7/perks v1.0.1 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/jackc/pgx/v5 v5.6.0 // indirect @@ -24,20 +28,30 @@ require ( github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect github.com/klauspost/compress v1.17.9 // indirect - github.com/kr/text v0.2.0 // indirect github.com/mattn/go-sqlite3 v1.14.22 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_golang v1.20.5 // indirect github.com/prometheus/client_model v0.6.1 // indirect - github.com/prometheus/common v0.55.0 // indirect + github.com/prometheus/common v0.60.1 // indirect github.com/prometheus/procfs v0.15.1 // indirect - github.com/rogpeppe/go-internal v1.13.1 // indirect + go.opentelemetry.io/otel v1.32.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0 // indirect + go.opentelemetry.io/otel/exporters/prometheus v0.54.0 // indirect + go.opentelemetry.io/otel/metric v1.32.0 // indirect + go.opentelemetry.io/otel/sdk v1.32.0 // indirect + go.opentelemetry.io/otel/sdk/metric v1.32.0 // indirect + go.opentelemetry.io/otel/trace v1.32.0 // indirect + go.opentelemetry.io/proto/otlp v1.3.1 // indirect golang.org/x/crypto v0.40.0 // indirect + golang.org/x/net v0.41.0 // indirect golang.org/x/sync v0.16.0 // indirect golang.org/x/sys v0.34.0 // indirect golang.org/x/text v0.27.0 // indirect - google.golang.org/protobuf v1.34.2 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/grpc v1.67.1 // indirect + google.golang.org/protobuf v1.35.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) @@ -45,6 +59,6 @@ replace github.com/gov-dx-sandbox/exchange/shared/config => ./shared/config replace github.com/gov-dx-sandbox/exchange/shared/constants => ./shared/constants -replace github.com/gov-dx-sandbox/exchange/shared/monitoring => ../shared/monitoring - replace github.com/gov-dx-sandbox/exchange/shared/utils => ./shared/utils + +replace github.com/gov-dx-sandbox/exchange/shared/monitoring => ../shared/monitoring diff --git a/exchange/policy-decision-point/go.sum b/exchange/policy-decision-point/go.sum index 4533542f..69c1cdef 100644 --- a/exchange/policy-decision-point/go.sum +++ b/exchange/policy-decision-point/go.sum @@ -1,15 +1,23 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 h1:ad0vkEBuk23VJzZR9nkLVG0YAoN9coASF1GusYX6AlU= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0/go.mod h1:igFoXX2ELCW06bol23DWPB5BEWfZISOzSP5K2sbLea0= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= @@ -42,8 +50,8 @@ github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+ github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= -github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= +github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc= +github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= @@ -53,16 +61,40 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +go.opentelemetry.io/otel v1.32.0 h1:WnBN+Xjcteh0zdk01SVqV55d/m62NJLJdIyb4y/WO5U= +go.opentelemetry.io/otel v1.32.0/go.mod h1:00DCVSB0RQcnzlwyTfqtxSm+DRr9hpYrHjNGiBHVQIg= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0 h1:t/Qur3vKSkUCcDVaSumWF2PKHt85pc7fRvFuoVT8qFU= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0/go.mod h1:Rl61tySSdcOJWoEgYZVtmnKdA0GeKrSqkHC1t+91CH8= +go.opentelemetry.io/otel/exporters/prometheus v0.54.0 h1:rFwzp68QMgtzu9PgP3jm9XaMICI6TsofWWPcBDKwlsU= +go.opentelemetry.io/otel/exporters/prometheus v0.54.0/go.mod h1:QyjcV9qDP6VeK5qPyKETvNjmaaEc7+gqjh4SS0ZYzDU= +go.opentelemetry.io/otel/metric v1.32.0 h1:xV2umtmNcThh2/a/aCP+h64Xx5wsj8qqnkYZktzNa0M= +go.opentelemetry.io/otel/metric v1.32.0/go.mod h1:jH7CIbbK6SH2V2wE16W05BHCtIDzauciCRLoc/SyMv8= +go.opentelemetry.io/otel/sdk v1.32.0 h1:RNxepc9vK59A8XsgZQouW8ue8Gkb4jpWtJm9ge5lEG4= +go.opentelemetry.io/otel/sdk v1.32.0/go.mod h1:LqgegDBjKMmb2GC6/PrTnteJG39I8/vJCAP9LlJXEjU= +go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU= +go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ= +go.opentelemetry.io/otel/trace v1.32.0 h1:WIC9mYrXf8TmY/EXuULKc8hR17vE+Hjv2cssQDe03fM= +go.opentelemetry.io/otel/trace v1.32.0/go.mod h1:+i4rkvCraA+tG6AzwloGaCtkx53Fa+L+V8e9a7YvhT8= +go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= +go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= golang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM= golang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY= +golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= +golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= -google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= -google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 h1:M0KvPgPmDZHPlbRbaNU1APr28TvwvvdUPlSv7PUvy8g= +google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:dguCy7UOdZhTvLzDyt15+rOrawrpM4q7DD9dQ1P11P4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= +google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= +google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/exchange/policy-decision-point/main.go b/exchange/policy-decision-point/main.go index 94d8dbc3..c9290e02 100644 --- a/exchange/policy-decision-point/main.go +++ b/exchange/policy-decision-point/main.go @@ -132,6 +132,9 @@ func main() { utils.RespondWithJSON(w, http.StatusOK, debugInfo) }))) + // Wrap with metrics middleware + handler := monitoring.HTTPMetricsMiddleware(mux) + // Create server using utils port := getEnvOrDefault("PORT", "8082") serverConfig := &utils.ServerConfig{ @@ -140,8 +143,6 @@ func main() { WriteTimeout: 15 * time.Second, IdleTimeout: 60 * time.Second, } - // Wrap handler with metrics middleware - handler := monitoring.HTTPMetricsMiddleware(mux) server := utils.CreateServer(serverConfig, handler) // Start server with graceful shutdown diff --git a/exchange/policy-decision-point/v1/handler.go b/exchange/policy-decision-point/v1/handler.go index fd69d4e2..c6989bb6 100644 --- a/exchange/policy-decision-point/v1/handler.go +++ b/exchange/policy-decision-point/v1/handler.go @@ -7,7 +7,6 @@ import ( "github.com/gov-dx-sandbox/exchange/policy-decision-point/v1/models" "github.com/gov-dx-sandbox/exchange/policy-decision-point/v1/services" - "github.com/gov-dx-sandbox/exchange/shared/monitoring" "github.com/gov-dx-sandbox/exchange/shared/utils" "gorm.io/gorm" ) @@ -116,12 +115,5 @@ func (h *Handler) GetPolicyDecision(w http.ResponseWriter, r *http.Request) { return } - // Record business event based on decision - if resp.AppAuthorized { - monitoring.RecordBusinessEvent("policy_decision", "allow") - } else { - monitoring.RecordBusinessEvent("policy_decision", "deny") - } - utils.RespondWithSuccess(w, http.StatusOK, resp) } diff --git a/observability/prometheus/prometheus.yml b/observability/prometheus/prometheus.yml index 2c327650..ad955f9f 100644 --- a/observability/prometheus/prometheus.yml +++ b/observability/prometheus/prometheus.yml @@ -39,7 +39,7 @@ scrape_configs: metrics_path: /metrics static_configs: - targets: - - host.docker.internal:3000 + - portal-backend:3000 labels: service: 'api-server-go' port: '3000'