Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ gem "lograge"

# For distributed tracing and telemetry
gem "opentelemetry-exporter-otlp", "~> 0.34.0"
gem "opentelemetry-exporter-otlp-metrics", "~> 0.10.0"
gem "opentelemetry-instrumentation-all", "~> 0.94.0"
gem "opentelemetry-metrics-sdk", "~> 0.15.0"
gem "opentelemetry-propagator-xray", "~> 0.27.0"
gem "opentelemetry-sdk", "~> 1.12"

Expand Down
20 changes: 20 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,15 @@ GEM
opentelemetry-common (~> 0.20)
opentelemetry-sdk (~> 1.10)
opentelemetry-semantic_conventions
opentelemetry-exporter-otlp-metrics (0.10.0)
google-protobuf (>= 3.18, < 5.0)
googleapis-common-protos-types (~> 1.3)
opentelemetry-api (~> 1.1)
opentelemetry-common (~> 0.20)
opentelemetry-metrics-api (~> 0.2)
opentelemetry-metrics-sdk (~> 0.5)
opentelemetry-sdk (~> 1.2)
opentelemetry-semantic_conventions
opentelemetry-helpers-mysql (0.6.0)
opentelemetry-api (~> 1.7)
opentelemetry-common (~> 0.21)
Expand Down Expand Up @@ -537,6 +546,12 @@ GEM
opentelemetry-helpers-sql-processor
opentelemetry-instrumentation-base (~> 0.25)
opentelemetry-semantic_conventions (>= 1.8.0)
opentelemetry-metrics-api (0.6.0)
opentelemetry-api (~> 1.0)
opentelemetry-metrics-sdk (0.15.0)
opentelemetry-api (~> 1.1)
opentelemetry-metrics-api (~> 0.2)
opentelemetry-sdk (~> 1.2)
opentelemetry-propagator-xray (0.27.0)
opentelemetry-api (~> 1.7)
opentelemetry-registry (0.6.0)
Expand Down Expand Up @@ -838,7 +853,9 @@ DEPENDENCIES
omniauth-auth0
omniauth-rails_csrf_protection
opentelemetry-exporter-otlp (~> 0.34.0)
opentelemetry-exporter-otlp-metrics (~> 0.10.0)
opentelemetry-instrumentation-all (~> 0.94.0)
opentelemetry-metrics-sdk (~> 0.15.0)
opentelemetry-propagator-xray (~> 0.27.0)
opentelemetry-sdk (~> 1.12)
pagy
Expand Down Expand Up @@ -994,6 +1011,7 @@ CHECKSUMS
opentelemetry-api (1.10.0) sha256=99ee7c829b18381c31a817ee9bf6a160d737542d99cb8da55d443336d266bfa9
opentelemetry-common (0.25.0) sha256=73915362e58d337fc92acbe1abfdaee1f725442527125fdb2af1420417f1149d
opentelemetry-exporter-otlp (0.34.0) sha256=3b3cdf4329ba30f4389d849c7f13b8f9f983ecb4a030031c03997dffae1e2a60
opentelemetry-exporter-otlp-metrics (0.10.0) sha256=d8cbff9b8a3391eb61486b8be9b6ad74e3b9306a3c60fb4c906b28bc857167c8
opentelemetry-helpers-mysql (0.6.0) sha256=7eeb5e6950c434775a8cf28b5fde4defc12e8b865c86479ce3119fcf593d9337
opentelemetry-helpers-sql (0.4.0) sha256=b10e8c3a2cca28a98af951bbb3e4efdc59e68b25ba0825e055574af543420afb
opentelemetry-helpers-sql-processor (0.5.0) sha256=b199241bc9451fcbd9f00b2f454830af19d4ca27c2219ea379c9b0d53cd0e0f1
Expand Down Expand Up @@ -1043,6 +1061,8 @@ CHECKSUMS
opentelemetry-instrumentation-sidekiq (0.29.0) sha256=b1d2a0cb9041a5e14239fe7c94d99e3dd07f870e2759460ab63592d7cdd8aadc
opentelemetry-instrumentation-sinatra (0.30.0) sha256=b67301153420f43264a0c68cdb3ca5bd77467cf5054e57b83a2bf891aaaa0361
opentelemetry-instrumentation-trilogy (0.69.0) sha256=0676dd720eeab284abfa52f273967442156fcac7084a1e1411373cf14ec026ad
opentelemetry-metrics-api (0.6.0) sha256=b9300821680a1370684098cb030c18423dd55909ea0206faadfa7bc47362df87
opentelemetry-metrics-sdk (0.15.0) sha256=611a9cd9f473c461095c7401b8c25f9774160d286a1acbfcbf044da2972aeada
opentelemetry-propagator-xray (0.27.0) sha256=753f756c7ad3146f182d428b06041084eecc77769edfd280f365e0bc09b9c4d1
opentelemetry-registry (0.6.0) sha256=5d3ed32ab9eee0fbdb30d4f0d0bb61ad11a4040b267b475ae815b80a8498a728
opentelemetry-sdk (1.12.0) sha256=a224abe0c59023d41cb7ac1c634d9d28843907efcd045ed1ae320796c48b864b
Expand Down
95 changes: 95 additions & 0 deletions app/services/metrics/form_count_service.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
module Metrics
class FormCountService
class ExportError < StandardError; end

METRIC_NAME = "FormCount".freeze
METER_NAME = "forms-admin".freeze
METER_VERSION = "1.0".freeze
UNKNOWN_ORG = "Unknown".freeze
METRIC_STATES = %w[draft live archived].freeze

def publish_form_counts
metric_count = 0

form_counts_by_org_and_state.each do |(org, state), count|
form_count_gauge.record(count, attributes: metric_attributes(org:, state:))
metric_count += 1
end

export_metrics!

Rails.logger.info "Published #{metric_count} form count metrics via OpenTelemetry"
rescue StandardError => e
Sentry.capture_exception(e)
raise
end

private

def form_counts_by_org_and_state
totals = counted_form_totals
organisation_names.each { |org_name| ensure_all_metric_states(totals, org_name) }
ensure_all_metric_states(totals, UNKNOWN_ORG) if totals.keys.any? { |(org, _), _| org == UNKNOWN_ORG }
totals
Comment on lines +31 to +33
end

def counted_form_totals
counts_by_org_and_state = Form
.where.not(state: :deleted)
.left_joins(group_form: { group: :organisation })
Comment thread
theseanything marked this conversation as resolved.
.group(Organisation.arel_table[:name], Form.arel_table[:state], Organisation.arel_table[:internal])
.count

counts_by_org_and_state.each_with_object(Hash.new(0)) do |((org_name, state, internal), count), totals|
next if internal == true # Skip internal organisations for metrics

totals[[org_name || UNKNOWN_ORG, metric_state(state)]] += count
end
end

def organisation_names
Organisation.where(internal: false).pluck(:name)
end

def ensure_all_metric_states(totals, org_name)
METRIC_STATES.each { |state| totals[[org_name, state]] += 0 }
end

def metric_state(state)
case state
when "live", "live_with_draft" then "live"
when "archived", "archived_with_draft" then "archived"
when "draft" then "draft"
end
end

def metric_attributes(org:, state:)
{
"Environment" => Settings.forms_env.downcase,
"Org" => org,
"State" => state,
}
end

def form_count_gauge
@form_count_gauge ||= meter.create_gauge(
METRIC_NAME,
unit: "1",
description: "Count of forms grouped by organisation and state",
)
end

def meter
OpenTelemetry.meter_provider.meter(METER_NAME, version: METER_VERSION)
end

def export_metrics!
return if OpenTelemetry.meter_provider.metric_readers.empty?

result = OpenTelemetry.meter_provider.force_flush
return if result == OpenTelemetry::SDK::Metrics::Export::SUCCESS

raise ExportError, "OpenTelemetry metrics export failed with result code #{result}"
end
Comment on lines +86 to +93
end
end
5 changes: 5 additions & 0 deletions config/initializers/opentelemetry.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
require "opentelemetry/sdk"
require "opentelemetry/instrumentation/all"
require "opentelemetry-metrics-sdk"
require "opentelemetry/exporter/otlp_metrics"

return unless ENV["ENABLE_OTEL"] == "true"

Expand All @@ -15,3 +17,6 @@
# Disable logging for Rake tasks to avoid cluttering output
c.logger = Logger.new(File::NULL) if Rails.const_defined?(:Rake) && Rake.application.top_level_tasks.any?
end

# Metrics are configured automatically by opentelemetry-metrics-sdk via OTEL_METRICS_EXPORTER
# (defaults to "otlp"), pushing to the collector sidecar at OTEL_EXPORTER_OTLP_ENDPOINT.
6 changes: 6 additions & 0 deletions lib/tasks/metrics.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace :metrics do
desc "Export form counts as OpenTelemetry metrics grouped by organisation and state"
task export_form_counts: :environment do
Metrics::FormCountService.new.publish_form_counts
end
end
17 changes: 17 additions & 0 deletions spec/lib/tasks/metrics.rake_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
require "rails_helper"

RSpec.describe "metrics.rake", type: :task do
describe "metrics:export_form_counts" do
subject(:task) do
Rake::Task["metrics:export_form_counts"]
end

it "publishes form counts via Metrics::FormCountService" do
service = instance_double(Metrics::FormCountService)
allow(Metrics::FormCountService).to receive(:new).and_return(service)
expect(service).to receive(:publish_form_counts)

task.invoke
end
end
end
134 changes: 134 additions & 0 deletions spec/services/metrics/form_count_service_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
require "rails_helper"
require "opentelemetry-metrics-sdk"

describe Metrics::FormCountService do
subject(:service) { described_class.new }

let(:forms_env) { "test" }
let(:metric_exporter) { OpenTelemetry::SDK::Metrics::Export::InMemoryMetricPullExporter.new }
let(:organisation) { create(:organisation, name: "Department for Testing") }
let(:group) { create(:group, organisation:) }

before do
allow(Settings).to receive(:forms_env).and_return(forms_env)

provider = OpenTelemetry::SDK::Metrics::MeterProvider.new
periodic_reader = OpenTelemetry::SDK::Metrics::Export::PeriodicMetricReader.new(
export_interval_millis: 60_000,
exporter: metric_exporter,
)
provider.add_metric_reader(periodic_reader)
OpenTelemetry.meter_provider = provider
end

after do
OpenTelemetry.meter_provider.shutdown
end
Comment on lines +12 to +26

around do |example|
travel_to(Time.zone.local(2026, 6, 3, 12, 0, 0)) do
example.run
end
end

describe "#publish_form_counts" do
before do
Form.destroy_all

# Use explicit states instead of :live/:archived traits — those pull in :with_pages,
# and each page factory creates its own :form, inflating counts.
create(:form, :with_group, group:, state: :draft)
create(:form, :with_group, group:, state: :live, pages: [])
create(:form, :with_group, group:, state: :live_with_draft, pages: [])
create(:form, :with_group, group:, state: :archived, pages: [])
create(:form, :with_group, group:, state: :archived_with_draft, pages: [])
create(:form, state: :draft)
end

it "publishes grouped form counts via OpenTelemetry" do
service.publish_form_counts

expect(exported_data_points).to contain_exactly(
metric_data_point(org: organisation.name, state: "draft", count: 1),
metric_data_point(org: organisation.name, state: "live", count: 2),
metric_data_point(org: organisation.name, state: "archived", count: 2),
metric_data_point(org: "Unknown", state: "draft", count: 1),
metric_data_point(org: "Unknown", state: "live", count: 0),
metric_data_point(org: "Unknown", state: "archived", count: 0),
)
end

context "when an organisation has no forms" do
let(:empty_organisation) { create(:organisation, name: "Empty Org", slug: "empty-org") }

before { empty_organisation }

it "publishes zero counts for each state" do
service.publish_form_counts

expect(exported_data_points).to contain_exactly(
metric_data_point(org: organisation.name, state: "draft", count: 1),
metric_data_point(org: organisation.name, state: "live", count: 2),
metric_data_point(org: organisation.name, state: "archived", count: 2),
metric_data_point(org: empty_organisation.name, state: "draft", count: 0),
metric_data_point(org: empty_organisation.name, state: "live", count: 0),
metric_data_point(org: empty_organisation.name, state: "archived", count: 0),
metric_data_point(org: "Unknown", state: "draft", count: 1),
metric_data_point(org: "Unknown", state: "live", count: 0),
metric_data_point(org: "Unknown", state: "archived", count: 0),
)
end
end

context "when an organisation is internal" do
let(:internal_organisation) { create(:organisation, name: "Internal Org", slug: "internal-org", internal: true) }
let(:internal_group) { create(:group, organisation: internal_organisation) }

before do
create(:form, :with_group, group: internal_group, state: :draft)
create(:form, :with_group, group: internal_group, state: :live, pages: [])
end

it "excludes forms belonging to internal organisations" do
service.publish_form_counts

expect(exported_data_points).to contain_exactly(
metric_data_point(org: organisation.name, state: "draft", count: 1),
metric_data_point(org: organisation.name, state: "live", count: 2),
metric_data_point(org: organisation.name, state: "archived", count: 2),
metric_data_point(org: "Unknown", state: "draft", count: 1),
metric_data_point(org: "Unknown", state: "live", count: 0),
metric_data_point(org: "Unknown", state: "archived", count: 0),
)
end
end

context "when OpenTelemetry export fails" do
before do
allow(OpenTelemetry.meter_provider).to receive(:force_flush)
.and_return(OpenTelemetry::SDK::Metrics::Export::FAILURE)
end

it "captures the exception and re-raises" do
expect(Sentry).to receive(:capture_exception).with(instance_of(Metrics::FormCountService::ExportError))

expect { service.publish_form_counts }.to raise_error(Metrics::FormCountService::ExportError)
end
end
end

def exported_data_points
metric_exporter.metric_snapshots.flat_map(&:data_points)
end

def metric_data_point(org:, state:, count:)
have_attributes(
attributes: {
"Environment" => forms_env,
"Org" => org,
"State" => state,
},
value: count,
)
end
end