From d08efc34c38a951ff02c42dd007b77e94c809e01 Mon Sep 17 00:00:00 2001 From: Cecylia Bocovich Date: Tue, 9 Sep 2025 12:41:27 -0400 Subject: [PATCH] Add prometheus metric for proxy answer counts This adds a prometheus metric that tracks snowflake proxy answers. If the client has not timed out before the proxy responds with an answer, the proxy type is recorded along with a status of "success". If the client has timed out, the type is left blank and the status is recorded as "timeout". The goal of these metrics is to help us determine how many proxies fail to respond and to help narrow down which proxy implementations are causing client timeouts. --- broker/ipc.go | 2 ++ broker/metrics.go | 12 +++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/broker/ipc.go b/broker/ipc.go index 728363a..7e2951c 100644 --- a/broker/ipc.go +++ b/broker/ipc.go @@ -253,6 +253,7 @@ func (i *IPC) ProxyAnswers(arg messages.Arg, response *[]byte) error { // The snowflake took too long to respond with an answer, so its client // disappeared / the snowflake is no longer recognized by the Broker. success = false + i.ctx.metrics.promMetrics.ProxyAnswerTotal.With(prometheus.Labels{"type": "", "status": "timeout"}).Inc() } b, err := messages.EncodeAnswerResponse(success) @@ -263,6 +264,7 @@ func (i *IPC) ProxyAnswers(arg messages.Arg, response *[]byte) error { *response = b if success { + i.ctx.metrics.promMetrics.ProxyAnswerTotal.With(prometheus.Labels{"type": snowflake.proxyType, "status": "success"}).Inc() snowflake.answerChannel <- answer } diff --git a/broker/metrics.go b/broker/metrics.go index a09691b..ae1183c 100644 --- a/broker/metrics.go +++ b/broker/metrics.go @@ -290,6 +290,7 @@ type PromMetrics struct { ProxyTotal *prometheus.CounterVec ProxyPollTotal *safeprom.CounterVec ClientPollTotal *safeprom.CounterVec + ProxyAnswerTotal *safeprom.CounterVec AvailableProxies *prometheus.GaugeVec ProxyPollWithRelayURLExtensionTotal *safeprom.CounterVec @@ -331,6 +332,15 @@ func initPrometheus() *PromMetrics { []string{"nat", "status"}, ) + promMetrics.ProxyAnswerTotal = safeprom.NewCounterVec( + prometheus.CounterOpts{ + Namespace: prometheusNamespace, + Name: "rounded_proxy_answer_total", + Help: "The number of snowflake proxy answers, rounded up to a multiple of 8", + }, + []string{"type", "status"}, + ) + promMetrics.ProxyPollWithRelayURLExtensionTotal = safeprom.NewCounterVec( prometheus.CounterOpts{ Namespace: prometheusNamespace, @@ -370,7 +380,7 @@ func initPrometheus() *PromMetrics { // We need to register our metrics so they can be exported. promMetrics.registry.MustRegister( promMetrics.ClientPollTotal, promMetrics.ProxyPollTotal, - promMetrics.ProxyTotal, promMetrics.AvailableProxies, + promMetrics.ProxyTotal, promMetrics.ProxyAnswerTotal, promMetrics.AvailableProxies, promMetrics.ProxyPollWithRelayURLExtensionTotal, promMetrics.ProxyPollWithoutRelayURLExtensionTotal, promMetrics.ProxyPollRejectedForRelayURLExtensionTotal,