Add prometheus metric for proxy answer counts

This adds a prometheus metric that tracks snowflake proxy answers. If
the client has not timed out before the proxy responds with an answer,
the proxy type is recorded along with a status of "success". If the
client has timed out, the type is left blank and the status is recorded
as "timeout".

The goal of these metrics is to help us determine how many proxies fail
to respond and to help narrow down which proxy implementations are
causing client timeouts.
This commit is contained in:
Cecylia Bocovich 2025-09-09 12:41:27 -04:00
parent c49a86e5a9
commit d08efc34c3
No known key found for this signature in database
GPG key ID: 009DE379FD9B7B90
2 changed files with 13 additions and 1 deletions

View file

@ -253,6 +253,7 @@ func (i *IPC) ProxyAnswers(arg messages.Arg, response *[]byte) error {
// The snowflake took too long to respond with an answer, so its client // The snowflake took too long to respond with an answer, so its client
// disappeared / the snowflake is no longer recognized by the Broker. // disappeared / the snowflake is no longer recognized by the Broker.
success = false success = false
i.ctx.metrics.promMetrics.ProxyAnswerTotal.With(prometheus.Labels{"type": "", "status": "timeout"}).Inc()
} }
b, err := messages.EncodeAnswerResponse(success) b, err := messages.EncodeAnswerResponse(success)
@ -263,6 +264,7 @@ func (i *IPC) ProxyAnswers(arg messages.Arg, response *[]byte) error {
*response = b *response = b
if success { if success {
i.ctx.metrics.promMetrics.ProxyAnswerTotal.With(prometheus.Labels{"type": snowflake.proxyType, "status": "success"}).Inc()
snowflake.answerChannel <- answer snowflake.answerChannel <- answer
} }

View file

@ -290,6 +290,7 @@ type PromMetrics struct {
ProxyTotal *prometheus.CounterVec ProxyTotal *prometheus.CounterVec
ProxyPollTotal *safeprom.CounterVec ProxyPollTotal *safeprom.CounterVec
ClientPollTotal *safeprom.CounterVec ClientPollTotal *safeprom.CounterVec
ProxyAnswerTotal *safeprom.CounterVec
AvailableProxies *prometheus.GaugeVec AvailableProxies *prometheus.GaugeVec
ProxyPollWithRelayURLExtensionTotal *safeprom.CounterVec ProxyPollWithRelayURLExtensionTotal *safeprom.CounterVec
@ -331,6 +332,15 @@ func initPrometheus() *PromMetrics {
[]string{"nat", "status"}, []string{"nat", "status"},
) )
promMetrics.ProxyAnswerTotal = safeprom.NewCounterVec(
prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "rounded_proxy_answer_total",
Help: "The number of snowflake proxy answers, rounded up to a multiple of 8",
},
[]string{"type", "status"},
)
promMetrics.ProxyPollWithRelayURLExtensionTotal = safeprom.NewCounterVec( promMetrics.ProxyPollWithRelayURLExtensionTotal = safeprom.NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{
Namespace: prometheusNamespace, Namespace: prometheusNamespace,
@ -370,7 +380,7 @@ func initPrometheus() *PromMetrics {
// We need to register our metrics so they can be exported. // We need to register our metrics so they can be exported.
promMetrics.registry.MustRegister( promMetrics.registry.MustRegister(
promMetrics.ClientPollTotal, promMetrics.ProxyPollTotal, promMetrics.ClientPollTotal, promMetrics.ProxyPollTotal,
promMetrics.ProxyTotal, promMetrics.AvailableProxies, promMetrics.ProxyTotal, promMetrics.ProxyAnswerTotal, promMetrics.AvailableProxies,
promMetrics.ProxyPollWithRelayURLExtensionTotal, promMetrics.ProxyPollWithRelayURLExtensionTotal,
promMetrics.ProxyPollWithoutRelayURLExtensionTotal, promMetrics.ProxyPollWithoutRelayURLExtensionTotal,
promMetrics.ProxyPollRejectedForRelayURLExtensionTotal, promMetrics.ProxyPollRejectedForRelayURLExtensionTotal,