Add prometheus metric for proxy answer counts

This adds a prometheus metric that tracks snowflake proxy answers. If
the client has not timed out before the proxy responds with an answer,
the proxy type is recorded along with a status of "success". If the
client has timed out, the type is left blank and the status is recorded
as "timeout".

The goal of these metrics is to help us determine how many proxies fail
to respond and to help narrow down which proxy implementations are
causing client timeouts.
This commit is contained in:
Cecylia Bocovich 2025-09-09 12:41:27 -04:00
parent c49a86e5a9
commit d08efc34c3
No known key found for this signature in database
GPG key ID: 009DE379FD9B7B90
2 changed files with 13 additions and 1 deletions

View file

@ -253,6 +253,7 @@ func (i *IPC) ProxyAnswers(arg messages.Arg, response *[]byte) error {
// The snowflake took too long to respond with an answer, so its client
// disappeared / the snowflake is no longer recognized by the Broker.
success = false
i.ctx.metrics.promMetrics.ProxyAnswerTotal.With(prometheus.Labels{"type": "", "status": "timeout"}).Inc()
}
b, err := messages.EncodeAnswerResponse(success)
@ -263,6 +264,7 @@ func (i *IPC) ProxyAnswers(arg messages.Arg, response *[]byte) error {
*response = b
if success {
i.ctx.metrics.promMetrics.ProxyAnswerTotal.With(prometheus.Labels{"type": snowflake.proxyType, "status": "success"}).Inc()
snowflake.answerChannel <- answer
}

View file

@ -290,6 +290,7 @@ type PromMetrics struct {
ProxyTotal *prometheus.CounterVec
ProxyPollTotal *safeprom.CounterVec
ClientPollTotal *safeprom.CounterVec
ProxyAnswerTotal *safeprom.CounterVec
AvailableProxies *prometheus.GaugeVec
ProxyPollWithRelayURLExtensionTotal *safeprom.CounterVec
@ -331,6 +332,15 @@ func initPrometheus() *PromMetrics {
[]string{"nat", "status"},
)
promMetrics.ProxyAnswerTotal = safeprom.NewCounterVec(
prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "rounded_proxy_answer_total",
Help: "The number of snowflake proxy answers, rounded up to a multiple of 8",
},
[]string{"type", "status"},
)
promMetrics.ProxyPollWithRelayURLExtensionTotal = safeprom.NewCounterVec(
prometheus.CounterOpts{
Namespace: prometheusNamespace,
@ -370,7 +380,7 @@ func initPrometheus() *PromMetrics {
// We need to register our metrics so they can be exported.
promMetrics.registry.MustRegister(
promMetrics.ClientPollTotal, promMetrics.ProxyPollTotal,
promMetrics.ProxyTotal, promMetrics.AvailableProxies,
promMetrics.ProxyTotal, promMetrics.ProxyAnswerTotal, promMetrics.AvailableProxies,
promMetrics.ProxyPollWithRelayURLExtensionTotal,
promMetrics.ProxyPollWithoutRelayURLExtensionTotal,
promMetrics.ProxyPollRejectedForRelayURLExtensionTotal,