snowflake/common/safelog/log.go
Arlo Breault e4c818be76
Scrub space separated ip addresses
The issue with ReplaceAllFunc is that it's capturing the leading and
trailing spaces in the regexp, so successive ips don't match.  From the
docstring,

> If 'All' is present, the routine matches successive non-overlapping
> matches of the entire expression.

For #40306
2024-01-08 10:03:35 -05:00

83 lines
2.4 KiB
Go

//Package for a safer logging wrapper around the standard logging package
// import "gitlab.torproject.org/tpo/anti-censorship/pluggable-transports/snowflake/v2/common/safelog"
package safelog
import (
"bytes"
"io"
"regexp"
"sync"
)
const ipv4Address = `\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}`
// %3A and %3a are for matching : in URL-encoded IPv6 addresses
const colon = `(:|%3a|%3A)`
const ipv6Address = `([0-9a-fA-F]{0,4}` + colon + `){5,7}([0-9a-fA-F]{0,4})?`
const ipv6Compressed = `([0-9a-fA-F]{0,4}` + colon + `){0,5}([0-9a-fA-F]{0,4})?(` + colon + `){2}([0-9a-fA-F]{0,4}` + colon + `){0,5}([0-9a-fA-F]{0,4})?`
const ipv6Full = `(` + ipv6Address + `(` + ipv4Address + `))` +
`|(` + ipv6Compressed + `(` + ipv4Address + `))` +
`|(` + ipv6Address + `)` + `|(` + ipv6Compressed + `)`
const optionalPort = `(:\d{1,5})?`
const addressPattern = `((` + ipv4Address + `)|(\[(` + ipv6Full + `)\])|(` + ipv6Full + `))` + optionalPort
const fullAddrPattern = `(?:^|\s|[^\w:])(` + addressPattern + `)(?:\s|(:\s)|[^\w:]|$)`
var scrubberPatterns = []*regexp.Regexp{
regexp.MustCompile(fullAddrPattern),
}
var addressRegexp = regexp.MustCompile(addressPattern)
// An io.Writer that can be used as the output for a logger that first
// sanitizes logs and then writes to the provided io.Writer
type LogScrubber struct {
Output io.Writer
buffer []byte
lock sync.Mutex
}
func (ls *LogScrubber) Lock() { (*ls).lock.Lock() }
func (ls *LogScrubber) Unlock() { (*ls).lock.Unlock() }
func Scrub(b []byte) []byte {
scrubbedBytes := b
for _, pattern := range scrubberPatterns {
// this is a workaround since go does not yet support look ahead or look
// behind for regular expressions.
var newBytes []byte
index := 0
for {
loc := pattern.FindSubmatchIndex(scrubbedBytes[index:])
if loc == nil {
break
}
newBytes = append(newBytes, scrubbedBytes[index:index+loc[2]]...)
newBytes = append(newBytes, []byte("[scrubbed]")...)
index = index + loc[3]
}
scrubbedBytes = append(newBytes, scrubbedBytes[index:]...)
}
return scrubbedBytes
}
func (ls *LogScrubber) Write(b []byte) (n int, err error) {
ls.Lock()
defer ls.Unlock()
n = len(b)
ls.buffer = append(ls.buffer, b...)
for {
i := bytes.LastIndexByte(ls.buffer, '\n')
if i == -1 {
return
}
fullLines := ls.buffer[:i+1]
_, err = ls.Output.Write(Scrub(fullLines))
if err != nil {
return
}
ls.buffer = ls.buffer[i+1:]
}
}