Made regular expressions more precise

Modified regular expressions to not scrub fingerprints, but catch all
instances of IPv4 and IPv6 addresses. Expanded test cases with those
suggested by dcf.
This commit is contained in:
Cecylia Bocovich 2019-03-25 12:56:10 -04:00
parent c6a4a4191f
commit 611cb889c5
2 changed files with 79 additions and 36 deletions

View file

@ -56,13 +56,19 @@ additional HTTP listener on port 80 to work with ACME.
flag.PrintDefaults()
}
const ipv4Address = `\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}`
const ipv6Address = `(([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?(` + ipv4Address + `))` +
`|(([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?)`
const optionalPort = `(:\d{1,5})?`
const addressPattern = `((` + ipv4Address + `)|(\[(` + ipv6Address + `)\])|(` + ipv6Address + `))` + optionalPort
const fullAddrPattern = `(^|\s|[^\w:])` + addressPattern + `(\s|(:\s)|[^\w:]|$)`
var scrubberPatterns = []*regexp.Regexp{
/* IPv6 */
regexp.MustCompile(`\[([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})?\]`),
/* IPv4 */
regexp.MustCompile(`\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b`),
regexp.MustCompile(fullAddrPattern),
}
var addressRegexp = regexp.MustCompile(addressPattern)
// An io.Writer that can be used as the output for a logger that first
// sanitizes logs and then writes to the provided io.Writer
type logScrubber struct {
@ -73,7 +79,11 @@ type logScrubber struct {
func scrub(b []byte) []byte {
scrubbedBytes := b
for _, pattern := range scrubberPatterns {
scrubbedBytes = pattern.ReplaceAll(scrubbedBytes, []byte("[scrubbed]"))
// this is a workaround since go does not yet support look ahead or look
// behind for regular expressions.
scrubbedBytes = pattern.ReplaceAllFunc(scrubbedBytes, func(b []byte) []byte {
return addressRegexp.ReplaceAll(b, []byte("[scrubbed]"))
})
}
return scrubbedBytes
}

View file

@ -54,7 +54,7 @@ func TestClientAddr(t *testing.T) {
func TestLogScrubberSplit(t *testing.T) {
input := []byte("test\nhttp2: panic serving [2620:101:f000:780:9097:75b1:519f:dbb8]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n")
expected := "test\nhttp2: panic serving [scrubbed]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n"
expected := "test\nhttp2: panic serving [scrubbed]: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n"
var buff bytes.Buffer
scrubber := &logScrubber{output: &buff}
@ -93,17 +93,18 @@ func TestLogScrubberSplit(t *testing.T) {
}
func TestLogScrubberFormats(t *testing.T) {
//Test the log scrubber on known problematic log messages
func TestLogScrubberMessages(t *testing.T) {
for _, test := range []struct {
input, expected string
}{
{
"http: TLS handshake error from 129.97.208.23:38310: ",
"http: TLS handshake error from [scrubbed]:38310:\n",
"http: TLS handshake error from [scrubbed]: \n",
},
{
"http2: panic serving [2620:101:f000:780:9097:75b1:519f:dbb8]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack",
"http2: panic serving [scrubbed]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n",
"http2: panic serving [scrubbed]: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n",
},
{
//Make sure it doesn't scrub fingerprint
@ -111,32 +112,9 @@ func TestLogScrubberFormats(t *testing.T) {
"a=fingerprint:sha-256 33:B6:FA:F6:94:CA:74:61:45:4A:D2:1F:2C:2F:75:8A:D9:EB:23:34:B2:30:E9:1B:2A:A6:A9:E0:44:72:CC:74\n",
},
{
"[1::]:58344",
"[scrubbed]:58344\n",
},
{
"[1:2:3:4:5:6::8]",
"[scrubbed]\n",
},
{
"[1::7:8]",
"[scrubbed]\n",
},
{
"[::4:5:6:7:8]",
"[scrubbed]\n",
},
{
"[::255.255.255.255]",
"[scrubbed]\n",
},
{
"[::ffff:0:255.255.255.255]",
"[scrubbed]\n",
},
{
"[2001:db8:3:4::192.0.2.33]",
"[scrubbed]\n",
//try with enclosing parens
"(1:2:3:4:c:d:e:f) {1:2:3:4:c:d:e:f}",
"([scrubbed]) {[scrubbed]}\n",
},
} {
var buff bytes.Buffer
@ -149,3 +127,58 @@ func TestLogScrubberFormats(t *testing.T) {
}
}
func TestLogScrubberGoodFormats(t *testing.T) {
for _, addr := range []string{
// IPv4
"1.2.3.4",
"255.255.255.255",
// IPv4 with port
"1.2.3.4:55",
"255.255.255.255:65535",
// IPv6
"1:2:3:4:c:d:e:f",
"1111:2222:3333:4444:CCCC:DDDD:EEEE:FFFF",
// IPv6 with brackets
"[1:2:3:4:c:d:e:f]",
"[1111:2222:3333:4444:CCCC:DDDD:EEEE:FFFF]",
// IPv6 with brackets and port
"[1:2:3:4:c:d:e:f]:55",
"[1111:2222:3333:4444:CCCC:DDDD:EEEE:FFFF]:65535",
// compressed IPv6
"::d:e:f",
"1:2:3::",
"1:2:3::d:e:f",
"1111:2222:3333::DDDD:EEEE:FFFF",
// compressed IPv6 with brackets
"[::d:e:f]",
"[1:2:3::]",
"[1:2:3::d:e:f]",
"[1111:2222:3333::DDDD:EEEE:FFFF]",
"[1:2:3:4:5:6::8]",
"[1::7:8]",
// compressed IPv6 with brackets and port
"[1::]:58344",
"[::d:e:f]:55",
"[1:2:3::]:55",
"[1:2:3::d:e:f]:55",
"[1111:2222:3333::DDDD:EEEE:FFFF]:65535",
// IPv4-compatible and IPv4-mapped
"::255.255.255.255",
"::ffff:255.255.255.255",
"[::255.255.255.255]",
"[::ffff:255.255.255.255]",
"[::255.255.255.255]:65535",
"[::ffff:255.255.255.255]:65535",
"[::ffff:0:255.255.255.255]",
"[2001:db8:3:4::192.0.2.33]",
} {
var buff bytes.Buffer
log.SetFlags(0) //remove all extra log output for test comparisons
log.SetOutput(&logScrubber{output: &buff})
log.Print(addr)
if buff.String() != "[scrubbed]\n" {
t.Errorf("%q: Got %q, expected %q", addr, buff.String(), "[scrubbed]\n")
}
}
}