Simplified log scrubber

IPv6 regexes didn't need to be that precise, added more tests for
edge-cases.
This commit is contained in:
Cecylia Bocovich 2019-03-21 10:11:11 -04:00
parent f586a4bab8
commit 5bc8817028
2 changed files with 17 additions and 1 deletions

View file

@ -64,7 +64,8 @@ type logScrubber struct {
func (ls *logScrubber) Write(b []byte) (n int, err error) { func (ls *logScrubber) Write(b []byte) (n int, err error) {
//First scrub the input of IP addresses //First scrub the input of IP addresses
reIPv4 := regexp.MustCompile(`\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b`) reIPv4 := regexp.MustCompile(`\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b`)
reIPv6 := regexp.MustCompile(`(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))`) //Note that for embedded IPv4 address, the previous regex will scrub it
reIPv6 := regexp.MustCompile(`([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?`)
scrubbedBytes := reIPv4.ReplaceAll(b, []byte("X.X.X.X")) scrubbedBytes := reIPv4.ReplaceAll(b, []byte("X.X.X.X"))
scrubbedBytes = reIPv6.ReplaceAll(scrubbedBytes, scrubbedBytes = reIPv6.ReplaceAll(scrubbedBytes,
[]byte("X:X:X:X:X:X:X:X")) []byte("X:X:X:X:X:X:X:X"))

View file

@ -59,6 +59,7 @@ func TestLogScrubber(t *testing.T) {
log.Printf("%s", "http: TLS handshake error from 129.97.208.23:38310:") log.Printf("%s", "http: TLS handshake error from 129.97.208.23:38310:")
//Example IPv4 address that ended up in log
if bytes.Compare(buff.Bytes(), []byte("http: TLS handshake error from X.X.X.X:38310:\n")) != 0 { if bytes.Compare(buff.Bytes(), []byte("http: TLS handshake error from X.X.X.X:38310:\n")) != 0 {
t.Errorf("log scrubber didn't scrub IPv4 address. Output: %s", string(buff.Bytes())) t.Errorf("log scrubber didn't scrub IPv4 address. Output: %s", string(buff.Bytes()))
} }
@ -66,9 +67,23 @@ func TestLogScrubber(t *testing.T) {
log.Printf("%s", "http2: panic serving [2620:101:f000:780:9097:75b1:519f:dbb8]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack") log.Printf("%s", "http2: panic serving [2620:101:f000:780:9097:75b1:519f:dbb8]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack")
//Example IPv6 address that ended up in log
if bytes.Compare(buff.Bytes(), []byte("http2: panic serving [X:X:X:X:X:X:X:X]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n")) != 0 { if bytes.Compare(buff.Bytes(), []byte("http2: panic serving [X:X:X:X:X:X:X:X]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack\n")) != 0 {
t.Errorf("log scrubber didn't scrub IPv6 address. Output: %s", string(buff.Bytes())) t.Errorf("log scrubber didn't scrub IPv6 address. Output: %s", string(buff.Bytes()))
} }
buff.Reset() buff.Reset()
//Testing IPv6 edge cases
log.Printf("%s", "[1::]:58344")
log.Printf("%s", "[1:2:3:4:5:6::8]:58344")
log.Printf("%s", "[1::7:8]:58344")
log.Printf("%s", "[::4:5:6:7:8]:58344")
log.Printf("%s", "[::255.255.255.255]:58344")
log.Printf("%s", "[::ffff:0:255.255.255.255]:58344")
log.Printf("%s", "[2001:db8:3:4::192.0.2.33]:58344")
if bytes.Compare(buff.Bytes(), []byte("[X:X:X:X:X:X:X:X]:58344\n[X:X:X:X:X:X:X:X]:58344\n[X:X:X:X:X:X:X:X]:58344\n[X:X:X:X:X:X:X:X]:58344\n[X:X:X:X:X:X:X:XX.X.X.X]:58344\n[X:X:X:X:X:X:X:XX.X.X.X]:58344\n[X:X:X:X:X:X:X:XX.X.X.X]:58344\n")) != 0 {
t.Errorf("log scrubber didn't scrub IPv6 address. Output: %s", string(buff.Bytes()))
}
buff.Reset()
} }