Use tpo geoip library

Now the geoip implmentation has being moved to it's own library to be
shared between projects.
This commit is contained in:
meskio 2021-09-30 12:10:59 +02:00
parent 8c6f0dbae7
commit 4396d505a3
No known key found for this signature in database
GPG key ID: 52B8F5AC97A2DA86
5 changed files with 13 additions and 364 deletions

View file

@ -1,240 +0,0 @@
/*
This code is for loading database data that maps ip addresses to countries
for collecting and presenting statistics on snowflake use that might alert us
to censorship events.
The functions here are heavily based off of how tor maintains and searches their
geoip database
The tables used for geoip data must be structured as follows:
Recognized line format for IPv4 is:
INTIPLOW,INTIPHIGH,CC
where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as big-endian 4-byte unsigned
integers, and CC is a country code.
Note that the IPv4 line format
"INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
is not currently supported.
Recognized line format for IPv6 is:
IPV6LOW,IPV6HIGH,CC
where IPV6LOW and IPV6HIGH are IPv6 addresses and CC is a country code.
It also recognizes, and skips over, blank lines and lines that start
with '#' (comments).
*/
package main
import (
"bufio"
"bytes"
"crypto/sha1"
"encoding/hex"
"fmt"
"io"
"log"
"net"
"os"
"sort"
"strconv"
"strings"
"sync"
)
type GeoIPTable interface {
parseEntry(string) (*GeoIPEntry, error)
Len() int
Append(GeoIPEntry)
ElementAt(int) GeoIPEntry
Lock()
Unlock()
}
type GeoIPEntry struct {
ipLow net.IP
ipHigh net.IP
country string
}
type GeoIPv4Table struct {
table []GeoIPEntry
lock sync.Mutex // synchronization for geoip table accesses and reloads
}
type GeoIPv6Table struct {
table []GeoIPEntry
lock sync.Mutex // synchronization for geoip table accesses and reloads
}
func (table *GeoIPv4Table) Len() int { return len(table.table) }
func (table *GeoIPv6Table) Len() int { return len(table.table) }
func (table *GeoIPv4Table) Append(entry GeoIPEntry) {
(*table).table = append(table.table, entry)
}
func (table *GeoIPv6Table) Append(entry GeoIPEntry) {
(*table).table = append(table.table, entry)
}
func (table *GeoIPv4Table) ElementAt(i int) GeoIPEntry { return table.table[i] }
func (table *GeoIPv6Table) ElementAt(i int) GeoIPEntry { return table.table[i] }
func (table *GeoIPv4Table) Lock() { (*table).lock.Lock() }
func (table *GeoIPv6Table) Lock() { (*table).lock.Lock() }
func (table *GeoIPv4Table) Unlock() { (*table).lock.Unlock() }
func (table *GeoIPv6Table) Unlock() { (*table).lock.Unlock() }
// Convert a geoip IP address represented as a big-endian unsigned integer to net.IP
func geoipStringToIP(ipStr string) (net.IP, error) {
ip, err := strconv.ParseUint(ipStr, 10, 32)
if err != nil {
return net.IPv4(0, 0, 0, 0), fmt.Errorf("error parsing IP %s", ipStr)
}
var bytes [4]byte
bytes[0] = byte(ip & 0xFF)
bytes[1] = byte((ip >> 8) & 0xFF)
bytes[2] = byte((ip >> 16) & 0xFF)
bytes[3] = byte((ip >> 24) & 0xFF)
return net.IPv4(bytes[3], bytes[2], bytes[1], bytes[0]), nil
}
//Parses a line in the provided geoip file that corresponds
//to an address range and a two character country code
func (table *GeoIPv4Table) parseEntry(candidate string) (*GeoIPEntry, error) {
if candidate[0] == '#' {
return nil, nil
}
parsedCandidate := strings.Split(candidate, ",")
if len(parsedCandidate) != 3 {
return nil, fmt.Errorf("provided geoip file is incorrectly formatted. Could not parse line:\n%s", parsedCandidate)
}
low, err := geoipStringToIP(parsedCandidate[0])
if err != nil {
return nil, err
}
high, err := geoipStringToIP(parsedCandidate[1])
if err != nil {
return nil, err
}
geoipEntry := &GeoIPEntry{
ipLow: low,
ipHigh: high,
country: parsedCandidate[2],
}
return geoipEntry, nil
}
//Parses a line in the provided geoip file that corresponds
//to an address range and a two character country code
func (table *GeoIPv6Table) parseEntry(candidate string) (*GeoIPEntry, error) {
if candidate[0] == '#' {
return nil, nil
}
parsedCandidate := strings.Split(candidate, ",")
if len(parsedCandidate) != 3 {
return nil, fmt.Errorf("")
}
low := net.ParseIP(parsedCandidate[0])
if low == nil {
return nil, fmt.Errorf("")
}
high := net.ParseIP(parsedCandidate[1])
if high == nil {
return nil, fmt.Errorf("")
}
geoipEntry := &GeoIPEntry{
ipLow: low,
ipHigh: high,
country: parsedCandidate[2],
}
return geoipEntry, nil
}
//Loads provided geoip file into our tables
//Entries are stored in a table
func GeoIPLoadFile(table GeoIPTable, pathname string) error {
//open file
geoipFile, err := os.Open(pathname)
if err != nil {
return err
}
defer geoipFile.Close()
hash := sha1.New()
table.Lock()
defer table.Unlock()
hashedFile := io.TeeReader(geoipFile, hash)
//read in strings and call parse function
scanner := bufio.NewScanner(hashedFile)
for scanner.Scan() {
entry, err := table.parseEntry(scanner.Text())
if err != nil {
return fmt.Errorf("provided geoip file is incorrectly formatted. Line is: %+q", scanner.Text())
}
if entry != nil {
table.Append(*entry)
}
}
if err := scanner.Err(); err != nil {
return err
}
sha1Hash := hex.EncodeToString(hash.Sum(nil))
log.Println("Using geoip file ", pathname, " with checksum", sha1Hash)
log.Println("Loaded ", table.Len(), " entries into table")
return nil
}
//Returns the country location of an IPv4 or IPv6 address, and a boolean value
//that indicates whether the IP address was present in the geoip database
func GetCountryByAddr(table GeoIPTable, ip net.IP) (string, bool) {
table.Lock()
defer table.Unlock()
//look IP up in database
index := sort.Search(table.Len(), func(i int) bool {
entry := table.ElementAt(i)
return (bytes.Compare(ip.To16(), entry.ipHigh.To16()) <= 0)
})
if index == table.Len() {
return "", false
}
// check to see if addr is in the range specified by the returned index
// search on IPs in invalid ranges (e.g., 127.0.0.0/8) will return the
//country code of the next highest range
entry := table.ElementAt(index)
if !(bytes.Compare(ip.To16(), entry.ipLow.To16()) >= 0 &&
bytes.Compare(ip.To16(), entry.ipHigh.To16()) <= 0) {
return "", false
}
return table.ElementAt(index).country, true
}

View file

@ -15,6 +15,7 @@ import (
"time" "time"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"gitlab.torproject.org/tpo/anti-censorship/geoip"
) )
const ( const (
@ -38,8 +39,7 @@ type CountryStats struct {
// Implements Observable // Implements Observable
type Metrics struct { type Metrics struct {
logger *log.Logger logger *log.Logger
tablev4 *GeoIPv4Table geoipdb *geoip.Geoip
tablev6 *GeoIPv6Table
countryStats CountryStats countryStats CountryStats
clientRoundtripEstimate time.Duration clientRoundtripEstimate time.Duration
@ -115,19 +115,10 @@ func (m *Metrics) UpdateCountryStats(addr string, proxyType string, natType stri
} }
ip := net.ParseIP(addr) ip := net.ParseIP(addr)
if ip.To4() != nil { if m.geoipdb == nil {
//This is an IPv4 address
if m.tablev4 == nil {
return return
} }
country, ok = GetCountryByAddr(m.tablev4, ip) country, ok = m.geoipdb.GetCountryByAddr(ip)
} else {
if m.tablev6 == nil {
return
}
country, ok = GetCountryByAddr(m.tablev6, ip)
}
if !ok { if !ok {
country = "??" country = "??"
} }
@ -164,24 +155,11 @@ func (m *Metrics) UpdateCountryStats(addr string, proxyType string, natType stri
func (m *Metrics) LoadGeoipDatabases(geoipDB string, geoip6DB string) error { func (m *Metrics) LoadGeoipDatabases(geoipDB string, geoip6DB string) error {
// Load geoip databases // Load geoip databases
var err error
log.Println("Loading geoip databases") log.Println("Loading geoip databases")
tablev4 := new(GeoIPv4Table) m.geoipdb, err = geoip.New(geoipDB, geoip6DB)
err := GeoIPLoadFile(tablev4, geoipDB)
if err != nil {
m.tablev4 = nil
return err return err
} }
m.tablev4 = tablev4
tablev6 := new(GeoIPv6Table)
err = GeoIPLoadFile(tablev6, geoip6DB)
if err != nil {
m.tablev6 = nil
return err
}
m.tablev6 = tablev6
return nil
}
func NewMetrics(metricsLogger *log.Logger) (*Metrics, error) { func NewMetrics(metricsLogger *log.Logger) (*Metrics, error) {
m := new(Metrics) m := new(Metrics)

View file

@ -6,7 +6,6 @@ import (
"io" "io"
"io/ioutil" "io/ioutil"
"log" "log"
"net"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"os" "os"
@ -473,106 +472,15 @@ func TestSnowflakeHeap(t *testing.T) {
}) })
} }
func TestGeoip(t *testing.T) { func TestInvalidGeoipFile(t *testing.T) {
Convey("Geoip", t, func() { Convey("Geoip", t, func() {
tv4 := new(GeoIPv4Table)
err := GeoIPLoadFile(tv4, "test_geoip")
So(err, ShouldEqual, nil)
tv6 := new(GeoIPv6Table)
err = GeoIPLoadFile(tv6, "test_geoip6")
So(err, ShouldEqual, nil)
Convey("IPv4 Country Mapping Tests", func() {
for _, test := range []struct {
addr, cc string
ok bool
}{
{
"129.97.208.23", //uwaterloo
"CA",
true,
},
{
"127.0.0.1",
"",
false,
},
{
"255.255.255.255",
"",
false,
},
{
"0.0.0.0",
"",
false,
},
{
"223.252.127.255", //test high end of range
"JP",
true,
},
{
"223.252.127.255", //test low end of range
"JP",
true,
},
} {
country, ok := GetCountryByAddr(tv4, net.ParseIP(test.addr))
So(country, ShouldEqual, test.cc)
So(ok, ShouldResemble, test.ok)
}
})
Convey("IPv6 Country Mapping Tests", func() {
for _, test := range []struct {
addr, cc string
ok bool
}{
{
"2620:101:f000:0:250:56ff:fe80:168e", //uwaterloo
"CA",
true,
},
{
"fd00:0:0:0:0:0:0:1",
"",
false,
},
{
"0:0:0:0:0:0:0:0",
"",
false,
},
{
"ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
"",
false,
},
{
"2a07:2e47:ffff:ffff:ffff:ffff:ffff:ffff", //test high end of range
"FR",
true,
},
{
"2a07:2e40::", //test low end of range
"FR",
true,
},
} {
country, ok := GetCountryByAddr(tv6, net.ParseIP(test.addr))
So(country, ShouldEqual, test.cc)
So(ok, ShouldResemble, test.ok)
}
})
// Make sure things behave properly if geoip file fails to load // Make sure things behave properly if geoip file fails to load
ctx := NewBrokerContext(NullLogger()) ctx := NewBrokerContext(NullLogger())
if err := ctx.metrics.LoadGeoipDatabases("invalid_filename", "invalid_filename6"); err != nil { if err := ctx.metrics.LoadGeoipDatabases("invalid_filename", "invalid_filename6"); err != nil {
log.Printf("loading geo ip databases returned error: %v", err) log.Printf("loading geo ip databases returned error: %v", err)
} }
ctx.metrics.UpdateCountryStats("127.0.0.1", "", NATUnrestricted) ctx.metrics.UpdateCountryStats("127.0.0.1", "", NATUnrestricted)
So(ctx.metrics.tablev4, ShouldEqual, nil) So(ctx.metrics.geoipdb, ShouldEqual, nil)
}) })
} }

1
go.mod
View file

@ -16,6 +16,7 @@ require (
github.com/smartystreets/goconvey v1.6.4 github.com/smartystreets/goconvey v1.6.4
github.com/xtaci/kcp-go/v5 v5.6.1 github.com/xtaci/kcp-go/v5 v5.6.1
github.com/xtaci/smux v1.5.15 github.com/xtaci/smux v1.5.15
gitlab.torproject.org/tpo/anti-censorship/geoip v0.0.0-20210928150955-7ce4b3d98d01
golang.org/x/crypto v0.0.0-20210317152858-513c2a44f670 golang.org/x/crypto v0.0.0-20210317152858-513c2a44f670
golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4 golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4
golang.org/x/sys v0.0.0-20210317225723-c4fcb01b228e // indirect golang.org/x/sys v0.0.0-20210317225723-c4fcb01b228e // indirect

2
go.sum
View file

@ -358,6 +358,8 @@ github.com/xtaci/smux v1.5.15 h1:6hMiXswcleXj5oNfcJc+DXS8Vj36XX2LaX98udog6Kc=
github.com/xtaci/smux v1.5.15/go.mod h1:OMlQbT5vcgl2gb49mFkYo6SMf+zP3rcjcwQz7ZU7IGY= github.com/xtaci/smux v1.5.15/go.mod h1:OMlQbT5vcgl2gb49mFkYo6SMf+zP3rcjcwQz7ZU7IGY=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
gitlab.torproject.org/tpo/anti-censorship/geoip v0.0.0-20210928150955-7ce4b3d98d01 h1:4949mHh9Vj2/okk48yG8nhP6TosFWOUfSfSr502sKGE=
gitlab.torproject.org/tpo/anti-censorship/geoip v0.0.0-20210928150955-7ce4b3d98d01/go.mod h1:K3LOI4H8fa6j+7E10ViHeGEQV10304FG4j94ypmKLjY=
go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg= go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg=
go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk=