mirror of
https://gitlab.torproject.org/tpo/anti-censorship/pluggable-transports/snowflake.git
synced 2025-10-13 11:11:30 -04:00
amp package.
This package contains a CacheURL function that modifies a URL to be accessed through an AMP cache, and the "AMP armor" data encoding scheme for encoding data into the AMP subset of HTML.
This commit is contained in:
parent
0f34a7778f
commit
c9e0dd287f
8 changed files with 1223 additions and 0 deletions
136
common/amp/armor_decoder.go
Normal file
136
common/amp/armor_decoder.go
Normal file
|
@ -0,0 +1,136 @@
|
|||
package amp
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// ErrUnknownVersion is the error returned when the first character inside the
|
||||
// element encoding (but outside the base64 encoding) is not '0'.
|
||||
type ErrUnknownVersion byte
|
||||
|
||||
func (err ErrUnknownVersion) Error() string {
|
||||
return fmt.Sprintf("unknown armor version indicator %+q", byte(err))
|
||||
}
|
||||
|
||||
func isASCIIWhitespace(b byte) bool {
|
||||
switch b {
|
||||
// https://infra.spec.whatwg.org/#ascii-whitespace
|
||||
case '\x09', '\x0a', '\x0c', '\x0d', '\x20':
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func splitASCIIWhitespace(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||
var i, j int
|
||||
// Skip initial whitespace.
|
||||
for i = 0; i < len(data); i++ {
|
||||
if !isASCIIWhitespace(data[i]) {
|
||||
break
|
||||
}
|
||||
}
|
||||
// Look for next whitespace.
|
||||
for j = i; j < len(data); j++ {
|
||||
if isASCIIWhitespace(data[j]) {
|
||||
return j + 1, data[i:j], nil
|
||||
}
|
||||
}
|
||||
// We reached the end of data without finding more whitespace. Only
|
||||
// consider it a token if we are at EOF.
|
||||
if atEOF && i < j {
|
||||
return j, data[i:j], nil
|
||||
}
|
||||
// Otherwise, request more data.
|
||||
return i, nil, nil
|
||||
}
|
||||
|
||||
func decodeToWriter(w io.Writer, r io.Reader) (int64, error) {
|
||||
tokenizer := html.NewTokenizer(r)
|
||||
// Set a memory limit on token sizes, otherwise the tokenizer will
|
||||
// buffer text indefinitely if it is not broken up by other token types.
|
||||
tokenizer.SetMaxBuf(elementSizeLimit)
|
||||
active := false
|
||||
total := int64(0)
|
||||
for {
|
||||
tt := tokenizer.Next()
|
||||
switch tt {
|
||||
case html.ErrorToken:
|
||||
err := tokenizer.Err()
|
||||
if err == io.EOF {
|
||||
err = nil
|
||||
}
|
||||
if err == nil && active {
|
||||
return total, fmt.Errorf("missing </pre> tag")
|
||||
}
|
||||
return total, err
|
||||
case html.TextToken:
|
||||
if active {
|
||||
// Re-join the separate chunks of text and
|
||||
// feed them to the decoder.
|
||||
scanner := bufio.NewScanner(bytes.NewReader(tokenizer.Text()))
|
||||
scanner.Split(splitASCIIWhitespace)
|
||||
for scanner.Scan() {
|
||||
n, err := w.Write(scanner.Bytes())
|
||||
total += int64(n)
|
||||
if err != nil {
|
||||
return total, err
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return total, err
|
||||
}
|
||||
}
|
||||
case html.StartTagToken:
|
||||
tn, _ := tokenizer.TagName()
|
||||
if string(tn) == "pre" {
|
||||
if active {
|
||||
// nesting not allowed
|
||||
return total, fmt.Errorf("unexpected %s", tokenizer.Token())
|
||||
}
|
||||
active = true
|
||||
}
|
||||
case html.EndTagToken:
|
||||
tn, _ := tokenizer.TagName()
|
||||
if string(tn) == "pre" {
|
||||
if !active {
|
||||
// stray end tag
|
||||
return total, fmt.Errorf("unexpected %s", tokenizer.Token())
|
||||
}
|
||||
active = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NewArmorDecoder returns a new AMP armor decoder.
|
||||
func NewArmorDecoder(r io.Reader) (io.Reader, error) {
|
||||
pr, pw := io.Pipe()
|
||||
go func() {
|
||||
_, err := decodeToWriter(pw, r)
|
||||
pw.CloseWithError(err)
|
||||
}()
|
||||
|
||||
// The first byte inside the element encoding is a server–client
|
||||
// protocol version indicator.
|
||||
var version [1]byte
|
||||
_, err := pr.Read(version[:])
|
||||
if err != nil {
|
||||
pr.CloseWithError(err)
|
||||
return nil, err
|
||||
}
|
||||
switch version[0] {
|
||||
case '0':
|
||||
return base64.NewDecoder(base64.StdEncoding, pr), nil
|
||||
default:
|
||||
err := ErrUnknownVersion(version[0])
|
||||
pr.CloseWithError(err)
|
||||
return nil, err
|
||||
}
|
||||
}
|
176
common/amp/armor_encoder.go
Normal file
176
common/amp/armor_encoder.go
Normal file
|
@ -0,0 +1,176 @@
|
|||
package amp
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"io"
|
||||
)
|
||||
|
||||
// https://amp.dev/boilerplate/
|
||||
// https://amp.dev/documentation/guides-and-tutorials/learn/spec/amp-boilerplate/?format=websites
|
||||
// https://amp.dev/documentation/guides-and-tutorials/learn/spec/amphtml/?format=websites#the-amp-html-format
|
||||
const (
|
||||
boilerplateStart = `<!doctype html>
|
||||
<html amp>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<script async src="https://cdn.ampproject.org/v0.js"></script>
|
||||
<link rel="canonical" href="#">
|
||||
<meta name="viewport" content="width=device-width">
|
||||
<style amp-boilerplate>body{-webkit-animation:-amp-start 8s steps(1,end) 0s 1 normal both;-moz-animation:-amp-start 8s steps(1,end) 0s 1 normal both;-ms-animation:-amp-start 8s steps(1,end) 0s 1 normal both;animation:-amp-start 8s steps(1,end) 0s 1 normal both}@-webkit-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-moz-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-ms-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-o-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}</style><noscript><style amp-boilerplate>body{-webkit-animation:none;-moz-animation:none;-ms-animation:none;animation:none}</style></noscript>
|
||||
</head>
|
||||
<body>
|
||||
`
|
||||
boilerplateEnd = `</body>
|
||||
</html>`
|
||||
)
|
||||
|
||||
const (
|
||||
// We restrict the amount of text may go inside an HTML element, in
|
||||
// order to limit the amount a decoder may have to buffer.
|
||||
elementSizeLimit = 32 * 1024
|
||||
|
||||
// The payload is conceptually a long base64-encoded string, but we
|
||||
// break the string into short chunks separated by whitespace. This is
|
||||
// to protect against modification by AMP caches, which reportedly may
|
||||
// truncate long words in text:
|
||||
// https://bugs.torproject.org/tpo/anti-censorship/pluggable-transports/snowflake/25985#note_2592348
|
||||
bytesPerChunk = 32
|
||||
|
||||
// We set the number of chunks per element so as to stay under
|
||||
// elementSizeLimit. Here, we assume that there is 1 byte of whitespace
|
||||
// after each chunk (with an additional whitespace byte at the beginning
|
||||
// of the element).
|
||||
chunksPerElement = (elementSizeLimit - 1) / (bytesPerChunk + 1)
|
||||
)
|
||||
|
||||
// The AMP armor encoder is a chain of a base64 encoder (base64.NewEncoder) and
|
||||
// an HTML element encoder (elementEncoder). A top-level encoder (armorEncoder)
|
||||
// coordinates these two, and handles prepending and appending the AMP
|
||||
// boilerplate. armorEncoder's Write method writes data into the base64 encoder,
|
||||
// where it makes its way through the chain.
|
||||
|
||||
// NewArmorEncoder returns a new AMP armor encoder. Anything written to the
|
||||
// returned io.WriteCloser will be encoded and written to w. The caller must
|
||||
// call Close to flush any partially written data and output the AMP boilerplate
|
||||
// trailer.
|
||||
func NewArmorEncoder(w io.Writer) (io.WriteCloser, error) {
|
||||
// Immediately write the AMP boilerplate header.
|
||||
_, err := w.Write([]byte(boilerplateStart))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
element := &elementEncoder{w: w}
|
||||
// Write a server–client protocol version indicator, outside the base64
|
||||
// layer.
|
||||
_, err = element.Write([]byte{'0'})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
base64 := base64.NewEncoder(base64.StdEncoding, element)
|
||||
return &armorEncoder{
|
||||
w: w,
|
||||
element: element,
|
||||
base64: base64,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type armorEncoder struct {
|
||||
base64 io.WriteCloser
|
||||
element *elementEncoder
|
||||
w io.Writer
|
||||
}
|
||||
|
||||
func (enc *armorEncoder) Write(p []byte) (int, error) {
|
||||
// Write into the chain base64 | element | w.
|
||||
return enc.base64.Write(p)
|
||||
}
|
||||
|
||||
func (enc *armorEncoder) Close() error {
|
||||
// Close the base64 encoder first, to flush out any buffered data and
|
||||
// the final padding.
|
||||
err := enc.base64.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Next, close the element encoder, to close any open elements.
|
||||
err = enc.element.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Finally, output the AMP boilerplate trailer.
|
||||
_, err = enc.w.Write([]byte(boilerplateEnd))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// elementEncoder arranges written data into pre elements, with the text within
|
||||
// separated into chunks. It does no HTML encoding, so data written must not
|
||||
// contain any bytes that are meaningful in HTML.
|
||||
type elementEncoder struct {
|
||||
w io.Writer
|
||||
chunkCounter int
|
||||
elementCounter int
|
||||
}
|
||||
|
||||
func (enc *elementEncoder) Write(p []byte) (n int, err error) {
|
||||
total := 0
|
||||
for len(p) > 0 {
|
||||
if enc.elementCounter == 0 && enc.chunkCounter == 0 {
|
||||
_, err := enc.w.Write([]byte("<pre>\n"))
|
||||
if err != nil {
|
||||
return total, err
|
||||
}
|
||||
}
|
||||
|
||||
n := bytesPerChunk - enc.chunkCounter
|
||||
if n > len(p) {
|
||||
n = len(p)
|
||||
}
|
||||
nn, err := enc.w.Write(p[:n])
|
||||
if err != nil {
|
||||
return total, err
|
||||
}
|
||||
total += nn
|
||||
p = p[n:]
|
||||
|
||||
enc.chunkCounter += n
|
||||
if enc.chunkCounter >= bytesPerChunk {
|
||||
enc.chunkCounter = 0
|
||||
enc.elementCounter += 1
|
||||
nn, err = enc.w.Write([]byte("\n"))
|
||||
if err != nil {
|
||||
return total, err
|
||||
}
|
||||
total += nn
|
||||
}
|
||||
|
||||
if enc.elementCounter >= chunksPerElement {
|
||||
enc.elementCounter = 0
|
||||
nn, err = enc.w.Write([]byte("</pre>\n"))
|
||||
if err != nil {
|
||||
return total, err
|
||||
}
|
||||
total += nn
|
||||
}
|
||||
}
|
||||
return total, nil
|
||||
}
|
||||
|
||||
func (enc *elementEncoder) Close() error {
|
||||
var err error
|
||||
if !(enc.elementCounter == 0 && enc.chunkCounter == 0) {
|
||||
if enc.chunkCounter == 0 {
|
||||
_, err = enc.w.Write([]byte("</pre>\n"))
|
||||
} else {
|
||||
_, err = enc.w.Write([]byte("\n</pre>\n"))
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
227
common/amp/armor_test.go
Normal file
227
common/amp/armor_test.go
Normal file
|
@ -0,0 +1,227 @@
|
|||
package amp
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func armorDecodeToString(src string) (string, error) {
|
||||
dec, err := NewArmorDecoder(strings.NewReader(src))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
p, err := ioutil.ReadAll(dec)
|
||||
return string(p), err
|
||||
}
|
||||
|
||||
func TestArmorDecoder(t *testing.T) {
|
||||
for _, test := range []struct {
|
||||
input string
|
||||
expectedOutput string
|
||||
expectedErr bool
|
||||
}{
|
||||
{`
|
||||
<pre>
|
||||
0
|
||||
</pre>
|
||||
`,
|
||||
"",
|
||||
false,
|
||||
},
|
||||
{`
|
||||
<pre>
|
||||
0aGVsbG8gd29ybGQK
|
||||
</pre>
|
||||
`,
|
||||
"hello world\n",
|
||||
false,
|
||||
},
|
||||
// bad version indicator
|
||||
{`
|
||||
<pre>
|
||||
1aGVsbG8gd29ybGQK
|
||||
</pre>
|
||||
`,
|
||||
"",
|
||||
true,
|
||||
},
|
||||
// text outside <pre> elements
|
||||
{`
|
||||
0aGVsbG8gd29ybGQK
|
||||
blah blah blah
|
||||
<pre>
|
||||
0aGVsbG8gd29ybGQK
|
||||
</pre>
|
||||
0aGVsbG8gd29ybGQK
|
||||
blah blah blah
|
||||
`,
|
||||
"hello world\n",
|
||||
false,
|
||||
},
|
||||
{`
|
||||
<pre>
|
||||
0QUJDREV
|
||||
GR0hJSkt
|
||||
MTU5PUFF
|
||||
SU1RVVld
|
||||
</pre>
|
||||
junk
|
||||
<pre>
|
||||
YWVowMTI
|
||||
zNDU2Nzg
|
||||
5Cg
|
||||
=
|
||||
</pre>
|
||||
<pre>
|
||||
=
|
||||
</pre>
|
||||
`,
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789\n",
|
||||
false,
|
||||
},
|
||||
// no <pre> elements, hence no version indicator
|
||||
{`
|
||||
aGVsbG8gd29ybGQK
|
||||
blah blah blah
|
||||
aGVsbG8gd29ybGQK
|
||||
aGVsbG8gd29ybGQK
|
||||
blah blah blah
|
||||
`,
|
||||
"",
|
||||
true,
|
||||
},
|
||||
// empty <pre> elements, hence no version indicator
|
||||
{`
|
||||
aGVsbG8gd29ybGQK
|
||||
blah blah blah
|
||||
<pre> </pre>
|
||||
aGVsbG8gd29ybGQK
|
||||
aGVsbG8gd29ybGQK<pre></pre>
|
||||
blah blah blah
|
||||
`,
|
||||
"",
|
||||
true,
|
||||
},
|
||||
// other elements inside <pre>
|
||||
{
|
||||
"blah <pre>0aGVsb<p>G8gd29</p>ybGQK</pre>",
|
||||
"hello world\n",
|
||||
false,
|
||||
},
|
||||
// HTML comment
|
||||
{
|
||||
"blah <!-- <pre>aGVsbG8gd29ybGQK</pre> -->",
|
||||
"",
|
||||
true,
|
||||
},
|
||||
// all kinds of ASCII whitespace
|
||||
{
|
||||
"blah <pre>\x200\x09aG\x0aV\x0csb\x0dG8\x20gd29ybGQK</pre>",
|
||||
"hello world\n",
|
||||
false,
|
||||
},
|
||||
|
||||
// bad padding
|
||||
{`
|
||||
<pre>
|
||||
0QUJDREV
|
||||
GR0hJSkt
|
||||
MTU5PUFF
|
||||
SU1RVVld
|
||||
</pre>
|
||||
junk
|
||||
<pre>
|
||||
YWVowMTI
|
||||
zNDU2Nzg
|
||||
5Cg
|
||||
=
|
||||
</pre>
|
||||
`,
|
||||
"",
|
||||
true,
|
||||
},
|
||||
/*
|
||||
// per-chunk base64
|
||||
// test disabled because Go stdlib handles this incorrectly:
|
||||
// https://github.com/golang/go/issues/31626
|
||||
{
|
||||
"<pre>QQ==</pre><pre>Qg==</pre>",
|
||||
"",
|
||||
true,
|
||||
},
|
||||
*/
|
||||
// missing </pre>
|
||||
{
|
||||
"blah <pre></pre><pre>0aGVsbG8gd29ybGQK",
|
||||
"",
|
||||
true,
|
||||
},
|
||||
// nested <pre>
|
||||
{
|
||||
"blah <pre>0aGVsb<pre>G8gd29</pre>ybGQK</pre>",
|
||||
"",
|
||||
true,
|
||||
},
|
||||
} {
|
||||
output, err := armorDecodeToString(test.input)
|
||||
if test.expectedErr && err == nil {
|
||||
t.Errorf("%+q → (%+q, %v), expected error", test.input, output, err)
|
||||
continue
|
||||
}
|
||||
if !test.expectedErr && err != nil {
|
||||
t.Errorf("%+q → (%+q, %v), expected no error", test.input, output, err)
|
||||
continue
|
||||
}
|
||||
if !test.expectedErr && output != test.expectedOutput {
|
||||
t.Errorf("%+q → (%+q, %v), expected (%+q, %v)",
|
||||
test.input, output, err, test.expectedOutput, nil)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func armorRoundTrip(s string) (string, error) {
|
||||
var encoded strings.Builder
|
||||
enc, err := NewArmorEncoder(&encoded)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
_, err = io.Copy(enc, strings.NewReader(s))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
err = enc.Close()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return armorDecodeToString(encoded.String())
|
||||
}
|
||||
|
||||
func TestArmorRoundTrip(t *testing.T) {
|
||||
lengths := make([]int, 0)
|
||||
// Test short strings and lengths around elementSizeLimit thresholds.
|
||||
for i := 0; i < bytesPerChunk*2; i++ {
|
||||
lengths = append(lengths, i)
|
||||
}
|
||||
for i := -10; i < +10; i++ {
|
||||
lengths = append(lengths, elementSizeLimit+i)
|
||||
lengths = append(lengths, 2*elementSizeLimit+i)
|
||||
}
|
||||
for _, n := range lengths {
|
||||
buf := make([]byte, n)
|
||||
rand.Read(buf)
|
||||
input := string(buf)
|
||||
output, err := armorRoundTrip(input)
|
||||
if err != nil {
|
||||
t.Errorf("length %d → error %v", n, err)
|
||||
continue
|
||||
}
|
||||
if output != input {
|
||||
t.Errorf("length %d → %+q", n, output)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
178
common/amp/cache.go
Normal file
178
common/amp/cache.go
Normal file
|
@ -0,0 +1,178 @@
|
|||
package amp
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/base32"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/url"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/idna"
|
||||
)
|
||||
|
||||
// domainPrefixBasic does the basic domain prefix conversion. Does not do any
|
||||
// IDNA mapping, such as https://www.unicode.org/reports/tr46/.
|
||||
//
|
||||
// https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/#basic-algorithm
|
||||
func domainPrefixBasic(domain string) (string, error) {
|
||||
// 1. Punycode Decode the publisher domain.
|
||||
prefix, err := idna.ToUnicode(domain)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// 2. Replace any "-" (hyphen) character in the output of step 1 with
|
||||
// "--" (two hyphens).
|
||||
prefix = strings.Replace(prefix, "-", "--", -1)
|
||||
|
||||
// 3. Replace any "." (dot) character in the output of step 2 with "-"
|
||||
// (hyphen).
|
||||
prefix = strings.Replace(prefix, ".", "-", -1)
|
||||
|
||||
// 4. If the output of step 3 has a "-" (hyphen) at both positions 3 and
|
||||
// 4, then to the output of step 3, add a prefix of "0-" and add a
|
||||
// suffix of "-0".
|
||||
if len(prefix) >= 4 && prefix[2] == '-' && prefix[3] == '-' {
|
||||
prefix = "0-" + prefix + "-0"
|
||||
}
|
||||
|
||||
// 5. Punycode Encode the output of step 3.
|
||||
return idna.ToASCII(prefix)
|
||||
}
|
||||
|
||||
// Lower-case base32 without padding.
|
||||
var fallbackBase32Encoding = base32.NewEncoding("abcdefghijklmnopqrstuvwxyz234567").WithPadding(base32.NoPadding)
|
||||
|
||||
// domainPrefixFallback does the fallback domain prefix conversion. The returned
|
||||
// base32 domain uses lower-case letters.
|
||||
//
|
||||
// https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/#fallback-algorithm
|
||||
func domainPrefixFallback(domain string) string {
|
||||
// The algorithm specification does not say what, exactly, we are to
|
||||
// take the SHA-256 of. domain is notionally an abstract Unicode
|
||||
// string, not a byte sequence. While
|
||||
// https://github.com/ampproject/amp-toolbox/blob/84cb3057e5f6c54d64369ddd285db1cb36237ee8/packages/cache-url/lib/AmpCurlUrlGenerator.js#L62
|
||||
// says "Take the SHA256 of the punycode view of the domain," in reality
|
||||
// it hashes the UTF-8 encoding of the domain, without Punycode:
|
||||
// https://github.com/ampproject/amp-toolbox/blob/84cb3057e5f6c54d64369ddd285db1cb36237ee8/packages/cache-url/lib/AmpCurlUrlGenerator.js#L141
|
||||
// https://github.com/ampproject/amp-toolbox/blob/84cb3057e5f6c54d64369ddd285db1cb36237ee8/packages/cache-url/lib/browser/Sha256.js#L24
|
||||
// We do the same here, hashing the raw bytes of domain, presumed to be
|
||||
// UTF-8.
|
||||
|
||||
// 1. Hash the publisher's domain using SHA256.
|
||||
h := sha256.Sum256([]byte(domain))
|
||||
|
||||
// 2. Base32 Escape the output of step 1.
|
||||
// 3. Remove the last 4 characters from the output of step 2, which are
|
||||
// always "=" (equals) characters.
|
||||
return fallbackBase32Encoding.EncodeToString(h[:])
|
||||
}
|
||||
|
||||
// domainPrefix computes the domain prefix of an AMP cache URL.
|
||||
//
|
||||
// https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/#domain-name-prefix
|
||||
func domainPrefix(domain string) string {
|
||||
// https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/#combined-algorithm
|
||||
// 1. Run the Basic Algorithm. If the output is a valid DNS label,
|
||||
// [append the Cache domain suffix and] return. Otherwise continue to
|
||||
// step 2.
|
||||
prefix, err := domainPrefixBasic(domain)
|
||||
// "A domain prefix is not a valid DNS label if it is longer than 63
|
||||
// characters"
|
||||
if err == nil && len(prefix) <= 63 {
|
||||
return prefix
|
||||
}
|
||||
// 2. Run the Fallback Algorithm. [Append the Cache domain suffix and]
|
||||
// return.
|
||||
return domainPrefixFallback(domain)
|
||||
}
|
||||
|
||||
// CacheURL computes the AMP cache URL for the publisher URL pubURL, using the
|
||||
// AMP cache at cacheURL. contentType is a string such as "c" or "i" that
|
||||
// indicates what type of serving the AMP cache is to perform. The Scheme of
|
||||
// pubURL must be "http" or "https". The Port of pubURL, if any, must match the
|
||||
// default for the scheme. cacheURL may not have RawQuery, Fragment, or
|
||||
// RawFragment set, because the resulting URL's query and fragment are taken
|
||||
// from the publisher URL.
|
||||
//
|
||||
// https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/
|
||||
func CacheURL(pubURL, cacheURL *url.URL, contentType string) (*url.URL, error) {
|
||||
// The cache URL subdomain, including the domain prefix corresponding to
|
||||
// the publisher URL's domain.
|
||||
resultHost := domainPrefix(pubURL.Hostname()) + "." + cacheURL.Hostname()
|
||||
if cacheURL.Port() != "" {
|
||||
resultHost = net.JoinHostPort(resultHost, cacheURL.Port())
|
||||
}
|
||||
|
||||
// https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/#url-path
|
||||
// The first part of the path is the cache URL's own path, if any.
|
||||
pathComponents := []string{cacheURL.EscapedPath()}
|
||||
// The next path component is the content type. We cannot encode an
|
||||
// empty content type, because it would result in consecutive path
|
||||
// separators, which would semantically combine into a single separator.
|
||||
if contentType == "" {
|
||||
return nil, fmt.Errorf("invalid content type %+q", contentType)
|
||||
}
|
||||
pathComponents = append(pathComponents, url.PathEscape(contentType))
|
||||
// Then, we add an "s" path component, if the publisher URL scheme is
|
||||
// "https".
|
||||
switch pubURL.Scheme {
|
||||
case "http":
|
||||
// Do nothing.
|
||||
case "https":
|
||||
pathComponents = append(pathComponents, "s")
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid scheme %+q in publisher URL", pubURL.Scheme)
|
||||
}
|
||||
// The next path component is the publisher URL's host. The AMP cache
|
||||
// URL format specification is not clear about whether other
|
||||
// subcomponents of the authority (namely userinfo and port) may appear
|
||||
// here. We adopt a policy of forbidding userinfo, and requiring that
|
||||
// the port be the default for the scheme (and then we omit the port
|
||||
// entirely from the returned URL).
|
||||
if pubURL.User != nil {
|
||||
return nil, fmt.Errorf("publisher URL may not contain userinfo")
|
||||
}
|
||||
if port := pubURL.Port(); port != "" {
|
||||
if !((pubURL.Scheme == "http" && port == "80") || (pubURL.Scheme == "https" && port == "443")) {
|
||||
return nil, fmt.Errorf("publisher URL port %+q is not the default for scheme %+q", port, pubURL.Scheme)
|
||||
}
|
||||
}
|
||||
// As with the content type, we cannot encode an empty host, because
|
||||
// that would result in an empty path component.
|
||||
if pubURL.Hostname() == "" {
|
||||
return nil, fmt.Errorf("invalid host %+q in publisher URL", pubURL.Hostname())
|
||||
}
|
||||
pathComponents = append(pathComponents, url.PathEscape(pubURL.Hostname()))
|
||||
// Finally, we append the remainder of the original escaped path from
|
||||
// the publisher URL.
|
||||
pathComponents = append(pathComponents, pubURL.EscapedPath())
|
||||
|
||||
resultRawPath := path.Join(pathComponents...)
|
||||
resultPath, err := url.PathUnescape(resultRawPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// The query and fragment of the returned URL always come from pubURL.
|
||||
// Any query or fragment of cacheURL would be ignored. Return an error
|
||||
// if either is set.
|
||||
if cacheURL.RawQuery != "" {
|
||||
return nil, fmt.Errorf("cache URL may not contain a query")
|
||||
}
|
||||
if cacheURL.Fragment != "" {
|
||||
return nil, fmt.Errorf("cache URL may not contain a fragment")
|
||||
}
|
||||
|
||||
return &url.URL{
|
||||
Scheme: cacheURL.Scheme,
|
||||
User: cacheURL.User,
|
||||
Host: resultHost,
|
||||
Path: resultPath,
|
||||
RawPath: resultRawPath,
|
||||
RawQuery: pubURL.RawQuery,
|
||||
Fragment: pubURL.Fragment,
|
||||
}, nil
|
||||
}
|
320
common/amp/cache_test.go
Normal file
320
common/amp/cache_test.go
Normal file
|
@ -0,0 +1,320 @@
|
|||
package amp
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"net/url"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/net/idna"
|
||||
)
|
||||
|
||||
func TestDomainPrefixBasic(t *testing.T) {
|
||||
// Tests expecting no error.
|
||||
for _, test := range []struct {
|
||||
domain, expected string
|
||||
}{
|
||||
{"", ""},
|
||||
{"xn--", ""},
|
||||
{"...", "---"},
|
||||
|
||||
// Should not apply mappings such as case folding and
|
||||
// normalization.
|
||||
{"b\u00fccher.de", "xn--bcher-de-65a"},
|
||||
{"B\u00fccher.de", "xn--Bcher-de-65a"},
|
||||
{"bu\u0308cher.de", "xn--bucher-de-hkf"},
|
||||
|
||||
// Check some that differ between IDNA 2003 and IDNA 2008.
|
||||
// https://unicode.org/reports/tr46/#Deviations
|
||||
// https://util.unicode.org/UnicodeJsps/idna.jsp
|
||||
{"faß.de", "xn--fa-de-mqa"},
|
||||
{"βόλοσ.com", "xn---com-4ld8c2a6a8e"},
|
||||
|
||||
// Lengths of 63 and 64. 64 is too long for a DNS label, but
|
||||
// domainPrefixBasic is not expected to check for that.
|
||||
{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"},
|
||||
{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"},
|
||||
|
||||
// https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/#basic-algorithm
|
||||
{"example.com", "example-com"},
|
||||
{"foo.example.com", "foo-example-com"},
|
||||
{"foo-example.com", "foo--example-com"},
|
||||
{"xn--57hw060o.com", "xn---com-p33b41770a"},
|
||||
{"\u26a1\U0001f60a.com", "xn---com-p33b41770a"},
|
||||
{"en-us.example.com", "0-en--us-example-com-0"},
|
||||
} {
|
||||
output, err := domainPrefixBasic(test.domain)
|
||||
if err != nil || output != test.expected {
|
||||
t.Errorf("%+q → (%+q, %v), expected (%+q, %v)",
|
||||
test.domain, output, err, test.expected, nil)
|
||||
}
|
||||
}
|
||||
|
||||
// Tests expecting an error.
|
||||
for _, domain := range []string{
|
||||
"xn---",
|
||||
} {
|
||||
output, err := domainPrefixBasic(domain)
|
||||
if err == nil || output != "" {
|
||||
t.Errorf("%+q → (%+q, %v), expected (%+q, non-nil)",
|
||||
domain, output, err, "")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDomainPrefixFallback(t *testing.T) {
|
||||
for _, test := range []struct {
|
||||
domain, expected string
|
||||
}{
|
||||
{
|
||||
"",
|
||||
"4oymiquy7qobjgx36tejs35zeqt24qpemsnzgtfeswmrw6csxbkq",
|
||||
},
|
||||
{
|
||||
"example.com",
|
||||
"un42n5xov642kxrxrqiyanhcoupgql5lt4wtbkyt2ijflbwodfdq",
|
||||
},
|
||||
|
||||
// These checked against the output of
|
||||
// https://github.com/ampproject/amp-toolbox/tree/84cb3057e5f6c54d64369ddd285db1cb36237ee8/packages/cache-url,
|
||||
// using the widget at
|
||||
// https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/#url-format.
|
||||
{
|
||||
"000000000000000000000000000000000000000000000000000000000000.com",
|
||||
"stejanx4hsijaoj4secyecy4nvqodk56kw72whwcmvdbtucibf5a",
|
||||
},
|
||||
{
|
||||
"00000000000000000000000000000000000000000000000000000000000a.com",
|
||||
"jdcvbsorpnc3hcjrhst56nfm6ymdpovlawdbm2efyxpvlt4cpbya",
|
||||
},
|
||||
{
|
||||
"00000000000000000000000000000000000000000000000000000000000\u03bb.com",
|
||||
"qhzqeumjkfpcpuic3vqruyjswcr7y7gcm3crqyhhywvn3xrhchfa",
|
||||
},
|
||||
} {
|
||||
output := domainPrefixFallback(test.domain)
|
||||
if output != test.expected {
|
||||
t.Errorf("%+q → %+q, expected %+q",
|
||||
test.domain, output, test.expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Checks that domainPrefix chooses domainPrefixBasic or domainPrefixFallback as
|
||||
// appropriate; i.e., always returns string that is a valid DNS label and is
|
||||
// IDNA-decodable.
|
||||
func TestDomainPrefix(t *testing.T) {
|
||||
// A validating IDNA profile, which checks label length and that the
|
||||
// label contains only certain ASCII characters. It does not do the
|
||||
// ValidateLabels check, because that depends on the input having
|
||||
// certain properties.
|
||||
profile := idna.New(
|
||||
idna.VerifyDNSLength(true),
|
||||
idna.StrictDomainName(true),
|
||||
)
|
||||
for _, domain := range []string{
|
||||
"example.com",
|
||||
"\u0314example.com",
|
||||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // 63 bytes
|
||||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // 64 bytes
|
||||
"xn--57hw060o.com",
|
||||
"a b c",
|
||||
} {
|
||||
output := domainPrefix(domain)
|
||||
if bytes.IndexByte([]byte(output), '.') != -1 {
|
||||
t.Errorf("%+q → %+q contains a dot", domain, output)
|
||||
}
|
||||
_, err := profile.ToUnicode(output)
|
||||
if err != nil {
|
||||
t.Errorf("%+q → error %v", domain, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func mustParseURL(rawurl string) *url.URL {
|
||||
u, err := url.Parse(rawurl)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return u
|
||||
}
|
||||
|
||||
func TestCacheURL(t *testing.T) {
|
||||
// Tests expecting no error.
|
||||
for _, test := range []struct {
|
||||
pub string
|
||||
cache string
|
||||
contentType string
|
||||
expected string
|
||||
}{
|
||||
// With or without trailing slash on pubURL.
|
||||
{
|
||||
"http://example.com/",
|
||||
"https://amp.cache/",
|
||||
"c",
|
||||
"https://example-com.amp.cache/c/example.com",
|
||||
},
|
||||
{
|
||||
"http://example.com",
|
||||
"https://amp.cache/",
|
||||
"c",
|
||||
"https://example-com.amp.cache/c/example.com",
|
||||
},
|
||||
// https pubURL.
|
||||
{
|
||||
"https://example.com/",
|
||||
"https://amp.cache/",
|
||||
"c",
|
||||
"https://example-com.amp.cache/c/s/example.com",
|
||||
},
|
||||
// The content type should be escaped if necessary.
|
||||
{
|
||||
"http://example.com/",
|
||||
"https://amp.cache/",
|
||||
"/",
|
||||
"https://example-com.amp.cache/%2F/example.com",
|
||||
},
|
||||
// Retain pubURL path, query, and fragment, including escaping.
|
||||
{
|
||||
"http://example.com/my%2Fpath/index.html?a=1#fragment",
|
||||
"https://amp.cache/",
|
||||
"c",
|
||||
"https://example-com.amp.cache/c/example.com/my%2Fpath/index.html?a=1#fragment",
|
||||
},
|
||||
// Retain scheme, userinfo, port, and path of cacheURL, escaping
|
||||
// whatever is necessary.
|
||||
{
|
||||
"http://example.com",
|
||||
"http://cache%2Fuser:cache%40pass@amp.cache:123/with/../../path/..%2f../",
|
||||
"c",
|
||||
"http://cache%2Fuser:cache%40pass@example-com.amp.cache:123/path/..%2f../c/example.com",
|
||||
},
|
||||
// Port numbers in pubURL are allowed, if they're the default
|
||||
// for scheme.
|
||||
{
|
||||
"http://example.com:80/",
|
||||
"https://amp.cache/",
|
||||
"c",
|
||||
"https://example-com.amp.cache/c/example.com",
|
||||
},
|
||||
{
|
||||
"https://example.com:443/",
|
||||
"https://amp.cache/",
|
||||
"c",
|
||||
"https://example-com.amp.cache/c/s/example.com",
|
||||
},
|
||||
// "?" at the end of cacheURL is okay, as long as the query is
|
||||
// empty.
|
||||
{
|
||||
"http://example.com/",
|
||||
"https://amp.cache/?",
|
||||
"c",
|
||||
"https://example-com.amp.cache/c/example.com",
|
||||
},
|
||||
|
||||
// https://developers.google.com/amp/cache/overview#example-requesting-document-using-tls
|
||||
{
|
||||
"https://example.com/amp_document.html",
|
||||
"https://cdn.ampproject.org/",
|
||||
"c",
|
||||
"https://example-com.cdn.ampproject.org/c/s/example.com/amp_document.html",
|
||||
},
|
||||
// https://developers.google.com/amp/cache/overview#example-requesting-image-using-plain-http
|
||||
{
|
||||
"http://example.com/logo.png",
|
||||
"https://cdn.ampproject.org/",
|
||||
"i",
|
||||
"https://example-com.cdn.ampproject.org/i/example.com/logo.png",
|
||||
},
|
||||
// https://developers.google.com/amp/cache/overview#query-parameter-example
|
||||
{
|
||||
"https://example.com/g?value=Hello%20World",
|
||||
"https://cdn.ampproject.org/",
|
||||
"c",
|
||||
"https://example-com.cdn.ampproject.org/c/s/example.com/g?value=Hello%20World",
|
||||
},
|
||||
} {
|
||||
pubURL := mustParseURL(test.pub)
|
||||
cacheURL := mustParseURL(test.cache)
|
||||
outputURL, err := CacheURL(pubURL, cacheURL, test.contentType)
|
||||
if err != nil {
|
||||
t.Errorf("%+q %+q %+q → error %v",
|
||||
test.pub, test.cache, test.contentType, err)
|
||||
continue
|
||||
}
|
||||
if outputURL.String() != test.expected {
|
||||
t.Errorf("%+q %+q %+q → %+q, expected %+q",
|
||||
test.pub, test.cache, test.contentType, outputURL, test.expected)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Tests expecting an error.
|
||||
for _, test := range []struct {
|
||||
pub string
|
||||
cache string
|
||||
contentType string
|
||||
}{
|
||||
// Empty content type.
|
||||
{
|
||||
"http://example.com/",
|
||||
"https://amp.cache/",
|
||||
"",
|
||||
},
|
||||
// Empty host.
|
||||
{
|
||||
"http:///index.html",
|
||||
"https://amp.cache/",
|
||||
"c",
|
||||
},
|
||||
// Empty scheme.
|
||||
{
|
||||
"//example.com/",
|
||||
"https://amp.cache/",
|
||||
"c",
|
||||
},
|
||||
// Unrecognized scheme.
|
||||
{
|
||||
"ftp://example.com/",
|
||||
"https://amp.cache/",
|
||||
"c",
|
||||
},
|
||||
// Wrong port number for scheme.
|
||||
{
|
||||
"http://example.com:443/",
|
||||
"https://amp.cache/",
|
||||
"c",
|
||||
},
|
||||
// userinfo in pubURL.
|
||||
{
|
||||
"http://user@example.com/",
|
||||
"https://amp.cache/",
|
||||
"c",
|
||||
},
|
||||
{
|
||||
"http://user:pass@example.com/",
|
||||
"https://amp.cache/",
|
||||
"c",
|
||||
},
|
||||
// cacheURL may not contain a query.
|
||||
{
|
||||
"http://example.com/",
|
||||
"https://amp.cache/?a=1",
|
||||
"c",
|
||||
},
|
||||
// cacheURL may not contain a fragment.
|
||||
{
|
||||
"http://example.com/",
|
||||
"https://amp.cache/#fragment",
|
||||
"c",
|
||||
},
|
||||
} {
|
||||
pubURL := mustParseURL(test.pub)
|
||||
cacheURL := mustParseURL(test.cache)
|
||||
outputURL, err := CacheURL(pubURL, cacheURL, test.contentType)
|
||||
if err == nil {
|
||||
t.Errorf("%+q %+q %+q → %+q, expected error",
|
||||
test.pub, test.cache, test.contentType, outputURL)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
88
common/amp/doc.go
Normal file
88
common/amp/doc.go
Normal file
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
Package amp provides functions for working with the AMP (Accelerated Mobile
|
||||
Pages) subset of HTML, and conveying binary data through an AMP cache.
|
||||
|
||||
AMP cache
|
||||
|
||||
The CacheURL function takes a plain URL and converts it to be accessed through a
|
||||
given AMP cache.
|
||||
|
||||
The EncodePath and DecodePath functions provide a way to encode data into the
|
||||
suffix of a URL path. AMP caches do not support HTTP POST, but encoding data
|
||||
into a URL path with GET is an alternative means of sending data to the server.
|
||||
The format of an encoded path is:
|
||||
0<0 or more bytes, including slash>/<base64 of data>
|
||||
That is:
|
||||
* "0", a format version number, which controls the interpretation of the rest of
|
||||
the path. Only the first byte matters as a version indicator (not the whole
|
||||
first path component).
|
||||
* Any number of slash or non-slash bytes. These may be used as padding or to
|
||||
prevent cache collisions in the AMP cache.
|
||||
* A final slash.
|
||||
* base64 encoding of the data, using the URL-safe alphabet (which does not
|
||||
include slash).
|
||||
|
||||
For example, an encoding of the string "This is path-encoded data." is the
|
||||
following. The "lgWHcwhXFjUm" following the format version number is random
|
||||
padding that will be ignored on decoding.
|
||||
0lgWHcwhXFjUm/VGhpcyBpcyBwYXRoLWVuY29kZWQgZGF0YS4
|
||||
|
||||
It is the caller's responsibility to add or remove any directory path prefix
|
||||
before calling EncodePath or DecodePath.
|
||||
|
||||
AMP armor
|
||||
|
||||
AMP armor is a data encoding scheme that that satisfies the requirements of the
|
||||
AMP (Accelerated Mobile Pages) subset of HTML, and survives modification by an
|
||||
AMP cache. For the requirements of AMP HTML, see
|
||||
https://amp.dev/documentation/guides-and-tutorials/learn/spec/amphtml/.
|
||||
For modifications that may be made by an AMP cache, see
|
||||
https://github.com/ampproject/amphtml/blob/main/docs/spec/amp-cache-modifications.md.
|
||||
|
||||
The encoding is based on ones created by Ivan Markin. See codec/amp/ in
|
||||
https://github.com/nogoegst/amper and discussion at
|
||||
https://bugs.torproject.org/tpo/anti-censorship/pluggable-transports/snowflake/25985.
|
||||
|
||||
The encoding algorithm works as follows. Base64-encode the input. Prepend the
|
||||
input with the byte '0'; this is a protocol version indicator that the decoder
|
||||
can use to determine how to interpret the bytes that follow. Split the base64
|
||||
into fixed-size chunks separated by whitespace. Take up to 1024 chunks at a
|
||||
time, and wrap them in a pre element. Then, situate the markup so far within the
|
||||
body of the AMP HTML boilerplate. The decoding algorithm is to scan the HTML for
|
||||
pre elements, split their text contents on whitespace and concatenate, then
|
||||
base64 decode. The base64 encoding uses the standard alphabet, with normal "="
|
||||
padding (https://tools.ietf.org/html/rfc4648#section-4).
|
||||
|
||||
The reason for splitting the base64 into chunks is that AMP caches reportedly
|
||||
truncate long strings that are not broken by whitespace:
|
||||
https://bugs.torproject.org/tpo/anti-censorship/pluggable-transports/snowflake/25985#note_2592348.
|
||||
The characters that may separate the chunks are the ASCII whitespace characters
|
||||
(https://infra.spec.whatwg.org/#ascii-whitespace) "\x09", "\x0a", "\x0c",
|
||||
"\x0d", and "\x20". The reason for separating the chunks into pre elements is to
|
||||
limit the amount of text a decoder may have to buffer while parsing the HTML.
|
||||
Each pre element may contain at most 64 KB of text. pre elements may not be
|
||||
nested.
|
||||
|
||||
Example
|
||||
|
||||
The following is the result of encoding the string
|
||||
"This was encoded with AMP armor.":
|
||||
|
||||
<!doctype html>
|
||||
<html amp>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<script async src="https://cdn.ampproject.org/v0.js"></script>
|
||||
<link rel="canonical" href="#">
|
||||
<meta name="viewport" content="width=device-width">
|
||||
<style amp-boilerplate>body{-webkit-animation:-amp-start 8s steps(1,end) 0s 1 normal both;-moz-animation:-amp-start 8s steps(1,end) 0s 1 normal both;-ms-animation:-amp-start 8s steps(1,end) 0s 1 normal both;animation:-amp-start 8s steps(1,end) 0s 1 normal both}@-webkit-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-moz-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-ms-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-o-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}</style><noscript><style amp-boilerplate>body{-webkit-animation:none;-moz-animation:none;-ms-animation:none;animation:none}</style></noscript>
|
||||
</head>
|
||||
<body>
|
||||
<pre>
|
||||
0VGhpcyB3YXMgZW5jb2RlZCB3aXRoIEF
|
||||
NUCBhcm1vci4=
|
||||
</pre>
|
||||
</body>
|
||||
</html>
|
||||
*/
|
||||
package amp
|
44
common/amp/path.go
Normal file
44
common/amp/path.go
Normal file
|
@ -0,0 +1,44 @@
|
|||
package amp
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// EncodePath encodes data in a way that is suitable for the suffix of an AMP
|
||||
// cache URL.
|
||||
func EncodePath(data []byte) string {
|
||||
var cacheBreaker [9]byte
|
||||
_, err := rand.Read(cacheBreaker[:])
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
b64 := base64.RawURLEncoding.EncodeToString
|
||||
return "0" + b64(cacheBreaker[:]) + "/" + b64(data)
|
||||
}
|
||||
|
||||
// DecodePath decodes data from a path suffix as encoded by EncodePath. The path
|
||||
// must have already been trimmed of any directory prefix (as might be present
|
||||
// in, e.g., an HTTP request). That is, the first character of path should be
|
||||
// the "0" message format indicator.
|
||||
func DecodePath(path string) ([]byte, error) {
|
||||
if len(path) < 1 {
|
||||
return nil, fmt.Errorf("missing format indicator")
|
||||
}
|
||||
version := path[0]
|
||||
rest := path[1:]
|
||||
switch version {
|
||||
case '0':
|
||||
// Ignore everything else up to and including the final slash
|
||||
// (there must be at least one slash).
|
||||
i := strings.LastIndexByte(rest, '/')
|
||||
if i == -1 {
|
||||
return nil, fmt.Errorf("missing data")
|
||||
}
|
||||
return base64.RawURLEncoding.DecodeString(rest[i+1:])
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown format indicator %q", version)
|
||||
}
|
||||
}
|
54
common/amp/path_test.go
Normal file
54
common/amp/path_test.go
Normal file
|
@ -0,0 +1,54 @@
|
|||
package amp
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDecodePath(t *testing.T) {
|
||||
for _, test := range []struct {
|
||||
path string
|
||||
expectedData string
|
||||
expectedErrStr string
|
||||
}{
|
||||
{"", "", "missing format indicator"},
|
||||
{"0", "", "missing data"},
|
||||
{"0foobar", "", "missing data"},
|
||||
{"/0/YWJj", "", "unknown format indicator '/'"},
|
||||
|
||||
{"0/", "", ""},
|
||||
{"0foobar/", "", ""},
|
||||
{"0/YWJj", "abc", ""},
|
||||
{"0///YWJj", "abc", ""},
|
||||
{"0foobar/YWJj", "abc", ""},
|
||||
{"0/foobar/YWJj", "abc", ""},
|
||||
} {
|
||||
data, err := DecodePath(test.path)
|
||||
if test.expectedErrStr != "" {
|
||||
if err == nil || err.Error() != test.expectedErrStr {
|
||||
t.Errorf("%+q expected error %+q, got %+q",
|
||||
test.path, test.expectedErrStr, err)
|
||||
}
|
||||
} else if err != nil {
|
||||
t.Errorf("%+q expected no error, got %+q", test.path, err)
|
||||
} else if string(data) != test.expectedData {
|
||||
t.Errorf("%+q expected data %+q, got %+q",
|
||||
test.path, test.expectedData, data)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPathRoundTrip(t *testing.T) {
|
||||
for _, data := range []string{
|
||||
"",
|
||||
"\x00",
|
||||
"/",
|
||||
"hello world",
|
||||
} {
|
||||
decoded, err := DecodePath(EncodePath([]byte(data)))
|
||||
if err != nil {
|
||||
t.Errorf("%+q roundtripped with error %v", data, err)
|
||||
} else if string(decoded) != data {
|
||||
t.Errorf("%+q roundtripped to %+q", data, decoded)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue