mirror of
https://gitlab.torproject.org/tpo/anti-censorship/pluggable-transports/snowflake.git
synced 2025-10-13 20:11:19 -04:00
amp package.
This package contains a CacheURL function that modifies a URL to be accessed through an AMP cache, and the "AMP armor" data encoding scheme for encoding data into the AMP subset of HTML.
This commit is contained in:
parent
0f34a7778f
commit
c9e0dd287f
8 changed files with 1223 additions and 0 deletions
136
common/amp/armor_decoder.go
Normal file
136
common/amp/armor_decoder.go
Normal file
|
@ -0,0 +1,136 @@
|
|||
package amp
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// ErrUnknownVersion is the error returned when the first character inside the
|
||||
// element encoding (but outside the base64 encoding) is not '0'.
|
||||
type ErrUnknownVersion byte
|
||||
|
||||
func (err ErrUnknownVersion) Error() string {
|
||||
return fmt.Sprintf("unknown armor version indicator %+q", byte(err))
|
||||
}
|
||||
|
||||
func isASCIIWhitespace(b byte) bool {
|
||||
switch b {
|
||||
// https://infra.spec.whatwg.org/#ascii-whitespace
|
||||
case '\x09', '\x0a', '\x0c', '\x0d', '\x20':
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func splitASCIIWhitespace(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||
var i, j int
|
||||
// Skip initial whitespace.
|
||||
for i = 0; i < len(data); i++ {
|
||||
if !isASCIIWhitespace(data[i]) {
|
||||
break
|
||||
}
|
||||
}
|
||||
// Look for next whitespace.
|
||||
for j = i; j < len(data); j++ {
|
||||
if isASCIIWhitespace(data[j]) {
|
||||
return j + 1, data[i:j], nil
|
||||
}
|
||||
}
|
||||
// We reached the end of data without finding more whitespace. Only
|
||||
// consider it a token if we are at EOF.
|
||||
if atEOF && i < j {
|
||||
return j, data[i:j], nil
|
||||
}
|
||||
// Otherwise, request more data.
|
||||
return i, nil, nil
|
||||
}
|
||||
|
||||
func decodeToWriter(w io.Writer, r io.Reader) (int64, error) {
|
||||
tokenizer := html.NewTokenizer(r)
|
||||
// Set a memory limit on token sizes, otherwise the tokenizer will
|
||||
// buffer text indefinitely if it is not broken up by other token types.
|
||||
tokenizer.SetMaxBuf(elementSizeLimit)
|
||||
active := false
|
||||
total := int64(0)
|
||||
for {
|
||||
tt := tokenizer.Next()
|
||||
switch tt {
|
||||
case html.ErrorToken:
|
||||
err := tokenizer.Err()
|
||||
if err == io.EOF {
|
||||
err = nil
|
||||
}
|
||||
if err == nil && active {
|
||||
return total, fmt.Errorf("missing </pre> tag")
|
||||
}
|
||||
return total, err
|
||||
case html.TextToken:
|
||||
if active {
|
||||
// Re-join the separate chunks of text and
|
||||
// feed them to the decoder.
|
||||
scanner := bufio.NewScanner(bytes.NewReader(tokenizer.Text()))
|
||||
scanner.Split(splitASCIIWhitespace)
|
||||
for scanner.Scan() {
|
||||
n, err := w.Write(scanner.Bytes())
|
||||
total += int64(n)
|
||||
if err != nil {
|
||||
return total, err
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return total, err
|
||||
}
|
||||
}
|
||||
case html.StartTagToken:
|
||||
tn, _ := tokenizer.TagName()
|
||||
if string(tn) == "pre" {
|
||||
if active {
|
||||
// nesting not allowed
|
||||
return total, fmt.Errorf("unexpected %s", tokenizer.Token())
|
||||
}
|
||||
active = true
|
||||
}
|
||||
case html.EndTagToken:
|
||||
tn, _ := tokenizer.TagName()
|
||||
if string(tn) == "pre" {
|
||||
if !active {
|
||||
// stray end tag
|
||||
return total, fmt.Errorf("unexpected %s", tokenizer.Token())
|
||||
}
|
||||
active = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NewArmorDecoder returns a new AMP armor decoder.
|
||||
func NewArmorDecoder(r io.Reader) (io.Reader, error) {
|
||||
pr, pw := io.Pipe()
|
||||
go func() {
|
||||
_, err := decodeToWriter(pw, r)
|
||||
pw.CloseWithError(err)
|
||||
}()
|
||||
|
||||
// The first byte inside the element encoding is a server–client
|
||||
// protocol version indicator.
|
||||
var version [1]byte
|
||||
_, err := pr.Read(version[:])
|
||||
if err != nil {
|
||||
pr.CloseWithError(err)
|
||||
return nil, err
|
||||
}
|
||||
switch version[0] {
|
||||
case '0':
|
||||
return base64.NewDecoder(base64.StdEncoding, pr), nil
|
||||
default:
|
||||
err := ErrUnknownVersion(version[0])
|
||||
pr.CloseWithError(err)
|
||||
return nil, err
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue