Improve media type detection for ActivityPub types

このコミットが含まれているのは:
Cory Slep 2018-08-01 23:18:41 +02:00
コミット b0c125a7ba
4個のファイルの変更93行の追加288行の削除

ファイルの表示

@ -1,241 +0,0 @@
package pub
import (
"bufio"
"bytes"
"errors"
"fmt"
"strings"
"unicode"
"unicode/utf8"
)
var (
mediaMissingErr = errors.New("media type missing /")
mediaSubtypeMissing = errors.New("media subtype missing after /")
missingEqualsError = errors.New("media parameter missing equals sign for token")
tooManyQuotesError = errors.New("too many quotes for optional parameter")
)
var _ decoderState = &mediaRangeState{}
var _ decoderState = &mediaRangeOptionalParameterState{}
type mediaRangeState struct {
isEndOfRange bool
next decoderState
}
func (s *mediaRangeState) Split(data []byte, atEOF bool) (advance int, token []byte, err error) {
err = mediaMissingErr
s.isEndOfRange = false
s.next = s
// Skip any leading optional whitespace
start := 0
for width := 0; start < len(data); start += width {
var r rune
r, width = utf8.DecodeRune(data[start:])
if !unicode.IsSpace(r) {
break
}
}
// Scan until a ';' or ','. Ensure we have seen a '/'.
for runeWidth, width, i := 0, 0, start; i < len(data); i, width = i+runeWidth, width+runeWidth {
var r rune
r, runeWidth = utf8.DecodeRune(data[i:])
if r == ';' {
s.next = &mediaRangeOptionalParameterState{}
return i, data[start:width], err
} else if r == ',' {
s.isEndOfRange = true
return i, data[start:width], err
} else if r == '/' {
err = mediaSubtypeMissing
} else if !unicode.IsSpace(r) && err == mediaSubtypeMissing {
err = nil
}
}
if atEOF && len(data) > start {
s.isEndOfRange = true
return len(data), data[start:], err
}
// Need more data
return 0, nil, nil
}
func (s *mediaRangeState) Result(token string, r *mediaTypeHeaderResult) bool {
r.mediaRange = token
return s.isEndOfRange
}
func (s *mediaRangeState) Next() decoderState {
return s.next
}
type mediaRangeOptionalParameterState struct {
isEndOfRange bool
isQuoted bool
next decoderState
}
func (s *mediaRangeOptionalParameterState) Split(data []byte, atEOF bool) (advance int, token []byte, err error) {
err = missingEqualsError
s.isEndOfRange = false
s.next = s
// Skip any leading optional whitespace
start := 0
for width := 0; start < len(data); start += width {
var r rune
r, width = utf8.DecodeRune(data[start:])
if !unicode.IsSpace(r) {
break
}
}
s.isQuoted = false
const (
tokenState = iota
tokenOrQuoteState
quotedStringState
endState
)
processingState := tokenState
processingFirstToken := true
// Scan until an unquoted ',' or ';'. Make sure we have seen '='
for runeWidth, width, i := 0, 0, 0; i < len(data); i, width = i+runeWidth, width+runeWidth {
var r rune
r, runeWidth = utf8.DecodeRune(data[i:])
switch processingState {
case tokenState:
if r == '=' {
if processingFirstToken {
err = nil
processingFirstToken = false
processingState = tokenOrQuoteState
} else {
return 0, nil, fmt.Errorf("illegal value for token: %s", r)
}
} else if r == ')' ||
r == '(' ||
r == '/' ||
r == ':' ||
r == '<' ||
r == '>' ||
r == '?' ||
r == '@' ||
r == '[' ||
r == ']' ||
r == '\\' ||
r == '{' ||
r == '}' ||
r == '"' {
return 0, nil, fmt.Errorf("illegal value for token: %s", r)
} else if (r == ',' || r == ';') && processingFirstToken {
return 0, nil, fmt.Errorf("illegal value for token: %s", r)
} else if r == ',' {
s.next = &mediaRangeState{}
s.isEndOfRange = true
return i, data[start:width], err
} else if r == ';' {
s.next = &mediaRangeOptionalParameterState{}
return i, data[start:width], err
}
case quotedStringState:
if r == '"' {
processingState = endState
}
case tokenOrQuoteState:
if r == '"' {
processingState = quotedStringState
s.isQuoted = true
} else {
processingState = tokenState
}
case endState:
if r == ',' {
s.next = &mediaRangeState{}
s.isEndOfRange = true
return i, data[start:width], err
} else if r == ';' {
s.next = &mediaRangeOptionalParameterState{}
return i, data[start:width], err
} else {
return 0, nil, fmt.Errorf("illegal value after quoted string: %s", r)
}
}
}
if atEOF && len(data) > start {
s.next = &mediaRangeState{}
s.isEndOfRange = true
return len(data), data[start:], err
}
// Need more data
return 0, nil, nil
}
func (s *mediaRangeOptionalParameterState) Result(token string, r *mediaTypeHeaderResult) bool {
// Token is already well-formed, so split on first equals and strip any literal quotes
pair := strings.SplitAfterN(token, "=", 1)
tok := strings.TrimSpace(pair[0])
value := strings.TrimSpace(pair[1])
if s.isQuoted {
value = strings.Trim(value, "\"")
}
r.mediaRangeParameters = append(r.mediaRangeParameters, parameterResult{
token: tok,
value: value,
})
return s.isEndOfRange
}
func (s *mediaRangeOptionalParameterState) Next() decoderState {
return s.next
}
type parameterResult struct {
token string
value string
}
type mediaTypeHeaderResult struct {
mediaRange string
mediaRangeParameters []parameterResult
}
type decoderState interface {
// Split splits the data into a next token.
Split(data []byte, atEOF bool) (advance int, token []byte, err error)
// Result applies the token returned by this state to the given result.
// Returns true if the result is completed and a brand new one should
// begin.
Result(token string, r *mediaTypeHeaderResult) bool
// Next is always called after Split, to obtain the next decoder state.
Next() decoderState
}
type mediaHeaderDecoder struct {
state decoderState
}
func (m *mediaHeaderDecoder) splitAcceptBoundaries(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
advance, token, err = m.state.Split(data, atEOF)
return
}
func (m *mediaHeaderDecoder) Parse(s string) []*mediaTypeHeaderResult {
m.state = &mediaRangeState{}
scanner := bufio.NewScanner(bytes.NewBufferString(s))
scanner.Split(m.splitAcceptBoundaries)
var results []*mediaTypeHeaderResult
current := &mediaTypeHeaderResult{}
for scanner.Scan() {
token := scanner.Text()
if m.state.Result(token, current) {
results = append(results, current)
current = &mediaTypeHeaderResult{}
}
m.state = m.state.Next()
}
return results
}

ファイルの表示

@ -1,22 +0,0 @@
package pub
import (
"testing"
"github.com/go-test/deep"
)
func TestSuccessfulParsing(t *testing.T) {
table := []struct{
name string
input string
expected mediaTypeHeaderResult
}{
{
name: "",
input: "",
expected: mediaTypeHeaderResult{},
},
}
for _, test := range table {
}
}

ファイルの表示

@ -35,9 +35,18 @@ const (
digestDelimiter = "="
)
var alternatives = []string{
"application/activity+json",
"application/ld+json; profile=https://www.w3.org/ns/activitystreams",
var mediaTypes []string
func init() {
mediaTypes = []string{
"application/activity+json",
}
jsonLdType := "application/ld+json"
for _, semi := range []string{";", " ;", " ; ", "; "} {
for _, profile := range []string{"profile=https://www.w3.org/ns/activitystreams", "profile=\"https://www.w3.org/ns/activitystreams\""} {
mediaTypes = append(mediaTypes, fmt.Sprintf("%s%s%s", jsonLdType, semi, profile))
}
}
}
func trimAll(s []string) []string {
@ -48,36 +57,21 @@ func trimAll(s []string) []string {
return r
}
func headerContainsOneOf(header string, acceptable []string) bool {
sanitizedHeaderValues := trimAll(strings.Split(header, ";"))
sanitizedHeaderMap := make(map[string]bool, len(sanitizedHeaderValues))
for _, s := range sanitizedHeaderValues {
sanitizedHeaderMap[s] = true
}
found := false
for _, v := range acceptable {
if found {
break
}
// Remove any number of whitespace after ;'s
sanitizedAcceptableValues := trimAll(strings.Split(v, ";"))
found = true
for _, v := range sanitizedAcceptableValues {
if has, ok := sanitizedHeaderMap[v]; !has || !ok {
found = false
break
}
func headerIsActivityPubMediaType(header string) bool {
for _, mediaType := range mediaTypes {
if strings.Contains(header, mediaType) {
return true
}
}
return found
return false
}
func isActivityPubPost(r *http.Request) bool {
return r.Method == "POST" && headerContainsOneOf(r.Header.Get(contentTypeHeader), append([]string{postContentTypeHeader}, alternatives...))
return r.Method == "POST" && headerIsActivityPubMediaType(r.Header.Get(contentTypeHeader))
}
func isActivityPubGet(r *http.Request) bool {
return r.Method == "GET" && headerContainsOneOf(r.Header.Get(acceptHeader), append([]string{getAcceptHeader}, alternatives...))
return r.Method == "GET" && headerIsActivityPubMediaType(r.Header.Get(acceptHeader))
}
// isPublic determines if a target is the Public collection as defined in the

74
pub/internal_test.go ノーマルファイル
ファイルの表示

@ -0,0 +1,74 @@
package pub
import (
"testing"
)
func TestHeaderIsActivityPubMediaType(t *testing.T) {
tests := []struct {
name string
input string
expected bool
}{
{
"Mastodon Accept Header",
"application/activity+json, application/ld+json",
true,
},
{
"Plain Type",
"application/activity+json",
true,
},
{
"Missing Profile",
"application/ld+json",
false,
},
{
"With Profile",
"application/ld+json ; profile=https://www.w3.org/ns/activitystreams",
true,
},
{
"With Quoted Profile",
"application/ld+json ; profile=\"https://www.w3.org/ns/activitystreams\"",
true,
},
{
"With Profile (End Space)",
"application/ld+json; profile=https://www.w3.org/ns/activitystreams",
true,
},
{
"With Quoted Profile (End Space)",
"application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\"",
true,
},
{
"With Profile (Begin Space)",
"application/ld+json ;profile=https://www.w3.org/ns/activitystreams",
true,
},
{
"With Quoted Profile (Begin Space)",
"application/ld+json ;profile=\"https://www.w3.org/ns/activitystreams\"",
true,
},
{
"With Profile (No Space)",
"application/ld+json;profile=https://www.w3.org/ns/activitystreams",
true,
},
{
"With Quoted Profile (No Space)",
"application/ld+json;profile=\"https://www.w3.org/ns/activitystreams\"",
true,
},
}
for _, test := range tests {
if actual := headerIsActivityPubMediaType(test.input); actual != test.expected {
t.Fatalf("(%q): expected %v, got %v", test.name, test.expected, actual)
}
}
}