Improve media type detection for ActivityPub types

2018-08-01 23:18:41 +02:00 · 2018-08-01 23:18:41 +02:00 · b0c125a7ba
--- a/pub/headers.go
+++ b/pub/headers.go
@ -1,241 +0,0 @@
-package pub
-
-import (
-	"bufio"
-	"bytes"
-	"errors"
-	"fmt"
-	"strings"
-	"unicode"
-	"unicode/utf8"
-)
-
-var (
-	mediaMissingErr     = errors.New("media type missing /")
-	mediaSubtypeMissing = errors.New("media subtype missing after /")
-	missingEqualsError  = errors.New("media parameter missing equals sign for token")
-	tooManyQuotesError  = errors.New("too many quotes for optional parameter")
-)
-
-var _ decoderState = &mediaRangeState{}
-var _ decoderState = &mediaRangeOptionalParameterState{}
-
-type mediaRangeState struct {
-	isEndOfRange bool
-	next         decoderState
-}
-
-func (s *mediaRangeState) Split(data []byte, atEOF bool) (advance int, token []byte, err error) {
-	err = mediaMissingErr
-	s.isEndOfRange = false
-	s.next = s
-	// Skip any leading optional whitespace
-	start := 0
-	for width := 0; start < len(data); start += width {
-		var r rune
-		r, width = utf8.DecodeRune(data[start:])
-		if !unicode.IsSpace(r) {
-			break
-		}
-	}
-	// Scan until a ';' or ','. Ensure we have seen a '/'.
-	for runeWidth, width, i := 0, 0, start; i < len(data); i, width = i+runeWidth, width+runeWidth {
-		var r rune
-		r, runeWidth = utf8.DecodeRune(data[i:])
-		if r == ';' {
-			s.next = &mediaRangeOptionalParameterState{}
-			return i, data[start:width], err
-		} else if r == ',' {
-			s.isEndOfRange = true
-			return i, data[start:width], err
-		} else if r == '/' {
-			err = mediaSubtypeMissing
-		} else if !unicode.IsSpace(r) && err == mediaSubtypeMissing {
-			err = nil
-		}
-	}
-	if atEOF && len(data) > start {
-		s.isEndOfRange = true
-		return len(data), data[start:], err
-	}
-	// Need more data
-	return 0, nil, nil
-}
-
-func (s *mediaRangeState) Result(token string, r *mediaTypeHeaderResult) bool {
-	r.mediaRange = token
-	return s.isEndOfRange
-}
-
-func (s *mediaRangeState) Next() decoderState {
-	return s.next
-}
-
-type mediaRangeOptionalParameterState struct {
-	isEndOfRange bool
-	isQuoted     bool
-	next         decoderState
-}
-
-func (s *mediaRangeOptionalParameterState) Split(data []byte, atEOF bool) (advance int, token []byte, err error) {
-	err = missingEqualsError
-	s.isEndOfRange = false
-	s.next = s
-	// Skip any leading optional whitespace
-	start := 0
-	for width := 0; start < len(data); start += width {
-		var r rune
-		r, width = utf8.DecodeRune(data[start:])
-		if !unicode.IsSpace(r) {
-			break
-		}
-	}
-	s.isQuoted = false
-	const (
-		tokenState = iota
-		tokenOrQuoteState
-		quotedStringState
-		endState
-	)
-	processingState := tokenState
-	processingFirstToken := true
-	// Scan until an unquoted ',' or ';'. Make sure we have seen '='
-	for runeWidth, width, i := 0, 0, 0; i < len(data); i, width = i+runeWidth, width+runeWidth {
-		var r rune
-		r, runeWidth = utf8.DecodeRune(data[i:])
-		switch processingState {
-		case tokenState:
-			if r == '=' {
-				if processingFirstToken {
-					err = nil
-					processingFirstToken = false
-					processingState = tokenOrQuoteState
-				} else {
-					return 0, nil, fmt.Errorf("illegal value for token: %s", r)
-				}
-			} else if r == ')' ||
-				r == '(' ||
-				r == '/' ||
-				r == ':' ||
-				r == '<' ||
-				r == '>' ||
-				r == '?' ||
-				r == '@' ||
-				r == '[' ||
-				r == ']' ||
-				r == '\\' ||
-				r == '{' ||
-				r == '}' ||
-				r == '"' {
-				return 0, nil, fmt.Errorf("illegal value for token: %s", r)
-			} else if (r == ',' || r == ';') && processingFirstToken {
-				return 0, nil, fmt.Errorf("illegal value for token: %s", r)
-			} else if r == ',' {
-				s.next = &mediaRangeState{}
-				s.isEndOfRange = true
-				return i, data[start:width], err
-			} else if r == ';' {
-				s.next = &mediaRangeOptionalParameterState{}
-				return i, data[start:width], err
-			}
-		case quotedStringState:
-			if r == '"' {
-				processingState = endState
-			}
-		case tokenOrQuoteState:
-			if r == '"' {
-				processingState = quotedStringState
-				s.isQuoted = true
-			} else {
-				processingState = tokenState
-			}
-		case endState:
-			if r == ',' {
-				s.next = &mediaRangeState{}
-				s.isEndOfRange = true
-				return i, data[start:width], err
-			} else if r == ';' {
-				s.next = &mediaRangeOptionalParameterState{}
-				return i, data[start:width], err
-			} else {
-				return 0, nil, fmt.Errorf("illegal value after quoted string: %s", r)
-			}
-		}
-	}
-	if atEOF && len(data) > start {
-		s.next = &mediaRangeState{}
-		s.isEndOfRange = true
-		return len(data), data[start:], err
-	}
-	// Need more data
-	return 0, nil, nil
-}
-
-func (s *mediaRangeOptionalParameterState) Result(token string, r *mediaTypeHeaderResult) bool {
-	// Token is already well-formed, so split on first equals and strip any literal quotes
-	pair := strings.SplitAfterN(token, "=", 1)
-	tok := strings.TrimSpace(pair[0])
-	value := strings.TrimSpace(pair[1])
-	if s.isQuoted {
-		value = strings.Trim(value, "\"")
-	}
-	r.mediaRangeParameters = append(r.mediaRangeParameters, parameterResult{
-		token: tok,
-		value: value,
-	})
-	return s.isEndOfRange
-}
-
-func (s *mediaRangeOptionalParameterState) Next() decoderState {
-	return s.next
-}
-
-type parameterResult struct {
-	token string
-	value string
-}
-
-type mediaTypeHeaderResult struct {
-	mediaRange           string
-	mediaRangeParameters []parameterResult
-}
-
-type decoderState interface {
-	// Split splits the data into a next token.
-	Split(data []byte, atEOF bool) (advance int, token []byte, err error)
-	// Result applies the token returned by this state to the given result.
-	// Returns true if the result is completed and a brand new one should
-	// begin.
-	Result(token string, r *mediaTypeHeaderResult) bool
-	// Next is always called after Split, to obtain the next decoder state.
-	Next() decoderState
-}
-
-type mediaHeaderDecoder struct {
-	state decoderState
-}
-
-func (m *mediaHeaderDecoder) splitAcceptBoundaries(data []byte, atEOF bool) (advance int, token []byte, err error) {
-	if atEOF && len(data) == 0 {
-		return 0, nil, nil
-	}
-	advance, token, err = m.state.Split(data, atEOF)
-	return
-}
-
-func (m *mediaHeaderDecoder) Parse(s string) []*mediaTypeHeaderResult {
-	m.state = &mediaRangeState{}
-	scanner := bufio.NewScanner(bytes.NewBufferString(s))
-	scanner.Split(m.splitAcceptBoundaries)
-	var results []*mediaTypeHeaderResult
-	current := &mediaTypeHeaderResult{}
-	for scanner.Scan() {
-		token := scanner.Text()
-		if m.state.Result(token, current) {
-			results = append(results, current)
-			current = &mediaTypeHeaderResult{}
-		}
-		m.state = m.state.Next()
-	}
-	return results
-}
--- a/pub/headers_test.go
+++ b/pub/headers_test.go
@ -1,22 +0,0 @@
-package pub
-
-import (
-	"testing"
-	"github.com/go-test/deep"
-)
-
-func TestSuccessfulParsing(t *testing.T) {
-	table := []struct{
-		name string
-		input string
-		expected mediaTypeHeaderResult
-	}{
-		{
-			name: "",
-			input: "",
-			expected: mediaTypeHeaderResult{},
-		},
-	}
-	for _, test := range table {
-	}
-}
--- a/pub/internal.go
+++ b/pub/internal.go
@ -35,9 +35,18 @@ const (
 	digestDelimiter           = "="
 )

-var alternatives = []string{
-	"application/activity+json",
-	"application/ld+json; profile=https://www.w3.org/ns/activitystreams",
+var mediaTypes []string
+
+func init() {
+	mediaTypes = []string{
+		"application/activity+json",
+	}
+	jsonLdType := "application/ld+json"
+	for _, semi := range []string{";", " ;", " ; ", "; "} {
+		for _, profile := range []string{"profile=https://www.w3.org/ns/activitystreams", "profile=\"https://www.w3.org/ns/activitystreams\""} {
+			mediaTypes = append(mediaTypes, fmt.Sprintf("%s%s%s", jsonLdType, semi, profile))
+		}
+	}
 }

 func trimAll(s []string) []string {
@ -48,36 +57,21 @@ func trimAll(s []string) []string {
 	return r
 }

-func headerContainsOneOf(header string, acceptable []string) bool {
-	sanitizedHeaderValues := trimAll(strings.Split(header, ";"))
-	sanitizedHeaderMap := make(map[string]bool, len(sanitizedHeaderValues))
-	for _, s := range sanitizedHeaderValues {
-		sanitizedHeaderMap[s] = true
-	}
-	found := false
-	for _, v := range acceptable {
-		if found {
-			break
-		}
-		// Remove any number of whitespace after ;'s
-		sanitizedAcceptableValues := trimAll(strings.Split(v, ";"))
-		found = true
-		for _, v := range sanitizedAcceptableValues {
-			if has, ok := sanitizedHeaderMap[v]; !has || !ok {
-				found = false
-				break
-			}
+func headerIsActivityPubMediaType(header string) bool {
+	for _, mediaType := range mediaTypes {
+		if strings.Contains(header, mediaType) {
+			return true
 		}
 	}
-	return found
+	return false
 }

 func isActivityPubPost(r *http.Request) bool {
-	return r.Method == "POST" && headerContainsOneOf(r.Header.Get(contentTypeHeader), append([]string{postContentTypeHeader}, alternatives...))
+	return r.Method == "POST" && headerIsActivityPubMediaType(r.Header.Get(contentTypeHeader))
 }

 func isActivityPubGet(r *http.Request) bool {
-	return r.Method == "GET" && headerContainsOneOf(r.Header.Get(acceptHeader), append([]string{getAcceptHeader}, alternatives...))
+	return r.Method == "GET" && headerIsActivityPubMediaType(r.Header.Get(acceptHeader))
 }

 // isPublic determines if a target is the Public collection as defined in the
--- a/pub/internal_test.go
+++ b/pub/internal_test.go
@ -0,0 +1,74 @@
+package pub
+
+import (
+	"testing"
+)
+
+func TestHeaderIsActivityPubMediaType(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected bool
+	}{
+		{
+			"Mastodon Accept Header",
+			"application/activity+json, application/ld+json",
+			true,
+		},
+		{
+			"Plain Type",
+			"application/activity+json",
+			true,
+		},
+		{
+			"Missing Profile",
+			"application/ld+json",
+			false,
+		},
+		{
+			"With Profile",
+			"application/ld+json ; profile=https://www.w3.org/ns/activitystreams",
+			true,
+		},
+		{
+			"With Quoted Profile",
+			"application/ld+json ; profile=\"https://www.w3.org/ns/activitystreams\"",
+			true,
+		},
+		{
+			"With Profile (End Space)",
+			"application/ld+json; profile=https://www.w3.org/ns/activitystreams",
+			true,
+		},
+		{
+			"With Quoted Profile (End Space)",
+			"application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\"",
+			true,
+		},
+		{
+			"With Profile (Begin Space)",
+			"application/ld+json ;profile=https://www.w3.org/ns/activitystreams",
+			true,
+		},
+		{
+			"With Quoted Profile (Begin Space)",
+			"application/ld+json ;profile=\"https://www.w3.org/ns/activitystreams\"",
+			true,
+		},
+		{
+			"With Profile (No Space)",
+			"application/ld+json;profile=https://www.w3.org/ns/activitystreams",
+			true,
+		},
+		{
+			"With Quoted Profile (No Space)",
+			"application/ld+json;profile=\"https://www.w3.org/ns/activitystreams\"",
+			true,
+		},
+	}
+	for _, test := range tests {
+		if actual := headerIsActivityPubMediaType(test.input); actual != test.expected {
+			t.Fatalf("(%q): expected %v, got %v", test.name, test.expected, actual)
+		}
+	}
+}