package utils import ( "github.com/microcosm-cc/bluemonday" "regexp" ) // copied from bluemonday's GitHub repostiory, with some adaptations func UGCPolicy() *bluemonday.Policy { p := bluemonday.NewPolicy() /////////////////////// // Global attributes // /////////////////////// // "class" is not permitted as we are not allowing users to style their own // content p.AllowStandardAttributes() ////////////////////////////// // Global URL format policy // ////////////////////////////// p.AllowStandardURLs() //////////////////////////////// // Declarations and structure // //////////////////////////////// // "xml" "xslt" "DOCTYPE" "html" "head" are not permitted as we are // expecting user generated content to be a fragment of HTML and not a full // document. ////////////////////////// // Sectioning root tags // ////////////////////////// // "article" and "aside" are permitted and takes no attributes p.AllowElements("article", "aside") // "body" is not permitted as we are expecting user generated content to be a fragment // of HTML and not a full document. // "details" is permitted, including the "open" attribute which can either // be blank or the value "open". p.AllowAttrs( "open", ).Matching(regexp.MustCompile(`(?i)^(|open)$`)).OnElements("details") // "fieldset" is not permitted as we are not allowing forms to be created. // "figure" is permitted and takes no attributes p.AllowElements("figure") // "nav" is not permitted as it is assumed that the site (and not the user) // has defined navigation elements // "section" is permitted and takes no attributes p.AllowElements("section") // "summary" is permitted and takes no attributes p.AllowElements("summary") ////////////////////////// // Headings and footers // ////////////////////////// // "footer" is not permitted as we expect user content to be a fragment and // not structural to this extent // "h1" through "h6" are permitted and take no attributes p.AllowElements("h1", "h2", "h3", "h4", "h5", "h6") // "header" is not permitted as we expect user content to be a fragment and // not structural to this extent // "hgroup" is permitted and takes no attributes p.AllowElements("hgroup") ///////////////////////////////////// // Content grouping and separating // ///////////////////////////////////// // "blockquote" is permitted, including the "cite" attribute which must be // a standard URL. p.AllowAttrs("cite").OnElements("blockquote") // "br" "div" "hr" "p" "span" "wbr" are permitted and take no attributes p.AllowElements("br", "div", "hr", "p", "span", "wbr") /////////// // Links // /////////// // "a" is permitted p.AllowAttrs("href").OnElements("a") // "area" is permitted along with the attributes that map image maps work p.AllowAttrs("name").Matching( regexp.MustCompile(`^([\p{L}\p{N}_-]+)$`), ).OnElements("map") p.AllowAttrs("alt").Matching(bluemonday.Paragraph).OnElements("area") p.AllowAttrs("coords").Matching( regexp.MustCompile(`^([0-9]+,)+[0-9]+$`), ).OnElements("area") p.AllowAttrs("href").OnElements("area") p.AllowAttrs("rel").Matching(bluemonday.SpaceSeparatedTokens).OnElements("area") p.AllowAttrs("shape").Matching( regexp.MustCompile(`(?i)^(default|circle|rect|poly)$`), ).OnElements("area") // "img" is permitted p.AllowImages() p.AllowAttrs("usemap").Matching( regexp.MustCompile(`(?i)^#[\p{L}\p{N}_-]+$`), ).OnElements("img") p.AllowAttrs("src").Matching( regexp.MustCompile(`(?i)^\/[\p{L}\p{N}\/\.]+$`), ).OnElements("img") p.AllowAttrs("style").OnElements("img") p.AllowAttrs("alt").OnElements("img") // "link" is not permitted ///////////////////// // Phrase elements // ///////////////////// // The following are all inline phrasing elements p.AllowElements("abbr", "acronym", "cite", "code", "dfn", "em", "figcaption", "mark", "s", "samp", "strong", "sub", "sup", "var") // "q" is permitted and "cite" is a URL and handled by URL policies p.AllowAttrs("cite").OnElements("q") // "time" is permitted p.AllowAttrs("datetime").Matching(bluemonday.ISO8601).OnElements("time") //////////////////// // Style elements // //////////////////// // block and inline elements that impart no semantic meaning but style the // document p.AllowElements("b", "i", "pre", "small", "strike", "tt", "u") // "style" is not permitted as we are not yet sanitising CSS and it is an // XSS attack vector ////////////////////// // HTML5 Formatting // ////////////////////// // "bdi" "bdo" are permitted p.AllowAttrs("dir").Matching(bluemonday.Direction).OnElements("bdi", "bdo") // "rp" "rt" "ruby" are permitted p.AllowElements("rp", "rt", "ruby") /////////////////////////// // HTML5 Change tracking // /////////////////////////// // "del" "ins" are permitted p.AllowAttrs("cite").Matching(bluemonday.Paragraph).OnElements("del", "ins") p.AllowAttrs("datetime").Matching(bluemonday.ISO8601).OnElements("del", "ins") /////////// // Lists // /////////// p.AllowLists() //////////// // Tables // //////////// p.AllowTables() /////////// // Forms // /////////// // By and large, forms are not permitted. However there are some form // elements that can be used to present data, and we do permit those // // "button" "fieldset" "input" "keygen" "label" "output" "select" "datalist" // "textarea" "optgroup" "option" are all not permitted // "meter" is permitted p.AllowAttrs( "value", "min", "max", "low", "high", "optimum", ).Matching(bluemonday.Number).OnElements("meter") // "progress" is permitted p.AllowAttrs("value", "max").Matching(bluemonday.Number).OnElements("progress") return p }