Markdown: Sanitizier Configuration (#9075)
* Support custom sanitization policy Allowing the gitea administrator to configure sanitization policy allows them to couple external renders and custom templates to support more markup. In particular, the `pandoc` renderer allows generating KaTeX annotations, wrapping them in `<span>` elements with class `math` and either `inline` or `display` (depending on whether or not inline or block mode was requested). This iteration gives the administrator whitelisting powers; carefully crafted regexes will thus let through only the desired attributes necessary to support their custom markup. Resolves: #9054 Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Document new sanitization configuration - Adds basic documentation to app.ini.sample, - Adds an example to the Configuration Cheat Sheet, and - Adds extended information to External Renderers section. Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Drop extraneous length check in newMarkupSanitizer(...) Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Fix plural ELEMENT and ALLOW_ATTR in docs These were left over from their initial names. Make them singular to conform with the current expectations. Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com>
This commit is contained in:
parent
cecc31951c
commit
ee7df7ba8c
5 changed files with 155 additions and 29 deletions
|
@ -877,6 +877,12 @@ SHOW_FOOTER_VERSION = true
|
||||||
; Show template execution time in the footer
|
; Show template execution time in the footer
|
||||||
SHOW_FOOTER_TEMPLATE_LOAD_TIME = true
|
SHOW_FOOTER_TEMPLATE_LOAD_TIME = true
|
||||||
|
|
||||||
|
[markup.sanitizer]
|
||||||
|
; The following keys can be used multiple times to define sanitation policy rules.
|
||||||
|
;ELEMENT = span
|
||||||
|
;ALLOW_ATTR = class
|
||||||
|
;REGEXP = ^(info|warning|error)$
|
||||||
|
|
||||||
[markup.asciidoc]
|
[markup.asciidoc]
|
||||||
ENABLED = false
|
ENABLED = false
|
||||||
; List of file extensions that should be rendered by an external command
|
; List of file extensions that should be rendered by an external command
|
||||||
|
|
|
@ -578,6 +578,24 @@ Two special environment variables are passed to the render command:
|
||||||
- `GITEA_PREFIX_SRC`, which contains the current URL prefix in the `src` path tree. To be used as prefix for links.
|
- `GITEA_PREFIX_SRC`, which contains the current URL prefix in the `src` path tree. To be used as prefix for links.
|
||||||
- `GITEA_PREFIX_RAW`, which contains the current URL prefix in the `raw` path tree. To be used as prefix for image paths.
|
- `GITEA_PREFIX_RAW`, which contains the current URL prefix in the `raw` path tree. To be used as prefix for image paths.
|
||||||
|
|
||||||
|
|
||||||
|
Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc.
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[markup.sanitizer]
|
||||||
|
; Pandoc renders TeX segments as <span>s with the "math" class, optionally
|
||||||
|
; with "inline" or "display" classes depending on context.
|
||||||
|
ELEMENT = span
|
||||||
|
ALLOW_ATTR = class
|
||||||
|
REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+
|
||||||
|
```
|
||||||
|
|
||||||
|
- `ELEMENT`: The element this policy applies to. Must be non-empty.
|
||||||
|
- `ALLOW_ATTR`: The attribute this policy allows. Must be non-empty.
|
||||||
|
- `REGEXP`: A regex to match the contents of the attribute against. Must be present but may be empty for unconditional whitelisting of this attribute.
|
||||||
|
|
||||||
|
You may redefine `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` multiple times; each time all three are defined is a single policy entry.
|
||||||
|
|
||||||
## Time (`time`)
|
## Time (`time`)
|
||||||
|
|
||||||
- `FORMAT`: Time format to diplay on UI. i.e. RFC1123 or 2006-01-02 15:04:05
|
- `FORMAT`: Time format to diplay on UI. i.e. RFC1123 or 2006-01-02 15:04:05
|
||||||
|
|
|
@ -68,4 +68,22 @@ RENDER_COMMAND = rst2html.py
|
||||||
IS_INPUT_FILE = false
|
IS_INPUT_FILE = false
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If your external markup relies on additional classes and attributes on the generated HTML elements, you might need to enable custom sanitizer policies. Gitea uses the [`bluemonday`](https://godoc.org/github.com/microcosm-cc/bluemonday) package as our HTML sanitizier. The example below will support [KaTeX](https://katex.org/) output from [`pandoc`](https://pandoc.org/).
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[markup.sanitizer]
|
||||||
|
; Pandoc renders TeX segments as <span>s with the "math" class, optionally
|
||||||
|
; with "inline" or "display" classes depending on context.
|
||||||
|
ELEMENT = span
|
||||||
|
ALLOW_ATTR = class
|
||||||
|
REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+
|
||||||
|
|
||||||
|
[markup.markdown]
|
||||||
|
ENABLED = true
|
||||||
|
FILE_EXTENSIONS = .md,.markdown
|
||||||
|
RENDER_COMMAND = pandoc -f markdown -t html --katex
|
||||||
|
```
|
||||||
|
|
||||||
|
You may redefine `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` multiple times; each time all three are defined is a single policy entry. All three must be defined, but `REGEXP` may be blank to allow unconditional whitelisting of that attribute.
|
||||||
|
|
||||||
Once your configuration changes have been made, restart Gitea to have changes take effect.
|
Once your configuration changes have been made, restart Gitea to have changes take effect.
|
||||||
|
|
|
@ -50,6 +50,15 @@ func ReplaceSanitizer() {
|
||||||
|
|
||||||
// Allow <kbd> tags for keyboard shortcut styling
|
// Allow <kbd> tags for keyboard shortcut styling
|
||||||
sanitizer.policy.AllowElements("kbd")
|
sanitizer.policy.AllowElements("kbd")
|
||||||
|
|
||||||
|
// Custom keyword markup
|
||||||
|
for _, rule := range setting.ExternalSanitizerRules {
|
||||||
|
if rule.Regexp != nil {
|
||||||
|
sanitizer.policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element)
|
||||||
|
} else {
|
||||||
|
sanitizer.policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
|
// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
|
||||||
|
|
|
@ -9,11 +9,14 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"code.gitea.io/gitea/modules/log"
|
"code.gitea.io/gitea/modules/log"
|
||||||
|
|
||||||
|
"gopkg.in/ini.v1"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ExternalMarkupParsers represents the external markup parsers
|
// ExternalMarkupParsers represents the external markup parsers
|
||||||
var (
|
var (
|
||||||
ExternalMarkupParsers []MarkupParser
|
ExternalMarkupParsers []MarkupParser
|
||||||
|
ExternalSanitizerRules []MarkupSanitizerRule
|
||||||
)
|
)
|
||||||
|
|
||||||
// MarkupParser defines the external parser configured in ini
|
// MarkupParser defines the external parser configured in ini
|
||||||
|
@ -25,8 +28,15 @@ type MarkupParser struct {
|
||||||
IsInputFile bool
|
IsInputFile bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MarkupSanitizerRule defines the policy for whitelisting attributes on
|
||||||
|
// certain elements.
|
||||||
|
type MarkupSanitizerRule struct {
|
||||||
|
Element string
|
||||||
|
AllowAttr string
|
||||||
|
Regexp *regexp.Regexp
|
||||||
|
}
|
||||||
|
|
||||||
func newMarkup() {
|
func newMarkup() {
|
||||||
extensionReg := regexp.MustCompile(`\.\w`)
|
|
||||||
for _, sec := range Cfg.Section("markup").ChildSections() {
|
for _, sec := range Cfg.Section("markup").ChildSections() {
|
||||||
name := strings.TrimPrefix(sec.Name(), "markup.")
|
name := strings.TrimPrefix(sec.Name(), "markup.")
|
||||||
if name == "" {
|
if name == "" {
|
||||||
|
@ -34,6 +44,72 @@ func newMarkup() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if name == "sanitizer" {
|
||||||
|
newMarkupSanitizer(name, sec)
|
||||||
|
} else {
|
||||||
|
newMarkupRenderer(name, sec)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newMarkupSanitizer(name string, sec *ini.Section) {
|
||||||
|
haveElement := sec.HasKey("ELEMENT")
|
||||||
|
haveAttr := sec.HasKey("ALLOW_ATTR")
|
||||||
|
haveRegexp := sec.HasKey("REGEXP")
|
||||||
|
|
||||||
|
if !haveElement && !haveAttr && !haveRegexp {
|
||||||
|
log.Warn("Skipping empty section: markup.%s.", name)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if !haveElement || !haveAttr || !haveRegexp {
|
||||||
|
log.Error("Missing required keys from markup.%s. Must have all three of ELEMENT, ALLOW_ATTR, and REGEXP defined!", name)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
elements := sec.Key("ELEMENT").ValueWithShadows()
|
||||||
|
allowAttrs := sec.Key("ALLOW_ATTR").ValueWithShadows()
|
||||||
|
regexps := sec.Key("REGEXP").ValueWithShadows()
|
||||||
|
|
||||||
|
if len(elements) != len(allowAttrs) ||
|
||||||
|
len(elements) != len(regexps) {
|
||||||
|
log.Error("All three keys in markup.%s (ELEMENT, ALLOW_ATTR, REGEXP) must be defined the same number of times! Got %d, %d, and %d respectively.", name, len(elements), len(allowAttrs), len(regexps))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ExternalSanitizerRules = make([]MarkupSanitizerRule, 0, len(elements))
|
||||||
|
|
||||||
|
for index, pattern := range regexps {
|
||||||
|
if pattern == "" {
|
||||||
|
rule := MarkupSanitizerRule{
|
||||||
|
Element: elements[index],
|
||||||
|
AllowAttr: allowAttrs[index],
|
||||||
|
Regexp: nil,
|
||||||
|
}
|
||||||
|
ExternalSanitizerRules = append(ExternalSanitizerRules, rule)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate when parsing the config that this is a valid regular
|
||||||
|
// expression. Then we can use regexp.MustCompile(...) later.
|
||||||
|
compiled, err := regexp.Compile(pattern)
|
||||||
|
if err != nil {
|
||||||
|
log.Error("In module.%s: REGEXP at definition %d failed to compile: %v", name, index+1, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
rule := MarkupSanitizerRule{
|
||||||
|
Element: elements[index],
|
||||||
|
AllowAttr: allowAttrs[index],
|
||||||
|
Regexp: compiled,
|
||||||
|
}
|
||||||
|
ExternalSanitizerRules = append(ExternalSanitizerRules, rule)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newMarkupRenderer(name string, sec *ini.Section) {
|
||||||
|
extensionReg := regexp.MustCompile(`\.\w`)
|
||||||
|
|
||||||
extensions := sec.Key("FILE_EXTENSIONS").Strings(",")
|
extensions := sec.Key("FILE_EXTENSIONS").Strings(",")
|
||||||
var exts = make([]string, 0, len(extensions))
|
var exts = make([]string, 0, len(extensions))
|
||||||
for _, extension := range extensions {
|
for _, extension := range extensions {
|
||||||
|
@ -46,13 +122,13 @@ func newMarkup() {
|
||||||
|
|
||||||
if len(exts) == 0 {
|
if len(exts) == 0 {
|
||||||
log.Warn(sec.Name() + " file extension is empty, markup " + name + " ignored")
|
log.Warn(sec.Name() + " file extension is empty, markup " + name + " ignored")
|
||||||
continue
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
command := sec.Key("RENDER_COMMAND").MustString("")
|
command := sec.Key("RENDER_COMMAND").MustString("")
|
||||||
if command == "" {
|
if command == "" {
|
||||||
log.Warn(" RENDER_COMMAND is empty, markup " + name + " ignored")
|
log.Warn(" RENDER_COMMAND is empty, markup " + name + " ignored")
|
||||||
continue
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
ExternalMarkupParsers = append(ExternalMarkupParsers, MarkupParser{
|
ExternalMarkupParsers = append(ExternalMarkupParsers, MarkupParser{
|
||||||
|
@ -62,5 +138,4 @@ func newMarkup() {
|
||||||
Command: command,
|
Command: command,
|
||||||
IsInputFile: sec.Key("IS_INPUT_FILE").MustBool(false),
|
IsInputFile: sec.Key("IS_INPUT_FILE").MustBool(false),
|
||||||
})
|
})
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue