forkjo/vendor/github.com/yuin/goldmark/parser/html_block.go
zeripath 27757714d0 Change markdown rendering from blackfriday to goldmark (#9533)
* Move to goldmark

Markdown rendering moved from blackfriday to the goldmark.

Multiple subtle changes required to the goldmark extensions to keep
current rendering and defaults.

Can go further with goldmark linkify and have this work within markdown
rendering making the link processor unnecessary.

Need to think about how to go about allowing extensions - at present it
seems that these would be hard to do without recompilation.

* linter fixes

Co-authored-by: Lauris BH <lauris@nix.lv>
2019-12-31 03:53:28 +02:00

228 lines
6 KiB
Go

package parser
import (
"bytes"
"regexp"
"strings"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
)
var allowedBlockTags = map[string]bool{
"address": true,
"article": true,
"aside": true,
"base": true,
"basefont": true,
"blockquote": true,
"body": true,
"caption": true,
"center": true,
"col": true,
"colgroup": true,
"dd": true,
"details": true,
"dialog": true,
"dir": true,
"div": true,
"dl": true,
"dt": true,
"fieldset": true,
"figcaption": true,
"figure": true,
"footer": true,
"form": true,
"frame": true,
"frameset": true,
"h1": true,
"h2": true,
"h3": true,
"h4": true,
"h5": true,
"h6": true,
"head": true,
"header": true,
"hr": true,
"html": true,
"iframe": true,
"legend": true,
"li": true,
"link": true,
"main": true,
"menu": true,
"menuitem": true,
"meta": true,
"nav": true,
"noframes": true,
"ol": true,
"optgroup": true,
"option": true,
"p": true,
"param": true,
"section": true,
"source": true,
"summary": true,
"table": true,
"tbody": true,
"td": true,
"tfoot": true,
"th": true,
"thead": true,
"title": true,
"tr": true,
"track": true,
"ul": true,
}
var htmlBlockType1OpenRegexp = regexp.MustCompile(`(?i)^[ ]{0,3}<(script|pre|style)(?:\s.*|>.*|/>.*|)\n?$`)
var htmlBlockType1CloseRegexp = regexp.MustCompile(`(?i)^.*</(?:script|pre|style)>.*`)
var htmlBlockType2OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<!\-\-`)
var htmlBlockType2Close = []byte{'-', '-', '>'}
var htmlBlockType3OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\?`)
var htmlBlockType3Close = []byte{'?', '>'}
var htmlBlockType4OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<![A-Z]+.*\n?$`)
var htmlBlockType4Close = []byte{'>'}
var htmlBlockType5OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\!\[CDATA\[`)
var htmlBlockType5Close = []byte{']', ']', '>'}
var htmlBlockType6Regexp = regexp.MustCompile(`^[ ]{0,3}</?([a-zA-Z0-9]+)(?:\s.*|>.*|/>.*|)\n?$`)
var htmlBlockType7Regexp = regexp.MustCompile(`^[ ]{0,3}<(/)?([a-zA-Z0-9]+)(` + attributePattern + `*)(:?>|/>)\s*\n?$`)
type htmlBlockParser struct {
}
var defaultHTMLBlockParser = &htmlBlockParser{}
// NewHTMLBlockParser return a new BlockParser that can parse html
// blocks.
func NewHTMLBlockParser() BlockParser {
return defaultHTMLBlockParser
}
func (b *htmlBlockParser) Trigger() []byte {
return []byte{'<'}
}
func (b *htmlBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
var node *ast.HTMLBlock
line, segment := reader.PeekLine()
last := pc.LastOpenedBlock().Node
if pos := pc.BlockOffset(); pos < 0 || line[pos] != '<' {
return nil, NoChildren
}
if m := htmlBlockType1OpenRegexp.FindSubmatchIndex(line); m != nil {
node = ast.NewHTMLBlock(ast.HTMLBlockType1)
} else if htmlBlockType2OpenRegexp.Match(line) {
node = ast.NewHTMLBlock(ast.HTMLBlockType2)
} else if htmlBlockType3OpenRegexp.Match(line) {
node = ast.NewHTMLBlock(ast.HTMLBlockType3)
} else if htmlBlockType4OpenRegexp.Match(line) {
node = ast.NewHTMLBlock(ast.HTMLBlockType4)
} else if htmlBlockType5OpenRegexp.Match(line) {
node = ast.NewHTMLBlock(ast.HTMLBlockType5)
} else if match := htmlBlockType7Regexp.FindSubmatchIndex(line); match != nil {
isCloseTag := match[2] > -1 && bytes.Equal(line[match[2]:match[3]], []byte("/"))
hasAttr := match[6] != match[7]
tagName := strings.ToLower(string(line[match[4]:match[5]]))
_, ok := allowedBlockTags[tagName]
if ok {
node = ast.NewHTMLBlock(ast.HTMLBlockType6)
} else if tagName != "script" && tagName != "style" && tagName != "pre" && !ast.IsParagraph(last) && !(isCloseTag && hasAttr) { // type 7 can not interrupt paragraph
node = ast.NewHTMLBlock(ast.HTMLBlockType7)
}
}
if node == nil {
if match := htmlBlockType6Regexp.FindSubmatchIndex(line); match != nil {
tagName := string(line[match[2]:match[3]])
_, ok := allowedBlockTags[strings.ToLower(tagName)]
if ok {
node = ast.NewHTMLBlock(ast.HTMLBlockType6)
}
}
}
if node != nil {
reader.Advance(segment.Len() - 1)
node.Lines().Append(segment)
return node, NoChildren
}
return nil, NoChildren
}
func (b *htmlBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
htmlBlock := node.(*ast.HTMLBlock)
lines := htmlBlock.Lines()
line, segment := reader.PeekLine()
var closurePattern []byte
switch htmlBlock.HTMLBlockType {
case ast.HTMLBlockType1:
if lines.Len() == 1 {
firstLine := lines.At(0)
if htmlBlockType1CloseRegexp.Match(firstLine.Value(reader.Source())) {
return Close
}
}
if htmlBlockType1CloseRegexp.Match(line) {
htmlBlock.ClosureLine = segment
reader.Advance(segment.Len() - 1)
return Close
}
case ast.HTMLBlockType2:
closurePattern = htmlBlockType2Close
fallthrough
case ast.HTMLBlockType3:
if closurePattern == nil {
closurePattern = htmlBlockType3Close
}
fallthrough
case ast.HTMLBlockType4:
if closurePattern == nil {
closurePattern = htmlBlockType4Close
}
fallthrough
case ast.HTMLBlockType5:
if closurePattern == nil {
closurePattern = htmlBlockType5Close
}
if lines.Len() == 1 {
firstLine := lines.At(0)
if bytes.Contains(firstLine.Value(reader.Source()), closurePattern) {
return Close
}
}
if bytes.Contains(line, closurePattern) {
htmlBlock.ClosureLine = segment
reader.Advance(segment.Len() - 1)
return Close
}
case ast.HTMLBlockType6, ast.HTMLBlockType7:
if util.IsBlank(line) {
return Close
}
}
node.Lines().Append(segment)
reader.Advance(segment.Len() - 1)
return Continue | NoChildren
}
func (b *htmlBlockParser) Close(node ast.Node, reader text.Reader, pc Context) {
// nothing to do
}
func (b *htmlBlockParser) CanInterruptParagraph() bool {
return true
}
func (b *htmlBlockParser) CanAcceptIndentedLine() bool {
return false
}