Use html.Parse rather than html.ParseFragment (#16223)

* Use html.Parse rather than html.ParseFragment
  There have been a few issues with html.ParseFragment - just use html.Parse instead.

* Skip document node

Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
zeripath 2021-06-21 23:12:22 +01:00 committed by GitHub
parent 36c158bc93
commit d55b5eb0d3
Signed by: GitHub
GPG Key ID: 4AEE18F83AFDEB23

@ -304,27 +304,26 @@ func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output
_, _ = res.WriteString("</body></html>")
// parse the HTML
nodes, err := html.ParseFragment(res, nil)
node, err := html.Parse(res)
if err != nil {
return &postProcessError{"invalid HTML", err}
}
for _, node := range nodes {
visitNode(ctx, procs, node, true)
if node.Type == html.DocumentNode {
node = node.FirstChild
}
newNodes := make([]*html.Node, 0, len(nodes))
visitNode(ctx, procs, node, true)
for _, node := range nodes {
if node.Data == "html" {
node = node.FirstChild
for node != nil && node.Data != "body" {
node = node.NextSibling
}
}
if node == nil {
continue
newNodes := make([]*html.Node, 0, 5)
if node.Data == "html" {
node = node.FirstChild
for node != nil && node.Data != "body" {
node = node.NextSibling
}
}
if node != nil {
if node.Data == "body" {
child := node.FirstChild
for child != nil {