|
- // Copyright 2011 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
-
- package html
-
- import (
- "bufio"
- "errors"
- "fmt"
- "io"
- "strings"
- )
-
- type writer interface {
- io.Writer
- io.ByteWriter
- WriteString(string) (int, error)
- }
-
- // Render renders the parse tree n to the given writer.
- //
- // Rendering is done on a 'best effort' basis: calling Parse on the output of
- // Render will always result in something similar to the original tree, but it
- // is not necessarily an exact clone unless the original tree was 'well-formed'.
- // 'Well-formed' is not easily specified; the HTML5 specification is
- // complicated.
- //
- // Calling Parse on arbitrary input typically results in a 'well-formed' parse
- // tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
- // For example, in a 'well-formed' parse tree, no <a> element is a child of
- // another <a> element: parsing "<a><a>" results in two sibling elements.
- // Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
- // <table> element: parsing "<p><table><a>" results in a <p> with two sibling
- // children; the <a> is reparented to the <table>'s parent. However, calling
- // Parse on "<a><table><a>" does not return an error, but the result has an <a>
- // element with an <a> child, and is therefore not 'well-formed'.
- //
- // Programmatically constructed trees are typically also 'well-formed', but it
- // is possible to construct a tree that looks innocuous but, when rendered and
- // re-parsed, results in a different tree. A simple example is that a solitary
- // text node would become a tree containing <html>, <head> and <body> elements.
- // Another example is that the programmatic equivalent of "a<head>b</head>c"
- // becomes "<html><head><head/><body>abc</body></html>".
- func Render(w io.Writer, n *Node) error {
- if x, ok := w.(writer); ok {
- return render(x, n)
- }
- buf := bufio.NewWriter(w)
- if err := render(buf, n); err != nil {
- return err
- }
- return buf.Flush()
- }
-
- // plaintextAbort is returned from render1 when a <plaintext> element
- // has been rendered. No more end tags should be rendered after that.
- var plaintextAbort = errors.New("html: internal error (plaintext abort)")
-
- func render(w writer, n *Node) error {
- err := render1(w, n)
- if err == plaintextAbort {
- err = nil
- }
- return err
- }
-
- func render1(w writer, n *Node) error {
- // Render non-element nodes; these are the easy cases.
- switch n.Type {
- case ErrorNode:
- return errors.New("html: cannot render an ErrorNode node")
- case TextNode:
- return escape(w, n.Data)
- case DocumentNode:
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- if err := render1(w, c); err != nil {
- return err
- }
- }
- return nil
- case ElementNode:
- // No-op.
- case CommentNode:
- if _, err := w.WriteString("<!--"); err != nil {
- return err
- }
- if _, err := w.WriteString(n.Data); err != nil {
- return err
- }
- if _, err := w.WriteString("-->"); err != nil {
- return err
- }
- return nil
- case DoctypeNode:
- if _, err := w.WriteString("<!DOCTYPE "); err != nil {
- return err
- }
- if _, err := w.WriteString(n.Data); err != nil {
- return err
- }
- if n.Attr != nil {
- var p, s string
- for _, a := range n.Attr {
- switch a.Key {
- case "public":
- p = a.Val
- case "system":
- s = a.Val
- }
- }
- if p != "" {
- if _, err := w.WriteString(" PUBLIC "); err != nil {
- return err
- }
- if err := writeQuoted(w, p); err != nil {
- return err
- }
- if s != "" {
- if err := w.WriteByte(' '); err != nil {
- return err
- }
- if err := writeQuoted(w, s); err != nil {
- return err
- }
- }
- } else if s != "" {
- if _, err := w.WriteString(" SYSTEM "); err != nil {
- return err
- }
- if err := writeQuoted(w, s); err != nil {
- return err
- }
- }
- }
- return w.WriteByte('>')
- default:
- return errors.New("html: unknown node type")
- }
-
- // Render the <xxx> opening tag.
- if err := w.WriteByte('<'); err != nil {
- return err
- }
- if _, err := w.WriteString(n.Data); err != nil {
- return err
- }
- for _, a := range n.Attr {
- if err := w.WriteByte(' '); err != nil {
- return err
- }
- if a.Namespace != "" {
- if _, err := w.WriteString(a.Namespace); err != nil {
- return err
- }
- if err := w.WriteByte(':'); err != nil {
- return err
- }
- }
- if _, err := w.WriteString(a.Key); err != nil {
- return err
- }
- if _, err := w.WriteString(`="`); err != nil {
- return err
- }
- if err := escape(w, a.Val); err != nil {
- return err
- }
- if err := w.WriteByte('"'); err != nil {
- return err
- }
- }
- if voidElements[n.Data] {
- if n.FirstChild != nil {
- return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
- }
- _, err := w.WriteString("/>")
- return err
- }
- if err := w.WriteByte('>'); err != nil {
- return err
- }
-
- // Add initial newline where there is danger of a newline beging ignored.
- if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
- switch n.Data {
- case "pre", "listing", "textarea":
- if err := w.WriteByte('\n'); err != nil {
- return err
- }
- }
- }
-
- // Render any child nodes.
- switch n.Data {
- case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- if c.Type == TextNode {
- if _, err := w.WriteString(c.Data); err != nil {
- return err
- }
- } else {
- if err := render1(w, c); err != nil {
- return err
- }
- }
- }
- if n.Data == "plaintext" {
- // Don't render anything else. <plaintext> must be the
- // last element in the file, with no closing tag.
- return plaintextAbort
- }
- default:
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- if err := render1(w, c); err != nil {
- return err
- }
- }
- }
-
- // Render the </xxx> closing tag.
- if _, err := w.WriteString("</"); err != nil {
- return err
- }
- if _, err := w.WriteString(n.Data); err != nil {
- return err
- }
- return w.WriteByte('>')
- }
-
- // writeQuoted writes s to w surrounded by quotes. Normally it will use double
- // quotes, but if s contains a double quote, it will use single quotes.
- // It is used for writing the identifiers in a doctype declaration.
- // In valid HTML, they can't contain both types of quotes.
- func writeQuoted(w writer, s string) error {
- var q byte = '"'
- if strings.Contains(s, `"`) {
- q = '\''
- }
- if err := w.WriteByte(q); err != nil {
- return err
- }
- if _, err := w.WriteString(s); err != nil {
- return err
- }
- if err := w.WriteByte(q); err != nil {
- return err
- }
- return nil
- }
-
- // Section 12.1.2, "Elements", gives this list of void elements. Void elements
- // are those that can't have any contents.
- var voidElements = map[string]bool{
- "area": true,
- "base": true,
- "br": true,
- "col": true,
- "command": true,
- "embed": true,
- "hr": true,
- "img": true,
- "input": true,
- "keygen": true,
- "link": true,
- "meta": true,
- "param": true,
- "source": true,
- "track": true,
- "wbr": true,
- }
|