mirror of https://github.com/ericchiang/pup
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
334 lines
7.5 KiB
334 lines
7.5 KiB
package main |
|
|
|
import ( |
|
"encoding/json" |
|
"fmt" |
|
"regexp" |
|
"strings" |
|
|
|
"github.com/fatih/color" |
|
colorable "github.com/mattn/go-colorable" |
|
"golang.org/x/net/html" |
|
"golang.org/x/net/html/atom" |
|
) |
|
|
|
func init() { |
|
color.Output = colorable.NewColorableStdout() |
|
} |
|
|
|
type Displayer interface { |
|
Display([]*html.Node) |
|
} |
|
|
|
func ParseDisplayer(cmd string) error { |
|
attrRe := regexp.MustCompile(`attr\{([a-zA-Z\-]+)\}`) |
|
if cmd == "text{}" { |
|
pupDisplayer = TextDisplayer{} |
|
} else if cmd == "json{}" { |
|
pupDisplayer = JSONDisplayer{} |
|
} else if match := attrRe.FindAllStringSubmatch(cmd, -1); len(match) == 1 { |
|
pupDisplayer = AttrDisplayer{ |
|
Attr: match[0][1], |
|
} |
|
} else { |
|
return fmt.Errorf("Unknown displayer") |
|
} |
|
return nil |
|
} |
|
|
|
// Is this node a tag with no end tag such as <meta> or <br>? |
|
// http://www.w3.org/TR/html-markup/syntax.html#syntax-elements |
|
func isVoidElement(n *html.Node) bool { |
|
switch n.DataAtom { |
|
case atom.Area, atom.Base, atom.Br, atom.Col, atom.Command, atom.Embed, |
|
atom.Hr, atom.Img, atom.Input, atom.Keygen, atom.Link, |
|
atom.Meta, atom.Param, atom.Source, atom.Track, atom.Wbr: |
|
return true |
|
} |
|
return false |
|
} |
|
|
|
var ( |
|
// Colors |
|
tagColor *color.Color = color.New(color.FgCyan) |
|
tokenColor = color.New(color.FgCyan) |
|
attrKeyColor = color.New(color.FgMagenta) |
|
quoteColor = color.New(color.FgBlue) |
|
commentColor = color.New(color.FgYellow) |
|
) |
|
|
|
type TreeDisplayer struct { |
|
} |
|
|
|
func (t TreeDisplayer) Display(nodes []*html.Node) { |
|
for _, node := range nodes { |
|
t.printNode(node, 0) |
|
} |
|
} |
|
|
|
// The <pre> tag indicates that the text within it should always be formatted |
|
// as is. See https://github.com/ericchiang/pup/issues/33 |
|
func (t TreeDisplayer) printPre(n *html.Node) { |
|
switch n.Type { |
|
case html.TextNode: |
|
s := n.Data |
|
if pupEscapeHTML { |
|
// don't escape javascript |
|
if n.Parent == nil || n.Parent.DataAtom != atom.Script { |
|
s = html.EscapeString(s) |
|
} |
|
} |
|
fmt.Print(s) |
|
for c := n.FirstChild; c != nil; c = c.NextSibling { |
|
t.printPre(c) |
|
} |
|
case html.ElementNode: |
|
fmt.Printf("<%s", n.Data) |
|
for _, a := range n.Attr { |
|
val := a.Val |
|
if pupEscapeHTML { |
|
val = html.EscapeString(val) |
|
} |
|
fmt.Printf(` %s="%s"`, a.Key, val) |
|
} |
|
fmt.Print(">") |
|
if !isVoidElement(n) { |
|
for c := n.FirstChild; c != nil; c = c.NextSibling { |
|
t.printPre(c) |
|
} |
|
fmt.Printf("</%s>", n.Data) |
|
} |
|
case html.CommentNode: |
|
data := n.Data |
|
if pupEscapeHTML { |
|
data = html.EscapeString(data) |
|
} |
|
fmt.Printf("<!--%s-->\n", data) |
|
for c := n.FirstChild; c != nil; c = c.NextSibling { |
|
t.printPre(c) |
|
} |
|
case html.DoctypeNode, html.DocumentNode: |
|
for c := n.FirstChild; c != nil; c = c.NextSibling { |
|
t.printPre(c) |
|
} |
|
} |
|
} |
|
|
|
// Print a node and all of it's children to `maxlevel`. |
|
func (t TreeDisplayer) printNode(n *html.Node, level int) { |
|
switch n.Type { |
|
case html.TextNode: |
|
s := n.Data |
|
if pupEscapeHTML { |
|
// don't escape javascript |
|
if n.Parent == nil || n.Parent.DataAtom != atom.Script { |
|
s = html.EscapeString(s) |
|
} |
|
} |
|
s = strings.TrimSpace(s) |
|
if s != "" { |
|
t.printIndent(level) |
|
fmt.Println(s) |
|
} |
|
case html.ElementNode: |
|
t.printIndent(level) |
|
// TODO: allow pre with color |
|
if n.DataAtom == atom.Pre && !pupPrintColor && pupPreformatted { |
|
t.printPre(n) |
|
fmt.Println() |
|
return |
|
} |
|
if pupPrintColor { |
|
tokenColor.Print("<") |
|
tagColor.Printf("%s", n.Data) |
|
} else { |
|
fmt.Printf("<%s", n.Data) |
|
} |
|
for _, a := range n.Attr { |
|
val := a.Val |
|
if pupEscapeHTML { |
|
val = html.EscapeString(val) |
|
} |
|
if pupPrintColor { |
|
fmt.Print(" ") |
|
attrKeyColor.Printf("%s", a.Key) |
|
tokenColor.Print("=") |
|
quoteColor.Printf(`"%s"`, val) |
|
} else { |
|
fmt.Printf(` %s="%s"`, a.Key, val) |
|
} |
|
} |
|
if pupPrintColor { |
|
tokenColor.Println(">") |
|
} else { |
|
fmt.Println(">") |
|
} |
|
if !isVoidElement(n) { |
|
t.printChildren(n, level+1) |
|
t.printIndent(level) |
|
if pupPrintColor { |
|
tokenColor.Print("</") |
|
tagColor.Printf("%s", n.Data) |
|
tokenColor.Println(">") |
|
} else { |
|
fmt.Printf("</%s>\n", n.Data) |
|
} |
|
} |
|
case html.CommentNode: |
|
t.printIndent(level) |
|
data := n.Data |
|
if pupEscapeHTML { |
|
data = html.EscapeString(data) |
|
} |
|
if pupPrintColor { |
|
commentColor.Printf("<!--%s-->\n", data) |
|
} else { |
|
fmt.Printf("<!--%s-->\n", data) |
|
} |
|
t.printChildren(n, level) |
|
case html.DoctypeNode, html.DocumentNode: |
|
t.printChildren(n, level) |
|
} |
|
} |
|
|
|
func (t TreeDisplayer) printChildren(n *html.Node, level int) { |
|
if pupMaxPrintLevel > -1 { |
|
if level >= pupMaxPrintLevel { |
|
t.printIndent(level) |
|
fmt.Println("...") |
|
return |
|
} |
|
} |
|
child := n.FirstChild |
|
for child != nil { |
|
t.printNode(child, level) |
|
child = child.NextSibling |
|
} |
|
} |
|
|
|
func (t TreeDisplayer) printIndent(level int) { |
|
for ; level > 0; level-- { |
|
fmt.Print(pupIndentString) |
|
} |
|
} |
|
|
|
// Print the text of a node |
|
type TextDisplayer struct{} |
|
|
|
func (t TextDisplayer) Display(nodes []*html.Node) { |
|
for _, node := range nodes { |
|
if node.Type == html.TextNode { |
|
data := node.Data |
|
if pupEscapeHTML { |
|
// don't escape javascript |
|
if node.Parent == nil || node.Parent.DataAtom != atom.Script { |
|
data = html.EscapeString(data) |
|
} |
|
} |
|
fmt.Println(data) |
|
} |
|
children := []*html.Node{} |
|
child := node.FirstChild |
|
for child != nil { |
|
children = append(children, child) |
|
child = child.NextSibling |
|
} |
|
t.Display(children) |
|
} |
|
} |
|
|
|
// Print the attribute of a node |
|
type AttrDisplayer struct { |
|
Attr string |
|
} |
|
|
|
func (a AttrDisplayer) Display(nodes []*html.Node) { |
|
for _, node := range nodes { |
|
attributes := node.Attr |
|
for _, attr := range attributes { |
|
if attr.Key == a.Attr { |
|
val := attr.Val |
|
if pupEscapeHTML { |
|
val = html.EscapeString(val) |
|
} |
|
fmt.Printf("%s\n", val) |
|
} |
|
} |
|
} |
|
} |
|
|
|
// Print nodes as a JSON list |
|
type JSONDisplayer struct{} |
|
|
|
// returns a jsonifiable struct |
|
func jsonify(node *html.Node) map[string]interface{} { |
|
vals := map[string]interface{}{} |
|
if len(node.Attr) > 0 { |
|
for _, attr := range node.Attr { |
|
if pupEscapeHTML { |
|
vals[attr.Key] = html.EscapeString(attr.Val) |
|
} else { |
|
vals[attr.Key] = attr.Val |
|
} |
|
} |
|
} |
|
vals["tag"] = node.DataAtom.String() |
|
children := []interface{}{} |
|
for child := node.FirstChild; child != nil; child = child.NextSibling { |
|
switch child.Type { |
|
case html.ElementNode: |
|
children = append(children, jsonify(child)) |
|
case html.TextNode: |
|
text := strings.TrimSpace(child.Data) |
|
if text != "" { |
|
if pupEscapeHTML { |
|
// don't escape javascript |
|
if node.DataAtom != atom.Script { |
|
text = html.EscapeString(text) |
|
} |
|
} |
|
// if there is already text we'll append it |
|
currText, ok := vals["text"] |
|
if ok { |
|
text = fmt.Sprintf("%s %s", currText, text) |
|
} |
|
vals["text"] = text |
|
} |
|
case html.CommentNode: |
|
comment := strings.TrimSpace(child.Data) |
|
if pupEscapeHTML { |
|
comment = html.EscapeString(comment) |
|
} |
|
currComment, ok := vals["comment"] |
|
if ok { |
|
comment = fmt.Sprintf("%s %s", currComment, comment) |
|
} |
|
vals["comment"] = comment |
|
} |
|
} |
|
if len(children) > 0 { |
|
vals["children"] = children |
|
} |
|
return vals |
|
} |
|
|
|
func (j JSONDisplayer) Display(nodes []*html.Node) { |
|
var data []byte |
|
var err error |
|
jsonNodes := []map[string]interface{}{} |
|
for _, node := range nodes { |
|
jsonNodes = append(jsonNodes, jsonify(node)) |
|
} |
|
data, err = json.MarshalIndent(&jsonNodes, "", pupIndentString) |
|
if err != nil { |
|
panic("Could not jsonify nodes") |
|
} |
|
fmt.Printf("%s\n", data) |
|
} |
|
|
|
// Print the number of features returned |
|
type NumDisplayer struct{} |
|
|
|
func (d NumDisplayer) Display(nodes []*html.Node) { |
|
fmt.Println(len(nodes)) |
|
}
|
|
|