mirror of https://github.com/ericchiang/pup
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
152 lines
3.5 KiB
152 lines
3.5 KiB
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"code.google.com/p/go.net/html"
|
|
)
|
|
|
|
type Displayer interface {
|
|
Display(nodes []*html.Node)
|
|
}
|
|
|
|
type TextDisplayer struct {
|
|
}
|
|
|
|
func (t TextDisplayer) Display(nodes []*html.Node) {
|
|
for _, node := range nodes {
|
|
if node.Type == html.TextNode {
|
|
fmt.Println(node.Data)
|
|
}
|
|
children := []*html.Node{}
|
|
child := node.FirstChild
|
|
for child != nil {
|
|
children = append(children, child)
|
|
child = child.NextSibling
|
|
}
|
|
t.Display(children)
|
|
}
|
|
}
|
|
|
|
type AttrDisplayer struct {
|
|
Attr string
|
|
}
|
|
|
|
func (a AttrDisplayer) Display(nodes []*html.Node) {
|
|
for _, node := range nodes {
|
|
attributes := node.Attr
|
|
for _, attr := range attributes {
|
|
if attr.Key == a.Attr {
|
|
val := html.EscapeString(attr.Val)
|
|
fmt.Printf("%s\n", val)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
type JSONDisplayer struct {
|
|
}
|
|
|
|
// returns a jsonifiable struct
|
|
func jsonify(node *html.Node) map[string]interface{} {
|
|
vals := map[string]interface{}{}
|
|
if len(node.Attr) > 0 {
|
|
attrs := map[string]string{}
|
|
for _, attr := range node.Attr {
|
|
attrs[attr.Key] = html.EscapeString(attr.Val)
|
|
}
|
|
vals["attrs"] = attrs
|
|
}
|
|
vals["tag"] = node.DataAtom.String()
|
|
children := []interface{}{}
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
switch child.Type {
|
|
case html.ElementNode:
|
|
children = append(children, jsonify(child))
|
|
case html.TextNode:
|
|
text := strings.TrimSpace(child.Data)
|
|
if text != "" {
|
|
// if there is already text we'll append it
|
|
currText, ok := vals["text"]
|
|
if ok {
|
|
text = fmt.Sprintf("%s %s", currText, text)
|
|
}
|
|
vals["text"] = text
|
|
}
|
|
case html.CommentNode:
|
|
comment := strings.TrimSpace(child.Data)
|
|
currComment, ok := vals["comment"]
|
|
if ok {
|
|
comment = fmt.Sprintf("%s %s", currComment, comment)
|
|
}
|
|
vals["comment"] = comment
|
|
}
|
|
}
|
|
if len(children) > 0 {
|
|
vals["children"] = children
|
|
}
|
|
return vals
|
|
}
|
|
|
|
func (j JSONDisplayer) Display(nodes []*html.Node) {
|
|
var data []byte
|
|
var err error
|
|
switch len(nodes) {
|
|
case 1:
|
|
node := nodes[0]
|
|
if node.Type != html.DocumentNode {
|
|
jsonNode := jsonify(nodes[0])
|
|
data, err = json.MarshalIndent(&jsonNode, "", indentString)
|
|
} else {
|
|
children := []*html.Node{}
|
|
child := node.FirstChild
|
|
for child != nil {
|
|
children = append(children, child)
|
|
child = child.NextSibling
|
|
}
|
|
j.Display(children)
|
|
}
|
|
default:
|
|
jsonNodes := []map[string]interface{}{}
|
|
for _, node := range nodes {
|
|
jsonNodes = append(jsonNodes, jsonify(node))
|
|
}
|
|
data, err = json.MarshalIndent(&jsonNodes, "", indentString)
|
|
}
|
|
if err != nil {
|
|
panic("Could not jsonify nodes")
|
|
}
|
|
fmt.Printf("%s", data)
|
|
}
|
|
|
|
var (
|
|
// Display function helpers
|
|
displayMatcher *regexp.Regexp = regexp.MustCompile(`\{[^\}]*\}$`)
|
|
textFuncMatcher = regexp.MustCompile(`^text\{\}$`)
|
|
attrFuncMatcher = regexp.MustCompile(`^attr\{([^\}]*)\}$`)
|
|
jsonFuncMatcher = regexp.MustCompile(`^json\{([^\}]*)\}$`)
|
|
)
|
|
|
|
func NewDisplayFunc(text string) (Displayer, error) {
|
|
if !displayMatcher.MatchString(text) {
|
|
return nil, fmt.Errorf("Not a display function")
|
|
}
|
|
switch {
|
|
case textFuncMatcher.MatchString(text):
|
|
return TextDisplayer{}, nil
|
|
case attrFuncMatcher.MatchString(text):
|
|
matches := attrFuncMatcher.FindStringSubmatch(text)
|
|
if len(matches) != 2 {
|
|
return nil, fmt.Errorf("")
|
|
} else {
|
|
return AttrDisplayer{matches[1]}, nil
|
|
}
|
|
case jsonFuncMatcher.MatchString(text):
|
|
return JSONDisplayer{}, nil
|
|
}
|
|
return nil, fmt.Errorf("Not a display function")
|
|
}
|