1
0
mirror of https://github.com/ericchiang/pup synced 2024-11-30 20:08:13 +00:00
pup/selector/selector.go
2014-09-01 12:54:45 -04:00

129 lines
2.4 KiB
Go

package selector
import (
"code.google.com/p/go.net/html"
"fmt"
"regexp"
"strings"
)
type Selector struct {
Class, ID, Name *regexp.Regexp
Attrs map[string]*regexp.Regexp
}
type SelectorType string
const (
Class SelectorType = "class"
ID SelectorType = "id"
Name SelectorType = "name"
)
func setTypeValue(s *Selector, a SelectorType, v string) error {
if v == "" {
return nil
}
// wildcards become '.*'
v = strings.Replace(v, "*", ".*", -1)
r, err := regexp.Compile(fmt.Sprintf("^%s$", v))
if err != nil {
return err
}
switch a {
case Class:
s.Class = r
case ID:
s.ID = r
case Name:
s.Name = r
}
return nil
}
func ParseSelector(s string) (Selector, error) {
attrs := map[string]*regexp.Regexp{}
selector := &Selector{nil, nil, nil, attrs}
nextAttr := Name
start := 0
for i, c := range s {
switch c {
case '.':
err := setTypeValue(selector, nextAttr, s[start:i])
if err != nil {
return *selector, err
}
nextAttr = Class
start = i + 1
case '#':
err := setTypeValue(selector, nextAttr, s[start:i])
if err != nil {
return *selector, err
}
nextAttr = ID
start = i + 1
}
}
setTypeValue(selector, nextAttr, s[start:])
return *selector, nil
}
func (sel Selector) FindAllChildren(node *html.Node) []*html.Node {
selected := []*html.Node{}
child := node.FirstChild
for child != nil {
childSelected := sel.FindAll(child)
selected = append(selected, childSelected...)
child = child.NextSibling
}
return selected
}
func (sel Selector) FindAll(node *html.Node) []*html.Node {
selected := []*html.Node{}
if sel.Match(node) {
return []*html.Node{node}
}
child := node.FirstChild
for child != nil {
childSelected := sel.FindAll(child)
selected = append(selected, childSelected...)
child = child.NextSibling
}
return selected
}
func (sel Selector) Match(node *html.Node) bool {
if node.Type != html.ElementNode {
return false
}
if sel.Name != nil {
if !sel.Name.MatchString(strings.ToLower(node.Data)) {
return false
}
}
classMatched := sel.Class == nil
idMatched := sel.ID == nil
for _, attr := range node.Attr {
switch attr.Key {
case "class":
if !classMatched {
if !sel.Class.MatchString(attr.Val) {
return false
} else {
classMatched = true
}
}
case "id":
if !idMatched {
if !sel.ID.MatchString(attr.Val) {
return false
} else {
idMatched = true
}
}
}
}
return classMatched && idMatched
}