mirror of
https://github.com/ericchiang/pup
synced 2024-11-30 20:08:13 +00:00
129 lines
2.4 KiB
Go
129 lines
2.4 KiB
Go
package selector
|
|
|
|
import (
|
|
"code.google.com/p/go.net/html"
|
|
"fmt"
|
|
"regexp"
|
|
"strings"
|
|
)
|
|
|
|
type Selector struct {
|
|
Class, ID, Name *regexp.Regexp
|
|
Attrs map[string]*regexp.Regexp
|
|
}
|
|
|
|
type SelectorType string
|
|
|
|
const (
|
|
Class SelectorType = "class"
|
|
ID SelectorType = "id"
|
|
Name SelectorType = "name"
|
|
)
|
|
|
|
func setTypeValue(s *Selector, a SelectorType, v string) error {
|
|
if v == "" {
|
|
return nil
|
|
}
|
|
// wildcards become '.*'
|
|
v = strings.Replace(v, "*", ".*", -1)
|
|
r, err := regexp.Compile(fmt.Sprintf("^%s$", v))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
switch a {
|
|
case Class:
|
|
s.Class = r
|
|
case ID:
|
|
s.ID = r
|
|
case Name:
|
|
s.Name = r
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func ParseSelector(s string) (Selector, error) {
|
|
attrs := map[string]*regexp.Regexp{}
|
|
selector := &Selector{nil, nil, nil, attrs}
|
|
nextAttr := Name
|
|
start := 0
|
|
for i, c := range s {
|
|
switch c {
|
|
case '.':
|
|
err := setTypeValue(selector, nextAttr, s[start:i])
|
|
if err != nil {
|
|
return *selector, err
|
|
}
|
|
nextAttr = Class
|
|
start = i + 1
|
|
case '#':
|
|
err := setTypeValue(selector, nextAttr, s[start:i])
|
|
if err != nil {
|
|
return *selector, err
|
|
}
|
|
nextAttr = ID
|
|
start = i + 1
|
|
}
|
|
}
|
|
setTypeValue(selector, nextAttr, s[start:])
|
|
return *selector, nil
|
|
}
|
|
|
|
func (sel Selector) FindAllChildren(node *html.Node) []*html.Node {
|
|
selected := []*html.Node{}
|
|
child := node.FirstChild
|
|
for child != nil {
|
|
childSelected := sel.FindAll(child)
|
|
selected = append(selected, childSelected...)
|
|
child = child.NextSibling
|
|
}
|
|
return selected
|
|
}
|
|
|
|
func (sel Selector) FindAll(node *html.Node) []*html.Node {
|
|
selected := []*html.Node{}
|
|
if sel.Match(node) {
|
|
return []*html.Node{node}
|
|
}
|
|
child := node.FirstChild
|
|
for child != nil {
|
|
childSelected := sel.FindAll(child)
|
|
selected = append(selected, childSelected...)
|
|
child = child.NextSibling
|
|
}
|
|
return selected
|
|
}
|
|
|
|
func (sel Selector) Match(node *html.Node) bool {
|
|
if node.Type != html.ElementNode {
|
|
return false
|
|
}
|
|
if sel.Name != nil {
|
|
if !sel.Name.MatchString(strings.ToLower(node.Data)) {
|
|
return false
|
|
}
|
|
}
|
|
classMatched := sel.Class == nil
|
|
idMatched := sel.ID == nil
|
|
for _, attr := range node.Attr {
|
|
switch attr.Key {
|
|
case "class":
|
|
if !classMatched {
|
|
if !sel.Class.MatchString(attr.Val) {
|
|
return false
|
|
} else {
|
|
classMatched = true
|
|
}
|
|
}
|
|
case "id":
|
|
if !idMatched {
|
|
if !sel.ID.MatchString(attr.Val) {
|
|
return false
|
|
} else {
|
|
idMatched = true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return classMatched && idMatched
|
|
}
|