mirror of
https://github.com/ericchiang/pup
synced 2025-02-05 20:31:26 +00:00
Enabled multiple attribute matchers for same attribute.
A selector with multiple attribute selectors on the same attribute would use only the last selector, because `selector.Attr[attrname]` gets overwritten. Something like 'a[href^=https][href$=.zip]' would give the results from 'a[href$=.zip]'. Now, the attribute selectors are collected into a list, and we check whether all selectors match.
This commit is contained in:
parent
681d7bb639
commit
f9e1b3ed7d
31
selector.go
31
selector.go
@ -77,7 +77,7 @@ type PseudoClass func(*html.Node) bool
|
|||||||
|
|
||||||
type CSSSelector struct {
|
type CSSSelector struct {
|
||||||
Tag string
|
Tag string
|
||||||
Attrs map[string]*regexp.Regexp
|
Attrs map[string][]*regexp.Regexp
|
||||||
Pseudo PseudoClass
|
Pseudo PseudoClass
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,15 +90,16 @@ func (s CSSSelector) Match(node *html.Node) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for attrKey, matcher := range s.Attrs {
|
for attrKey, matchers := range s.Attrs {
|
||||||
matched := false
|
matched := false
|
||||||
for _, attr := range node.Attr {
|
for _, attr := range node.Attr {
|
||||||
if attrKey == attr.Key {
|
if attrKey == attr.Key {
|
||||||
if !matcher.MatchString(attr.Val) {
|
for _, matcher := range matchers {
|
||||||
return false
|
if !matcher.MatchString(attr.Val) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
matched = true
|
||||||
}
|
}
|
||||||
matched = true
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !matched {
|
if !matched {
|
||||||
@ -116,7 +117,7 @@ func (s CSSSelector) Match(node *html.Node) bool {
|
|||||||
func ParseSelector(cmd string) (selector CSSSelector, err error) {
|
func ParseSelector(cmd string) (selector CSSSelector, err error) {
|
||||||
selector = CSSSelector{
|
selector = CSSSelector{
|
||||||
Tag: "",
|
Tag: "",
|
||||||
Attrs: map[string]*regexp.Regexp{},
|
Attrs: map[string][]*regexp.Regexp{},
|
||||||
Pseudo: nil,
|
Pseudo: nil,
|
||||||
}
|
}
|
||||||
var s scanner.Scanner
|
var s scanner.Scanner
|
||||||
@ -153,13 +154,21 @@ func ParseTagMatcher(selector *CSSSelector, s scanner.Scanner) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func AddAttrSelector(selector *CSSSelector, attrname string, regex *regexp.Regexp) {
|
||||||
|
if slice, ok := selector.Attrs[attrname]; ok && len(slice) > 0 {
|
||||||
|
selector.Attrs[attrname] = append(selector.Attrs[attrname], regex)
|
||||||
|
} else {
|
||||||
|
selector.Attrs[attrname] = []*regexp.Regexp{regex}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Parse a class matcher
|
// Parse a class matcher
|
||||||
// e.g. `.btn`
|
// e.g. `.btn`
|
||||||
func ParseClassMatcher(selector *CSSSelector, s scanner.Scanner) error {
|
func ParseClassMatcher(selector *CSSSelector, s scanner.Scanner) error {
|
||||||
var class bytes.Buffer
|
var class bytes.Buffer
|
||||||
defer func() {
|
defer func() {
|
||||||
regexpStr := `(\A|\s)` + regexp.QuoteMeta(class.String()) + `(\s|\z)`
|
regexpStr := `(\A|\s)` + regexp.QuoteMeta(class.String()) + `(\s|\z)`
|
||||||
selector.Attrs["class"] = regexp.MustCompile(regexpStr)
|
AddAttrSelector(selector, "class", regexp.MustCompile(regexpStr))
|
||||||
}()
|
}()
|
||||||
for {
|
for {
|
||||||
c := s.Next()
|
c := s.Next()
|
||||||
@ -188,7 +197,7 @@ func ParseIdMatcher(selector *CSSSelector, s scanner.Scanner) error {
|
|||||||
var id bytes.Buffer
|
var id bytes.Buffer
|
||||||
defer func() {
|
defer func() {
|
||||||
regexpStr := `^` + regexp.QuoteMeta(id.String()) + `$`
|
regexpStr := `^` + regexp.QuoteMeta(id.String()) + `$`
|
||||||
selector.Attrs["id"] = regexp.MustCompile(regexpStr)
|
AddAttrSelector(selector, "id", regexp.MustCompile(regexpStr))
|
||||||
}()
|
}()
|
||||||
for {
|
for {
|
||||||
c := s.Next()
|
c := s.Next()
|
||||||
@ -233,9 +242,9 @@ func ParseAttrMatcher(selector *CSSSelector, s scanner.Scanner) error {
|
|||||||
case '~':
|
case '~':
|
||||||
regexpStr = `(\A|\s)` + regexp.QuoteMeta(attrVal.String()) + `(\s|\z)`
|
regexpStr = `(\A|\s)` + regexp.QuoteMeta(attrVal.String()) + `(\s|\z)`
|
||||||
}
|
}
|
||||||
selector.Attrs[attrKey.String()] = regexp.MustCompile(regexpStr)
|
AddAttrSelector(selector, attrKey.String(), regexp.MustCompile(regexpStr))
|
||||||
} else {
|
} else {
|
||||||
selector.Attrs[attrKey.String()] = regexp.MustCompile(`^.*$`)
|
AddAttrSelector(selector, attrKey.String(), regexp.MustCompile(`^.*$`))
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
// After reaching ']' proceed
|
// After reaching ']' proceed
|
||||||
|
Loading…
Reference in New Issue
Block a user