diff --git a/.gitignore b/.gitignore index 5054208..de82825 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ dist/ testpages/* tests/test_results.txt +robots.html diff --git a/selector.go b/selector.go index 75d4307..694f028 100644 --- a/selector.go +++ b/selector.go @@ -378,6 +378,14 @@ func ParsePseudo(selector *CSSSelector, s scanner.Scanner) error { if selector.Pseudo, err = parseNthPseudo(cmd); err != nil { return err } + case strings.HasPrefix(cmd, "not("): + if selector.Pseudo, err = parseNotPseudo(cmd[len("not("):]); err != nil { + return err + } + case strings.HasPrefix(cmd, "parent-of("): + if selector.Pseudo, err = parseParentOfPseudo(cmd[len("parent-of("):]); err != nil { + return err + } default: return fmt.Errorf("%s not a valid pseudo class", cmd) } @@ -583,3 +591,54 @@ func parseContainsPseudo(cmd string) (PseudoClass, error) { } } } + +// Parse a :not(selector) selector +// expects the input to be everything after the open parenthesis +// e.g. for `not(div#id)` the argument would be `div#id)` +func parseNotPseudo(cmd string) (PseudoClass, error) { + if len(cmd) < 2 { + return nil, fmt.Errorf("malformed ':not' selector") + } + endQuote, cmd := cmd[len(cmd)-1], cmd[:len(cmd)-1] + selector, err := ParseSelector(cmd) + if err != nil { + return nil, err + } + if selector.Pseudo != nil { + return nil, fmt.Errorf("selector within ':not' may not contain a pseudo class") + } + if endQuote != ')' { + return nil, fmt.Errorf("unmatched '('") + } + return func(n *html.Node) bool { + return !selector.Match(n) + }, nil +} + +// Parse a :parent-of(selector) selector +// expects the input to be everything after the open parenthesis +// e.g. for `parent-of(div#id)` the argument would be `div#id)` +func parseParentOfPseudo(cmd string) (PseudoClass, error) { + if len(cmd) < 2 { + return nil, fmt.Errorf("malformed ':parent-of' selector") + } + endQuote, cmd := cmd[len(cmd)-1], cmd[:len(cmd)-1] + selector, err := ParseSelector(cmd) + if err != nil { + return nil, err + } + if selector.Pseudo != nil { + return nil, fmt.Errorf("selector within ':parent-of' may not contain a pseudo class") + } + if endQuote != ')' { + return nil, fmt.Errorf("unmatched '('") + } + return func(n *html.Node) bool { + for c := n.FirstChild; c != nil; c = c.NextSibling { + if c.Type == html.ElementNode && selector.Match(c) { + return true + } + } + return false + }, nil +} diff --git a/tests/cmds.txt b/tests/cmds.txt index 56c192d..b05f947 100644 --- a/tests/cmds.txt +++ b/tests/cmds.txt @@ -38,3 +38,4 @@ td:empty #toc li + a + span #toc li + span #toc li > li +li a:not([rel]) diff --git a/tests/expected_output.txt b/tests/expected_output.txt index db4cd5f..4f5dec8 100644 --- a/tests/expected_output.txt +++ b/tests/expected_output.txt @@ -38,3 +38,4 @@ dbc580de40eeb8448f0dbe1b98d74cf799a6868b #toc li + a 0cd687baaf08605bf6a68e3c285c5e8a41e0c9b2 #toc li + a + span da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li + span 5d6e3ed3cfe310cde185cbfe1bba6aa7ec2a7f8d #toc li > li +87eee1189dd5296d6c010a1ad329fc53c6099d72 li a:not([rel])