diff --git a/README.md b/README.md
index 5655c50..2561881 100644
--- a/README.md
+++ b/README.md
@@ -149,6 +149,22 @@ $ cat robots.html | pup ':parent-of([action="edit"])'
For a complete list, view the [implemented selectors](#Implemented Selectors)
section.
+
+####`+`, `>`, and `,`
+
+There are intermediate characters which declare special instructions. For
+instance, a comma `,` allows pup to specify mulitple groups of selctors.
+
+```bash
+$ cat robots.html | pup 'title, h1 span[dir="auto"]'
+
+ Robots exclusion standard - Wikipedia, the free encyclopedia
+
+
+ Robots exclusion standard
+
+```
+
####Chain selectors together
When combining selectors, the HTML nodes selected by the previous selector will
diff --git a/parse.go b/parse.go
index f61a43f..d9d570a 100644
--- a/parse.go
+++ b/parse.go
@@ -34,22 +34,23 @@ Flags
os.Exit(exitCode)
}
-func ParseArgs() []string {
- cmds := ProcessFlags(os.Args[1:])
+func ParseArgs() ([]string, error) {
+ cmds, err := ProcessFlags(os.Args[1:])
+ if err != nil {
+ return []string{}, err
+ }
return ParseCommands(strings.Join(cmds, " "))
}
// Process command arguments and return all non-flags.
-func ProcessFlags(cmds []string) []string {
+func ProcessFlags(cmds []string) (nonFlagCmds []string, err error) {
var i int
- var err error
defer func() {
if r := recover(); r != nil {
- fmt.Fprintf(os.Stderr, "Option '%s' requires an argument", cmds[i])
- os.Exit(2)
+ err = fmt.Errorf("Option '%s' requires an argument", cmds[i])
}
}()
- nonFlagCmds := make([]string, len(cmds))
+ nonFlagCmds = make([]string, len(cmds))
n := 0
for i = 0; i < len(cmds); i++ {
cmd := cmds[i]
@@ -77,8 +78,7 @@ func ProcessFlags(cmds []string) []string {
case "-l", "--limit":
pupMaxPrintLevel, err = strconv.Atoi(cmds[i+1])
if err != nil {
- fmt.Fprintf(os.Stderr, "Argument for '%s' must be numeric\n", cmd)
- os.Exit(2)
+ return []string{}, fmt.Errorf("Argument for '%s' must be numeric", cmd)
}
i++
case "--version":
@@ -86,18 +86,17 @@ func ProcessFlags(cmds []string) []string {
os.Exit(0)
default:
if cmd[0] == '-' {
- fmt.Fprintf(os.Stderr, "Unrecognized flag '%s'", cmd)
- os.Exit(2)
+ return []string{}, fmt.Errorf("Unrecognized flag '%s'", cmd)
}
nonFlagCmds[n] = cmds[i]
n++
}
}
- return nonFlagCmds[:n]
+ return nonFlagCmds[:n], nil
}
-// Split a string with awareness for quoted text
-func ParseCommands(cmdString string) []string {
+// Split a string with awareness for quoted text and commas
+func ParseCommands(cmdString string) ([]string, error) {
cmds := []string{}
last, next, max := 0, 0, len(cmdString)
for {
@@ -106,7 +105,7 @@ func ParseCommands(cmdString string) []string {
if next > last {
cmds = append(cmds, cmdString[last:next])
}
- return cmds
+ return cmds, nil
}
// evalute a rune
c := cmdString[next]
@@ -116,16 +115,26 @@ func ParseCommands(cmdString string) []string {
cmds = append(cmds, cmdString[last:next])
}
last = next + 1
+ case ',':
+ if next > last {
+ cmds = append(cmds, cmdString[last:next])
+ }
+ cmds = append(cmds, ",")
+ last = next + 1
case '\'', '"':
// for quotes, consume runes until the quote has ended
quoteChar := c
for {
next++
if next == max {
- fmt.Fprintf(os.Stderr, "Unmatched open quote (%c)\n", quoteChar)
- os.Exit(2)
+ return []string{}, fmt.Errorf("Unmatched open quote (%c)", quoteChar)
}
- if cmdString[next] == quoteChar {
+ if cmdString[next] == '\\' {
+ next++
+ if next == max {
+ return []string{}, fmt.Errorf("Unmatched open quote (%c)", quoteChar)
+ }
+ } else if cmdString[next] == quoteChar {
break
}
}
diff --git a/parse_test.go b/parse_test.go
new file mode 100644
index 0000000..c5dea74
--- /dev/null
+++ b/parse_test.go
@@ -0,0 +1,85 @@
+package main
+
+import (
+ "testing"
+)
+
+type parseCmdTest struct {
+ input string
+ split []string
+ ok bool
+}
+
+var parseCmdTests = []parseCmdTest{
+ parseCmdTest{`w1 w2`, []string{`w1`, `w2`}, true},
+ parseCmdTest{`w1 w2 w3`, []string{`w1`, `w2`, `w3`}, true},
+ parseCmdTest{`w1 'w2 w3'`, []string{`w1`, `'w2 w3'`}, true},
+ parseCmdTest{`w1 "w2 w3"`, []string{`w1`, `"w2 w3"`}, true},
+ parseCmdTest{`w1 "w2 w3"`, []string{`w1`, `"w2 w3"`}, true},
+ parseCmdTest{`w1 'w2 w3'`, []string{`w1`, `'w2 w3'`}, true},
+ parseCmdTest{`w1"w2 w3"`, []string{`w1"w2 w3"`}, true},
+ parseCmdTest{`w1'w2 w3'`, []string{`w1'w2 w3'`}, true},
+ parseCmdTest{`w1"w2 'w3"`, []string{`w1"w2 'w3"`}, true},
+ parseCmdTest{`w1'w2 "w3'`, []string{`w1'w2 "w3'`}, true},
+ parseCmdTest{`"w1 w2" "w3"`, []string{`"w1 w2"`, `"w3"`}, true},
+ parseCmdTest{`'w1 w2' "w3"`, []string{`'w1 w2'`, `"w3"`}, true},
+ parseCmdTest{`'w1 \'w2' "w3"`, []string{`'w1 \'w2'`, `"w3"`}, true},
+ parseCmdTest{`'w1 \'w2 "w3"`, []string{}, false},
+ parseCmdTest{`w1 'w2 w3'"`, []string{}, false},
+ parseCmdTest{`w1 "w2 w3"'`, []string{}, false},
+ parseCmdTest{`w1 ' "w2 w3"`, []string{}, false},
+ parseCmdTest{`w1 " 'w2 w3'`, []string{}, false},
+ parseCmdTest{`w1"w2 w3""`, []string{}, false},
+ parseCmdTest{`w1'w2 w3''`, []string{}, false},
+ parseCmdTest{`w1"w2 'w3""`, []string{}, false},
+ parseCmdTest{`w1'w2 "w3''`, []string{}, false},
+ parseCmdTest{`"w1 w2" "w3"'`, []string{}, false},
+ parseCmdTest{`'w1 w2' "w3"'`, []string{}, false},
+ parseCmdTest{`w1,"w2 w3"`, []string{`w1`, `,`, `"w2 w3"`}, true},
+ parseCmdTest{`w1,'w2 w3'`, []string{`w1`, `,`, `'w2 w3'`}, true},
+ parseCmdTest{`w1 , "w2 w3"`, []string{`w1`, `,`, `"w2 w3"`}, true},
+ parseCmdTest{`w1 , 'w2 w3'`, []string{`w1`, `,`, `'w2 w3'`}, true},
+ parseCmdTest{`w1, "w2 w3"`, []string{`w1`, `,`, `"w2 w3"`}, true},
+ parseCmdTest{`w1, 'w2 w3'`, []string{`w1`, `,`, `'w2 w3'`}, true},
+ parseCmdTest{`w1 ,"w2 w3"`, []string{`w1`, `,`, `"w2 w3"`}, true},
+ parseCmdTest{`w1 ,'w2 w3'`, []string{`w1`, `,`, `'w2 w3'`}, true},
+ parseCmdTest{`w1"w2, w3"`, []string{`w1"w2, w3"`}, true},
+ parseCmdTest{`w1'w2, w3'`, []string{`w1'w2, w3'`}, true},
+ parseCmdTest{`w1"w2, 'w3"`, []string{`w1"w2, 'w3"`}, true},
+ parseCmdTest{`w1'w2, "w3'`, []string{`w1'w2, "w3'`}, true},
+ parseCmdTest{`"w1, w2" "w3"`, []string{`"w1, w2"`, `"w3"`}, true},
+ parseCmdTest{`'w1, w2' "w3"`, []string{`'w1, w2'`, `"w3"`}, true},
+ parseCmdTest{`'w1, \'w2' "w3"`, []string{`'w1, \'w2'`, `"w3"`}, true},
+ parseCmdTest{`h1, .article-teaser, .article-content`, []string{
+ `h1`, `,`, `.article-teaser`, `,`, `.article-content`,
+ }, true},
+ parseCmdTest{`h1 ,.article-teaser ,.article-content`, []string{
+ `h1`, `,`, `.article-teaser`, `,`, `.article-content`,
+ }, true},
+ parseCmdTest{`h1 , .article-teaser , .article-content`, []string{
+ `h1`, `,`, `.article-teaser`, `,`, `.article-content`,
+ }, true},
+}
+
+func sliceEq(s1, s2 []string) bool {
+ if len(s1) != len(s2) {
+ return false
+ }
+ for i := range s1 {
+ if s1[i] != s2[i] {
+ return false
+ }
+ }
+ return true
+}
+
+func TestParseCommands(t *testing.T) {
+ for _, test := range parseCmdTests {
+ parsed, err := ParseCommands(test.input)
+ if test.ok != (err == nil) {
+ t.Errorf("`%s`: should have cause error? %v", test.input, !test.ok)
+ } else if !sliceEq(test.split, parsed) {
+ t.Errorf("`%s`: `%s`: `%s`", test.input, test.split, parsed)
+ }
+ }
+}
diff --git a/pup.go b/pup.go
index 1590eb3..43851d0 100644
--- a/pup.go
+++ b/pup.go
@@ -17,11 +17,15 @@ import (
// |/ \_( # |"
// C/ ,--___/
-var VERSION string = "0.3.4"
+var VERSION string = "0.3.5"
func main() {
// process flags and arguments
- cmds := ParseArgs()
+ cmds, err := ParseArgs()
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "%s\n", err.Error())
+ os.Exit(2)
+ }
// Determine the charset of the input
cr, err := charset.NewReader(pupIn, "")
@@ -49,12 +53,14 @@ func main() {
}
}
switch cmd {
- case "*":
+ case "*": // select all
continue
case "+":
funcGenerator = SelectFromChildren
case ">":
funcGenerator = SelectNextSibling
+ case ",": // nil will signify a comma
+ selectorFuncs = append(selectorFuncs, nil)
default:
selector, err := ParseSelector(cmd)
if err != nil {
@@ -66,9 +72,16 @@ func main() {
}
}
+ selectedNodes := []*html.Node{}
currNodes := []*html.Node{root}
for _, selectorFunc := range selectorFuncs {
- currNodes = selectorFunc(currNodes)
+ if selectorFunc == nil { // hit a comma
+ selectedNodes = append(selectedNodes, currNodes...)
+ currNodes = []*html.Node{root}
+ } else {
+ currNodes = selectorFunc(currNodes)
+ }
}
- pupDisplayer.Display(currNodes)
+ selectedNodes = append(selectedNodes, currNodes...)
+ pupDisplayer.Display(selectedNodes)
}
diff --git a/pup.rb b/pup.rb
index 1dc8b5f..3691a32 100644
--- a/pup.rb
+++ b/pup.rb
@@ -2,14 +2,14 @@ require 'formula'
class Pup < Formula
homepage 'https://github.com/EricChiang/pup'
- version '0.3.4'
+ version '0.3.5'
if Hardware.is_64_bit?
- url 'https://github.com/EricChiang/pup/releases/download/v0.3.4/pup_darwin_amd64.zip'
- sha1 '5fec62701a49bfd5eaa4b9c980e9c06dcece78c6'
+ url 'https://github.com/EricChiang/pup/releases/download/v0.3.5/pup_darwin_amd64.zip'
+ sha1 '6991dc9408e02adfa0ed5866eb7e284a94d79a77'
else
- url 'https://github.com/EricChiang/pup/releases/download/v0.3.4/pup_darwin_386.zip'
- sha1 '1eb129c662d7e323c9b1e8f8ed3b8e28ce521434'
+ url 'https://github.com/EricChiang/pup/releases/download/v0.3.5/pup_darwin_386.zip'
+ sha1 'ec58d15a39ab821caa5f903035862690bbeb4dfe'
end
def install
diff --git a/tests/cmds.txt b/tests/cmds.txt
index b05f947..d2a1e0c 100644
--- a/tests/cmds.txt
+++ b/tests/cmds.txt
@@ -39,3 +39,9 @@ td:empty
#toc li + span
#toc li > li
li a:not([rel])
+link, a
+link ,a
+link , a
+link , a sup
+link , a:parent-of(sup)
+link , a:parent-of(sup) sup
diff --git a/tests/expected_output.txt b/tests/expected_output.txt
index 4f5dec8..b1ab3b9 100644
--- a/tests/expected_output.txt
+++ b/tests/expected_output.txt
@@ -39,3 +39,9 @@ dbc580de40eeb8448f0dbe1b98d74cf799a6868b #toc li + a
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li + span
5d6e3ed3cfe310cde185cbfe1bba6aa7ec2a7f8d #toc li > li
87eee1189dd5296d6c010a1ad329fc53c6099d72 li a:not([rel])
+055f3c98e9160beb13f72f1009ad66b6252a9bba link, a
+055f3c98e9160beb13f72f1009ad66b6252a9bba link ,a
+055f3c98e9160beb13f72f1009ad66b6252a9bba link , a
+0d1f66765d1632c70f8608947890524e78459362 link , a sup
+b6a3d6cccd305fcc3e8bf2743c443743bdaaa02b link , a:parent-of(sup)
+0d1f66765d1632c70f8608947890524e78459362 link , a:parent-of(sup) sup