1
0
mirror of https://github.com/ericchiang/pup synced 2025-01-15 02:00:55 +00:00

Merge pull request #28 from EricChiang/0.3.5

0.3.5
This commit is contained in:
Eric Chiang 2014-11-23 15:21:55 -05:00
commit f407082dd5
7 changed files with 163 additions and 28 deletions

View File

@ -149,6 +149,22 @@ $ cat robots.html | pup ':parent-of([action="edit"])'
For a complete list, view the [implemented selectors](#Implemented Selectors)
section.
####`+`, `>`, and `,`
There are intermediate characters which declare special instructions. For
instance, a comma `,` allows pup to specify mulitple groups of selctors.
```bash
$ cat robots.html | pup 'title, h1 span[dir="auto"]'
<title>
Robots exclusion standard - Wikipedia, the free encyclopedia
</title>
<span dir="auto">
Robots exclusion standard
</span>
```
####Chain selectors together
When combining selectors, the HTML nodes selected by the previous selector will

View File

@ -34,22 +34,23 @@ Flags
os.Exit(exitCode)
}
func ParseArgs() []string {
cmds := ProcessFlags(os.Args[1:])
func ParseArgs() ([]string, error) {
cmds, err := ProcessFlags(os.Args[1:])
if err != nil {
return []string{}, err
}
return ParseCommands(strings.Join(cmds, " "))
}
// Process command arguments and return all non-flags.
func ProcessFlags(cmds []string) []string {
func ProcessFlags(cmds []string) (nonFlagCmds []string, err error) {
var i int
var err error
defer func() {
if r := recover(); r != nil {
fmt.Fprintf(os.Stderr, "Option '%s' requires an argument", cmds[i])
os.Exit(2)
err = fmt.Errorf("Option '%s' requires an argument", cmds[i])
}
}()
nonFlagCmds := make([]string, len(cmds))
nonFlagCmds = make([]string, len(cmds))
n := 0
for i = 0; i < len(cmds); i++ {
cmd := cmds[i]
@ -77,8 +78,7 @@ func ProcessFlags(cmds []string) []string {
case "-l", "--limit":
pupMaxPrintLevel, err = strconv.Atoi(cmds[i+1])
if err != nil {
fmt.Fprintf(os.Stderr, "Argument for '%s' must be numeric\n", cmd)
os.Exit(2)
return []string{}, fmt.Errorf("Argument for '%s' must be numeric", cmd)
}
i++
case "--version":
@ -86,18 +86,17 @@ func ProcessFlags(cmds []string) []string {
os.Exit(0)
default:
if cmd[0] == '-' {
fmt.Fprintf(os.Stderr, "Unrecognized flag '%s'", cmd)
os.Exit(2)
return []string{}, fmt.Errorf("Unrecognized flag '%s'", cmd)
}
nonFlagCmds[n] = cmds[i]
n++
}
}
return nonFlagCmds[:n]
return nonFlagCmds[:n], nil
}
// Split a string with awareness for quoted text
func ParseCommands(cmdString string) []string {
// Split a string with awareness for quoted text and commas
func ParseCommands(cmdString string) ([]string, error) {
cmds := []string{}
last, next, max := 0, 0, len(cmdString)
for {
@ -106,7 +105,7 @@ func ParseCommands(cmdString string) []string {
if next > last {
cmds = append(cmds, cmdString[last:next])
}
return cmds
return cmds, nil
}
// evalute a rune
c := cmdString[next]
@ -116,16 +115,26 @@ func ParseCommands(cmdString string) []string {
cmds = append(cmds, cmdString[last:next])
}
last = next + 1
case ',':
if next > last {
cmds = append(cmds, cmdString[last:next])
}
cmds = append(cmds, ",")
last = next + 1
case '\'', '"':
// for quotes, consume runes until the quote has ended
quoteChar := c
for {
next++
if next == max {
fmt.Fprintf(os.Stderr, "Unmatched open quote (%c)\n", quoteChar)
os.Exit(2)
return []string{}, fmt.Errorf("Unmatched open quote (%c)", quoteChar)
}
if cmdString[next] == quoteChar {
if cmdString[next] == '\\' {
next++
if next == max {
return []string{}, fmt.Errorf("Unmatched open quote (%c)", quoteChar)
}
} else if cmdString[next] == quoteChar {
break
}
}

85
parse_test.go Normal file
View File

@ -0,0 +1,85 @@
package main
import (
"testing"
)
type parseCmdTest struct {
input string
split []string
ok bool
}
var parseCmdTests = []parseCmdTest{
parseCmdTest{`w1 w2`, []string{`w1`, `w2`}, true},
parseCmdTest{`w1 w2 w3`, []string{`w1`, `w2`, `w3`}, true},
parseCmdTest{`w1 'w2 w3'`, []string{`w1`, `'w2 w3'`}, true},
parseCmdTest{`w1 "w2 w3"`, []string{`w1`, `"w2 w3"`}, true},
parseCmdTest{`w1 "w2 w3"`, []string{`w1`, `"w2 w3"`}, true},
parseCmdTest{`w1 'w2 w3'`, []string{`w1`, `'w2 w3'`}, true},
parseCmdTest{`w1"w2 w3"`, []string{`w1"w2 w3"`}, true},
parseCmdTest{`w1'w2 w3'`, []string{`w1'w2 w3'`}, true},
parseCmdTest{`w1"w2 'w3"`, []string{`w1"w2 'w3"`}, true},
parseCmdTest{`w1'w2 "w3'`, []string{`w1'w2 "w3'`}, true},
parseCmdTest{`"w1 w2" "w3"`, []string{`"w1 w2"`, `"w3"`}, true},
parseCmdTest{`'w1 w2' "w3"`, []string{`'w1 w2'`, `"w3"`}, true},
parseCmdTest{`'w1 \'w2' "w3"`, []string{`'w1 \'w2'`, `"w3"`}, true},
parseCmdTest{`'w1 \'w2 "w3"`, []string{}, false},
parseCmdTest{`w1 'w2 w3'"`, []string{}, false},
parseCmdTest{`w1 "w2 w3"'`, []string{}, false},
parseCmdTest{`w1 ' "w2 w3"`, []string{}, false},
parseCmdTest{`w1 " 'w2 w3'`, []string{}, false},
parseCmdTest{`w1"w2 w3""`, []string{}, false},
parseCmdTest{`w1'w2 w3''`, []string{}, false},
parseCmdTest{`w1"w2 'w3""`, []string{}, false},
parseCmdTest{`w1'w2 "w3''`, []string{}, false},
parseCmdTest{`"w1 w2" "w3"'`, []string{}, false},
parseCmdTest{`'w1 w2' "w3"'`, []string{}, false},
parseCmdTest{`w1,"w2 w3"`, []string{`w1`, `,`, `"w2 w3"`}, true},
parseCmdTest{`w1,'w2 w3'`, []string{`w1`, `,`, `'w2 w3'`}, true},
parseCmdTest{`w1 , "w2 w3"`, []string{`w1`, `,`, `"w2 w3"`}, true},
parseCmdTest{`w1 , 'w2 w3'`, []string{`w1`, `,`, `'w2 w3'`}, true},
parseCmdTest{`w1, "w2 w3"`, []string{`w1`, `,`, `"w2 w3"`}, true},
parseCmdTest{`w1, 'w2 w3'`, []string{`w1`, `,`, `'w2 w3'`}, true},
parseCmdTest{`w1 ,"w2 w3"`, []string{`w1`, `,`, `"w2 w3"`}, true},
parseCmdTest{`w1 ,'w2 w3'`, []string{`w1`, `,`, `'w2 w3'`}, true},
parseCmdTest{`w1"w2, w3"`, []string{`w1"w2, w3"`}, true},
parseCmdTest{`w1'w2, w3'`, []string{`w1'w2, w3'`}, true},
parseCmdTest{`w1"w2, 'w3"`, []string{`w1"w2, 'w3"`}, true},
parseCmdTest{`w1'w2, "w3'`, []string{`w1'w2, "w3'`}, true},
parseCmdTest{`"w1, w2" "w3"`, []string{`"w1, w2"`, `"w3"`}, true},
parseCmdTest{`'w1, w2' "w3"`, []string{`'w1, w2'`, `"w3"`}, true},
parseCmdTest{`'w1, \'w2' "w3"`, []string{`'w1, \'w2'`, `"w3"`}, true},
parseCmdTest{`h1, .article-teaser, .article-content`, []string{
`h1`, `,`, `.article-teaser`, `,`, `.article-content`,
}, true},
parseCmdTest{`h1 ,.article-teaser ,.article-content`, []string{
`h1`, `,`, `.article-teaser`, `,`, `.article-content`,
}, true},
parseCmdTest{`h1 , .article-teaser , .article-content`, []string{
`h1`, `,`, `.article-teaser`, `,`, `.article-content`,
}, true},
}
func sliceEq(s1, s2 []string) bool {
if len(s1) != len(s2) {
return false
}
for i := range s1 {
if s1[i] != s2[i] {
return false
}
}
return true
}
func TestParseCommands(t *testing.T) {
for _, test := range parseCmdTests {
parsed, err := ParseCommands(test.input)
if test.ok != (err == nil) {
t.Errorf("`%s`: should have cause error? %v", test.input, !test.ok)
} else if !sliceEq(test.split, parsed) {
t.Errorf("`%s`: `%s`: `%s`", test.input, test.split, parsed)
}
}
}

21
pup.go
View File

@ -17,11 +17,15 @@ import (
// |/ \_( # |"
// C/ ,--___/
var VERSION string = "0.3.4"
var VERSION string = "0.3.5"
func main() {
// process flags and arguments
cmds := ParseArgs()
cmds, err := ParseArgs()
if err != nil {
fmt.Fprintf(os.Stderr, "%s\n", err.Error())
os.Exit(2)
}
// Determine the charset of the input
cr, err := charset.NewReader(pupIn, "")
@ -49,12 +53,14 @@ func main() {
}
}
switch cmd {
case "*":
case "*": // select all
continue
case "+":
funcGenerator = SelectFromChildren
case ">":
funcGenerator = SelectNextSibling
case ",": // nil will signify a comma
selectorFuncs = append(selectorFuncs, nil)
default:
selector, err := ParseSelector(cmd)
if err != nil {
@ -66,9 +72,16 @@ func main() {
}
}
selectedNodes := []*html.Node{}
currNodes := []*html.Node{root}
for _, selectorFunc := range selectorFuncs {
if selectorFunc == nil { // hit a comma
selectedNodes = append(selectedNodes, currNodes...)
currNodes = []*html.Node{root}
} else {
currNodes = selectorFunc(currNodes)
}
pupDisplayer.Display(currNodes)
}
selectedNodes = append(selectedNodes, currNodes...)
pupDisplayer.Display(selectedNodes)
}

10
pup.rb
View File

@ -2,14 +2,14 @@ require 'formula'
class Pup < Formula
homepage 'https://github.com/EricChiang/pup'
version '0.3.4'
version '0.3.5'
if Hardware.is_64_bit?
url 'https://github.com/EricChiang/pup/releases/download/v0.3.4/pup_darwin_amd64.zip'
sha1 '5fec62701a49bfd5eaa4b9c980e9c06dcece78c6'
url 'https://github.com/EricChiang/pup/releases/download/v0.3.5/pup_darwin_amd64.zip'
sha1 '6991dc9408e02adfa0ed5866eb7e284a94d79a77'
else
url 'https://github.com/EricChiang/pup/releases/download/v0.3.4/pup_darwin_386.zip'
sha1 '1eb129c662d7e323c9b1e8f8ed3b8e28ce521434'
url 'https://github.com/EricChiang/pup/releases/download/v0.3.5/pup_darwin_386.zip'
sha1 'ec58d15a39ab821caa5f903035862690bbeb4dfe'
end
def install

View File

@ -39,3 +39,9 @@ td:empty
#toc li + span
#toc li > li
li a:not([rel])
link, a
link ,a
link , a
link , a sup
link , a:parent-of(sup)
link , a:parent-of(sup) sup

View File

@ -39,3 +39,9 @@ dbc580de40eeb8448f0dbe1b98d74cf799a6868b #toc li + a
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li + span
5d6e3ed3cfe310cde185cbfe1bba6aa7ec2a7f8d #toc li > li
87eee1189dd5296d6c010a1ad329fc53c6099d72 li a:not([rel])
055f3c98e9160beb13f72f1009ad66b6252a9bba link, a
055f3c98e9160beb13f72f1009ad66b6252a9bba link ,a
055f3c98e9160beb13f72f1009ad66b6252a9bba link , a
0d1f66765d1632c70f8608947890524e78459362 link , a sup
b6a3d6cccd305fcc3e8bf2743c443743bdaaa02b link , a:parent-of(sup)
0d1f66765d1632c70f8608947890524e78459362 link , a:parent-of(sup) sup