mirror of
https://github.com/ericchiang/pup
synced 2025-01-14 17:50:59 +00:00
commit
f407082dd5
16
README.md
16
README.md
@ -149,6 +149,22 @@ $ cat robots.html | pup ':parent-of([action="edit"])'
|
||||
For a complete list, view the [implemented selectors](#Implemented Selectors)
|
||||
section.
|
||||
|
||||
|
||||
####`+`, `>`, and `,`
|
||||
|
||||
There are intermediate characters which declare special instructions. For
|
||||
instance, a comma `,` allows pup to specify mulitple groups of selctors.
|
||||
|
||||
```bash
|
||||
$ cat robots.html | pup 'title, h1 span[dir="auto"]'
|
||||
<title>
|
||||
Robots exclusion standard - Wikipedia, the free encyclopedia
|
||||
</title>
|
||||
<span dir="auto">
|
||||
Robots exclusion standard
|
||||
</span>
|
||||
```
|
||||
|
||||
####Chain selectors together
|
||||
|
||||
When combining selectors, the HTML nodes selected by the previous selector will
|
||||
|
45
parse.go
45
parse.go
@ -34,22 +34,23 @@ Flags
|
||||
os.Exit(exitCode)
|
||||
}
|
||||
|
||||
func ParseArgs() []string {
|
||||
cmds := ProcessFlags(os.Args[1:])
|
||||
func ParseArgs() ([]string, error) {
|
||||
cmds, err := ProcessFlags(os.Args[1:])
|
||||
if err != nil {
|
||||
return []string{}, err
|
||||
}
|
||||
return ParseCommands(strings.Join(cmds, " "))
|
||||
}
|
||||
|
||||
// Process command arguments and return all non-flags.
|
||||
func ProcessFlags(cmds []string) []string {
|
||||
func ProcessFlags(cmds []string) (nonFlagCmds []string, err error) {
|
||||
var i int
|
||||
var err error
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
fmt.Fprintf(os.Stderr, "Option '%s' requires an argument", cmds[i])
|
||||
os.Exit(2)
|
||||
err = fmt.Errorf("Option '%s' requires an argument", cmds[i])
|
||||
}
|
||||
}()
|
||||
nonFlagCmds := make([]string, len(cmds))
|
||||
nonFlagCmds = make([]string, len(cmds))
|
||||
n := 0
|
||||
for i = 0; i < len(cmds); i++ {
|
||||
cmd := cmds[i]
|
||||
@ -77,8 +78,7 @@ func ProcessFlags(cmds []string) []string {
|
||||
case "-l", "--limit":
|
||||
pupMaxPrintLevel, err = strconv.Atoi(cmds[i+1])
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Argument for '%s' must be numeric\n", cmd)
|
||||
os.Exit(2)
|
||||
return []string{}, fmt.Errorf("Argument for '%s' must be numeric", cmd)
|
||||
}
|
||||
i++
|
||||
case "--version":
|
||||
@ -86,18 +86,17 @@ func ProcessFlags(cmds []string) []string {
|
||||
os.Exit(0)
|
||||
default:
|
||||
if cmd[0] == '-' {
|
||||
fmt.Fprintf(os.Stderr, "Unrecognized flag '%s'", cmd)
|
||||
os.Exit(2)
|
||||
return []string{}, fmt.Errorf("Unrecognized flag '%s'", cmd)
|
||||
}
|
||||
nonFlagCmds[n] = cmds[i]
|
||||
n++
|
||||
}
|
||||
}
|
||||
return nonFlagCmds[:n]
|
||||
return nonFlagCmds[:n], nil
|
||||
}
|
||||
|
||||
// Split a string with awareness for quoted text
|
||||
func ParseCommands(cmdString string) []string {
|
||||
// Split a string with awareness for quoted text and commas
|
||||
func ParseCommands(cmdString string) ([]string, error) {
|
||||
cmds := []string{}
|
||||
last, next, max := 0, 0, len(cmdString)
|
||||
for {
|
||||
@ -106,7 +105,7 @@ func ParseCommands(cmdString string) []string {
|
||||
if next > last {
|
||||
cmds = append(cmds, cmdString[last:next])
|
||||
}
|
||||
return cmds
|
||||
return cmds, nil
|
||||
}
|
||||
// evalute a rune
|
||||
c := cmdString[next]
|
||||
@ -116,16 +115,26 @@ func ParseCommands(cmdString string) []string {
|
||||
cmds = append(cmds, cmdString[last:next])
|
||||
}
|
||||
last = next + 1
|
||||
case ',':
|
||||
if next > last {
|
||||
cmds = append(cmds, cmdString[last:next])
|
||||
}
|
||||
cmds = append(cmds, ",")
|
||||
last = next + 1
|
||||
case '\'', '"':
|
||||
// for quotes, consume runes until the quote has ended
|
||||
quoteChar := c
|
||||
for {
|
||||
next++
|
||||
if next == max {
|
||||
fmt.Fprintf(os.Stderr, "Unmatched open quote (%c)\n", quoteChar)
|
||||
os.Exit(2)
|
||||
return []string{}, fmt.Errorf("Unmatched open quote (%c)", quoteChar)
|
||||
}
|
||||
if cmdString[next] == quoteChar {
|
||||
if cmdString[next] == '\\' {
|
||||
next++
|
||||
if next == max {
|
||||
return []string{}, fmt.Errorf("Unmatched open quote (%c)", quoteChar)
|
||||
}
|
||||
} else if cmdString[next] == quoteChar {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
85
parse_test.go
Normal file
85
parse_test.go
Normal file
@ -0,0 +1,85 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
type parseCmdTest struct {
|
||||
input string
|
||||
split []string
|
||||
ok bool
|
||||
}
|
||||
|
||||
var parseCmdTests = []parseCmdTest{
|
||||
parseCmdTest{`w1 w2`, []string{`w1`, `w2`}, true},
|
||||
parseCmdTest{`w1 w2 w3`, []string{`w1`, `w2`, `w3`}, true},
|
||||
parseCmdTest{`w1 'w2 w3'`, []string{`w1`, `'w2 w3'`}, true},
|
||||
parseCmdTest{`w1 "w2 w3"`, []string{`w1`, `"w2 w3"`}, true},
|
||||
parseCmdTest{`w1 "w2 w3"`, []string{`w1`, `"w2 w3"`}, true},
|
||||
parseCmdTest{`w1 'w2 w3'`, []string{`w1`, `'w2 w3'`}, true},
|
||||
parseCmdTest{`w1"w2 w3"`, []string{`w1"w2 w3"`}, true},
|
||||
parseCmdTest{`w1'w2 w3'`, []string{`w1'w2 w3'`}, true},
|
||||
parseCmdTest{`w1"w2 'w3"`, []string{`w1"w2 'w3"`}, true},
|
||||
parseCmdTest{`w1'w2 "w3'`, []string{`w1'w2 "w3'`}, true},
|
||||
parseCmdTest{`"w1 w2" "w3"`, []string{`"w1 w2"`, `"w3"`}, true},
|
||||
parseCmdTest{`'w1 w2' "w3"`, []string{`'w1 w2'`, `"w3"`}, true},
|
||||
parseCmdTest{`'w1 \'w2' "w3"`, []string{`'w1 \'w2'`, `"w3"`}, true},
|
||||
parseCmdTest{`'w1 \'w2 "w3"`, []string{}, false},
|
||||
parseCmdTest{`w1 'w2 w3'"`, []string{}, false},
|
||||
parseCmdTest{`w1 "w2 w3"'`, []string{}, false},
|
||||
parseCmdTest{`w1 ' "w2 w3"`, []string{}, false},
|
||||
parseCmdTest{`w1 " 'w2 w3'`, []string{}, false},
|
||||
parseCmdTest{`w1"w2 w3""`, []string{}, false},
|
||||
parseCmdTest{`w1'w2 w3''`, []string{}, false},
|
||||
parseCmdTest{`w1"w2 'w3""`, []string{}, false},
|
||||
parseCmdTest{`w1'w2 "w3''`, []string{}, false},
|
||||
parseCmdTest{`"w1 w2" "w3"'`, []string{}, false},
|
||||
parseCmdTest{`'w1 w2' "w3"'`, []string{}, false},
|
||||
parseCmdTest{`w1,"w2 w3"`, []string{`w1`, `,`, `"w2 w3"`}, true},
|
||||
parseCmdTest{`w1,'w2 w3'`, []string{`w1`, `,`, `'w2 w3'`}, true},
|
||||
parseCmdTest{`w1 , "w2 w3"`, []string{`w1`, `,`, `"w2 w3"`}, true},
|
||||
parseCmdTest{`w1 , 'w2 w3'`, []string{`w1`, `,`, `'w2 w3'`}, true},
|
||||
parseCmdTest{`w1, "w2 w3"`, []string{`w1`, `,`, `"w2 w3"`}, true},
|
||||
parseCmdTest{`w1, 'w2 w3'`, []string{`w1`, `,`, `'w2 w3'`}, true},
|
||||
parseCmdTest{`w1 ,"w2 w3"`, []string{`w1`, `,`, `"w2 w3"`}, true},
|
||||
parseCmdTest{`w1 ,'w2 w3'`, []string{`w1`, `,`, `'w2 w3'`}, true},
|
||||
parseCmdTest{`w1"w2, w3"`, []string{`w1"w2, w3"`}, true},
|
||||
parseCmdTest{`w1'w2, w3'`, []string{`w1'w2, w3'`}, true},
|
||||
parseCmdTest{`w1"w2, 'w3"`, []string{`w1"w2, 'w3"`}, true},
|
||||
parseCmdTest{`w1'w2, "w3'`, []string{`w1'w2, "w3'`}, true},
|
||||
parseCmdTest{`"w1, w2" "w3"`, []string{`"w1, w2"`, `"w3"`}, true},
|
||||
parseCmdTest{`'w1, w2' "w3"`, []string{`'w1, w2'`, `"w3"`}, true},
|
||||
parseCmdTest{`'w1, \'w2' "w3"`, []string{`'w1, \'w2'`, `"w3"`}, true},
|
||||
parseCmdTest{`h1, .article-teaser, .article-content`, []string{
|
||||
`h1`, `,`, `.article-teaser`, `,`, `.article-content`,
|
||||
}, true},
|
||||
parseCmdTest{`h1 ,.article-teaser ,.article-content`, []string{
|
||||
`h1`, `,`, `.article-teaser`, `,`, `.article-content`,
|
||||
}, true},
|
||||
parseCmdTest{`h1 , .article-teaser , .article-content`, []string{
|
||||
`h1`, `,`, `.article-teaser`, `,`, `.article-content`,
|
||||
}, true},
|
||||
}
|
||||
|
||||
func sliceEq(s1, s2 []string) bool {
|
||||
if len(s1) != len(s2) {
|
||||
return false
|
||||
}
|
||||
for i := range s1 {
|
||||
if s1[i] != s2[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func TestParseCommands(t *testing.T) {
|
||||
for _, test := range parseCmdTests {
|
||||
parsed, err := ParseCommands(test.input)
|
||||
if test.ok != (err == nil) {
|
||||
t.Errorf("`%s`: should have cause error? %v", test.input, !test.ok)
|
||||
} else if !sliceEq(test.split, parsed) {
|
||||
t.Errorf("`%s`: `%s`: `%s`", test.input, test.split, parsed)
|
||||
}
|
||||
}
|
||||
}
|
23
pup.go
23
pup.go
@ -17,11 +17,15 @@ import (
|
||||
// |/ \_( # |"
|
||||
// C/ ,--___/
|
||||
|
||||
var VERSION string = "0.3.4"
|
||||
var VERSION string = "0.3.5"
|
||||
|
||||
func main() {
|
||||
// process flags and arguments
|
||||
cmds := ParseArgs()
|
||||
cmds, err := ParseArgs()
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%s\n", err.Error())
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
// Determine the charset of the input
|
||||
cr, err := charset.NewReader(pupIn, "")
|
||||
@ -49,12 +53,14 @@ func main() {
|
||||
}
|
||||
}
|
||||
switch cmd {
|
||||
case "*":
|
||||
case "*": // select all
|
||||
continue
|
||||
case "+":
|
||||
funcGenerator = SelectFromChildren
|
||||
case ">":
|
||||
funcGenerator = SelectNextSibling
|
||||
case ",": // nil will signify a comma
|
||||
selectorFuncs = append(selectorFuncs, nil)
|
||||
default:
|
||||
selector, err := ParseSelector(cmd)
|
||||
if err != nil {
|
||||
@ -66,9 +72,16 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
selectedNodes := []*html.Node{}
|
||||
currNodes := []*html.Node{root}
|
||||
for _, selectorFunc := range selectorFuncs {
|
||||
currNodes = selectorFunc(currNodes)
|
||||
if selectorFunc == nil { // hit a comma
|
||||
selectedNodes = append(selectedNodes, currNodes...)
|
||||
currNodes = []*html.Node{root}
|
||||
} else {
|
||||
currNodes = selectorFunc(currNodes)
|
||||
}
|
||||
}
|
||||
pupDisplayer.Display(currNodes)
|
||||
selectedNodes = append(selectedNodes, currNodes...)
|
||||
pupDisplayer.Display(selectedNodes)
|
||||
}
|
||||
|
10
pup.rb
10
pup.rb
@ -2,14 +2,14 @@ require 'formula'
|
||||
|
||||
class Pup < Formula
|
||||
homepage 'https://github.com/EricChiang/pup'
|
||||
version '0.3.4'
|
||||
version '0.3.5'
|
||||
|
||||
if Hardware.is_64_bit?
|
||||
url 'https://github.com/EricChiang/pup/releases/download/v0.3.4/pup_darwin_amd64.zip'
|
||||
sha1 '5fec62701a49bfd5eaa4b9c980e9c06dcece78c6'
|
||||
url 'https://github.com/EricChiang/pup/releases/download/v0.3.5/pup_darwin_amd64.zip'
|
||||
sha1 '6991dc9408e02adfa0ed5866eb7e284a94d79a77'
|
||||
else
|
||||
url 'https://github.com/EricChiang/pup/releases/download/v0.3.4/pup_darwin_386.zip'
|
||||
sha1 '1eb129c662d7e323c9b1e8f8ed3b8e28ce521434'
|
||||
url 'https://github.com/EricChiang/pup/releases/download/v0.3.5/pup_darwin_386.zip'
|
||||
sha1 'ec58d15a39ab821caa5f903035862690bbeb4dfe'
|
||||
end
|
||||
|
||||
def install
|
||||
|
@ -39,3 +39,9 @@ td:empty
|
||||
#toc li + span
|
||||
#toc li > li
|
||||
li a:not([rel])
|
||||
link, a
|
||||
link ,a
|
||||
link , a
|
||||
link , a sup
|
||||
link , a:parent-of(sup)
|
||||
link , a:parent-of(sup) sup
|
||||
|
@ -39,3 +39,9 @@ dbc580de40eeb8448f0dbe1b98d74cf799a6868b #toc li + a
|
||||
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li + span
|
||||
5d6e3ed3cfe310cde185cbfe1bba6aa7ec2a7f8d #toc li > li
|
||||
87eee1189dd5296d6c010a1ad329fc53c6099d72 li a:not([rel])
|
||||
055f3c98e9160beb13f72f1009ad66b6252a9bba link, a
|
||||
055f3c98e9160beb13f72f1009ad66b6252a9bba link ,a
|
||||
055f3c98e9160beb13f72f1009ad66b6252a9bba link , a
|
||||
0d1f66765d1632c70f8608947890524e78459362 link , a sup
|
||||
b6a3d6cccd305fcc3e8bf2743c443743bdaaa02b link , a:parent-of(sup)
|
||||
0d1f66765d1632c70f8608947890524e78459362 link , a:parent-of(sup) sup
|
||||
|
Loading…
Reference in New Issue
Block a user