mirror of
https://github.com/ericchiang/pup
synced 2024-11-24 08:58:08 +00:00
cleaned up code and add comments
This commit is contained in:
parent
957fc30cc1
commit
825f458e22
@ -13,8 +13,7 @@ fast and flexible way of exploring HTML from the terminal.
|
|||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
Download a webpage with `wget`. _Please exercise restraint when using any
|
Download a webpage with `wget`.
|
||||||
automated request tool._
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ wget http://en.wikipedia.org/wiki/Robots_exclusion_standard -O robots.html
|
$ wget http://en.wikipedia.org/wiki/Robots_exclusion_standard -O robots.html
|
||||||
|
27
main.go
27
main.go
@ -6,12 +6,11 @@ import (
|
|||||||
"github.com/ericchiang/pup/selector"
|
"github.com/ericchiang/pup/selector"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
const VERSION = "0.1.0"
|
const VERSION string = "0.1.0"
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// Flags
|
// Flags
|
||||||
@ -20,20 +19,17 @@ var (
|
|||||||
maxPrintLevel int = -1
|
maxPrintLevel int = -1
|
||||||
printNumber bool = false
|
printNumber bool = false
|
||||||
printColor bool = false
|
printColor bool = false
|
||||||
|
|
||||||
// Helpers
|
|
||||||
whitespaceRegexp *regexp.Regexp = regexp.MustCompile(`^\s*$`)
|
|
||||||
preWhitespace *regexp.Regexp = regexp.MustCompile(`^\s+`)
|
|
||||||
postWhitespace *regexp.Regexp = regexp.MustCompile(`\s+$`)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Print to stderr and exit
|
||||||
func Fatal(format string, args ...interface{}) {
|
func Fatal(format string, args ...interface{}) {
|
||||||
fmt.Fprintf(os.Stderr, format, args...)
|
fmt.Fprintf(os.Stderr, format, args...)
|
||||||
fmt.Fprintf(os.Stderr, "\n")
|
fmt.Fprintf(os.Stderr, "\n")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
func printHelp() {
|
// Print help to stderr and quit
|
||||||
|
func PrintHelp() {
|
||||||
helpString := `Usage
|
helpString := `Usage
|
||||||
|
|
||||||
pup [list of css selectors]
|
pup [list of css selectors]
|
||||||
@ -54,7 +50,8 @@ Flags
|
|||||||
Fatal(helpString, VERSION)
|
Fatal(helpString, VERSION)
|
||||||
}
|
}
|
||||||
|
|
||||||
func processFlags(cmds []string) []string {
|
// Process command arguments and return all non-flags.
|
||||||
|
func ProcessFlags(cmds []string) []string {
|
||||||
var i int
|
var i int
|
||||||
var err error
|
var err error
|
||||||
defer func() {
|
defer func() {
|
||||||
@ -77,7 +74,7 @@ func processFlags(cmds []string) []string {
|
|||||||
}
|
}
|
||||||
i++
|
i++
|
||||||
case "-h", "--help":
|
case "-h", "--help":
|
||||||
printHelp()
|
PrintHelp()
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
case "-i", "--indent":
|
case "-i", "--indent":
|
||||||
indentLevel, err := strconv.Atoi(cmds[i+1])
|
indentLevel, err := strconv.Atoi(cmds[i+1])
|
||||||
@ -109,8 +106,9 @@ func processFlags(cmds []string) []string {
|
|||||||
return nonFlagCmds[:n]
|
return nonFlagCmds[:n]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// pup
|
||||||
func main() {
|
func main() {
|
||||||
cmds := processFlags(os.Args[1:])
|
cmds := ProcessFlags(os.Args[1:])
|
||||||
root, err := html.Parse(inputStream)
|
root, err := html.Parse(inputStream)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintf(os.Stderr, err.Error())
|
fmt.Fprintf(os.Stderr, err.Error())
|
||||||
@ -121,9 +119,9 @@ func main() {
|
|||||||
PrintNode(root, 0)
|
PrintNode(root, 0)
|
||||||
os.Exit(0)
|
os.Exit(0)
|
||||||
}
|
}
|
||||||
selectors := make([]selector.Selector, len(cmds))
|
selectors := make([]*selector.Selector, len(cmds))
|
||||||
for i, cmd := range cmds {
|
for i, cmd := range cmds {
|
||||||
selectors[i], err = selector.ParseSelector(cmd)
|
selectors[i], err = selector.NewSelector(cmd)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintf(os.Stderr, err.Error())
|
fmt.Fprintf(os.Stderr, err.Error())
|
||||||
os.Exit(2)
|
os.Exit(2)
|
||||||
@ -134,7 +132,8 @@ func main() {
|
|||||||
for _, selector := range selectors {
|
for _, selector := range selectors {
|
||||||
selected = []*html.Node{}
|
selected = []*html.Node{}
|
||||||
for _, node := range currNodes {
|
for _, node := range currNodes {
|
||||||
selected = append(selected, selector.FindAllChildren(node)...)
|
selected = append(selected,
|
||||||
|
selector.FindAllChildren(node)...)
|
||||||
}
|
}
|
||||||
currNodes = selected
|
currNodes = selected
|
||||||
}
|
}
|
||||||
|
@ -5,13 +5,20 @@ import (
|
|||||||
"code.google.com/p/go.net/html/atom"
|
"code.google.com/p/go.net/html/atom"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/fatih/color"
|
"github.com/fatih/color"
|
||||||
|
"regexp"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
// Colors
|
||||||
tagColor *color.Color = color.New(color.FgYellow).Add(color.Bold)
|
tagColor *color.Color = color.New(color.FgYellow).Add(color.Bold)
|
||||||
tokenColor = color.New(color.FgCyan).Add(color.Bold)
|
tokenColor = color.New(color.FgCyan).Add(color.Bold)
|
||||||
attrKeyColor = color.New(color.FgRed)
|
attrKeyColor = color.New(color.FgRed)
|
||||||
quoteColor = color.New(color.FgBlue)
|
quoteColor = color.New(color.FgBlue)
|
||||||
|
|
||||||
|
// Regexp helpers
|
||||||
|
whitespaceRegexp *regexp.Regexp = regexp.MustCompile(`^\s*$`)
|
||||||
|
preWhitespace *regexp.Regexp = regexp.MustCompile(`^\s+`)
|
||||||
|
postWhitespace *regexp.Regexp = regexp.MustCompile(`\s+$`)
|
||||||
)
|
)
|
||||||
|
|
||||||
func printIndent(level int) {
|
func printIndent(level int) {
|
||||||
@ -48,6 +55,7 @@ func printChildren(n *html.Node, level int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Print a node and all of it's children to `maxlevel`.
|
||||||
func PrintNode(n *html.Node, level int) {
|
func PrintNode(n *html.Node, level int) {
|
||||||
switch n.Type {
|
switch n.Type {
|
||||||
case html.TextNode:
|
case html.TextNode:
|
||||||
|
@ -7,20 +7,22 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// A CSS Selector
|
||||||
type Selector struct {
|
type Selector struct {
|
||||||
Class, ID, Name *regexp.Regexp
|
Class, ID, Name *regexp.Regexp
|
||||||
Attrs map[string]*regexp.Regexp
|
Attrs map[string]*regexp.Regexp
|
||||||
}
|
}
|
||||||
|
|
||||||
type SelectorType string
|
type selectorField string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
Class SelectorType = "class"
|
Class selectorField = "class"
|
||||||
ID SelectorType = "id"
|
ID selectorField = "id"
|
||||||
Name SelectorType = "name"
|
Name selectorField = "name"
|
||||||
)
|
)
|
||||||
|
|
||||||
func setTypeValue(s *Selector, a SelectorType, v string) error {
|
// Set a field of this selector.
|
||||||
|
func (s *Selector) setFieldValue(a selectorField, v string) error {
|
||||||
if v == "" {
|
if v == "" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -41,7 +43,8 @@ func setTypeValue(s *Selector, a SelectorType, v string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseSelector(s string) (Selector, error) {
|
// Convert a string to a selector.
|
||||||
|
func NewSelector(s string) (*Selector, error) {
|
||||||
attrs := map[string]*regexp.Regexp{}
|
attrs := map[string]*regexp.Regexp{}
|
||||||
selector := &Selector{nil, nil, nil, attrs}
|
selector := &Selector{nil, nil, nil, attrs}
|
||||||
nextAttr := Name
|
nextAttr := Name
|
||||||
@ -49,26 +52,30 @@ func ParseSelector(s string) (Selector, error) {
|
|||||||
for i, c := range s {
|
for i, c := range s {
|
||||||
switch c {
|
switch c {
|
||||||
case '.':
|
case '.':
|
||||||
err := setTypeValue(selector, nextAttr, s[start:i])
|
err := selector.setFieldValue(nextAttr, s[start:i])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return *selector, err
|
return selector, err
|
||||||
}
|
}
|
||||||
nextAttr = Class
|
nextAttr = Class
|
||||||
start = i + 1
|
start = i + 1
|
||||||
case '#':
|
case '#':
|
||||||
err := setTypeValue(selector, nextAttr, s[start:i])
|
err := selector.setFieldValue(nextAttr, s[start:i])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return *selector, err
|
return selector, err
|
||||||
}
|
}
|
||||||
nextAttr = ID
|
nextAttr = ID
|
||||||
start = i + 1
|
start = i + 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
setTypeValue(selector, nextAttr, s[start:])
|
err := selector.setFieldValue(nextAttr, s[start:])
|
||||||
return *selector, nil
|
if err != nil {
|
||||||
|
return selector, err
|
||||||
|
}
|
||||||
|
return selector, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sel Selector) FindAllChildren(node *html.Node) []*html.Node {
|
// Find all nodes which match a selector.
|
||||||
|
func (sel *Selector) FindAllChildren(node *html.Node) []*html.Node {
|
||||||
selected := []*html.Node{}
|
selected := []*html.Node{}
|
||||||
child := node.FirstChild
|
child := node.FirstChild
|
||||||
for child != nil {
|
for child != nil {
|
||||||
@ -79,7 +86,8 @@ func (sel Selector) FindAllChildren(node *html.Node) []*html.Node {
|
|||||||
return selected
|
return selected
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sel Selector) FindAll(node *html.Node) []*html.Node {
|
// Find all nodes which match a selector. May return itself.
|
||||||
|
func (sel *Selector) FindAll(node *html.Node) []*html.Node {
|
||||||
selected := []*html.Node{}
|
selected := []*html.Node{}
|
||||||
if sel.Match(node) {
|
if sel.Match(node) {
|
||||||
return []*html.Node{node}
|
return []*html.Node{node}
|
||||||
@ -93,7 +101,8 @@ func (sel Selector) FindAll(node *html.Node) []*html.Node {
|
|||||||
return selected
|
return selected
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sel Selector) Match(node *html.Node) bool {
|
// Does this selector match a given node?
|
||||||
|
func (sel *Selector) Match(node *html.Node) bool {
|
||||||
if node.Type != html.ElementNode {
|
if node.Type != html.ElementNode {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user