1
0
mirror of https://github.com/ericchiang/pup synced 2024-11-24 08:58:08 +00:00

cleaned up code and add comments

This commit is contained in:
ericchiang 2014-09-01 14:50:10 -04:00
parent 957fc30cc1
commit 825f458e22
4 changed files with 46 additions and 31 deletions

View File

@ -13,8 +13,7 @@ fast and flexible way of exploring HTML from the terminal.
## Examples ## Examples
Download a webpage with `wget`. _Please exercise restraint when using any Download a webpage with `wget`.
automated request tool._
```bash ```bash
$ wget http://en.wikipedia.org/wiki/Robots_exclusion_standard -O robots.html $ wget http://en.wikipedia.org/wiki/Robots_exclusion_standard -O robots.html

27
main.go
View File

@ -6,12 +6,11 @@ import (
"github.com/ericchiang/pup/selector" "github.com/ericchiang/pup/selector"
"io" "io"
"os" "os"
"regexp"
"strconv" "strconv"
"strings" "strings"
) )
const VERSION = "0.1.0" const VERSION string = "0.1.0"
var ( var (
// Flags // Flags
@ -20,20 +19,17 @@ var (
maxPrintLevel int = -1 maxPrintLevel int = -1
printNumber bool = false printNumber bool = false
printColor bool = false printColor bool = false
// Helpers
whitespaceRegexp *regexp.Regexp = regexp.MustCompile(`^\s*$`)
preWhitespace *regexp.Regexp = regexp.MustCompile(`^\s+`)
postWhitespace *regexp.Regexp = regexp.MustCompile(`\s+$`)
) )
// Print to stderr and exit
func Fatal(format string, args ...interface{}) { func Fatal(format string, args ...interface{}) {
fmt.Fprintf(os.Stderr, format, args...) fmt.Fprintf(os.Stderr, format, args...)
fmt.Fprintf(os.Stderr, "\n") fmt.Fprintf(os.Stderr, "\n")
os.Exit(1) os.Exit(1)
} }
func printHelp() { // Print help to stderr and quit
func PrintHelp() {
helpString := `Usage helpString := `Usage
pup [list of css selectors] pup [list of css selectors]
@ -54,7 +50,8 @@ Flags
Fatal(helpString, VERSION) Fatal(helpString, VERSION)
} }
func processFlags(cmds []string) []string { // Process command arguments and return all non-flags.
func ProcessFlags(cmds []string) []string {
var i int var i int
var err error var err error
defer func() { defer func() {
@ -77,7 +74,7 @@ func processFlags(cmds []string) []string {
} }
i++ i++
case "-h", "--help": case "-h", "--help":
printHelp() PrintHelp()
os.Exit(1) os.Exit(1)
case "-i", "--indent": case "-i", "--indent":
indentLevel, err := strconv.Atoi(cmds[i+1]) indentLevel, err := strconv.Atoi(cmds[i+1])
@ -109,8 +106,9 @@ func processFlags(cmds []string) []string {
return nonFlagCmds[:n] return nonFlagCmds[:n]
} }
// pup
func main() { func main() {
cmds := processFlags(os.Args[1:]) cmds := ProcessFlags(os.Args[1:])
root, err := html.Parse(inputStream) root, err := html.Parse(inputStream)
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, err.Error()) fmt.Fprintf(os.Stderr, err.Error())
@ -121,9 +119,9 @@ func main() {
PrintNode(root, 0) PrintNode(root, 0)
os.Exit(0) os.Exit(0)
} }
selectors := make([]selector.Selector, len(cmds)) selectors := make([]*selector.Selector, len(cmds))
for i, cmd := range cmds { for i, cmd := range cmds {
selectors[i], err = selector.ParseSelector(cmd) selectors[i], err = selector.NewSelector(cmd)
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, err.Error()) fmt.Fprintf(os.Stderr, err.Error())
os.Exit(2) os.Exit(2)
@ -134,7 +132,8 @@ func main() {
for _, selector := range selectors { for _, selector := range selectors {
selected = []*html.Node{} selected = []*html.Node{}
for _, node := range currNodes { for _, node := range currNodes {
selected = append(selected, selector.FindAllChildren(node)...) selected = append(selected,
selector.FindAllChildren(node)...)
} }
currNodes = selected currNodes = selected
} }

View File

@ -5,13 +5,20 @@ import (
"code.google.com/p/go.net/html/atom" "code.google.com/p/go.net/html/atom"
"fmt" "fmt"
"github.com/fatih/color" "github.com/fatih/color"
"regexp"
) )
var ( var (
// Colors
tagColor *color.Color = color.New(color.FgYellow).Add(color.Bold) tagColor *color.Color = color.New(color.FgYellow).Add(color.Bold)
tokenColor = color.New(color.FgCyan).Add(color.Bold) tokenColor = color.New(color.FgCyan).Add(color.Bold)
attrKeyColor = color.New(color.FgRed) attrKeyColor = color.New(color.FgRed)
quoteColor = color.New(color.FgBlue) quoteColor = color.New(color.FgBlue)
// Regexp helpers
whitespaceRegexp *regexp.Regexp = regexp.MustCompile(`^\s*$`)
preWhitespace *regexp.Regexp = regexp.MustCompile(`^\s+`)
postWhitespace *regexp.Regexp = regexp.MustCompile(`\s+$`)
) )
func printIndent(level int) { func printIndent(level int) {
@ -48,6 +55,7 @@ func printChildren(n *html.Node, level int) {
} }
} }
// Print a node and all of it's children to `maxlevel`.
func PrintNode(n *html.Node, level int) { func PrintNode(n *html.Node, level int) {
switch n.Type { switch n.Type {
case html.TextNode: case html.TextNode:

View File

@ -7,20 +7,22 @@ import (
"strings" "strings"
) )
// A CSS Selector
type Selector struct { type Selector struct {
Class, ID, Name *regexp.Regexp Class, ID, Name *regexp.Regexp
Attrs map[string]*regexp.Regexp Attrs map[string]*regexp.Regexp
} }
type SelectorType string type selectorField string
const ( const (
Class SelectorType = "class" Class selectorField = "class"
ID SelectorType = "id" ID selectorField = "id"
Name SelectorType = "name" Name selectorField = "name"
) )
func setTypeValue(s *Selector, a SelectorType, v string) error { // Set a field of this selector.
func (s *Selector) setFieldValue(a selectorField, v string) error {
if v == "" { if v == "" {
return nil return nil
} }
@ -41,7 +43,8 @@ func setTypeValue(s *Selector, a SelectorType, v string) error {
return nil return nil
} }
func ParseSelector(s string) (Selector, error) { // Convert a string to a selector.
func NewSelector(s string) (*Selector, error) {
attrs := map[string]*regexp.Regexp{} attrs := map[string]*regexp.Regexp{}
selector := &Selector{nil, nil, nil, attrs} selector := &Selector{nil, nil, nil, attrs}
nextAttr := Name nextAttr := Name
@ -49,26 +52,30 @@ func ParseSelector(s string) (Selector, error) {
for i, c := range s { for i, c := range s {
switch c { switch c {
case '.': case '.':
err := setTypeValue(selector, nextAttr, s[start:i]) err := selector.setFieldValue(nextAttr, s[start:i])
if err != nil { if err != nil {
return *selector, err return selector, err
} }
nextAttr = Class nextAttr = Class
start = i + 1 start = i + 1
case '#': case '#':
err := setTypeValue(selector, nextAttr, s[start:i]) err := selector.setFieldValue(nextAttr, s[start:i])
if err != nil { if err != nil {
return *selector, err return selector, err
} }
nextAttr = ID nextAttr = ID
start = i + 1 start = i + 1
} }
} }
setTypeValue(selector, nextAttr, s[start:]) err := selector.setFieldValue(nextAttr, s[start:])
return *selector, nil if err != nil {
return selector, err
}
return selector, nil
} }
func (sel Selector) FindAllChildren(node *html.Node) []*html.Node { // Find all nodes which match a selector.
func (sel *Selector) FindAllChildren(node *html.Node) []*html.Node {
selected := []*html.Node{} selected := []*html.Node{}
child := node.FirstChild child := node.FirstChild
for child != nil { for child != nil {
@ -79,7 +86,8 @@ func (sel Selector) FindAllChildren(node *html.Node) []*html.Node {
return selected return selected
} }
func (sel Selector) FindAll(node *html.Node) []*html.Node { // Find all nodes which match a selector. May return itself.
func (sel *Selector) FindAll(node *html.Node) []*html.Node {
selected := []*html.Node{} selected := []*html.Node{}
if sel.Match(node) { if sel.Match(node) {
return []*html.Node{node} return []*html.Node{node}
@ -93,7 +101,8 @@ func (sel Selector) FindAll(node *html.Node) []*html.Node {
return selected return selected
} }
func (sel Selector) Match(node *html.Node) bool { // Does this selector match a given node?
func (sel *Selector) Match(node *html.Node) bool {
if node.Type != html.ElementNode { if node.Type != html.ElementNode {
return false return false
} }