diff --git a/README.md b/README.md index dac14a8..f04dad4 100644 --- a/README.md +++ b/README.md @@ -13,8 +13,7 @@ fast and flexible way of exploring HTML from the terminal. ## Examples -Download a webpage with `wget`. _Please exercise restraint when using any -automated request tool._ +Download a webpage with `wget`. ```bash $ wget http://en.wikipedia.org/wiki/Robots_exclusion_standard -O robots.html diff --git a/main.go b/main.go index 9550298..e7f2d2d 100644 --- a/main.go +++ b/main.go @@ -6,12 +6,11 @@ import ( "github.com/ericchiang/pup/selector" "io" "os" - "regexp" "strconv" "strings" ) -const VERSION = "0.1.0" +const VERSION string = "0.1.0" var ( // Flags @@ -20,20 +19,17 @@ var ( maxPrintLevel int = -1 printNumber bool = false printColor bool = false - - // Helpers - whitespaceRegexp *regexp.Regexp = regexp.MustCompile(`^\s*$`) - preWhitespace *regexp.Regexp = regexp.MustCompile(`^\s+`) - postWhitespace *regexp.Regexp = regexp.MustCompile(`\s+$`) ) +// Print to stderr and exit func Fatal(format string, args ...interface{}) { fmt.Fprintf(os.Stderr, format, args...) fmt.Fprintf(os.Stderr, "\n") os.Exit(1) } -func printHelp() { +// Print help to stderr and quit +func PrintHelp() { helpString := `Usage pup [list of css selectors] @@ -54,7 +50,8 @@ Flags Fatal(helpString, VERSION) } -func processFlags(cmds []string) []string { +// Process command arguments and return all non-flags. +func ProcessFlags(cmds []string) []string { var i int var err error defer func() { @@ -77,7 +74,7 @@ func processFlags(cmds []string) []string { } i++ case "-h", "--help": - printHelp() + PrintHelp() os.Exit(1) case "-i", "--indent": indentLevel, err := strconv.Atoi(cmds[i+1]) @@ -109,8 +106,9 @@ func processFlags(cmds []string) []string { return nonFlagCmds[:n] } +// pup func main() { - cmds := processFlags(os.Args[1:]) + cmds := ProcessFlags(os.Args[1:]) root, err := html.Parse(inputStream) if err != nil { fmt.Fprintf(os.Stderr, err.Error()) @@ -121,9 +119,9 @@ func main() { PrintNode(root, 0) os.Exit(0) } - selectors := make([]selector.Selector, len(cmds)) + selectors := make([]*selector.Selector, len(cmds)) for i, cmd := range cmds { - selectors[i], err = selector.ParseSelector(cmd) + selectors[i], err = selector.NewSelector(cmd) if err != nil { fmt.Fprintf(os.Stderr, err.Error()) os.Exit(2) @@ -134,7 +132,8 @@ func main() { for _, selector := range selectors { selected = []*html.Node{} for _, node := range currNodes { - selected = append(selected, selector.FindAllChildren(node)...) + selected = append(selected, + selector.FindAllChildren(node)...) } currNodes = selected } diff --git a/printing.go b/printing.go index ef1ece7..0d4cb70 100644 --- a/printing.go +++ b/printing.go @@ -5,13 +5,20 @@ import ( "code.google.com/p/go.net/html/atom" "fmt" "github.com/fatih/color" + "regexp" ) var ( + // Colors tagColor *color.Color = color.New(color.FgYellow).Add(color.Bold) tokenColor = color.New(color.FgCyan).Add(color.Bold) attrKeyColor = color.New(color.FgRed) quoteColor = color.New(color.FgBlue) + + // Regexp helpers + whitespaceRegexp *regexp.Regexp = regexp.MustCompile(`^\s*$`) + preWhitespace *regexp.Regexp = regexp.MustCompile(`^\s+`) + postWhitespace *regexp.Regexp = regexp.MustCompile(`\s+$`) ) func printIndent(level int) { @@ -48,6 +55,7 @@ func printChildren(n *html.Node, level int) { } } +// Print a node and all of it's children to `maxlevel`. func PrintNode(n *html.Node, level int) { switch n.Type { case html.TextNode: diff --git a/selector/selector.go b/selector/selector.go index d0adf42..bb31246 100644 --- a/selector/selector.go +++ b/selector/selector.go @@ -7,20 +7,22 @@ import ( "strings" ) +// A CSS Selector type Selector struct { Class, ID, Name *regexp.Regexp Attrs map[string]*regexp.Regexp } -type SelectorType string +type selectorField string const ( - Class SelectorType = "class" - ID SelectorType = "id" - Name SelectorType = "name" + Class selectorField = "class" + ID selectorField = "id" + Name selectorField = "name" ) -func setTypeValue(s *Selector, a SelectorType, v string) error { +// Set a field of this selector. +func (s *Selector) setFieldValue(a selectorField, v string) error { if v == "" { return nil } @@ -41,7 +43,8 @@ func setTypeValue(s *Selector, a SelectorType, v string) error { return nil } -func ParseSelector(s string) (Selector, error) { +// Convert a string to a selector. +func NewSelector(s string) (*Selector, error) { attrs := map[string]*regexp.Regexp{} selector := &Selector{nil, nil, nil, attrs} nextAttr := Name @@ -49,26 +52,30 @@ func ParseSelector(s string) (Selector, error) { for i, c := range s { switch c { case '.': - err := setTypeValue(selector, nextAttr, s[start:i]) + err := selector.setFieldValue(nextAttr, s[start:i]) if err != nil { - return *selector, err + return selector, err } nextAttr = Class start = i + 1 case '#': - err := setTypeValue(selector, nextAttr, s[start:i]) + err := selector.setFieldValue(nextAttr, s[start:i]) if err != nil { - return *selector, err + return selector, err } nextAttr = ID start = i + 1 } } - setTypeValue(selector, nextAttr, s[start:]) - return *selector, nil + err := selector.setFieldValue(nextAttr, s[start:]) + if err != nil { + return selector, err + } + return selector, nil } -func (sel Selector) FindAllChildren(node *html.Node) []*html.Node { +// Find all nodes which match a selector. +func (sel *Selector) FindAllChildren(node *html.Node) []*html.Node { selected := []*html.Node{} child := node.FirstChild for child != nil { @@ -79,7 +86,8 @@ func (sel Selector) FindAllChildren(node *html.Node) []*html.Node { return selected } -func (sel Selector) FindAll(node *html.Node) []*html.Node { +// Find all nodes which match a selector. May return itself. +func (sel *Selector) FindAll(node *html.Node) []*html.Node { selected := []*html.Node{} if sel.Match(node) { return []*html.Node{node} @@ -93,7 +101,8 @@ func (sel Selector) FindAll(node *html.Node) []*html.Node { return selected } -func (sel Selector) Match(node *html.Node) bool { +// Does this selector match a given node? +func (sel *Selector) Match(node *html.Node) bool { if node.Type != html.ElementNode { return false }