diff --git a/README.md b/README.md
index 780547f..dac14a8 100644
--- a/README.md
+++ b/README.md
@@ -26,13 +26,13 @@ By default, `pup` will fill in missing tags, and properly indent the page.
```bash
$ cat robots.html
-# nasty looking html
-$ cat robots.html | pup
-# cleaned and indented html
+# nasty looking HTML
+$ cat robots.html | pup --color
+# cleaned, indented, and colorful HTML
```
###Filter by tag
-```
+```bash
$ pup < robots.html title
Robots exclusion standard - Wikipedia, the free encyclopedia
@@ -40,7 +40,7 @@ $ pup < robots.html title
```
###Filter by id
-```
+```bash
$ pup < robots.html span#See_also
See also
@@ -49,19 +49,20 @@ $ pup < robots.html span#See_also
###Chain selectors together
-The following two commands are equivalent.
+The following two commands are equivalent. (NOTE: pipes do not work with the
+`--color` flag)
-```
+```bash
$ pup < robots.html table.navbox ul a | tail
```
-```
+```bash
$ pup < robots.html table.navbox | pup ul | pup a | tail
```
Both produce the ouput:
-```
+```bash
Stop words
@@ -75,13 +76,15 @@ Both produce the ouput:
```
###How many nodes are selected by a filter?
-```
+
+```bash
$ pup < robots.html a -n
283
```
###Limit print level
-```
+
+```bash
$ pup < robots.html table -l 2
@@ -105,4 +108,3 @@ $ pup < robots.html table -l 2
* Attribute css selectors.
* Print attribute value rather than html ({href})
* Print result as JSON (--json)
-* Print colorfully
diff --git a/main.go b/main.go
index 2e70f4a..9550298 100644
--- a/main.go
+++ b/main.go
@@ -2,7 +2,6 @@ package main
import (
"code.google.com/p/go.net/html"
- "code.google.com/p/go.net/html/atom"
"fmt"
"github.com/ericchiang/pup/selector"
"io"
@@ -12,12 +11,15 @@ import (
"strings"
)
+const VERSION = "0.1.0"
+
var (
// Flags
inputStream io.ReadCloser = os.Stdin
- sep string = " "
+ indentString string = " "
maxPrintLevel int = -1
printNumber bool = false
+ printColor bool = false
// Helpers
whitespaceRegexp *regexp.Regexp = regexp.MustCompile(`^\s*$`)
@@ -25,67 +27,6 @@ var (
postWhitespace *regexp.Regexp = regexp.MustCompile(`\s+$`)
)
-func printIndent(level int) {
- for ; level > 0; level-- {
- fmt.Print(sep)
- }
-}
-
-// Is this node a tag with no end tag such as or
?
-// http://www.w3.org/TR/html-markup/syntax.html#syntax-elements
-func isVoidElement(n *html.Node) bool {
- switch n.DataAtom {
- case atom.Area, atom.Base, atom.Br, atom.Col, atom.Command, atom.Embed,
- atom.Hr, atom.Img, atom.Input, atom.Keygen, atom.Link,
- atom.Meta, atom.Param, atom.Source, atom.Track, atom.Wbr:
- return true
- }
- return false
-
-}
-
-func printChildren(n *html.Node, level int) {
- if maxPrintLevel > -1 {
- if level >= maxPrintLevel {
- printIndent(level)
- fmt.Println("...")
- return
- }
- }
- child := n.FirstChild
- for child != nil {
- PrintNode(child, level)
- child = child.NextSibling
- }
-}
-
-func PrintNode(n *html.Node, level int) {
- switch n.Type {
- case html.TextNode:
- s := n.Data
- if !whitespaceRegexp.MatchString(s) {
- s = preWhitespace.ReplaceAllString(s, "")
- s = postWhitespace.ReplaceAllString(s, "")
- printIndent(level)
- fmt.Println(s)
- }
- case html.ElementNode:
- printIndent(level)
- fmt.Printf("<%s", n.Data)
- for _, a := range n.Attr {
- fmt.Printf(` %s="%s"`, a.Key, a.Val)
- }
- fmt.Print(">\n")
- if !isVoidElement(n) {
- printChildren(n, level+1)
- printIndent(level)
- fmt.Printf("%s>\n", n.Data)
- }
- case html.CommentNode, html.DoctypeNode, html.DocumentNode:
- printChildren(n, level)
- }
-}
-
func Fatal(format string, args ...interface{}) {
fmt.Fprintf(os.Stderr, format, args...)
fmt.Fprintf(os.Stderr, "\n")
@@ -93,18 +34,24 @@ func Fatal(format string, args ...interface{}) {
}
func printHelp() {
- Fatal(`Usage:
+ helpString := `Usage
pup [list of css selectors]
-Flags:
+Version
+
+ %s
+
+Flags
+ -c --color print result with color
-f --file file to read from
-h --help display this help
-i --indent number of spaces to use for indent or character
-n --number print number of elements selected
- -l --level restrict number of levels printed
-`)
+ -l --limit restrict number of levels printed
+ --version display version`
+ Fatal(helpString, VERSION)
}
func processFlags(cmds []string) []string {
@@ -120,6 +67,8 @@ func processFlags(cmds []string) []string {
for i = 0; i < len(cmds); i++ {
cmd := cmds[i]
switch cmd {
+ case "-c", "--color":
+ printColor = true
case "-f", "--file":
filename := cmds[i+1]
inputStream, err = os.Open(filename)
@@ -133,20 +82,22 @@ func processFlags(cmds []string) []string {
case "-i", "--indent":
indentLevel, err := strconv.Atoi(cmds[i+1])
if err == nil {
- sep = strings.Repeat(" ", indentLevel)
+ indentString = strings.Repeat(" ", indentLevel)
} else {
- sep = cmds[i+1]
+ indentString = cmds[i+1]
}
i++
case "-n", "--number":
printNumber = true
- case "-l", "--level":
+ case "-l", "--limit":
maxPrintLevel, err = strconv.Atoi(cmds[i+1])
if err != nil {
Fatal("Argument for '%s' must be numeric",
cmds)
}
i++
+ case "--version":
+ Fatal(VERSION)
default:
if cmd[0] == '-' {
Fatal("Unrecognized flag '%s'", cmd)
@@ -191,6 +142,7 @@ func main() {
fmt.Println(len(currNodes))
} else {
for _, s := range currNodes {
+ // defined in `printing.go`
PrintNode(s, 0)
}
}
diff --git a/printing.go b/printing.go
new file mode 100644
index 0000000..ef1ece7
--- /dev/null
+++ b/printing.go
@@ -0,0 +1,98 @@
+package main
+
+import (
+ "code.google.com/p/go.net/html"
+ "code.google.com/p/go.net/html/atom"
+ "fmt"
+ "github.com/fatih/color"
+)
+
+var (
+ tagColor *color.Color = color.New(color.FgYellow).Add(color.Bold)
+ tokenColor = color.New(color.FgCyan).Add(color.Bold)
+ attrKeyColor = color.New(color.FgRed)
+ quoteColor = color.New(color.FgBlue)
+)
+
+func printIndent(level int) {
+ for ; level > 0; level-- {
+ fmt.Print(indentString)
+ }
+}
+
+// Is this node a tag with no end tag such as or
?
+// http://www.w3.org/TR/html-markup/syntax.html#syntax-elements
+func isVoidElement(n *html.Node) bool {
+ switch n.DataAtom {
+ case atom.Area, atom.Base, atom.Br, atom.Col, atom.Command, atom.Embed,
+ atom.Hr, atom.Img, atom.Input, atom.Keygen, atom.Link,
+ atom.Meta, atom.Param, atom.Source, atom.Track, atom.Wbr:
+ return true
+ }
+ return false
+
+}
+
+func printChildren(n *html.Node, level int) {
+ if maxPrintLevel > -1 {
+ if level >= maxPrintLevel {
+ printIndent(level)
+ fmt.Println("...")
+ return
+ }
+ }
+ child := n.FirstChild
+ for child != nil {
+ PrintNode(child, level)
+ child = child.NextSibling
+ }
+}
+
+func PrintNode(n *html.Node, level int) {
+ switch n.Type {
+ case html.TextNode:
+ s := n.Data
+ if !whitespaceRegexp.MatchString(s) {
+ s = preWhitespace.ReplaceAllString(s, "")
+ s = postWhitespace.ReplaceAllString(s, "")
+ printIndent(level)
+ fmt.Println(s)
+ }
+ case html.ElementNode:
+ printIndent(level)
+ if printColor {
+ tokenColor.Print("<")
+ tagColor.Printf("%s", n.Data)
+ } else {
+ fmt.Printf("<%s", n.Data)
+ }
+ for _, a := range n.Attr {
+ if printColor {
+ fmt.Print(" ")
+ attrKeyColor.Printf("%s", a.Key)
+ tokenColor.Print("=")
+ quoteColor.Printf(`"%s"`, a.Val)
+ } else {
+ fmt.Printf(` %s="%s"`, a.Key, a.Val)
+ }
+ }
+ if printColor {
+ tokenColor.Println(">")
+ } else {
+ fmt.Print(">\n")
+ }
+ if !isVoidElement(n) {
+ printChildren(n, level+1)
+ printIndent(level)
+ if printColor {
+ tokenColor.Print("")
+ tagColor.Printf("%s", n.Data)
+ tokenColor.Println(">")
+ } else {
+ fmt.Printf("%s>\n", n.Data)
+ }
+ }
+ case html.CommentNode, html.DoctypeNode, html.DocumentNode:
+ printChildren(n, level)
+ }
+}