From 957fc30cc1ba6250703164c54f056f414ebc441e Mon Sep 17 00:00:00 2001 From: ericchiang Date: Mon, 1 Sep 2014 14:18:34 -0400 Subject: [PATCH] added colorful printing --- README.md | 26 +++++++------- main.go | 92 ++++++++++++------------------------------------- printing.go | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 134 insertions(+), 82 deletions(-) create mode 100644 printing.go diff --git a/README.md b/README.md index 780547f..dac14a8 100644 --- a/README.md +++ b/README.md @@ -26,13 +26,13 @@ By default, `pup` will fill in missing tags, and properly indent the page. ```bash $ cat robots.html -# nasty looking html -$ cat robots.html | pup -# cleaned and indented html +# nasty looking HTML +$ cat robots.html | pup --color +# cleaned, indented, and colorful HTML ``` ###Filter by tag -``` +```bash $ pup < robots.html title Robots exclusion standard - Wikipedia, the free encyclopedia @@ -40,7 +40,7 @@ $ pup < robots.html title ``` ###Filter by id -``` +```bash $ pup < robots.html span#See_also <span class="mw-headline" id="See_also"> See also @@ -49,19 +49,20 @@ $ pup < robots.html span#See_also ###Chain selectors together -The following two commands are equivalent. +The following two commands are equivalent. (NOTE: pipes do not work with the +`--color` flag) -``` +```bash $ pup < robots.html table.navbox ul a | tail ``` -``` +```bash $ pup < robots.html table.navbox | pup ul | pup a | tail ``` Both produce the ouput: -``` +```bash </a> <a href="/wiki/Stop_words" title="Stop words"> Stop words @@ -75,13 +76,15 @@ Both produce the ouput: ``` ###How many nodes are selected by a filter? -``` + +```bash $ pup < robots.html a -n 283 ``` ###Limit print level -``` + +```bash $ pup < robots.html table -l 2 <table class="metadata plainlinks ambox ambox-content" role="presentation"> <tbody> @@ -105,4 +108,3 @@ $ pup < robots.html table -l 2 * Attribute css selectors. * Print attribute value rather than html ({href}) * Print result as JSON (--json) -* Print colorfully diff --git a/main.go b/main.go index 2e70f4a..9550298 100644 --- a/main.go +++ b/main.go @@ -2,7 +2,6 @@ package main import ( "code.google.com/p/go.net/html" - "code.google.com/p/go.net/html/atom" "fmt" "github.com/ericchiang/pup/selector" "io" @@ -12,12 +11,15 @@ import ( "strings" ) +const VERSION = "0.1.0" + var ( // Flags inputStream io.ReadCloser = os.Stdin - sep string = " " + indentString string = " " maxPrintLevel int = -1 printNumber bool = false + printColor bool = false // Helpers whitespaceRegexp *regexp.Regexp = regexp.MustCompile(`^\s*$`) @@ -25,67 +27,6 @@ var ( postWhitespace *regexp.Regexp = regexp.MustCompile(`\s+$`) ) -func printIndent(level int) { - for ; level > 0; level-- { - fmt.Print(sep) - } -} - -// Is this node a tag with no end tag such as <meta> or <br>? -// http://www.w3.org/TR/html-markup/syntax.html#syntax-elements -func isVoidElement(n *html.Node) bool { - switch n.DataAtom { - case atom.Area, atom.Base, atom.Br, atom.Col, atom.Command, atom.Embed, - atom.Hr, atom.Img, atom.Input, atom.Keygen, atom.Link, - atom.Meta, atom.Param, atom.Source, atom.Track, atom.Wbr: - return true - } - return false - -} - -func printChildren(n *html.Node, level int) { - if maxPrintLevel > -1 { - if level >= maxPrintLevel { - printIndent(level) - fmt.Println("...") - return - } - } - child := n.FirstChild - for child != nil { - PrintNode(child, level) - child = child.NextSibling - } -} - -func PrintNode(n *html.Node, level int) { - switch n.Type { - case html.TextNode: - s := n.Data - if !whitespaceRegexp.MatchString(s) { - s = preWhitespace.ReplaceAllString(s, "") - s = postWhitespace.ReplaceAllString(s, "") - printIndent(level) - fmt.Println(s) - } - case html.ElementNode: - printIndent(level) - fmt.Printf("<%s", n.Data) - for _, a := range n.Attr { - fmt.Printf(` %s="%s"`, a.Key, a.Val) - } - fmt.Print(">\n") - if !isVoidElement(n) { - printChildren(n, level+1) - printIndent(level) - fmt.Printf("</%s>\n", n.Data) - } - case html.CommentNode, html.DoctypeNode, html.DocumentNode: - printChildren(n, level) - } -} - func Fatal(format string, args ...interface{}) { fmt.Fprintf(os.Stderr, format, args...) fmt.Fprintf(os.Stderr, "\n") @@ -93,18 +34,24 @@ func Fatal(format string, args ...interface{}) { } func printHelp() { - Fatal(`Usage: + helpString := `Usage pup [list of css selectors] -Flags: +Version + + %s + +Flags + -c --color print result with color -f --file file to read from -h --help display this help -i --indent number of spaces to use for indent or character -n --number print number of elements selected - -l --level restrict number of levels printed -`) + -l --limit restrict number of levels printed + --version display version` + Fatal(helpString, VERSION) } func processFlags(cmds []string) []string { @@ -120,6 +67,8 @@ func processFlags(cmds []string) []string { for i = 0; i < len(cmds); i++ { cmd := cmds[i] switch cmd { + case "-c", "--color": + printColor = true case "-f", "--file": filename := cmds[i+1] inputStream, err = os.Open(filename) @@ -133,20 +82,22 @@ func processFlags(cmds []string) []string { case "-i", "--indent": indentLevel, err := strconv.Atoi(cmds[i+1]) if err == nil { - sep = strings.Repeat(" ", indentLevel) + indentString = strings.Repeat(" ", indentLevel) } else { - sep = cmds[i+1] + indentString = cmds[i+1] } i++ case "-n", "--number": printNumber = true - case "-l", "--level": + case "-l", "--limit": maxPrintLevel, err = strconv.Atoi(cmds[i+1]) if err != nil { Fatal("Argument for '%s' must be numeric", cmds) } i++ + case "--version": + Fatal(VERSION) default: if cmd[0] == '-' { Fatal("Unrecognized flag '%s'", cmd) @@ -191,6 +142,7 @@ func main() { fmt.Println(len(currNodes)) } else { for _, s := range currNodes { + // defined in `printing.go` PrintNode(s, 0) } } diff --git a/printing.go b/printing.go new file mode 100644 index 0000000..ef1ece7 --- /dev/null +++ b/printing.go @@ -0,0 +1,98 @@ +package main + +import ( + "code.google.com/p/go.net/html" + "code.google.com/p/go.net/html/atom" + "fmt" + "github.com/fatih/color" +) + +var ( + tagColor *color.Color = color.New(color.FgYellow).Add(color.Bold) + tokenColor = color.New(color.FgCyan).Add(color.Bold) + attrKeyColor = color.New(color.FgRed) + quoteColor = color.New(color.FgBlue) +) + +func printIndent(level int) { + for ; level > 0; level-- { + fmt.Print(indentString) + } +} + +// Is this node a tag with no end tag such as <meta> or <br>? +// http://www.w3.org/TR/html-markup/syntax.html#syntax-elements +func isVoidElement(n *html.Node) bool { + switch n.DataAtom { + case atom.Area, atom.Base, atom.Br, atom.Col, atom.Command, atom.Embed, + atom.Hr, atom.Img, atom.Input, atom.Keygen, atom.Link, + atom.Meta, atom.Param, atom.Source, atom.Track, atom.Wbr: + return true + } + return false + +} + +func printChildren(n *html.Node, level int) { + if maxPrintLevel > -1 { + if level >= maxPrintLevel { + printIndent(level) + fmt.Println("...") + return + } + } + child := n.FirstChild + for child != nil { + PrintNode(child, level) + child = child.NextSibling + } +} + +func PrintNode(n *html.Node, level int) { + switch n.Type { + case html.TextNode: + s := n.Data + if !whitespaceRegexp.MatchString(s) { + s = preWhitespace.ReplaceAllString(s, "") + s = postWhitespace.ReplaceAllString(s, "") + printIndent(level) + fmt.Println(s) + } + case html.ElementNode: + printIndent(level) + if printColor { + tokenColor.Print("<") + tagColor.Printf("%s", n.Data) + } else { + fmt.Printf("<%s", n.Data) + } + for _, a := range n.Attr { + if printColor { + fmt.Print(" ") + attrKeyColor.Printf("%s", a.Key) + tokenColor.Print("=") + quoteColor.Printf(`"%s"`, a.Val) + } else { + fmt.Printf(` %s="%s"`, a.Key, a.Val) + } + } + if printColor { + tokenColor.Println(">") + } else { + fmt.Print(">\n") + } + if !isVoidElement(n) { + printChildren(n, level+1) + printIndent(level) + if printColor { + tokenColor.Print("</") + tagColor.Printf("%s", n.Data) + tokenColor.Println(">") + } else { + fmt.Printf("</%s>\n", n.Data) + } + } + case html.CommentNode, html.DoctypeNode, html.DocumentNode: + printChildren(n, level) + } +}