1
0
mirror of https://github.com/ericchiang/pup synced 2025-01-28 16:41:32 +00:00

added colorful printing

This commit is contained in:
ericchiang 2014-09-01 14:18:34 -04:00
parent 292afbc5f0
commit 957fc30cc1
3 changed files with 134 additions and 82 deletions

View File

@ -26,13 +26,13 @@ By default, `pup` will fill in missing tags, and properly indent the page.
```bash
$ cat robots.html
# nasty looking html
$ cat robots.html | pup
# cleaned and indented html
# nasty looking HTML
$ cat robots.html | pup --color
# cleaned, indented, and colorful HTML
```
###Filter by tag
```
```bash
$ pup < robots.html title
<title>
Robots exclusion standard - Wikipedia, the free encyclopedia
@ -40,7 +40,7 @@ $ pup < robots.html title
```
###Filter by id
```
```bash
$ pup < robots.html span#See_also
<span class="mw-headline" id="See_also">
See also
@ -49,19 +49,20 @@ $ pup < robots.html span#See_also
###Chain selectors together
The following two commands are equivalent.
The following two commands are equivalent. (NOTE: pipes do not work with the
`--color` flag)
```
```bash
$ pup < robots.html table.navbox ul a | tail
```
```
```bash
$ pup < robots.html table.navbox | pup ul | pup a | tail
```
Both produce the ouput:
```
```bash
</a>
<a href="/wiki/Stop_words" title="Stop words">
Stop words
@ -75,13 +76,15 @@ Both produce the ouput:
```
###How many nodes are selected by a filter?
```
```bash
$ pup < robots.html a -n
283
```
###Limit print level
```
```bash
$ pup < robots.html table -l 2
<table class="metadata plainlinks ambox ambox-content" role="presentation">
<tbody>
@ -105,4 +108,3 @@ $ pup < robots.html table -l 2
* Attribute css selectors.
* Print attribute value rather than html ({href})
* Print result as JSON (--json)
* Print colorfully

92
main.go
View File

@ -2,7 +2,6 @@ package main
import (
"code.google.com/p/go.net/html"
"code.google.com/p/go.net/html/atom"
"fmt"
"github.com/ericchiang/pup/selector"
"io"
@ -12,12 +11,15 @@ import (
"strings"
)
const VERSION = "0.1.0"
var (
// Flags
inputStream io.ReadCloser = os.Stdin
sep string = " "
indentString string = " "
maxPrintLevel int = -1
printNumber bool = false
printColor bool = false
// Helpers
whitespaceRegexp *regexp.Regexp = regexp.MustCompile(`^\s*$`)
@ -25,67 +27,6 @@ var (
postWhitespace *regexp.Regexp = regexp.MustCompile(`\s+$`)
)
func printIndent(level int) {
for ; level > 0; level-- {
fmt.Print(sep)
}
}
// Is this node a tag with no end tag such as <meta> or <br>?
// http://www.w3.org/TR/html-markup/syntax.html#syntax-elements
func isVoidElement(n *html.Node) bool {
switch n.DataAtom {
case atom.Area, atom.Base, atom.Br, atom.Col, atom.Command, atom.Embed,
atom.Hr, atom.Img, atom.Input, atom.Keygen, atom.Link,
atom.Meta, atom.Param, atom.Source, atom.Track, atom.Wbr:
return true
}
return false
}
func printChildren(n *html.Node, level int) {
if maxPrintLevel > -1 {
if level >= maxPrintLevel {
printIndent(level)
fmt.Println("...")
return
}
}
child := n.FirstChild
for child != nil {
PrintNode(child, level)
child = child.NextSibling
}
}
func PrintNode(n *html.Node, level int) {
switch n.Type {
case html.TextNode:
s := n.Data
if !whitespaceRegexp.MatchString(s) {
s = preWhitespace.ReplaceAllString(s, "")
s = postWhitespace.ReplaceAllString(s, "")
printIndent(level)
fmt.Println(s)
}
case html.ElementNode:
printIndent(level)
fmt.Printf("<%s", n.Data)
for _, a := range n.Attr {
fmt.Printf(` %s="%s"`, a.Key, a.Val)
}
fmt.Print(">\n")
if !isVoidElement(n) {
printChildren(n, level+1)
printIndent(level)
fmt.Printf("</%s>\n", n.Data)
}
case html.CommentNode, html.DoctypeNode, html.DocumentNode:
printChildren(n, level)
}
}
func Fatal(format string, args ...interface{}) {
fmt.Fprintf(os.Stderr, format, args...)
fmt.Fprintf(os.Stderr, "\n")
@ -93,18 +34,24 @@ func Fatal(format string, args ...interface{}) {
}
func printHelp() {
Fatal(`Usage:
helpString := `Usage
pup [list of css selectors]
Flags:
Version
%s
Flags
-c --color print result with color
-f --file file to read from
-h --help display this help
-i --indent number of spaces to use for indent or character
-n --number print number of elements selected
-l --level restrict number of levels printed
`)
-l --limit restrict number of levels printed
--version display version`
Fatal(helpString, VERSION)
}
func processFlags(cmds []string) []string {
@ -120,6 +67,8 @@ func processFlags(cmds []string) []string {
for i = 0; i < len(cmds); i++ {
cmd := cmds[i]
switch cmd {
case "-c", "--color":
printColor = true
case "-f", "--file":
filename := cmds[i+1]
inputStream, err = os.Open(filename)
@ -133,20 +82,22 @@ func processFlags(cmds []string) []string {
case "-i", "--indent":
indentLevel, err := strconv.Atoi(cmds[i+1])
if err == nil {
sep = strings.Repeat(" ", indentLevel)
indentString = strings.Repeat(" ", indentLevel)
} else {
sep = cmds[i+1]
indentString = cmds[i+1]
}
i++
case "-n", "--number":
printNumber = true
case "-l", "--level":
case "-l", "--limit":
maxPrintLevel, err = strconv.Atoi(cmds[i+1])
if err != nil {
Fatal("Argument for '%s' must be numeric",
cmds)
}
i++
case "--version":
Fatal(VERSION)
default:
if cmd[0] == '-' {
Fatal("Unrecognized flag '%s'", cmd)
@ -191,6 +142,7 @@ func main() {
fmt.Println(len(currNodes))
} else {
for _, s := range currNodes {
// defined in `printing.go`
PrintNode(s, 0)
}
}

98
printing.go Normal file
View File

@ -0,0 +1,98 @@
package main
import (
"code.google.com/p/go.net/html"
"code.google.com/p/go.net/html/atom"
"fmt"
"github.com/fatih/color"
)
var (
tagColor *color.Color = color.New(color.FgYellow).Add(color.Bold)
tokenColor = color.New(color.FgCyan).Add(color.Bold)
attrKeyColor = color.New(color.FgRed)
quoteColor = color.New(color.FgBlue)
)
func printIndent(level int) {
for ; level > 0; level-- {
fmt.Print(indentString)
}
}
// Is this node a tag with no end tag such as <meta> or <br>?
// http://www.w3.org/TR/html-markup/syntax.html#syntax-elements
func isVoidElement(n *html.Node) bool {
switch n.DataAtom {
case atom.Area, atom.Base, atom.Br, atom.Col, atom.Command, atom.Embed,
atom.Hr, atom.Img, atom.Input, atom.Keygen, atom.Link,
atom.Meta, atom.Param, atom.Source, atom.Track, atom.Wbr:
return true
}
return false
}
func printChildren(n *html.Node, level int) {
if maxPrintLevel > -1 {
if level >= maxPrintLevel {
printIndent(level)
fmt.Println("...")
return
}
}
child := n.FirstChild
for child != nil {
PrintNode(child, level)
child = child.NextSibling
}
}
func PrintNode(n *html.Node, level int) {
switch n.Type {
case html.TextNode:
s := n.Data
if !whitespaceRegexp.MatchString(s) {
s = preWhitespace.ReplaceAllString(s, "")
s = postWhitespace.ReplaceAllString(s, "")
printIndent(level)
fmt.Println(s)
}
case html.ElementNode:
printIndent(level)
if printColor {
tokenColor.Print("<")
tagColor.Printf("%s", n.Data)
} else {
fmt.Printf("<%s", n.Data)
}
for _, a := range n.Attr {
if printColor {
fmt.Print(" ")
attrKeyColor.Printf("%s", a.Key)
tokenColor.Print("=")
quoteColor.Printf(`"%s"`, a.Val)
} else {
fmt.Printf(` %s="%s"`, a.Key, a.Val)
}
}
if printColor {
tokenColor.Println(">")
} else {
fmt.Print(">\n")
}
if !isVoidElement(n) {
printChildren(n, level+1)
printIndent(level)
if printColor {
tokenColor.Print("</")
tagColor.Printf("%s", n.Data)
tokenColor.Println(">")
} else {
fmt.Printf("</%s>\n", n.Data)
}
}
case html.CommentNode, html.DoctypeNode, html.DocumentNode:
printChildren(n, level)
}
}