mirror of
https://github.com/ericchiang/pup
synced 2024-11-28 02:48:16 +00:00
added colorful printing
This commit is contained in:
parent
292afbc5f0
commit
957fc30cc1
26
README.md
26
README.md
@ -26,13 +26,13 @@ By default, `pup` will fill in missing tags, and properly indent the page.
|
||||
|
||||
```bash
|
||||
$ cat robots.html
|
||||
# nasty looking html
|
||||
$ cat robots.html | pup
|
||||
# cleaned and indented html
|
||||
# nasty looking HTML
|
||||
$ cat robots.html | pup --color
|
||||
# cleaned, indented, and colorful HTML
|
||||
```
|
||||
|
||||
###Filter by tag
|
||||
```
|
||||
```bash
|
||||
$ pup < robots.html title
|
||||
<title>
|
||||
Robots exclusion standard - Wikipedia, the free encyclopedia
|
||||
@ -40,7 +40,7 @@ $ pup < robots.html title
|
||||
```
|
||||
|
||||
###Filter by id
|
||||
```
|
||||
```bash
|
||||
$ pup < robots.html span#See_also
|
||||
<span class="mw-headline" id="See_also">
|
||||
See also
|
||||
@ -49,19 +49,20 @@ $ pup < robots.html span#See_also
|
||||
|
||||
###Chain selectors together
|
||||
|
||||
The following two commands are equivalent.
|
||||
The following two commands are equivalent. (NOTE: pipes do not work with the
|
||||
`--color` flag)
|
||||
|
||||
```
|
||||
```bash
|
||||
$ pup < robots.html table.navbox ul a | tail
|
||||
```
|
||||
|
||||
```
|
||||
```bash
|
||||
$ pup < robots.html table.navbox | pup ul | pup a | tail
|
||||
```
|
||||
|
||||
Both produce the ouput:
|
||||
|
||||
```
|
||||
```bash
|
||||
</a>
|
||||
<a href="/wiki/Stop_words" title="Stop words">
|
||||
Stop words
|
||||
@ -75,13 +76,15 @@ Both produce the ouput:
|
||||
```
|
||||
|
||||
###How many nodes are selected by a filter?
|
||||
```
|
||||
|
||||
```bash
|
||||
$ pup < robots.html a -n
|
||||
283
|
||||
```
|
||||
|
||||
###Limit print level
|
||||
```
|
||||
|
||||
```bash
|
||||
$ pup < robots.html table -l 2
|
||||
<table class="metadata plainlinks ambox ambox-content" role="presentation">
|
||||
<tbody>
|
||||
@ -105,4 +108,3 @@ $ pup < robots.html table -l 2
|
||||
* Attribute css selectors.
|
||||
* Print attribute value rather than html ({href})
|
||||
* Print result as JSON (--json)
|
||||
* Print colorfully
|
||||
|
92
main.go
92
main.go
@ -2,7 +2,6 @@ package main
|
||||
|
||||
import (
|
||||
"code.google.com/p/go.net/html"
|
||||
"code.google.com/p/go.net/html/atom"
|
||||
"fmt"
|
||||
"github.com/ericchiang/pup/selector"
|
||||
"io"
|
||||
@ -12,12 +11,15 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
const VERSION = "0.1.0"
|
||||
|
||||
var (
|
||||
// Flags
|
||||
inputStream io.ReadCloser = os.Stdin
|
||||
sep string = " "
|
||||
indentString string = " "
|
||||
maxPrintLevel int = -1
|
||||
printNumber bool = false
|
||||
printColor bool = false
|
||||
|
||||
// Helpers
|
||||
whitespaceRegexp *regexp.Regexp = regexp.MustCompile(`^\s*$`)
|
||||
@ -25,67 +27,6 @@ var (
|
||||
postWhitespace *regexp.Regexp = regexp.MustCompile(`\s+$`)
|
||||
)
|
||||
|
||||
func printIndent(level int) {
|
||||
for ; level > 0; level-- {
|
||||
fmt.Print(sep)
|
||||
}
|
||||
}
|
||||
|
||||
// Is this node a tag with no end tag such as <meta> or <br>?
|
||||
// http://www.w3.org/TR/html-markup/syntax.html#syntax-elements
|
||||
func isVoidElement(n *html.Node) bool {
|
||||
switch n.DataAtom {
|
||||
case atom.Area, atom.Base, atom.Br, atom.Col, atom.Command, atom.Embed,
|
||||
atom.Hr, atom.Img, atom.Input, atom.Keygen, atom.Link,
|
||||
atom.Meta, atom.Param, atom.Source, atom.Track, atom.Wbr:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
||||
}
|
||||
|
||||
func printChildren(n *html.Node, level int) {
|
||||
if maxPrintLevel > -1 {
|
||||
if level >= maxPrintLevel {
|
||||
printIndent(level)
|
||||
fmt.Println("...")
|
||||
return
|
||||
}
|
||||
}
|
||||
child := n.FirstChild
|
||||
for child != nil {
|
||||
PrintNode(child, level)
|
||||
child = child.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
func PrintNode(n *html.Node, level int) {
|
||||
switch n.Type {
|
||||
case html.TextNode:
|
||||
s := n.Data
|
||||
if !whitespaceRegexp.MatchString(s) {
|
||||
s = preWhitespace.ReplaceAllString(s, "")
|
||||
s = postWhitespace.ReplaceAllString(s, "")
|
||||
printIndent(level)
|
||||
fmt.Println(s)
|
||||
}
|
||||
case html.ElementNode:
|
||||
printIndent(level)
|
||||
fmt.Printf("<%s", n.Data)
|
||||
for _, a := range n.Attr {
|
||||
fmt.Printf(` %s="%s"`, a.Key, a.Val)
|
||||
}
|
||||
fmt.Print(">\n")
|
||||
if !isVoidElement(n) {
|
||||
printChildren(n, level+1)
|
||||
printIndent(level)
|
||||
fmt.Printf("</%s>\n", n.Data)
|
||||
}
|
||||
case html.CommentNode, html.DoctypeNode, html.DocumentNode:
|
||||
printChildren(n, level)
|
||||
}
|
||||
}
|
||||
|
||||
func Fatal(format string, args ...interface{}) {
|
||||
fmt.Fprintf(os.Stderr, format, args...)
|
||||
fmt.Fprintf(os.Stderr, "\n")
|
||||
@ -93,18 +34,24 @@ func Fatal(format string, args ...interface{}) {
|
||||
}
|
||||
|
||||
func printHelp() {
|
||||
Fatal(`Usage:
|
||||
helpString := `Usage
|
||||
|
||||
pup [list of css selectors]
|
||||
|
||||
Flags:
|
||||
Version
|
||||
|
||||
%s
|
||||
|
||||
Flags
|
||||
|
||||
-c --color print result with color
|
||||
-f --file file to read from
|
||||
-h --help display this help
|
||||
-i --indent number of spaces to use for indent or character
|
||||
-n --number print number of elements selected
|
||||
-l --level restrict number of levels printed
|
||||
`)
|
||||
-l --limit restrict number of levels printed
|
||||
--version display version`
|
||||
Fatal(helpString, VERSION)
|
||||
}
|
||||
|
||||
func processFlags(cmds []string) []string {
|
||||
@ -120,6 +67,8 @@ func processFlags(cmds []string) []string {
|
||||
for i = 0; i < len(cmds); i++ {
|
||||
cmd := cmds[i]
|
||||
switch cmd {
|
||||
case "-c", "--color":
|
||||
printColor = true
|
||||
case "-f", "--file":
|
||||
filename := cmds[i+1]
|
||||
inputStream, err = os.Open(filename)
|
||||
@ -133,20 +82,22 @@ func processFlags(cmds []string) []string {
|
||||
case "-i", "--indent":
|
||||
indentLevel, err := strconv.Atoi(cmds[i+1])
|
||||
if err == nil {
|
||||
sep = strings.Repeat(" ", indentLevel)
|
||||
indentString = strings.Repeat(" ", indentLevel)
|
||||
} else {
|
||||
sep = cmds[i+1]
|
||||
indentString = cmds[i+1]
|
||||
}
|
||||
i++
|
||||
case "-n", "--number":
|
||||
printNumber = true
|
||||
case "-l", "--level":
|
||||
case "-l", "--limit":
|
||||
maxPrintLevel, err = strconv.Atoi(cmds[i+1])
|
||||
if err != nil {
|
||||
Fatal("Argument for '%s' must be numeric",
|
||||
cmds)
|
||||
}
|
||||
i++
|
||||
case "--version":
|
||||
Fatal(VERSION)
|
||||
default:
|
||||
if cmd[0] == '-' {
|
||||
Fatal("Unrecognized flag '%s'", cmd)
|
||||
@ -191,6 +142,7 @@ func main() {
|
||||
fmt.Println(len(currNodes))
|
||||
} else {
|
||||
for _, s := range currNodes {
|
||||
// defined in `printing.go`
|
||||
PrintNode(s, 0)
|
||||
}
|
||||
}
|
||||
|
98
printing.go
Normal file
98
printing.go
Normal file
@ -0,0 +1,98 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"code.google.com/p/go.net/html"
|
||||
"code.google.com/p/go.net/html/atom"
|
||||
"fmt"
|
||||
"github.com/fatih/color"
|
||||
)
|
||||
|
||||
var (
|
||||
tagColor *color.Color = color.New(color.FgYellow).Add(color.Bold)
|
||||
tokenColor = color.New(color.FgCyan).Add(color.Bold)
|
||||
attrKeyColor = color.New(color.FgRed)
|
||||
quoteColor = color.New(color.FgBlue)
|
||||
)
|
||||
|
||||
func printIndent(level int) {
|
||||
for ; level > 0; level-- {
|
||||
fmt.Print(indentString)
|
||||
}
|
||||
}
|
||||
|
||||
// Is this node a tag with no end tag such as <meta> or <br>?
|
||||
// http://www.w3.org/TR/html-markup/syntax.html#syntax-elements
|
||||
func isVoidElement(n *html.Node) bool {
|
||||
switch n.DataAtom {
|
||||
case atom.Area, atom.Base, atom.Br, atom.Col, atom.Command, atom.Embed,
|
||||
atom.Hr, atom.Img, atom.Input, atom.Keygen, atom.Link,
|
||||
atom.Meta, atom.Param, atom.Source, atom.Track, atom.Wbr:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
||||
}
|
||||
|
||||
func printChildren(n *html.Node, level int) {
|
||||
if maxPrintLevel > -1 {
|
||||
if level >= maxPrintLevel {
|
||||
printIndent(level)
|
||||
fmt.Println("...")
|
||||
return
|
||||
}
|
||||
}
|
||||
child := n.FirstChild
|
||||
for child != nil {
|
||||
PrintNode(child, level)
|
||||
child = child.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
func PrintNode(n *html.Node, level int) {
|
||||
switch n.Type {
|
||||
case html.TextNode:
|
||||
s := n.Data
|
||||
if !whitespaceRegexp.MatchString(s) {
|
||||
s = preWhitespace.ReplaceAllString(s, "")
|
||||
s = postWhitespace.ReplaceAllString(s, "")
|
||||
printIndent(level)
|
||||
fmt.Println(s)
|
||||
}
|
||||
case html.ElementNode:
|
||||
printIndent(level)
|
||||
if printColor {
|
||||
tokenColor.Print("<")
|
||||
tagColor.Printf("%s", n.Data)
|
||||
} else {
|
||||
fmt.Printf("<%s", n.Data)
|
||||
}
|
||||
for _, a := range n.Attr {
|
||||
if printColor {
|
||||
fmt.Print(" ")
|
||||
attrKeyColor.Printf("%s", a.Key)
|
||||
tokenColor.Print("=")
|
||||
quoteColor.Printf(`"%s"`, a.Val)
|
||||
} else {
|
||||
fmt.Printf(` %s="%s"`, a.Key, a.Val)
|
||||
}
|
||||
}
|
||||
if printColor {
|
||||
tokenColor.Println(">")
|
||||
} else {
|
||||
fmt.Print(">\n")
|
||||
}
|
||||
if !isVoidElement(n) {
|
||||
printChildren(n, level+1)
|
||||
printIndent(level)
|
||||
if printColor {
|
||||
tokenColor.Print("</")
|
||||
tagColor.Printf("%s", n.Data)
|
||||
tokenColor.Println(">")
|
||||
} else {
|
||||
fmt.Printf("</%s>\n", n.Data)
|
||||
}
|
||||
}
|
||||
case html.CommentNode, html.DoctypeNode, html.DocumentNode:
|
||||
printChildren(n, level)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user