mirror of
https://github.com/ericchiang/pup
synced 2024-11-24 08:58:08 +00:00
added colorful printing
This commit is contained in:
parent
292afbc5f0
commit
957fc30cc1
26
README.md
26
README.md
@ -26,13 +26,13 @@ By default, `pup` will fill in missing tags, and properly indent the page.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ cat robots.html
|
$ cat robots.html
|
||||||
# nasty looking html
|
# nasty looking HTML
|
||||||
$ cat robots.html | pup
|
$ cat robots.html | pup --color
|
||||||
# cleaned and indented html
|
# cleaned, indented, and colorful HTML
|
||||||
```
|
```
|
||||||
|
|
||||||
###Filter by tag
|
###Filter by tag
|
||||||
```
|
```bash
|
||||||
$ pup < robots.html title
|
$ pup < robots.html title
|
||||||
<title>
|
<title>
|
||||||
Robots exclusion standard - Wikipedia, the free encyclopedia
|
Robots exclusion standard - Wikipedia, the free encyclopedia
|
||||||
@ -40,7 +40,7 @@ $ pup < robots.html title
|
|||||||
```
|
```
|
||||||
|
|
||||||
###Filter by id
|
###Filter by id
|
||||||
```
|
```bash
|
||||||
$ pup < robots.html span#See_also
|
$ pup < robots.html span#See_also
|
||||||
<span class="mw-headline" id="See_also">
|
<span class="mw-headline" id="See_also">
|
||||||
See also
|
See also
|
||||||
@ -49,19 +49,20 @@ $ pup < robots.html span#See_also
|
|||||||
|
|
||||||
###Chain selectors together
|
###Chain selectors together
|
||||||
|
|
||||||
The following two commands are equivalent.
|
The following two commands are equivalent. (NOTE: pipes do not work with the
|
||||||
|
`--color` flag)
|
||||||
|
|
||||||
```
|
```bash
|
||||||
$ pup < robots.html table.navbox ul a | tail
|
$ pup < robots.html table.navbox ul a | tail
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```bash
|
||||||
$ pup < robots.html table.navbox | pup ul | pup a | tail
|
$ pup < robots.html table.navbox | pup ul | pup a | tail
|
||||||
```
|
```
|
||||||
|
|
||||||
Both produce the ouput:
|
Both produce the ouput:
|
||||||
|
|
||||||
```
|
```bash
|
||||||
</a>
|
</a>
|
||||||
<a href="/wiki/Stop_words" title="Stop words">
|
<a href="/wiki/Stop_words" title="Stop words">
|
||||||
Stop words
|
Stop words
|
||||||
@ -75,13 +76,15 @@ Both produce the ouput:
|
|||||||
```
|
```
|
||||||
|
|
||||||
###How many nodes are selected by a filter?
|
###How many nodes are selected by a filter?
|
||||||
```
|
|
||||||
|
```bash
|
||||||
$ pup < robots.html a -n
|
$ pup < robots.html a -n
|
||||||
283
|
283
|
||||||
```
|
```
|
||||||
|
|
||||||
###Limit print level
|
###Limit print level
|
||||||
```
|
|
||||||
|
```bash
|
||||||
$ pup < robots.html table -l 2
|
$ pup < robots.html table -l 2
|
||||||
<table class="metadata plainlinks ambox ambox-content" role="presentation">
|
<table class="metadata plainlinks ambox ambox-content" role="presentation">
|
||||||
<tbody>
|
<tbody>
|
||||||
@ -105,4 +108,3 @@ $ pup < robots.html table -l 2
|
|||||||
* Attribute css selectors.
|
* Attribute css selectors.
|
||||||
* Print attribute value rather than html ({href})
|
* Print attribute value rather than html ({href})
|
||||||
* Print result as JSON (--json)
|
* Print result as JSON (--json)
|
||||||
* Print colorfully
|
|
||||||
|
92
main.go
92
main.go
@ -2,7 +2,6 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"code.google.com/p/go.net/html"
|
"code.google.com/p/go.net/html"
|
||||||
"code.google.com/p/go.net/html/atom"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/ericchiang/pup/selector"
|
"github.com/ericchiang/pup/selector"
|
||||||
"io"
|
"io"
|
||||||
@ -12,12 +11,15 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const VERSION = "0.1.0"
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// Flags
|
// Flags
|
||||||
inputStream io.ReadCloser = os.Stdin
|
inputStream io.ReadCloser = os.Stdin
|
||||||
sep string = " "
|
indentString string = " "
|
||||||
maxPrintLevel int = -1
|
maxPrintLevel int = -1
|
||||||
printNumber bool = false
|
printNumber bool = false
|
||||||
|
printColor bool = false
|
||||||
|
|
||||||
// Helpers
|
// Helpers
|
||||||
whitespaceRegexp *regexp.Regexp = regexp.MustCompile(`^\s*$`)
|
whitespaceRegexp *regexp.Regexp = regexp.MustCompile(`^\s*$`)
|
||||||
@ -25,67 +27,6 @@ var (
|
|||||||
postWhitespace *regexp.Regexp = regexp.MustCompile(`\s+$`)
|
postWhitespace *regexp.Regexp = regexp.MustCompile(`\s+$`)
|
||||||
)
|
)
|
||||||
|
|
||||||
func printIndent(level int) {
|
|
||||||
for ; level > 0; level-- {
|
|
||||||
fmt.Print(sep)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Is this node a tag with no end tag such as <meta> or <br>?
|
|
||||||
// http://www.w3.org/TR/html-markup/syntax.html#syntax-elements
|
|
||||||
func isVoidElement(n *html.Node) bool {
|
|
||||||
switch n.DataAtom {
|
|
||||||
case atom.Area, atom.Base, atom.Br, atom.Col, atom.Command, atom.Embed,
|
|
||||||
atom.Hr, atom.Img, atom.Input, atom.Keygen, atom.Link,
|
|
||||||
atom.Meta, atom.Param, atom.Source, atom.Track, atom.Wbr:
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
func printChildren(n *html.Node, level int) {
|
|
||||||
if maxPrintLevel > -1 {
|
|
||||||
if level >= maxPrintLevel {
|
|
||||||
printIndent(level)
|
|
||||||
fmt.Println("...")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
child := n.FirstChild
|
|
||||||
for child != nil {
|
|
||||||
PrintNode(child, level)
|
|
||||||
child = child.NextSibling
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func PrintNode(n *html.Node, level int) {
|
|
||||||
switch n.Type {
|
|
||||||
case html.TextNode:
|
|
||||||
s := n.Data
|
|
||||||
if !whitespaceRegexp.MatchString(s) {
|
|
||||||
s = preWhitespace.ReplaceAllString(s, "")
|
|
||||||
s = postWhitespace.ReplaceAllString(s, "")
|
|
||||||
printIndent(level)
|
|
||||||
fmt.Println(s)
|
|
||||||
}
|
|
||||||
case html.ElementNode:
|
|
||||||
printIndent(level)
|
|
||||||
fmt.Printf("<%s", n.Data)
|
|
||||||
for _, a := range n.Attr {
|
|
||||||
fmt.Printf(` %s="%s"`, a.Key, a.Val)
|
|
||||||
}
|
|
||||||
fmt.Print(">\n")
|
|
||||||
if !isVoidElement(n) {
|
|
||||||
printChildren(n, level+1)
|
|
||||||
printIndent(level)
|
|
||||||
fmt.Printf("</%s>\n", n.Data)
|
|
||||||
}
|
|
||||||
case html.CommentNode, html.DoctypeNode, html.DocumentNode:
|
|
||||||
printChildren(n, level)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func Fatal(format string, args ...interface{}) {
|
func Fatal(format string, args ...interface{}) {
|
||||||
fmt.Fprintf(os.Stderr, format, args...)
|
fmt.Fprintf(os.Stderr, format, args...)
|
||||||
fmt.Fprintf(os.Stderr, "\n")
|
fmt.Fprintf(os.Stderr, "\n")
|
||||||
@ -93,18 +34,24 @@ func Fatal(format string, args ...interface{}) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func printHelp() {
|
func printHelp() {
|
||||||
Fatal(`Usage:
|
helpString := `Usage
|
||||||
|
|
||||||
pup [list of css selectors]
|
pup [list of css selectors]
|
||||||
|
|
||||||
Flags:
|
Version
|
||||||
|
|
||||||
|
%s
|
||||||
|
|
||||||
|
Flags
|
||||||
|
|
||||||
|
-c --color print result with color
|
||||||
-f --file file to read from
|
-f --file file to read from
|
||||||
-h --help display this help
|
-h --help display this help
|
||||||
-i --indent number of spaces to use for indent or character
|
-i --indent number of spaces to use for indent or character
|
||||||
-n --number print number of elements selected
|
-n --number print number of elements selected
|
||||||
-l --level restrict number of levels printed
|
-l --limit restrict number of levels printed
|
||||||
`)
|
--version display version`
|
||||||
|
Fatal(helpString, VERSION)
|
||||||
}
|
}
|
||||||
|
|
||||||
func processFlags(cmds []string) []string {
|
func processFlags(cmds []string) []string {
|
||||||
@ -120,6 +67,8 @@ func processFlags(cmds []string) []string {
|
|||||||
for i = 0; i < len(cmds); i++ {
|
for i = 0; i < len(cmds); i++ {
|
||||||
cmd := cmds[i]
|
cmd := cmds[i]
|
||||||
switch cmd {
|
switch cmd {
|
||||||
|
case "-c", "--color":
|
||||||
|
printColor = true
|
||||||
case "-f", "--file":
|
case "-f", "--file":
|
||||||
filename := cmds[i+1]
|
filename := cmds[i+1]
|
||||||
inputStream, err = os.Open(filename)
|
inputStream, err = os.Open(filename)
|
||||||
@ -133,20 +82,22 @@ func processFlags(cmds []string) []string {
|
|||||||
case "-i", "--indent":
|
case "-i", "--indent":
|
||||||
indentLevel, err := strconv.Atoi(cmds[i+1])
|
indentLevel, err := strconv.Atoi(cmds[i+1])
|
||||||
if err == nil {
|
if err == nil {
|
||||||
sep = strings.Repeat(" ", indentLevel)
|
indentString = strings.Repeat(" ", indentLevel)
|
||||||
} else {
|
} else {
|
||||||
sep = cmds[i+1]
|
indentString = cmds[i+1]
|
||||||
}
|
}
|
||||||
i++
|
i++
|
||||||
case "-n", "--number":
|
case "-n", "--number":
|
||||||
printNumber = true
|
printNumber = true
|
||||||
case "-l", "--level":
|
case "-l", "--limit":
|
||||||
maxPrintLevel, err = strconv.Atoi(cmds[i+1])
|
maxPrintLevel, err = strconv.Atoi(cmds[i+1])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
Fatal("Argument for '%s' must be numeric",
|
Fatal("Argument for '%s' must be numeric",
|
||||||
cmds)
|
cmds)
|
||||||
}
|
}
|
||||||
i++
|
i++
|
||||||
|
case "--version":
|
||||||
|
Fatal(VERSION)
|
||||||
default:
|
default:
|
||||||
if cmd[0] == '-' {
|
if cmd[0] == '-' {
|
||||||
Fatal("Unrecognized flag '%s'", cmd)
|
Fatal("Unrecognized flag '%s'", cmd)
|
||||||
@ -191,6 +142,7 @@ func main() {
|
|||||||
fmt.Println(len(currNodes))
|
fmt.Println(len(currNodes))
|
||||||
} else {
|
} else {
|
||||||
for _, s := range currNodes {
|
for _, s := range currNodes {
|
||||||
|
// defined in `printing.go`
|
||||||
PrintNode(s, 0)
|
PrintNode(s, 0)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
98
printing.go
Normal file
98
printing.go
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"code.google.com/p/go.net/html"
|
||||||
|
"code.google.com/p/go.net/html/atom"
|
||||||
|
"fmt"
|
||||||
|
"github.com/fatih/color"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
tagColor *color.Color = color.New(color.FgYellow).Add(color.Bold)
|
||||||
|
tokenColor = color.New(color.FgCyan).Add(color.Bold)
|
||||||
|
attrKeyColor = color.New(color.FgRed)
|
||||||
|
quoteColor = color.New(color.FgBlue)
|
||||||
|
)
|
||||||
|
|
||||||
|
func printIndent(level int) {
|
||||||
|
for ; level > 0; level-- {
|
||||||
|
fmt.Print(indentString)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Is this node a tag with no end tag such as <meta> or <br>?
|
||||||
|
// http://www.w3.org/TR/html-markup/syntax.html#syntax-elements
|
||||||
|
func isVoidElement(n *html.Node) bool {
|
||||||
|
switch n.DataAtom {
|
||||||
|
case atom.Area, atom.Base, atom.Br, atom.Col, atom.Command, atom.Embed,
|
||||||
|
atom.Hr, atom.Img, atom.Input, atom.Keygen, atom.Link,
|
||||||
|
atom.Meta, atom.Param, atom.Source, atom.Track, atom.Wbr:
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func printChildren(n *html.Node, level int) {
|
||||||
|
if maxPrintLevel > -1 {
|
||||||
|
if level >= maxPrintLevel {
|
||||||
|
printIndent(level)
|
||||||
|
fmt.Println("...")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
child := n.FirstChild
|
||||||
|
for child != nil {
|
||||||
|
PrintNode(child, level)
|
||||||
|
child = child.NextSibling
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func PrintNode(n *html.Node, level int) {
|
||||||
|
switch n.Type {
|
||||||
|
case html.TextNode:
|
||||||
|
s := n.Data
|
||||||
|
if !whitespaceRegexp.MatchString(s) {
|
||||||
|
s = preWhitespace.ReplaceAllString(s, "")
|
||||||
|
s = postWhitespace.ReplaceAllString(s, "")
|
||||||
|
printIndent(level)
|
||||||
|
fmt.Println(s)
|
||||||
|
}
|
||||||
|
case html.ElementNode:
|
||||||
|
printIndent(level)
|
||||||
|
if printColor {
|
||||||
|
tokenColor.Print("<")
|
||||||
|
tagColor.Printf("%s", n.Data)
|
||||||
|
} else {
|
||||||
|
fmt.Printf("<%s", n.Data)
|
||||||
|
}
|
||||||
|
for _, a := range n.Attr {
|
||||||
|
if printColor {
|
||||||
|
fmt.Print(" ")
|
||||||
|
attrKeyColor.Printf("%s", a.Key)
|
||||||
|
tokenColor.Print("=")
|
||||||
|
quoteColor.Printf(`"%s"`, a.Val)
|
||||||
|
} else {
|
||||||
|
fmt.Printf(` %s="%s"`, a.Key, a.Val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if printColor {
|
||||||
|
tokenColor.Println(">")
|
||||||
|
} else {
|
||||||
|
fmt.Print(">\n")
|
||||||
|
}
|
||||||
|
if !isVoidElement(n) {
|
||||||
|
printChildren(n, level+1)
|
||||||
|
printIndent(level)
|
||||||
|
if printColor {
|
||||||
|
tokenColor.Print("</")
|
||||||
|
tagColor.Printf("%s", n.Data)
|
||||||
|
tokenColor.Println(">")
|
||||||
|
} else {
|
||||||
|
fmt.Printf("</%s>\n", n.Data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case html.CommentNode, html.DoctypeNode, html.DocumentNode:
|
||||||
|
printChildren(n, level)
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user