2014-11-06 03:31:28 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"os"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
2014-12-14 04:52:41 +00:00
|
|
|
|
2016-07-23 04:42:23 +00:00
|
|
|
"golang.org/x/net/html"
|
|
|
|
"golang.org/x/net/html/charset"
|
|
|
|
"golang.org/x/text/transform"
|
2014-11-06 03:31:28 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
|
|
|
pupIn io.ReadCloser = os.Stdin
|
2014-12-14 04:52:41 +00:00
|
|
|
pupCharset string = ""
|
2014-11-06 03:31:28 +00:00
|
|
|
pupMaxPrintLevel int = -1
|
2015-04-05 19:02:03 +00:00
|
|
|
pupPreformatted bool = false
|
2014-11-06 03:31:28 +00:00
|
|
|
pupPrintColor bool = false
|
2015-01-20 05:10:39 +00:00
|
|
|
pupEscapeHTML bool = true
|
2014-11-06 03:31:28 +00:00
|
|
|
pupIndentString string = " "
|
|
|
|
pupDisplayer Displayer = TreeDisplayer{}
|
|
|
|
)
|
|
|
|
|
2014-12-14 04:52:41 +00:00
|
|
|
// Parse the html while handling the charset
|
|
|
|
func ParseHTML(r io.Reader, cs string) (*html.Node, error) {
|
|
|
|
var err error
|
|
|
|
if cs == "" {
|
|
|
|
// attempt to guess the charset of the HTML document
|
|
|
|
r, err = charset.NewReader(r, "")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// let the user specify the charset
|
|
|
|
e, name := charset.Lookup(cs)
|
|
|
|
if name == "" {
|
|
|
|
return nil, fmt.Errorf("'%s' is not a valid charset", cs)
|
|
|
|
}
|
|
|
|
r = transform.NewReader(r, e.NewDecoder())
|
|
|
|
}
|
|
|
|
return html.Parse(r)
|
|
|
|
}
|
|
|
|
|
2014-11-06 03:31:28 +00:00
|
|
|
func PrintHelp(w io.Writer, exitCode int) {
|
|
|
|
helpString := `Usage
|
|
|
|
pup [flags] [selectors] [optional display function]
|
|
|
|
Version
|
|
|
|
%s
|
|
|
|
Flags
|
|
|
|
-c --color print result with color
|
|
|
|
-f --file file to read from
|
|
|
|
-h --help display this help
|
|
|
|
-i --indent number of spaces to use for indent or character
|
|
|
|
-n --number print number of elements selected
|
|
|
|
-l --limit restrict number of levels printed
|
2015-02-01 18:52:02 +00:00
|
|
|
-p --plain don't escape html
|
2015-04-05 19:02:03 +00:00
|
|
|
--pre preserve preformatted text
|
2014-12-14 04:52:41 +00:00
|
|
|
--charset specify the charset for pup to use
|
2014-11-06 03:31:28 +00:00
|
|
|
--version display version
|
|
|
|
`
|
|
|
|
fmt.Fprintf(w, helpString, VERSION)
|
|
|
|
os.Exit(exitCode)
|
|
|
|
}
|
|
|
|
|
2014-11-22 15:47:33 +00:00
|
|
|
func ParseArgs() ([]string, error) {
|
|
|
|
cmds, err := ProcessFlags(os.Args[1:])
|
|
|
|
if err != nil {
|
|
|
|
return []string{}, err
|
|
|
|
}
|
2014-11-06 03:31:28 +00:00
|
|
|
return ParseCommands(strings.Join(cmds, " "))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Process command arguments and return all non-flags.
|
2014-11-22 15:47:33 +00:00
|
|
|
func ProcessFlags(cmds []string) (nonFlagCmds []string, err error) {
|
2014-11-06 03:31:28 +00:00
|
|
|
var i int
|
|
|
|
defer func() {
|
|
|
|
if r := recover(); r != nil {
|
2014-11-22 15:47:33 +00:00
|
|
|
err = fmt.Errorf("Option '%s' requires an argument", cmds[i])
|
2014-11-06 03:31:28 +00:00
|
|
|
}
|
|
|
|
}()
|
2014-11-22 15:47:33 +00:00
|
|
|
nonFlagCmds = make([]string, len(cmds))
|
2014-11-06 03:31:28 +00:00
|
|
|
n := 0
|
|
|
|
for i = 0; i < len(cmds); i++ {
|
|
|
|
cmd := cmds[i]
|
|
|
|
switch cmd {
|
|
|
|
case "-c", "--color":
|
|
|
|
pupPrintColor = true
|
2015-02-01 18:52:02 +00:00
|
|
|
case "-p", "--plain":
|
2015-01-20 05:10:39 +00:00
|
|
|
pupEscapeHTML = false
|
2015-04-05 19:02:03 +00:00
|
|
|
case "--pre":
|
|
|
|
pupPreformatted = true
|
2014-11-06 03:31:28 +00:00
|
|
|
case "-f", "--file":
|
|
|
|
filename := cmds[i+1]
|
|
|
|
pupIn, err = os.Open(filename)
|
|
|
|
if err != nil {
|
|
|
|
fmt.Fprintf(os.Stderr, "%s\n", err.Error())
|
|
|
|
os.Exit(2)
|
|
|
|
}
|
|
|
|
i++
|
|
|
|
case "-h", "--help":
|
|
|
|
PrintHelp(os.Stdout, 0)
|
|
|
|
case "-i", "--indent":
|
|
|
|
indentLevel, err := strconv.Atoi(cmds[i+1])
|
|
|
|
if err == nil {
|
|
|
|
pupIndentString = strings.Repeat(" ", indentLevel)
|
|
|
|
} else {
|
|
|
|
pupIndentString = cmds[i+1]
|
|
|
|
}
|
|
|
|
i++
|
|
|
|
case "-l", "--limit":
|
|
|
|
pupMaxPrintLevel, err = strconv.Atoi(cmds[i+1])
|
|
|
|
if err != nil {
|
2014-11-22 15:47:33 +00:00
|
|
|
return []string{}, fmt.Errorf("Argument for '%s' must be numeric", cmd)
|
2014-11-06 03:31:28 +00:00
|
|
|
}
|
|
|
|
i++
|
2014-12-14 04:52:41 +00:00
|
|
|
case "--charset":
|
|
|
|
pupCharset = cmds[i+1]
|
|
|
|
i++
|
2014-11-06 03:31:28 +00:00
|
|
|
case "--version":
|
|
|
|
fmt.Println(VERSION)
|
|
|
|
os.Exit(0)
|
2015-02-01 18:52:02 +00:00
|
|
|
case "-n", "--number":
|
|
|
|
pupDisplayer = NumDisplayer{}
|
2014-11-06 03:31:28 +00:00
|
|
|
default:
|
|
|
|
if cmd[0] == '-' {
|
2014-11-22 15:47:33 +00:00
|
|
|
return []string{}, fmt.Errorf("Unrecognized flag '%s'", cmd)
|
2014-11-06 03:31:28 +00:00
|
|
|
}
|
|
|
|
nonFlagCmds[n] = cmds[i]
|
|
|
|
n++
|
|
|
|
}
|
|
|
|
}
|
2014-11-22 15:47:33 +00:00
|
|
|
return nonFlagCmds[:n], nil
|
2014-11-06 03:31:28 +00:00
|
|
|
}
|
|
|
|
|
2014-11-22 15:47:33 +00:00
|
|
|
// Split a string with awareness for quoted text and commas
|
|
|
|
func ParseCommands(cmdString string) ([]string, error) {
|
2014-11-06 03:31:28 +00:00
|
|
|
cmds := []string{}
|
|
|
|
last, next, max := 0, 0, len(cmdString)
|
|
|
|
for {
|
|
|
|
// if we're at the end of the string, return
|
|
|
|
if next == max {
|
|
|
|
if next > last {
|
|
|
|
cmds = append(cmds, cmdString[last:next])
|
|
|
|
}
|
2014-11-22 15:47:33 +00:00
|
|
|
return cmds, nil
|
2014-11-06 03:31:28 +00:00
|
|
|
}
|
2016-08-19 16:57:26 +00:00
|
|
|
// evaluate a rune
|
2014-11-06 03:31:28 +00:00
|
|
|
c := cmdString[next]
|
|
|
|
switch c {
|
|
|
|
case ' ':
|
|
|
|
if next > last {
|
|
|
|
cmds = append(cmds, cmdString[last:next])
|
|
|
|
}
|
|
|
|
last = next + 1
|
2014-11-22 15:47:33 +00:00
|
|
|
case ',':
|
|
|
|
if next > last {
|
|
|
|
cmds = append(cmds, cmdString[last:next])
|
|
|
|
}
|
|
|
|
cmds = append(cmds, ",")
|
|
|
|
last = next + 1
|
2014-11-06 03:31:28 +00:00
|
|
|
case '\'', '"':
|
|
|
|
// for quotes, consume runes until the quote has ended
|
|
|
|
quoteChar := c
|
|
|
|
for {
|
|
|
|
next++
|
|
|
|
if next == max {
|
2014-11-22 15:47:33 +00:00
|
|
|
return []string{}, fmt.Errorf("Unmatched open quote (%c)", quoteChar)
|
2014-11-06 03:31:28 +00:00
|
|
|
}
|
2014-11-22 15:47:33 +00:00
|
|
|
if cmdString[next] == '\\' {
|
|
|
|
next++
|
|
|
|
if next == max {
|
|
|
|
return []string{}, fmt.Errorf("Unmatched open quote (%c)", quoteChar)
|
|
|
|
}
|
|
|
|
} else if cmdString[next] == quoteChar {
|
2014-11-06 03:31:28 +00:00
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
next++
|
|
|
|
}
|
|
|
|
}
|