mirror of https://github.com/ericchiang/pup
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
183 lines
4.3 KiB
183 lines
4.3 KiB
package main |
|
|
|
import ( |
|
"fmt" |
|
"io" |
|
"os" |
|
"strconv" |
|
"strings" |
|
|
|
"golang.org/x/net/html" |
|
"golang.org/x/net/html/charset" |
|
"golang.org/x/text/transform" |
|
) |
|
|
|
var ( |
|
pupIn io.ReadCloser = os.Stdin |
|
pupCharset string = "" |
|
pupMaxPrintLevel int = -1 |
|
pupPreformatted bool = false |
|
pupPrintColor bool = false |
|
pupEscapeHTML bool = true |
|
pupIndentString string = " " |
|
pupDisplayer Displayer = TreeDisplayer{} |
|
) |
|
|
|
// Parse the html while handling the charset |
|
func ParseHTML(r io.Reader, cs string) (*html.Node, error) { |
|
var err error |
|
if cs == "" { |
|
// attempt to guess the charset of the HTML document |
|
r, err = charset.NewReader(r, "") |
|
if err != nil { |
|
return nil, err |
|
} |
|
} else { |
|
// let the user specify the charset |
|
e, name := charset.Lookup(cs) |
|
if name == "" { |
|
return nil, fmt.Errorf("'%s' is not a valid charset", cs) |
|
} |
|
r = transform.NewReader(r, e.NewDecoder()) |
|
} |
|
return html.Parse(r) |
|
} |
|
|
|
func PrintHelp(w io.Writer, exitCode int) { |
|
helpString := `Usage |
|
pup [flags] [selectors] [optional display function] |
|
Version |
|
%s |
|
Flags |
|
-c --color print result with color |
|
-f --file file to read from |
|
-h --help display this help |
|
-i --indent number of spaces to use for indent or character |
|
-n --number print number of elements selected |
|
-l --limit restrict number of levels printed |
|
-p --plain don't escape html |
|
--pre preserve preformatted text |
|
--charset specify the charset for pup to use |
|
--version display version |
|
` |
|
fmt.Fprintf(w, helpString, VERSION) |
|
os.Exit(exitCode) |
|
} |
|
|
|
func ParseArgs() ([]string, error) { |
|
cmds, err := ProcessFlags(os.Args[1:]) |
|
if err != nil { |
|
return []string{}, err |
|
} |
|
return ParseCommands(strings.Join(cmds, " ")) |
|
} |
|
|
|
// Process command arguments and return all non-flags. |
|
func ProcessFlags(cmds []string) (nonFlagCmds []string, err error) { |
|
var i int |
|
defer func() { |
|
if r := recover(); r != nil { |
|
err = fmt.Errorf("Option '%s' requires an argument", cmds[i]) |
|
} |
|
}() |
|
nonFlagCmds = make([]string, len(cmds)) |
|
n := 0 |
|
for i = 0; i < len(cmds); i++ { |
|
cmd := cmds[i] |
|
switch cmd { |
|
case "-c", "--color": |
|
pupPrintColor = true |
|
case "-p", "--plain": |
|
pupEscapeHTML = false |
|
case "--pre": |
|
pupPreformatted = true |
|
case "-f", "--file": |
|
filename := cmds[i+1] |
|
pupIn, err = os.Open(filename) |
|
if err != nil { |
|
fmt.Fprintf(os.Stderr, "%s\n", err.Error()) |
|
os.Exit(2) |
|
} |
|
i++ |
|
case "-h", "--help": |
|
PrintHelp(os.Stdout, 0) |
|
case "-i", "--indent": |
|
indentLevel, err := strconv.Atoi(cmds[i+1]) |
|
if err == nil { |
|
pupIndentString = strings.Repeat(" ", indentLevel) |
|
} else { |
|
pupIndentString = cmds[i+1] |
|
} |
|
i++ |
|
case "-l", "--limit": |
|
pupMaxPrintLevel, err = strconv.Atoi(cmds[i+1]) |
|
if err != nil { |
|
return []string{}, fmt.Errorf("Argument for '%s' must be numeric", cmd) |
|
} |
|
i++ |
|
case "--charset": |
|
pupCharset = cmds[i+1] |
|
i++ |
|
case "--version": |
|
fmt.Println(VERSION) |
|
os.Exit(0) |
|
case "-n", "--number": |
|
pupDisplayer = NumDisplayer{} |
|
default: |
|
if cmd[0] == '-' { |
|
return []string{}, fmt.Errorf("Unrecognized flag '%s'", cmd) |
|
} |
|
nonFlagCmds[n] = cmds[i] |
|
n++ |
|
} |
|
} |
|
return nonFlagCmds[:n], nil |
|
} |
|
|
|
// Split a string with awareness for quoted text and commas |
|
func ParseCommands(cmdString string) ([]string, error) { |
|
cmds := []string{} |
|
last, next, max := 0, 0, len(cmdString) |
|
for { |
|
// if we're at the end of the string, return |
|
if next == max { |
|
if next > last { |
|
cmds = append(cmds, cmdString[last:next]) |
|
} |
|
return cmds, nil |
|
} |
|
// evaluate a rune |
|
c := cmdString[next] |
|
switch c { |
|
case ' ': |
|
if next > last { |
|
cmds = append(cmds, cmdString[last:next]) |
|
} |
|
last = next + 1 |
|
case ',': |
|
if next > last { |
|
cmds = append(cmds, cmdString[last:next]) |
|
} |
|
cmds = append(cmds, ",") |
|
last = next + 1 |
|
case '\'', '"': |
|
// for quotes, consume runes until the quote has ended |
|
quoteChar := c |
|
for { |
|
next++ |
|
if next == max { |
|
return []string{}, fmt.Errorf("Unmatched open quote (%c)", quoteChar) |
|
} |
|
if cmdString[next] == '\\' { |
|
next++ |
|
if next == max { |
|
return []string{}, fmt.Errorf("Unmatched open quote (%c)", quoteChar) |
|
} |
|
} else if cmdString[next] == quoteChar { |
|
break |
|
} |
|
} |
|
} |
|
next++ |
|
} |
|
}
|
|
|