mirror of
https://github.com/ericchiang/pup
synced 2025-01-14 17:50:59 +00:00
Allow user to specify charset
This commit is contained in:
parent
d00d65425a
commit
a07991268b
11
README.md
11
README.md
@ -19,16 +19,6 @@ If you're on OS X, use [Brew](http://brew.sh/) to install (no Go required).
|
||||
|
||||
brew install https://raw.githubusercontent.com/EricChiang/pup/master/pup.rb
|
||||
|
||||
For linux distrubtions use the following commands to install under your `PATH`
|
||||
environment variable. You can set `ARCH` to `linux_386` for 32-bit infrastructures.
|
||||
|
||||
ARCH=linux_amd64
|
||||
cd /tmp
|
||||
wget https://github.com/EricChiang/pup/releases/download/v0.3.5/pup_${ARCH}.zip
|
||||
unzip pup_${ARCH}.zip && rm pup_${ARCH}.zip
|
||||
sudo mv pup /usr/local/bin
|
||||
pup --version
|
||||
|
||||
## Quick start
|
||||
|
||||
```bash
|
||||
@ -353,5 +343,6 @@ output of pup into a more consumable format.
|
||||
-i --indent number of spaces to use for indent or character
|
||||
-n --number print number of elements selected
|
||||
-l --limit restrict number of levels printed
|
||||
--charset specify the charset for pup to use
|
||||
--version display version
|
||||
```
|
||||
|
29
parse.go
29
parse.go
@ -6,16 +6,41 @@ import (
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/charset"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
var (
|
||||
pupIn io.ReadCloser = os.Stdin
|
||||
pupCharset string = ""
|
||||
pupMaxPrintLevel int = -1
|
||||
pupPrintColor bool = false
|
||||
pupIndentString string = " "
|
||||
pupDisplayer Displayer = TreeDisplayer{}
|
||||
)
|
||||
|
||||
// Parse the html while handling the charset
|
||||
func ParseHTML(r io.Reader, cs string) (*html.Node, error) {
|
||||
var err error
|
||||
if cs == "" {
|
||||
// attempt to guess the charset of the HTML document
|
||||
r, err = charset.NewReader(r, "")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
// let the user specify the charset
|
||||
e, name := charset.Lookup(cs)
|
||||
if name == "" {
|
||||
return nil, fmt.Errorf("'%s' is not a valid charset", cs)
|
||||
}
|
||||
r = transform.NewReader(r, e.NewDecoder())
|
||||
}
|
||||
return html.Parse(r)
|
||||
}
|
||||
|
||||
func PrintHelp(w io.Writer, exitCode int) {
|
||||
helpString := `Usage
|
||||
pup [flags] [selectors] [optional display function]
|
||||
@ -28,6 +53,7 @@ Flags
|
||||
-i --indent number of spaces to use for indent or character
|
||||
-n --number print number of elements selected
|
||||
-l --limit restrict number of levels printed
|
||||
--charset specify the charset for pup to use
|
||||
--version display version
|
||||
`
|
||||
fmt.Fprintf(w, helpString, VERSION)
|
||||
@ -81,6 +107,9 @@ func ProcessFlags(cmds []string) (nonFlagCmds []string, err error) {
|
||||
return []string{}, fmt.Errorf("Argument for '%s' must be numeric", cmd)
|
||||
}
|
||||
i++
|
||||
case "--charset":
|
||||
pupCharset = cmds[i+1]
|
||||
i++
|
||||
case "--version":
|
||||
fmt.Println(VERSION)
|
||||
os.Exit(0)
|
||||
|
15
pup.go
15
pup.go
@ -5,7 +5,6 @@ import (
|
||||
"os"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
// _=,_
|
||||
@ -17,7 +16,7 @@ import (
|
||||
// |/ \_( # |"
|
||||
// C/ ,--___/
|
||||
|
||||
var VERSION string = "0.3.6"
|
||||
var VERSION string = "0.3.7"
|
||||
|
||||
func main() {
|
||||
// process flags and arguments
|
||||
@ -27,19 +26,13 @@ func main() {
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
// Determine the charset of the input
|
||||
cr, err := charset.NewReader(pupIn, "")
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, err.Error())
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
// Parse the input and get the root node
|
||||
root, err := html.Parse(cr)
|
||||
root, err := ParseHTML(pupIn, pupCharset)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, err.Error())
|
||||
fmt.Fprintf(os.Stderr, "%s\n", err.Error())
|
||||
os.Exit(2)
|
||||
}
|
||||
pupIn.Close()
|
||||
|
||||
// Parse the selectors
|
||||
selectorFuncs := []SelectorFunc{}
|
||||
|
Loading…
Reference in New Issue
Block a user