mirror of
https://github.com/ericchiang/pup
synced 2024-11-24 00:48:36 +00:00
json{} displayer added
This commit is contained in:
parent
dd9e318ca4
commit
dfe4a38973
95
README.md
95
README.md
@ -34,16 +34,22 @@ Ew, HTML. Let's run that through some pup selectors:
|
||||
$ curl -s https://news.ycombinator.com/ | pup 'td.title a[href^=http] attr{href}'
|
||||
```
|
||||
|
||||
Even better, let's grab the titles too:
|
||||
|
||||
```bash
|
||||
$ curl -s https://news.ycombinator.com/ | pup 'td.title a[href^=http] json{}'
|
||||
```
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```bash
|
||||
$ cat index.html | pup [selectors and flags]
|
||||
$ cat index.html | pup [flags] [selectors] [optional display function]
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
$ pup < index.html [selectors and flags]
|
||||
$ pup < index.html [flags] [selectors] [optional display function]
|
||||
```
|
||||
|
||||
## Examples
|
||||
@ -185,7 +191,7 @@ You can mix and match selectors as you wish.
|
||||
cat index.html | pup element#id[attribute=value]
|
||||
```
|
||||
|
||||
## Functions
|
||||
## Display Functions
|
||||
|
||||
Non-HTML selectors which effect the output type are implemented as functions
|
||||
which can be provided as a final argument.
|
||||
@ -231,6 +237,85 @@ $ pup < robots.html a attr{href} | head
|
||||
//en.wikivoyage.org/wiki/
|
||||
```
|
||||
|
||||
#### `json{}`
|
||||
|
||||
Print HTML as JSON.
|
||||
|
||||
```bash
|
||||
$ cat robots.html | pup div#p-namespaces a
|
||||
<a href="/wiki/Robots_exclusion_standard" title="View the content page [c]" accesskey="c">
|
||||
Article
|
||||
</a>
|
||||
<a href="/wiki/Talk:Robots_exclusion_standard" title="Discussion about the content page [t]" accesskey="t">
|
||||
Talk
|
||||
</a>
|
||||
```
|
||||
|
||||
```bash
|
||||
$ cat robots.html | pup div#p-namespaces a json{}
|
||||
[
|
||||
{
|
||||
"attrs": {
|
||||
"accesskey": "c",
|
||||
"href": "/wiki/Robots_exclusion_standard",
|
||||
"title": "View the content page [c]"
|
||||
},
|
||||
"tag": "a",
|
||||
"text": "Article"
|
||||
},
|
||||
{
|
||||
"attrs": {
|
||||
"accesskey": "t",
|
||||
"href": "/wiki/Talk:Robots_exclusion_standard",
|
||||
"title": "Discussion about the content page [t]"
|
||||
},
|
||||
"tag": "a",
|
||||
"text": "Talk"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
Use the `-i` / `--indent` flag to control the intent level.
|
||||
|
||||
```bash
|
||||
$ cat robots.html | pup --indent 4 div#p-namespaces a json{}
|
||||
[
|
||||
{
|
||||
"attrs": {
|
||||
"accesskey": "c",
|
||||
"href": "/wiki/Robots_exclusion_standard",
|
||||
"title": "View the content page [c]"
|
||||
},
|
||||
"tag": "a",
|
||||
"text": "Article"
|
||||
},
|
||||
{
|
||||
"attrs": {
|
||||
"accesskey": "t",
|
||||
"href": "/wiki/Talk:Robots_exclusion_standard",
|
||||
"title": "Discussion about the content page [t]"
|
||||
},
|
||||
"tag": "a",
|
||||
"text": "Talk"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
If the selectors only return one element the results will be printed as a JSON
|
||||
object, not a list.
|
||||
|
||||
```bash
|
||||
$ cat robots.html | pup --indent 4 title json{}
|
||||
{
|
||||
"tag": "title",
|
||||
"text": "Robots exclusion standard - Wikipedia, the free encyclopedia"
|
||||
}
|
||||
```
|
||||
|
||||
Because there is no universal standard for converting HTML/XML to JSON, a
|
||||
method has been chosen which hopefully fits. The goal is simply to get the
|
||||
output of pup into a more consumable format.
|
||||
|
||||
## Flags
|
||||
|
||||
```bash
|
||||
@ -243,6 +328,6 @@ $ pup < robots.html a attr{href} | head
|
||||
--version display version
|
||||
```
|
||||
|
||||
## TODO:
|
||||
## TODO
|
||||
|
||||
* Print as json function `json{}`
|
||||
Add more tests!
|
||||
|
130
display.go
Normal file
130
display.go
Normal file
@ -0,0 +1,130 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"code.google.com/p/go.net/html"
|
||||
)
|
||||
|
||||
type Displayer interface {
|
||||
Display(nodes []*html.Node)
|
||||
}
|
||||
|
||||
type TextDisplayer struct {
|
||||
}
|
||||
|
||||
func (t TextDisplayer) Display(nodes []*html.Node) {
|
||||
for _, node := range nodes {
|
||||
if node.Type == html.TextNode {
|
||||
fmt.Println(node.Data)
|
||||
}
|
||||
children := []*html.Node{}
|
||||
child := node.FirstChild
|
||||
for child != nil {
|
||||
children = append(children, child)
|
||||
child = child.NextSibling
|
||||
}
|
||||
t.Display(children)
|
||||
}
|
||||
}
|
||||
|
||||
type AttrDisplayer struct {
|
||||
Attr string
|
||||
}
|
||||
|
||||
func (a AttrDisplayer) Display(nodes []*html.Node) {
|
||||
for _, node := range nodes {
|
||||
attributes := node.Attr
|
||||
for _, attr := range attributes {
|
||||
if attr.Key == a.Attr {
|
||||
val := html.EscapeString(attr.Val)
|
||||
fmt.Printf("%s\n", val)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type JSONDisplayer struct {
|
||||
}
|
||||
|
||||
// returns a jsonifiable struct
|
||||
func jsonify(node *html.Node) map[string]interface{} {
|
||||
vals := map[string]interface{}{}
|
||||
if len(node.Attr) > 0 {
|
||||
attrs := map[string]string{}
|
||||
for _, attr := range node.Attr {
|
||||
attrs[attr.Key] = html.EscapeString(attr.Val)
|
||||
}
|
||||
vals["attrs"] = attrs
|
||||
}
|
||||
vals["tag"] = node.DataAtom.String()
|
||||
children := []interface{}{}
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
switch child.Type {
|
||||
case html.ElementNode:
|
||||
children = append(children, jsonify(child))
|
||||
case html.TextNode:
|
||||
text := strings.TrimSpace(child.Data)
|
||||
if text != "" {
|
||||
// if there is already text we'll append it
|
||||
currText, ok := vals["text"]
|
||||
if ok {
|
||||
text = fmt.Sprintf("%s %s", currText, text)
|
||||
}
|
||||
vals["text"] = text
|
||||
}
|
||||
}
|
||||
}
|
||||
return vals
|
||||
}
|
||||
|
||||
func (j JSONDisplayer) Display(nodes []*html.Node) {
|
||||
var data []byte
|
||||
var err error
|
||||
switch len(nodes) {
|
||||
case 1:
|
||||
jsonNode := jsonify(nodes[0])
|
||||
data, err = json.MarshalIndent(&jsonNode, "", indentString)
|
||||
default:
|
||||
jsonNodes := []map[string]interface{}{}
|
||||
for _, node := range nodes {
|
||||
jsonNodes = append(jsonNodes, jsonify(node))
|
||||
}
|
||||
data, err = json.MarshalIndent(&jsonNodes, "", indentString)
|
||||
}
|
||||
if err != nil {
|
||||
panic("Could not jsonify nodes")
|
||||
}
|
||||
fmt.Printf("%s\n", data)
|
||||
}
|
||||
|
||||
var (
|
||||
// Display function helpers
|
||||
displayMatcher *regexp.Regexp = regexp.MustCompile(`\{[^\}]*\}$`)
|
||||
textFuncMatcher = regexp.MustCompile(`^text\{\}$`)
|
||||
attrFuncMatcher = regexp.MustCompile(`^attr\{([^\}]*)\}$`)
|
||||
jsonFuncMatcher = regexp.MustCompile(`^json\{([^\}]*)\}$`)
|
||||
)
|
||||
|
||||
func NewDisplayFunc(text string) (Displayer, error) {
|
||||
if !displayMatcher.MatchString(text) {
|
||||
return nil, fmt.Errorf("Not a display function")
|
||||
}
|
||||
switch {
|
||||
case textFuncMatcher.MatchString(text):
|
||||
return TextDisplayer{}, nil
|
||||
case attrFuncMatcher.MatchString(text):
|
||||
matches := attrFuncMatcher.FindStringSubmatch(text)
|
||||
if len(matches) != 2 {
|
||||
return nil, fmt.Errorf("")
|
||||
} else {
|
||||
return AttrDisplayer{matches[1]}, nil
|
||||
}
|
||||
case jsonFuncMatcher.MatchString(text):
|
||||
return JSONDisplayer{}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("Not a display function")
|
||||
}
|
@ -1,70 +0,0 @@
|
||||
package funcs
|
||||
|
||||
import (
|
||||
"code.google.com/p/go.net/html"
|
||||
"fmt"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
type Displayer interface {
|
||||
Display(nodes []*html.Node)
|
||||
}
|
||||
|
||||
type TextDisplayer struct {
|
||||
}
|
||||
|
||||
func (t TextDisplayer) Display(nodes []*html.Node) {
|
||||
for _, node := range nodes {
|
||||
if node.Type == html.TextNode {
|
||||
fmt.Println(node.Data)
|
||||
}
|
||||
children := []*html.Node{}
|
||||
child := node.FirstChild
|
||||
for child != nil {
|
||||
children = append(children, child)
|
||||
child = child.NextSibling
|
||||
}
|
||||
t.Display(children)
|
||||
}
|
||||
}
|
||||
|
||||
type AttrDisplayer struct {
|
||||
Attr string
|
||||
}
|
||||
|
||||
func (a AttrDisplayer) Display(nodes []*html.Node) {
|
||||
for _, node := range nodes {
|
||||
attributes := node.Attr
|
||||
for _, attr := range attributes {
|
||||
if attr.Key == a.Attr {
|
||||
val := html.EscapeString(attr.Val)
|
||||
fmt.Printf("%s\n", val)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
// Display function helpers
|
||||
displayMatcher *regexp.Regexp = regexp.MustCompile(`\{[^\}]*\}$`)
|
||||
textFuncMatcher = regexp.MustCompile(`^text\{\}$`)
|
||||
attrFuncMatcher = regexp.MustCompile(`^attr\{([^\}]*)\}$`)
|
||||
)
|
||||
|
||||
func NewDisplayFunc(text string) (Displayer, error) {
|
||||
if !displayMatcher.MatchString(text) {
|
||||
return nil, fmt.Errorf("Not a display function")
|
||||
}
|
||||
switch {
|
||||
case textFuncMatcher.MatchString(text):
|
||||
return TextDisplayer{}, nil
|
||||
case attrFuncMatcher.MatchString(text):
|
||||
matches := attrFuncMatcher.FindStringSubmatch(text)
|
||||
if len(matches) != 2 {
|
||||
return nil, fmt.Errorf("")
|
||||
} else {
|
||||
return AttrDisplayer{matches[1]}, nil
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("Not a display function")
|
||||
}
|
7
main.go
7
main.go
@ -4,7 +4,6 @@ import (
|
||||
"code.google.com/p/go.net/html"
|
||||
"code.google.com/p/go.net/html/charset"
|
||||
"fmt"
|
||||
"github.com/ericchiang/pup/funcs"
|
||||
"github.com/ericchiang/pup/selector"
|
||||
"io"
|
||||
"os"
|
||||
@ -12,7 +11,7 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
const VERSION string = "0.3.0"
|
||||
const VERSION string = "0.3.1"
|
||||
|
||||
var (
|
||||
// Flags
|
||||
@ -22,7 +21,7 @@ var (
|
||||
maxPrintLevel int = -1
|
||||
printNumber bool = false
|
||||
printColor bool = false
|
||||
displayer funcs.Displayer = nil
|
||||
displayer Displayer = nil
|
||||
)
|
||||
|
||||
// Print to stderr and exit
|
||||
@ -177,7 +176,7 @@ func main() {
|
||||
// if this is the last element, check for a function like
|
||||
// text{} or attr{}
|
||||
if i+1 == len(cmds) {
|
||||
d, err := funcs.NewDisplayFunc(cmd)
|
||||
d, err := NewDisplayFunc(cmd)
|
||||
if err == nil {
|
||||
displayer = d
|
||||
selectors = selectors[0 : len(cmds)-1]
|
||||
|
Loading…
Reference in New Issue
Block a user