diff --git a/README.md b/README.md
index 00c6e02..e1cb584 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ Download a webpage with `wget`.
$ wget http://en.wikipedia.org/wiki/Robots_exclusion_standard -O robots.html
```
-###Clean and indent
+####Clean and indent
By default `pup` will fill in missing tags and properly indent the page.
@@ -42,7 +42,7 @@ $ cat robots.html | pup --color
# cleaned, indented, and colorful HTML
```
-###Filter by tag
+####Filter by tag
```bash
$ pup < robots.html title
@@ -50,7 +50,7 @@ $ pup < robots.html title
```
-###Filter by id
+####Filter by id
```bash
$ pup < robots.html span#See_also
@@ -58,7 +58,7 @@ $ pup < robots.html span#See_also
```
-###Chain selectors together
+####Chain selectors together
The following two commands are equivalent. (NOTE: pipes do not work with the
`--color` flag)
@@ -86,14 +86,14 @@ Both produce the ouput:
```
-###How many nodes are selected by a filter?
+####How many nodes are selected by a filter?
```bash
$ pup < robots.html a -n
283
```
-###Limit print level
+####Limit print level
```bash
$ pup < robots.html table -l 2
@@ -134,6 +134,36 @@ You can mix and match selectors as you wish.
cat index.html | pup element#id[attribute=value]
```
+## Functions
+
+Non-HTML selectors which effect the output type are implemented as functions
+which can be provided as a final argument.
+
+As of now, `text{}` is the only implemented function.
+
+#### `text{}`
+
+Print all text from selected nodes and children in depth first order.
+
+```bash
+$ cat robots.html | pup .mw-headline text{}
+History
+About the standard
+Disadvantages
+Alternatives
+Examples
+Nonstandard extensions
+Crawl-delay directive
+Allow directive
+Sitemap
+Host
+Universal "*" match
+Meta tags and headers
+See also
+References
+External links
+```
+
## Flags
```bash
@@ -148,5 +178,6 @@ cat index.html | pup element#id[attribute=value]
## TODO:
-* Print attribute value rather than html ({href})
-* Print result as JSON (--json)
+* Print attribute function `attr{attr1, attr2}`
+* Print as json function `json{}`
+* Switch `-n` from a flag to a function
diff --git a/funcs/display.go b/funcs/display.go
new file mode 100644
index 0000000..053b4b6
--- /dev/null
+++ b/funcs/display.go
@@ -0,0 +1,49 @@
+package funcs
+
+import (
+ "code.google.com/p/go.net/html"
+ "fmt"
+ "regexp"
+)
+
+type Displayer interface {
+ Display(nodes []*html.Node)
+}
+
+type TextDisplayer struct {
+}
+
+func (t TextDisplayer) Display(nodes []*html.Node) {
+ for _, node := range nodes {
+ if node.Type == html.TextNode {
+ fmt.Println(node.Data)
+ }
+ children := []*html.Node{}
+ child := node.FirstChild
+ for child != nil {
+ children = append(children, child)
+ child = child.NextSibling
+ }
+ t.Display(children)
+ }
+}
+
+var (
+ // Display function helpers
+ displayMatcher *regexp.Regexp = regexp.MustCompile(`\{[^\}]*\}$`)
+ textFuncMatcher = regexp.MustCompile(`^text\{\}$`)
+ attrFuncMatcher = regexp.MustCompile(`^attr\{[^\}]*\}$`)
+)
+
+func NewDisplayFunc(text string) (Displayer, error) {
+ if !displayMatcher.MatchString(text) {
+ return nil, fmt.Errorf("Not a display function")
+ }
+ switch {
+ case textFuncMatcher.MatchString(text):
+ return TextDisplayer{}, nil
+ case attrFuncMatcher.MatchString(text):
+ return nil, fmt.Errorf("attr")
+ }
+ return nil, fmt.Errorf("Not a display function")
+}
diff --git a/main.go b/main.go
index f5f08a7..7a04e41 100644
--- a/main.go
+++ b/main.go
@@ -3,6 +3,7 @@ package main
import (
"code.google.com/p/go.net/html"
"fmt"
+ "github.com/ericchiang/pup/funcs"
"github.com/ericchiang/pup/selector"
"io"
"os"
@@ -14,11 +15,13 @@ const VERSION string = "0.1.0"
var (
// Flags
- inputStream io.ReadCloser = os.Stdin
- indentString string = " "
- maxPrintLevel int = -1
- printNumber bool = false
- printColor bool = false
+ attributes []string = []string{}
+ inputStream io.ReadCloser = os.Stdin
+ indentString string = " "
+ maxPrintLevel int = -1
+ printNumber bool = false
+ printColor bool = false
+ displayer funcs.Displayer = nil
)
// Print to stderr and exit
@@ -64,6 +67,9 @@ func ProcessFlags(cmds []string) []string {
for i = 0; i < len(cmds); i++ {
cmd := cmds[i]
switch cmd {
+ case "-a", "--attr":
+ attributes = append(attributes, cmds[i+1])
+ i++
case "-c", "--color":
printColor = true
case "-f", "--file":
@@ -121,6 +127,14 @@ func main() {
}
selectors := make([]*selector.Selector, len(cmds))
for i, cmd := range cmds {
+ if i+1 == len(cmds) {
+ d, err := funcs.NewDisplayFunc(cmd)
+ if err == nil {
+ displayer = d
+ selectors = selectors[0 : len(cmds)-1]
+ break
+ }
+ }
selectors[i], err = selector.NewSelector(cmd)
if err != nil {
Fatal("Selector parse error: %s", err)
@@ -136,7 +150,9 @@ func main() {
}
currNodes = selected
}
- if printNumber {
+ if displayer != nil {
+ displayer.Display(currNodes)
+ } else if printNumber {
fmt.Println(len(currNodes))
} else {
for _, s := range currNodes {
diff --git a/printing.go b/printing.go
index 0d4cb70..37a9dee 100644
--- a/printing.go
+++ b/printing.go
@@ -17,8 +17,8 @@ var (
// Regexp helpers
whitespaceRegexp *regexp.Regexp = regexp.MustCompile(`^\s*$`)
- preWhitespace *regexp.Regexp = regexp.MustCompile(`^\s+`)
- postWhitespace *regexp.Regexp = regexp.MustCompile(`\s+$`)
+ preWhitespace = regexp.MustCompile(`^\s+`)
+ postWhitespace = regexp.MustCompile(`\s+$`)
)
func printIndent(level int) {