mirror of
https://github.com/ericchiang/pup
synced 2024-11-24 08:58:08 +00:00
commit
af61952d1c
4
main.go
4
main.go
@ -11,7 +11,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
const VERSION string = "0.1.1"
|
const VERSION string = "0.1.2a"
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// Flags
|
// Flags
|
||||||
@ -125,7 +125,7 @@ func main() {
|
|||||||
PrintNode(root, 0)
|
PrintNode(root, 0)
|
||||||
os.Exit(0)
|
os.Exit(0)
|
||||||
}
|
}
|
||||||
selectors := make([]*selector.Selector, len(cmds))
|
selectors := make([]selector.Selector, len(cmds))
|
||||||
for i, cmd := range cmds {
|
for i, cmd := range cmds {
|
||||||
if i+1 == len(cmds) {
|
if i+1 == len(cmds) {
|
||||||
d, err := funcs.NewDisplayFunc(cmd)
|
d, err := funcs.NewDisplayFunc(cmd)
|
||||||
|
@ -8,11 +8,20 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// A CSS Selector
|
// A CSS Selector
|
||||||
type Selector struct {
|
type BasicSelector struct {
|
||||||
Name *regexp.Regexp
|
Name *regexp.Regexp
|
||||||
Attrs map[string]*regexp.Regexp
|
Attrs map[string]*regexp.Regexp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Selector interface {
|
||||||
|
// Does this selector match a given node?
|
||||||
|
Match(node *html.Node) bool
|
||||||
|
// Find all nodes which match a selector. May return itself.
|
||||||
|
FindAll(node *html.Node) []*html.Node
|
||||||
|
// Find all child nodes which match a selector.
|
||||||
|
FindAllChildren(node *html.Node) []*html.Node
|
||||||
|
}
|
||||||
|
|
||||||
type selectorField int
|
type selectorField int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -63,7 +72,7 @@ func parseAttrField(command string) (attrKey string, matcher *regexp.Regexp,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Set a field of this selector.
|
// Set a field of this selector.
|
||||||
func (s *Selector) setFieldValue(f selectorField, v string) error {
|
func (s *BasicSelector) setFieldValue(f selectorField, v string) error {
|
||||||
if v == "" {
|
if v == "" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -98,9 +107,9 @@ func (s *Selector) setFieldValue(f selectorField, v string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Convert a string to a selector.
|
// Convert a string to a selector.
|
||||||
func NewSelector(s string) (*Selector, error) {
|
func NewSelector(s string) (Selector, error) {
|
||||||
attrs := map[string]*regexp.Regexp{}
|
attrs := map[string]*regexp.Regexp{}
|
||||||
selector := &Selector{nil, attrs}
|
selector := BasicSelector{nil, attrs}
|
||||||
nextField := NameField
|
nextField := NameField
|
||||||
start := 0
|
start := 0
|
||||||
// Parse the selector character by character
|
// Parse the selector character by character
|
||||||
@ -153,7 +162,7 @@ func NewSelector(s string) (*Selector, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Find all nodes which match a selector.
|
// Find all nodes which match a selector.
|
||||||
func (sel *Selector) FindAllChildren(node *html.Node) []*html.Node {
|
func (sel BasicSelector) FindAllChildren(node *html.Node) []*html.Node {
|
||||||
selected := []*html.Node{}
|
selected := []*html.Node{}
|
||||||
child := node.FirstChild
|
child := node.FirstChild
|
||||||
for child != nil {
|
for child != nil {
|
||||||
@ -165,7 +174,7 @@ func (sel *Selector) FindAllChildren(node *html.Node) []*html.Node {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Find all nodes which match a selector. May return itself.
|
// Find all nodes which match a selector. May return itself.
|
||||||
func (sel *Selector) FindAll(node *html.Node) []*html.Node {
|
func (sel BasicSelector) FindAll(node *html.Node) []*html.Node {
|
||||||
selected := []*html.Node{}
|
selected := []*html.Node{}
|
||||||
if sel.Match(node) {
|
if sel.Match(node) {
|
||||||
return []*html.Node{node}
|
return []*html.Node{node}
|
||||||
@ -180,7 +189,7 @@ func (sel *Selector) FindAll(node *html.Node) []*html.Node {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Does this selector match a given node?
|
// Does this selector match a given node?
|
||||||
func (sel *Selector) Match(node *html.Node) bool {
|
func (sel BasicSelector) Match(node *html.Node) bool {
|
||||||
if node.Type != html.ElementNode {
|
if node.Type != html.ElementNode {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
19
tests/README.md
Normal file
19
tests/README.md
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# Tests
|
||||||
|
|
||||||
|
A simple set of tests to help maintain sanity.
|
||||||
|
|
||||||
|
The tests themselves are written in Python and can be run using the [nose](
|
||||||
|
https://nose.readthedocs.org/en/latest/) tool.
|
||||||
|
|
||||||
|
Install with:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ pip install nose
|
||||||
|
```
|
||||||
|
|
||||||
|
Run the following command from either the base directory or this one to perform
|
||||||
|
the tests:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ nosetests
|
||||||
|
```
|
56
tests/tests.py
Normal file
56
tests/tests.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
|
||||||
|
from subprocess import Popen, PIPE, STDOUT
|
||||||
|
|
||||||
|
example_data = """
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div>
|
||||||
|
<div class="nav clearfix">
|
||||||
|
My data
|
||||||
|
</div>
|
||||||
|
<p>Some other data</p>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
# run a pup command as a subprocess
|
||||||
|
def run_pup(args, input_html):
|
||||||
|
cmd = ["pup"]
|
||||||
|
cmd.extend(args)
|
||||||
|
p = Popen(cmd, stdout=PIPE, stdin=PIPE, stderr=PIPE)
|
||||||
|
stdout_data = p.communicate(input=input_html)[0]
|
||||||
|
p.wait()
|
||||||
|
return stdout_data
|
||||||
|
|
||||||
|
# simply count the number of lines returned by this pup command
|
||||||
|
def run_pup_count(args, input_html):
|
||||||
|
pup_output = run_pup(args, input_html)
|
||||||
|
lines = [l for l in pup_output.split("\n") if l]
|
||||||
|
return len(lines)
|
||||||
|
|
||||||
|
def test_class_selector():
|
||||||
|
assert run_pup_count([".nav"], example_data) == 3
|
||||||
|
|
||||||
|
def test_attr_eq():
|
||||||
|
assert run_pup_count(["[class=nav]"], example_data) == 0
|
||||||
|
|
||||||
|
def test_attr_pre():
|
||||||
|
assert run_pup_count(["[class^=nav]"], example_data) == 3
|
||||||
|
assert run_pup_count(["[class^=clearfix]"], example_data) == 0
|
||||||
|
|
||||||
|
def test_attr_post():
|
||||||
|
assert run_pup_count(["[class$=nav]"], example_data) == 0
|
||||||
|
assert run_pup_count(["[class$=clearfix]"], example_data) == 3
|
||||||
|
|
||||||
|
def test_attr_func():
|
||||||
|
result = run_pup(["div", "attr{class}"], example_data).strip()
|
||||||
|
assert result == ""
|
||||||
|
result = run_pup(["div", "div", "attr{class}"], example_data).strip()
|
||||||
|
assert result == "nav clearfix"
|
||||||
|
|
||||||
|
def test_text_func():
|
||||||
|
result = run_pup(["p", "text{}"], example_data).strip()
|
||||||
|
assert result == "Some other data"
|
Loading…
Reference in New Issue
Block a user