mirror of
https://github.com/ericchiang/pup
synced 2025-01-15 10:11:16 +00:00
Preserving sibling relationship of all node types
This commit is contained in:
parent
14e452d641
commit
2bb485903c
28
README.md
28
README.md
@ -272,7 +272,7 @@ $ cat robots.html | pup 'div#p-namespaces a'
|
||||
<a href="/wiki/Robots_exclusion_standard" title="View the content page [c]" accesskey="c">
|
||||
Article
|
||||
</a>
|
||||
<a href="/wiki/Talk:Robots_exclusion_standard" title="Discussion about the content page [t]" accesskey="t">
|
||||
<a href="/wiki/Talk:Robots_exclusion_standard" rel="discussion" title="Discussion about the content page [t]" accesskey="t">
|
||||
Talk
|
||||
</a>
|
||||
```
|
||||
@ -282,16 +282,25 @@ $ cat robots.html | pup 'div#p-namespaces a json{}'
|
||||
[
|
||||
{
|
||||
"accesskey": "c",
|
||||
"children": [
|
||||
{
|
||||
"text": "Article"
|
||||
}
|
||||
],
|
||||
"href": "/wiki/Robots_exclusion_standard",
|
||||
"tag": "a",
|
||||
"text": "Article",
|
||||
"title": "View the content page [c]"
|
||||
},
|
||||
{
|
||||
"accesskey": "t",
|
||||
"children": [
|
||||
{
|
||||
"text": "Talk"
|
||||
}
|
||||
],
|
||||
"href": "/wiki/Talk:Robots_exclusion_standard",
|
||||
"rel": "discussion",
|
||||
"tag": "a",
|
||||
"text": "Talk",
|
||||
"title": "Discussion about the content page [t]"
|
||||
}
|
||||
]
|
||||
@ -304,16 +313,25 @@ $ cat robots.html | pup -i 4 'div#p-namespaces a json{}'
|
||||
[
|
||||
{
|
||||
"accesskey": "c",
|
||||
"children": [
|
||||
{
|
||||
"text": "Article"
|
||||
}
|
||||
],
|
||||
"href": "/wiki/Robots_exclusion_standard",
|
||||
"tag": "a",
|
||||
"text": "Article",
|
||||
"title": "View the content page [c]"
|
||||
},
|
||||
{
|
||||
"accesskey": "t",
|
||||
"children": [
|
||||
{
|
||||
"text": "Talk"
|
||||
}
|
||||
],
|
||||
"href": "/wiki/Talk:Robots_exclusion_standard",
|
||||
"rel": "discussion",
|
||||
"tag": "a",
|
||||
"text": "Talk",
|
||||
"title": "Discussion about the content page [t]"
|
||||
}
|
||||
]
|
||||
|
26
display.go
26
display.go
@ -272,14 +272,11 @@ func jsonify(node *html.Node) map[string]interface{} {
|
||||
}
|
||||
}
|
||||
}
|
||||
vals["tag"] = node.DataAtom.String()
|
||||
children := []interface{}{}
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
switch child.Type {
|
||||
switch node.Type {
|
||||
case html.ElementNode:
|
||||
children = append(children, jsonify(child))
|
||||
vals["tag"] = node.Data
|
||||
case html.TextNode:
|
||||
text := strings.TrimSpace(child.Data)
|
||||
text := strings.TrimSpace(node.Data)
|
||||
if text != "" {
|
||||
if pupEscapeHTML {
|
||||
// don't escape javascript
|
||||
@ -287,24 +284,21 @@ func jsonify(node *html.Node) map[string]interface{} {
|
||||
text = html.EscapeString(text)
|
||||
}
|
||||
}
|
||||
// if there is already text we'll append it
|
||||
currText, ok := vals["text"]
|
||||
if ok {
|
||||
text = fmt.Sprintf("%s %s", currText, text)
|
||||
}
|
||||
vals["text"] = text
|
||||
}
|
||||
case html.CommentNode:
|
||||
comment := strings.TrimSpace(child.Data)
|
||||
comment := strings.TrimSpace(node.Data)
|
||||
if pupEscapeHTML {
|
||||
comment = html.EscapeString(comment)
|
||||
}
|
||||
currComment, ok := vals["comment"]
|
||||
if ok {
|
||||
comment = fmt.Sprintf("%s %s", currComment, comment)
|
||||
}
|
||||
vals["comment"] = comment
|
||||
}
|
||||
children := []interface{}{}
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
jChild := jsonify(child)
|
||||
if len(jChild) > 0 {
|
||||
children = append(children, jChild)
|
||||
}
|
||||
}
|
||||
if len(children) > 0 {
|
||||
vals["children"] = children
|
||||
|
@ -10,7 +10,7 @@ a92e50c09cd56970625ac3b74efbddb83b2731bb table li
|
||||
66950e746590d7f4e9cfe3d1adef42cd0addcf1d table li:last-of-type
|
||||
0a37d612cd4c67a42bd147b1edc5a1128456b017 table a[title="The Practice of Programming"]
|
||||
0d3918d54f868f13110262ffbb88cbb0b083057d table a[title="The Practice of Programming"] text{}
|
||||
ecb542a30fc75c71a0c6380692cbbc4266ccbce4 json{}
|
||||
199188dc8f1522426a628e41d96264bffb8beb0f json{}
|
||||
95ef88ded9dab22ee3206cca47b9c3a376274bda text{}
|
||||
e4f7358fbb7bb1748a296fa2a7e815fa7de0a08b .after-portlet
|
||||
da39a3ee5e6b4b0d3255bfef95601890afd80709 .after
|
||||
@ -34,7 +34,7 @@ d314e83b059bb876b0e5ee76aa92d54987961f9a .navbox-list li:nth-last-child(1)
|
||||
613bf65ac4042b6ee0a7a47f08732fdbe1b5b06b #toc
|
||||
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li + a
|
||||
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li + a text{}
|
||||
97d170e1550eee4afc0af065b78cda302a97674c #toc li + a json{}
|
||||
cd0d4cc32346750408f7d4f5e78ec9a6e5b79a0d #toc li + a json{}
|
||||
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li + a + span
|
||||
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li + span
|
||||
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li > li
|
||||
|
Loading…
Reference in New Issue
Block a user