mirror of
https://github.com/ericchiang/pup
synced 2025-01-28 16:41:32 +00:00
Preserving sibling relationship of all node types
This commit is contained in:
parent
14e452d641
commit
2bb485903c
28
README.md
28
README.md
@ -272,7 +272,7 @@ $ cat robots.html | pup 'div#p-namespaces a'
|
|||||||
<a href="/wiki/Robots_exclusion_standard" title="View the content page [c]" accesskey="c">
|
<a href="/wiki/Robots_exclusion_standard" title="View the content page [c]" accesskey="c">
|
||||||
Article
|
Article
|
||||||
</a>
|
</a>
|
||||||
<a href="/wiki/Talk:Robots_exclusion_standard" title="Discussion about the content page [t]" accesskey="t">
|
<a href="/wiki/Talk:Robots_exclusion_standard" rel="discussion" title="Discussion about the content page [t]" accesskey="t">
|
||||||
Talk
|
Talk
|
||||||
</a>
|
</a>
|
||||||
```
|
```
|
||||||
@ -282,16 +282,25 @@ $ cat robots.html | pup 'div#p-namespaces a json{}'
|
|||||||
[
|
[
|
||||||
{
|
{
|
||||||
"accesskey": "c",
|
"accesskey": "c",
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"text": "Article"
|
||||||
|
}
|
||||||
|
],
|
||||||
"href": "/wiki/Robots_exclusion_standard",
|
"href": "/wiki/Robots_exclusion_standard",
|
||||||
"tag": "a",
|
"tag": "a",
|
||||||
"text": "Article",
|
|
||||||
"title": "View the content page [c]"
|
"title": "View the content page [c]"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"accesskey": "t",
|
"accesskey": "t",
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"text": "Talk"
|
||||||
|
}
|
||||||
|
],
|
||||||
"href": "/wiki/Talk:Robots_exclusion_standard",
|
"href": "/wiki/Talk:Robots_exclusion_standard",
|
||||||
|
"rel": "discussion",
|
||||||
"tag": "a",
|
"tag": "a",
|
||||||
"text": "Talk",
|
|
||||||
"title": "Discussion about the content page [t]"
|
"title": "Discussion about the content page [t]"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@ -304,16 +313,25 @@ $ cat robots.html | pup -i 4 'div#p-namespaces a json{}'
|
|||||||
[
|
[
|
||||||
{
|
{
|
||||||
"accesskey": "c",
|
"accesskey": "c",
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"text": "Article"
|
||||||
|
}
|
||||||
|
],
|
||||||
"href": "/wiki/Robots_exclusion_standard",
|
"href": "/wiki/Robots_exclusion_standard",
|
||||||
"tag": "a",
|
"tag": "a",
|
||||||
"text": "Article",
|
|
||||||
"title": "View the content page [c]"
|
"title": "View the content page [c]"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"accesskey": "t",
|
"accesskey": "t",
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"text": "Talk"
|
||||||
|
}
|
||||||
|
],
|
||||||
"href": "/wiki/Talk:Robots_exclusion_standard",
|
"href": "/wiki/Talk:Robots_exclusion_standard",
|
||||||
|
"rel": "discussion",
|
||||||
"tag": "a",
|
"tag": "a",
|
||||||
"text": "Talk",
|
|
||||||
"title": "Discussion about the content page [t]"
|
"title": "Discussion about the content page [t]"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
26
display.go
26
display.go
@ -272,14 +272,11 @@ func jsonify(node *html.Node) map[string]interface{} {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
vals["tag"] = node.DataAtom.String()
|
switch node.Type {
|
||||||
children := []interface{}{}
|
|
||||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
||||||
switch child.Type {
|
|
||||||
case html.ElementNode:
|
case html.ElementNode:
|
||||||
children = append(children, jsonify(child))
|
vals["tag"] = node.Data
|
||||||
case html.TextNode:
|
case html.TextNode:
|
||||||
text := strings.TrimSpace(child.Data)
|
text := strings.TrimSpace(node.Data)
|
||||||
if text != "" {
|
if text != "" {
|
||||||
if pupEscapeHTML {
|
if pupEscapeHTML {
|
||||||
// don't escape javascript
|
// don't escape javascript
|
||||||
@ -287,24 +284,21 @@ func jsonify(node *html.Node) map[string]interface{} {
|
|||||||
text = html.EscapeString(text)
|
text = html.EscapeString(text)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// if there is already text we'll append it
|
|
||||||
currText, ok := vals["text"]
|
|
||||||
if ok {
|
|
||||||
text = fmt.Sprintf("%s %s", currText, text)
|
|
||||||
}
|
|
||||||
vals["text"] = text
|
vals["text"] = text
|
||||||
}
|
}
|
||||||
case html.CommentNode:
|
case html.CommentNode:
|
||||||
comment := strings.TrimSpace(child.Data)
|
comment := strings.TrimSpace(node.Data)
|
||||||
if pupEscapeHTML {
|
if pupEscapeHTML {
|
||||||
comment = html.EscapeString(comment)
|
comment = html.EscapeString(comment)
|
||||||
}
|
}
|
||||||
currComment, ok := vals["comment"]
|
|
||||||
if ok {
|
|
||||||
comment = fmt.Sprintf("%s %s", currComment, comment)
|
|
||||||
}
|
|
||||||
vals["comment"] = comment
|
vals["comment"] = comment
|
||||||
}
|
}
|
||||||
|
children := []interface{}{}
|
||||||
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||||
|
jChild := jsonify(child)
|
||||||
|
if len(jChild) > 0 {
|
||||||
|
children = append(children, jChild)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if len(children) > 0 {
|
if len(children) > 0 {
|
||||||
vals["children"] = children
|
vals["children"] = children
|
||||||
|
@ -10,7 +10,7 @@ a92e50c09cd56970625ac3b74efbddb83b2731bb table li
|
|||||||
66950e746590d7f4e9cfe3d1adef42cd0addcf1d table li:last-of-type
|
66950e746590d7f4e9cfe3d1adef42cd0addcf1d table li:last-of-type
|
||||||
0a37d612cd4c67a42bd147b1edc5a1128456b017 table a[title="The Practice of Programming"]
|
0a37d612cd4c67a42bd147b1edc5a1128456b017 table a[title="The Practice of Programming"]
|
||||||
0d3918d54f868f13110262ffbb88cbb0b083057d table a[title="The Practice of Programming"] text{}
|
0d3918d54f868f13110262ffbb88cbb0b083057d table a[title="The Practice of Programming"] text{}
|
||||||
ecb542a30fc75c71a0c6380692cbbc4266ccbce4 json{}
|
199188dc8f1522426a628e41d96264bffb8beb0f json{}
|
||||||
95ef88ded9dab22ee3206cca47b9c3a376274bda text{}
|
95ef88ded9dab22ee3206cca47b9c3a376274bda text{}
|
||||||
e4f7358fbb7bb1748a296fa2a7e815fa7de0a08b .after-portlet
|
e4f7358fbb7bb1748a296fa2a7e815fa7de0a08b .after-portlet
|
||||||
da39a3ee5e6b4b0d3255bfef95601890afd80709 .after
|
da39a3ee5e6b4b0d3255bfef95601890afd80709 .after
|
||||||
@ -34,7 +34,7 @@ d314e83b059bb876b0e5ee76aa92d54987961f9a .navbox-list li:nth-last-child(1)
|
|||||||
613bf65ac4042b6ee0a7a47f08732fdbe1b5b06b #toc
|
613bf65ac4042b6ee0a7a47f08732fdbe1b5b06b #toc
|
||||||
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li + a
|
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li + a
|
||||||
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li + a text{}
|
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li + a text{}
|
||||||
97d170e1550eee4afc0af065b78cda302a97674c #toc li + a json{}
|
cd0d4cc32346750408f7d4f5e78ec9a6e5b79a0d #toc li + a json{}
|
||||||
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li + a + span
|
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li + a + span
|
||||||
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li + span
|
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li + span
|
||||||
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li > li
|
da39a3ee5e6b4b0d3255bfef95601890afd80709 #toc li > li
|
||||||
|
Loading…
Reference in New Issue
Block a user