aboutsummaryrefslogtreecommitdiff
path: root/lib/net/html
diff options
context:
space:
mode:
authorShulhan <m.shulhan@gmail.com>2020-04-05 15:13:15 +0700
committerShulhan <m.shulhan@gmail.com>2020-04-05 15:13:15 +0700
commitfe987df87daa808c369b4e2f1c1fa4071e1e80e6 (patch)
tree9d17b735fa007b1a83f119703e856e444fc77142 /lib/net/html
parentbfd340aa12f885202b600a3f33a5c52530a7a89b (diff)
downloadpakakeh.go-fe987df87daa808c369b4e2f1c1fa4071e1e80e6.tar.xz
net/html: new package to simplify the golang.org/x/net/html
The x/net/html package currently only provide bare raw functionalities to iterate tree, there is no check for empty node, and no function to get attribute by name without looping it manually. This package extends the package by adding methods to get node's attribute by name, get the first non-empty child, and get the next non-empty sibling.
Diffstat (limited to 'lib/net/html')
-rw-r--r--lib/net/html/html.go33
-rw-r--r--lib/net/html/node.go87
2 files changed, 120 insertions, 0 deletions
diff --git a/lib/net/html/html.go b/lib/net/html/html.go
new file mode 100644
index 00000000..0b002fe2
--- /dev/null
+++ b/lib/net/html/html.go
@@ -0,0 +1,33 @@
+// Copyright 2020, Shulhan <m.shulhan@gmail.com>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// Package html extends the golang.org/x/net/html by providing simplified
+// methods to Node.
+//
+// The x/net/html package currently only provide bare raw functionalities
+// to iterate tree, there is no check for empty node, and no function to
+// get attribute by name without looping it manually.
+//
+// This package extends the package by adding methods to get node's attribute
+// by name, get the first non-empty child, and get the next non-empty sibling
+//
+package html
+
+import (
+ "io"
+
+ "golang.org/x/net/html"
+)
+
+//
+// Parse returns the parse tree for the HTML from the given Reader.
+//
+func Parse(r io.Reader) (*Node, error) {
+ node, err := html.Parse(r)
+ if err != nil {
+ return nil, err
+ }
+ return NewNode(node), nil
+}
diff --git a/lib/net/html/node.go b/lib/net/html/node.go
new file mode 100644
index 00000000..4d7c71d4
--- /dev/null
+++ b/lib/net/html/node.go
@@ -0,0 +1,87 @@
+// Copyright 2020, Shulhan <m.shulhan@gmail.com>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+ "strings"
+
+ "golang.org/x/net/html"
+)
+
+//
+// Node extends the html.Node.
+//
+type Node struct {
+ *html.Node
+}
+
+//
+// NewNode create new node by embedding html.Node "el".
+//
+func NewNode(el *html.Node) *Node {
+ return &Node{Node: el}
+}
+
+//
+// GetAttrValue get the value of node's attribute with specific key or empty
+// if key not found.
+//
+func (node *Node) GetAttrValue(key string) string {
+ for _, attr := range node.Attr {
+ if key == attr.Key {
+ return attr.Val
+ }
+ }
+ return ""
+}
+
+//
+// GetFirstChild get the first non-empty child of node or nil if no child
+// left.
+//
+func (node *Node) GetFirstChild() *Node {
+ el := node.FirstChild
+ for el != nil {
+ if el.Type == html.TextNode {
+ if len(strings.TrimSpace(el.Data)) == 0 {
+ el = el.NextSibling
+ continue
+ }
+ }
+ break
+ }
+ if el == nil {
+ return nil
+ }
+ return NewNode(el)
+}
+
+//
+// GetNextSibling get the next non-empty sibling of node or nil if no more
+// sibling left.
+//
+func (node *Node) GetNextSibling() *Node {
+ el := node.NextSibling
+ for el != nil {
+ if el.Type == html.TextNode {
+ if len(strings.TrimSpace(el.Data)) == 0 {
+ el = el.NextSibling
+ continue
+ }
+ }
+ break
+ }
+ if el == nil {
+ return nil
+ }
+ return NewNode(el)
+}
+
+//
+// IsElement will return true if node type is html.ElementNode.
+//
+func (node *Node) IsElement() bool {
+ return node.Type == html.ElementNode
+}