diff options
| author | Shulhan <m.shulhan@gmail.com> | 2020-04-05 15:13:15 +0700 |
|---|---|---|
| committer | Shulhan <m.shulhan@gmail.com> | 2020-04-05 15:13:15 +0700 |
| commit | fe987df87daa808c369b4e2f1c1fa4071e1e80e6 (patch) | |
| tree | 9d17b735fa007b1a83f119703e856e444fc77142 /lib/net/html | |
| parent | bfd340aa12f885202b600a3f33a5c52530a7a89b (diff) | |
| download | pakakeh.go-fe987df87daa808c369b4e2f1c1fa4071e1e80e6.tar.xz | |
net/html: new package to simplify the golang.org/x/net/html
The x/net/html package currently only provide bare raw functionalities
to iterate tree, there is no check for empty node, and no function to
get attribute by name without looping it manually.
This package extends the package by adding methods to get node's attribute
by name, get the first non-empty child, and get the next non-empty sibling.
Diffstat (limited to 'lib/net/html')
| -rw-r--r-- | lib/net/html/html.go | 33 | ||||
| -rw-r--r-- | lib/net/html/node.go | 87 |
2 files changed, 120 insertions, 0 deletions
diff --git a/lib/net/html/html.go b/lib/net/html/html.go new file mode 100644 index 00000000..0b002fe2 --- /dev/null +++ b/lib/net/html/html.go @@ -0,0 +1,33 @@ +// Copyright 2020, Shulhan <m.shulhan@gmail.com>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +// Package html extends the golang.org/x/net/html by providing simplified +// methods to Node. +// +// The x/net/html package currently only provide bare raw functionalities +// to iterate tree, there is no check for empty node, and no function to +// get attribute by name without looping it manually. +// +// This package extends the package by adding methods to get node's attribute +// by name, get the first non-empty child, and get the next non-empty sibling +// +package html + +import ( + "io" + + "golang.org/x/net/html" +) + +// +// Parse returns the parse tree for the HTML from the given Reader. +// +func Parse(r io.Reader) (*Node, error) { + node, err := html.Parse(r) + if err != nil { + return nil, err + } + return NewNode(node), nil +} diff --git a/lib/net/html/node.go b/lib/net/html/node.go new file mode 100644 index 00000000..4d7c71d4 --- /dev/null +++ b/lib/net/html/node.go @@ -0,0 +1,87 @@ +// Copyright 2020, Shulhan <m.shulhan@gmail.com>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package html + +import ( + "strings" + + "golang.org/x/net/html" +) + +// +// Node extends the html.Node. +// +type Node struct { + *html.Node +} + +// +// NewNode create new node by embedding html.Node "el". +// +func NewNode(el *html.Node) *Node { + return &Node{Node: el} +} + +// +// GetAttrValue get the value of node's attribute with specific key or empty +// if key not found. +// +func (node *Node) GetAttrValue(key string) string { + for _, attr := range node.Attr { + if key == attr.Key { + return attr.Val + } + } + return "" +} + +// +// GetFirstChild get the first non-empty child of node or nil if no child +// left. +// +func (node *Node) GetFirstChild() *Node { + el := node.FirstChild + for el != nil { + if el.Type == html.TextNode { + if len(strings.TrimSpace(el.Data)) == 0 { + el = el.NextSibling + continue + } + } + break + } + if el == nil { + return nil + } + return NewNode(el) +} + +// +// GetNextSibling get the next non-empty sibling of node or nil if no more +// sibling left. +// +func (node *Node) GetNextSibling() *Node { + el := node.NextSibling + for el != nil { + if el.Type == html.TextNode { + if len(strings.TrimSpace(el.Data)) == 0 { + el = el.NextSibling + continue + } + } + break + } + if el == nil { + return nil + } + return NewNode(el) +} + +// +// IsElement will return true if node type is html.ElementNode. +// +func (node *Node) IsElement() bool { + return node.Type == html.ElementNode +} |
