diff options
| author | Shulhan <ms@kilabit.info> | 2024-03-06 03:42:00 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2024-03-09 01:10:17 +0700 |
| commit | e730418289d80985c5d48946353961a357a4b532 (patch) | |
| tree | 9e573bf4fca975775eb25f32448f755a325880a8 /lib/html/node_iterator.go | |
| parent | 1e7cb99f42bcd41e98326bd9406d3cecfb2a4542 (diff) | |
| download | pakakeh.go-e730418289d80985c5d48946353961a357a4b532.tar.xz | |
lib: move package "net/html" to "lib/html"
Putting "html" under "net" package make no sense.
Another reason is to make the package flat under "lib/" directory.
Diffstat (limited to 'lib/html/node_iterator.go')
| -rw-r--r-- | lib/html/node_iterator.go | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/lib/html/node_iterator.go b/lib/html/node_iterator.go new file mode 100644 index 00000000..ca1819d4 --- /dev/null +++ b/lib/html/node_iterator.go @@ -0,0 +1,93 @@ +// Copyright 2020, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package html + +import ( + "io" + "strings" + + "golang.org/x/net/html" +) + +// NodeIterator simplify iterating each node from top to bottom. +type NodeIterator struct { + current *Node + previous *Node + next *html.Node + hasNext bool +} + +// Parse returns the NodeIterator to iterate through HTML tree. +func Parse(r io.Reader) (iter *NodeIterator, err error) { + node, err := html.Parse(r) + if err != nil { + return nil, err + } + iter = &NodeIterator{ + current: NewNode(node), + previous: NewNode(nil), + } + return iter, nil +} + +// Next return the first child or the next sibling of current node. +// If no more node in the tree, it will return nil. +func (iter *NodeIterator) Next() *Node { + if iter.hasNext { + iter.current.Node = iter.next + iter.next = nil + iter.hasNext = false + return iter.current + } + if iter.current.Node == nil { + return nil + } + + for { + switch { + case iter.current.FirstChild != nil && + iter.current.FirstChild != iter.previous.Node && + iter.current.LastChild != iter.previous.Node: + iter.current.Node = iter.current.FirstChild + case iter.current.NextSibling != nil: + iter.current.Node = iter.current.NextSibling + default: + iter.previous.Node = iter.current.Node + iter.current.Node = iter.current.Parent + } + if iter.current.Node == nil { + return nil + } + + // Skip empty text node. + if iter.current.Type != html.TextNode { + break + } + text := strings.TrimSpace(iter.current.Data) + if len(text) != 0 { + break + } + } + return iter.current +} + +// SetNext set the node for iteration to Node "el" only if its not nil. +func (iter *NodeIterator) SetNext(el *Node) { + if el == nil { + return + } + iter.hasNext = true + iter.next = el.Node +} + +// SetNextNode set the next iteration node to html.Node "el" only if its not +// nil. +func (iter *NodeIterator) SetNextNode(el *html.Node) { + if el == nil { + return + } + iter.hasNext = true + iter.next = el +} |
