diff options
| author | Shulhan <m.shulhan@gmail.com> | 2020-04-05 15:19:10 +0700 |
|---|---|---|
| committer | Shulhan <m.shulhan@gmail.com> | 2020-04-05 15:19:10 +0700 |
| commit | 826f56cf7ea4ca350078538d39fd6fee6f05bf9a (patch) | |
| tree | 2211e5051c97369557811b9e290edd381375ffdb /kata.go | |
| parent | 3061f6561c746f216586c271a10219515dcdb2d7 (diff) | |
| download | kamusku-826f56cf7ea4ca350078538d39fd6fee6f05bf9a.tar.xz | |
all: simplify html parser using github.com/shuLhan/share/lib/net/html
Diffstat (limited to 'kata.go')
| -rw-r--r-- | kata.go | 55 |
1 files changed, 26 insertions, 29 deletions
@@ -7,7 +7,7 @@ package kbbi import ( "bytes" - "golang.org/x/net/html" + "github.com/shuLhan/share/lib/net/html" ) // @@ -36,24 +36,24 @@ func (kata *Kata) parseHTMLEntri(htmlBody []byte) (err error) { return err } - var prev *html.Node + prev := html.NewNode(nil) for { switch { - case node.FirstChild != nil && node.FirstChild != prev && - node.LastChild != prev: - node = node.FirstChild + case node.FirstChild != nil && node.FirstChild != prev.Node && + node.LastChild != prev.Node: + node.Node = node.FirstChild case node.NextSibling != nil: - node = node.NextSibling + node.Node = node.NextSibling default: - prev = node - node = node.Parent + prev.Node = node.Node + node.Node = node.Parent } - if node == nil { + if node.Node == nil { break } - if node.Type != html.ElementNode { + if !node.IsElement() { continue } @@ -62,16 +62,16 @@ func (kata *Kata) parseHTMLEntri(htmlBody []byte) (err error) { kata.parseKataDasar(node) case tagNameOrderedList, tagNameUnorderedList: - li := getFirstChild(node) + li := node.GetFirstChild() for li != nil { defKata := parseDefinisiKata(li) if defKata == nil { break } kata.Definisi = append(kata.Definisi, defKata) - li = getNextSibling(li) + li = li.GetNextSibling() } - node = node.NextSibling + node.Node = node.NextSibling default: continue @@ -86,24 +86,21 @@ func (kata *Kata) parseHTMLEntri(htmlBody []byte) (err error) { // return true; otherwise it will return false. // func (kata *Kata) parseKataDasar(h2 *html.Node) bool { - el := getFirstChild(h2) + el := h2.GetFirstChild() if el.Data != tagNameSpan { return false } - for _, attr := range el.Attr { - if attr.Key != attrNameClass { - continue - } - if attr.Val != attrValueRootWord { - continue - } - el = getFirstChild(el) - if el.Data != tagNameAnchor { - return false - } - el = getFirstChild(el) - kata.Dasar = el.Data - return true + v := el.GetAttrValue(attrNameClass) + if v != attrValueRootWord { + return false } - return false + + el = el.GetFirstChild() + if el.Data != tagNameAnchor { + return false + } + el = el.GetFirstChild() + kata.Dasar = el.Data + + return true } |
