aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--definisi_kata.go36
-rw-r--r--direct_client.go64
-rw-r--r--go.mod4
-rw-r--r--go.sum13
-rw-r--r--kata.go55
-rw-r--r--parser.go51
6 files changed, 81 insertions, 142 deletions
diff --git a/definisi_kata.go b/definisi_kata.go
index ac8ba5d..95f1292 100644
--- a/definisi_kata.go
+++ b/definisi_kata.go
@@ -7,8 +7,8 @@ package kbbi
import (
"strings"
+ "github.com/shuLhan/share/lib/net/html"
libstrings "github.com/shuLhan/share/lib/strings"
- "golang.org/x/net/html"
)
//
@@ -22,29 +22,31 @@ type DefinisiKata struct {
}
func parseDefinisiKata(li *html.Node) (defKata *DefinisiKata) {
- elFont := getFirstChild(li)
- if elFont.Data != tagNameFont {
+ elFont := li.GetFirstChild()
+ if elFont == nil || elFont.Data != tagNameFont {
return nil
}
- elItalic := getFirstChild(elFont)
- if elItalic.Data != tagNameItalic {
+ elItalic := elFont.GetFirstChild()
+ if elItalic == nil || elItalic.Data != tagNameItalic {
return nil
}
defKata = &DefinisiKata{}
- elSpan := getFirstChild(elItalic)
+ elSpan := elItalic.GetFirstChild()
for elSpan != nil && elSpan.Data == tagNameSpan {
- for _, attr := range elSpan.Attr {
- if attr.Key != attrNameTitle {
- continue
- }
- defKata.Kelas = append(defKata.Kelas, attr.Val)
+ kelas := elSpan.GetAttrValue(attrNameTitle)
+ if len(kelas) > 0 {
+ defKata.Kelas = append(defKata.Kelas, kelas)
}
- elSpan = getNextSibling(elSpan)
+ elSpan = elSpan.GetNextSibling()
+ }
+
+ el := elFont.GetNextSibling()
+ if el == nil {
+ return defKata
}
- el := getNextSibling(elFont)
defKata.Isi = strings.TrimSpace(libstrings.SingleSpace(el.Data))
if defKata.Isi[len(defKata.Isi)-1] != ':' {
@@ -54,23 +56,23 @@ func parseDefinisiKata(li *html.Node) (defKata *DefinisiKata) {
defKata.Isi = defKata.Isi[:len(defKata.Isi)-1]
// Parse the example of kata in the next sibling.
- el = getNextSibling(el)
+ el = el.GetNextSibling()
for el != nil {
if el.Data != tagNameFont {
break
}
- elItalic = getFirstChild(el)
+ elItalic = el.GetFirstChild()
if elItalic.Data != tagNameItalic {
break
}
- elText := getFirstChild(elItalic)
+ elText := elItalic.GetFirstChild()
if elText != nil {
defKata.Contoh = append(defKata.Contoh, elText.Data)
}
- el = getNextSibling(el)
+ el = el.GetNextSibling()
}
return defKata
diff --git a/direct_client.go b/direct_client.go
index 7c944e0..8fa19b9 100644
--- a/direct_client.go
+++ b/direct_client.go
@@ -16,7 +16,7 @@ import (
"strings"
"github.com/shuLhan/share/lib/debug"
- "golang.org/x/net/html"
+ "github.com/shuLhan/share/lib/net/html"
"golang.org/x/net/publicsuffix"
)
@@ -231,39 +231,34 @@ func (cl *directClient) parseHTMLKataDasar(htmlBody []byte) (
kataDasar = make(DaftarKata)
- var prev *html.Node
+ prev := html.NewNode(nil)
for {
switch {
- case node.FirstChild != nil && node.FirstChild != prev &&
- node.LastChild != prev:
- node = node.FirstChild
+ case node.FirstChild != nil && node.FirstChild != prev.Node &&
+ node.LastChild != prev.Node:
+ node.Node = node.FirstChild
case node.NextSibling != nil:
- node = node.NextSibling
+ node.Node = node.NextSibling
default:
- prev = node
- node = node.Parent
+ prev.Node = node.Node
+ node.Node = node.Parent
}
- if node == nil {
+ if node.Node == nil {
break
}
-
- if node.Type != html.ElementNode {
+ if !node.IsElement() {
continue
}
if node.Data != tagNameAnchor {
continue
}
- for _, attr := range node.Attr {
- if attr.Key != attrNameHref {
- continue
- }
- if !strings.HasPrefix(attr.Val, entriPath) {
- continue
- }
- k := strings.TrimSpace(node.FirstChild.Data)
- kataDasar[k] = struct{}{}
+ hrefValue := node.GetAttrValue(attrNameHref)
+ if !strings.HasPrefix(hrefValue, entriPath) {
+ continue
}
+ k := strings.TrimSpace(node.FirstChild.Data)
+ kataDasar[k] = struct{}{}
}
return kataDasar, nil
@@ -280,38 +275,33 @@ func (cl *directClient) parseHTMLLogin(htmlBody []byte) (
return "", err
}
- var prev *html.Node
+ prev := html.NewNode(nil)
for {
switch {
- case node.FirstChild != nil && node.FirstChild != prev &&
- node.LastChild != prev:
- node = node.FirstChild
+ case node.FirstChild != nil && node.FirstChild != prev.Node &&
+ node.LastChild != prev.Node:
+ node.Node = node.FirstChild
case node.NextSibling != nil:
- node = node.NextSibling
+ node.Node = node.NextSibling
default:
- prev = node
- node = node.Parent
+ prev.Node = node.Node
+ node.Node = node.Parent
}
- if node == nil {
+ if node.Node == nil {
break
}
- if node.Type != html.ElementNode {
+ if !node.IsElement() {
continue
}
if node.Data != tagNameInput {
continue
}
- for _, attr := range node.Attr {
- if attr.Key != attrNameName {
- continue
- }
- token = getAttrValue(node.Attr)
- if len(token) > 0 {
- return token, nil
- }
+ token := node.GetAttrValue(attrNameName)
+ if len(token) > 0 {
+ return token, nil
}
}
diff --git a/go.mod b/go.mod
index b8f3599..aa96a2f 100644
--- a/go.mod
+++ b/go.mod
@@ -3,8 +3,8 @@ module github.com/shuLhan/kbbi
go 1.13
require (
- github.com/shuLhan/share v0.13.1-0.20200330125604-7ac43c699173
- golang.org/x/net v0.0.0-20200320220750-118fecf932d8
+ github.com/shuLhan/share v0.14.1-0.20200405081315-fe987df87daa
+ golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e
)
//replace github.com/shuLhan/share => ../share
diff --git a/go.sum b/go.sum
index d479867..7ff9b47 100644
--- a/go.sum
+++ b/go.sum
@@ -1,11 +1,12 @@
-github.com/shuLhan/share v0.13.1-0.20200330125604-7ac43c699173 h1:lhiuIUynM8i0EdntUiy0gnyBcvRmkyrXkdQBPDf0iJw=
-github.com/shuLhan/share v0.13.1-0.20200330125604-7ac43c699173/go.mod h1:uG1C5VfU81bI4iQ48VbWRm5c7mkvpr4huuUO54PKK1o=
+github.com/shuLhan/share v0.14.1-0.20200405081315-fe987df87daa h1:PUSymJV6kPNjsziMhdHGjw2trQHknJPq29Is6MDYGcs=
+github.com/shuLhan/share v0.14.1-0.20200405081315-fe987df87daa/go.mod h1:mpa0ub5qmuko/muUlOROOqLCSHKU76GzuAR/sUaSwRo=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
-golang.org/x/crypto v0.0.0-20200320181102-891825fb96df/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.0.0-20200323165209-0ec3e9974c59/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20200320220750-118fecf932d8 h1:1+zQlQqEEhUeStBTi653GZAnAuivZq/2hz+Iz+OP7rg=
-golang.org/x/net v0.0.0-20200320220750-118fecf932d8/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k=
+golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200321134203-328b4cd54aae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
diff --git a/kata.go b/kata.go
index 94b23b4..d6502f0 100644
--- a/kata.go
+++ b/kata.go
@@ -7,7 +7,7 @@ package kbbi
import (
"bytes"
- "golang.org/x/net/html"
+ "github.com/shuLhan/share/lib/net/html"
)
//
@@ -36,24 +36,24 @@ func (kata *Kata) parseHTMLEntri(htmlBody []byte) (err error) {
return err
}
- var prev *html.Node
+ prev := html.NewNode(nil)
for {
switch {
- case node.FirstChild != nil && node.FirstChild != prev &&
- node.LastChild != prev:
- node = node.FirstChild
+ case node.FirstChild != nil && node.FirstChild != prev.Node &&
+ node.LastChild != prev.Node:
+ node.Node = node.FirstChild
case node.NextSibling != nil:
- node = node.NextSibling
+ node.Node = node.NextSibling
default:
- prev = node
- node = node.Parent
+ prev.Node = node.Node
+ node.Node = node.Parent
}
- if node == nil {
+ if node.Node == nil {
break
}
- if node.Type != html.ElementNode {
+ if !node.IsElement() {
continue
}
@@ -62,16 +62,16 @@ func (kata *Kata) parseHTMLEntri(htmlBody []byte) (err error) {
kata.parseKataDasar(node)
case tagNameOrderedList, tagNameUnorderedList:
- li := getFirstChild(node)
+ li := node.GetFirstChild()
for li != nil {
defKata := parseDefinisiKata(li)
if defKata == nil {
break
}
kata.Definisi = append(kata.Definisi, defKata)
- li = getNextSibling(li)
+ li = li.GetNextSibling()
}
- node = node.NextSibling
+ node.Node = node.NextSibling
default:
continue
@@ -86,24 +86,21 @@ func (kata *Kata) parseHTMLEntri(htmlBody []byte) (err error) {
// return true; otherwise it will return false.
//
func (kata *Kata) parseKataDasar(h2 *html.Node) bool {
- el := getFirstChild(h2)
+ el := h2.GetFirstChild()
if el.Data != tagNameSpan {
return false
}
- for _, attr := range el.Attr {
- if attr.Key != attrNameClass {
- continue
- }
- if attr.Val != attrValueRootWord {
- continue
- }
- el = getFirstChild(el)
- if el.Data != tagNameAnchor {
- return false
- }
- el = getFirstChild(el)
- kata.Dasar = el.Data
- return true
+ v := el.GetAttrValue(attrNameClass)
+ if v != attrValueRootWord {
+ return false
}
- return false
+
+ el = el.GetFirstChild()
+ if el.Data != tagNameAnchor {
+ return false
+ }
+ el = el.GetFirstChild()
+ kata.Dasar = el.Data
+
+ return true
}
diff --git a/parser.go b/parser.go
deleted file mode 100644
index 8a66e6f..0000000
--- a/parser.go
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright 2020, Shulhan <m.shulhan@gmail.com>. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package kbbi
-
-import (
- "strings"
-
- "golang.org/x/net/html"
-)
-
-func getAttrValue(attrs []html.Attribute) string {
- for _, attr := range attrs {
- if attr.Key == attrNameValue {
- return attr.Val
- }
- }
- return ""
-}
-
-//
-// getFirstChild get the first non-empty child.
-//
-func getFirstChild(node *html.Node) *html.Node {
- el := node.FirstChild
- for el != nil {
- if el.Type == html.TextNode {
- if len(strings.TrimSpace(el.Data)) == 0 {
- el = el.NextSibling
- continue
- }
- }
- break
- }
- return el
-}
-
-func getNextSibling(node *html.Node) *html.Node {
- el := node.NextSibling
- for el != nil {
- if el.Type == html.TextNode {
- if len(strings.TrimSpace(el.Data)) == 0 {
- el = el.NextSibling
- continue
- }
- }
- break
- }
- return el
-}