diff options
| -rw-r--r-- | definisi_kata.go | 36 | ||||
| -rw-r--r-- | direct_client.go | 64 | ||||
| -rw-r--r-- | go.mod | 4 | ||||
| -rw-r--r-- | go.sum | 13 | ||||
| -rw-r--r-- | kata.go | 55 | ||||
| -rw-r--r-- | parser.go | 51 |
6 files changed, 81 insertions, 142 deletions
diff --git a/definisi_kata.go b/definisi_kata.go index ac8ba5d..95f1292 100644 --- a/definisi_kata.go +++ b/definisi_kata.go @@ -7,8 +7,8 @@ package kbbi import ( "strings" + "github.com/shuLhan/share/lib/net/html" libstrings "github.com/shuLhan/share/lib/strings" - "golang.org/x/net/html" ) // @@ -22,29 +22,31 @@ type DefinisiKata struct { } func parseDefinisiKata(li *html.Node) (defKata *DefinisiKata) { - elFont := getFirstChild(li) - if elFont.Data != tagNameFont { + elFont := li.GetFirstChild() + if elFont == nil || elFont.Data != tagNameFont { return nil } - elItalic := getFirstChild(elFont) - if elItalic.Data != tagNameItalic { + elItalic := elFont.GetFirstChild() + if elItalic == nil || elItalic.Data != tagNameItalic { return nil } defKata = &DefinisiKata{} - elSpan := getFirstChild(elItalic) + elSpan := elItalic.GetFirstChild() for elSpan != nil && elSpan.Data == tagNameSpan { - for _, attr := range elSpan.Attr { - if attr.Key != attrNameTitle { - continue - } - defKata.Kelas = append(defKata.Kelas, attr.Val) + kelas := elSpan.GetAttrValue(attrNameTitle) + if len(kelas) > 0 { + defKata.Kelas = append(defKata.Kelas, kelas) } - elSpan = getNextSibling(elSpan) + elSpan = elSpan.GetNextSibling() + } + + el := elFont.GetNextSibling() + if el == nil { + return defKata } - el := getNextSibling(elFont) defKata.Isi = strings.TrimSpace(libstrings.SingleSpace(el.Data)) if defKata.Isi[len(defKata.Isi)-1] != ':' { @@ -54,23 +56,23 @@ func parseDefinisiKata(li *html.Node) (defKata *DefinisiKata) { defKata.Isi = defKata.Isi[:len(defKata.Isi)-1] // Parse the example of kata in the next sibling. - el = getNextSibling(el) + el = el.GetNextSibling() for el != nil { if el.Data != tagNameFont { break } - elItalic = getFirstChild(el) + elItalic = el.GetFirstChild() if elItalic.Data != tagNameItalic { break } - elText := getFirstChild(elItalic) + elText := elItalic.GetFirstChild() if elText != nil { defKata.Contoh = append(defKata.Contoh, elText.Data) } - el = getNextSibling(el) + el = el.GetNextSibling() } return defKata diff --git a/direct_client.go b/direct_client.go index 7c944e0..8fa19b9 100644 --- a/direct_client.go +++ b/direct_client.go @@ -16,7 +16,7 @@ import ( "strings" "github.com/shuLhan/share/lib/debug" - "golang.org/x/net/html" + "github.com/shuLhan/share/lib/net/html" "golang.org/x/net/publicsuffix" ) @@ -231,39 +231,34 @@ func (cl *directClient) parseHTMLKataDasar(htmlBody []byte) ( kataDasar = make(DaftarKata) - var prev *html.Node + prev := html.NewNode(nil) for { switch { - case node.FirstChild != nil && node.FirstChild != prev && - node.LastChild != prev: - node = node.FirstChild + case node.FirstChild != nil && node.FirstChild != prev.Node && + node.LastChild != prev.Node: + node.Node = node.FirstChild case node.NextSibling != nil: - node = node.NextSibling + node.Node = node.NextSibling default: - prev = node - node = node.Parent + prev.Node = node.Node + node.Node = node.Parent } - if node == nil { + if node.Node == nil { break } - - if node.Type != html.ElementNode { + if !node.IsElement() { continue } if node.Data != tagNameAnchor { continue } - for _, attr := range node.Attr { - if attr.Key != attrNameHref { - continue - } - if !strings.HasPrefix(attr.Val, entriPath) { - continue - } - k := strings.TrimSpace(node.FirstChild.Data) - kataDasar[k] = struct{}{} + hrefValue := node.GetAttrValue(attrNameHref) + if !strings.HasPrefix(hrefValue, entriPath) { + continue } + k := strings.TrimSpace(node.FirstChild.Data) + kataDasar[k] = struct{}{} } return kataDasar, nil @@ -280,38 +275,33 @@ func (cl *directClient) parseHTMLLogin(htmlBody []byte) ( return "", err } - var prev *html.Node + prev := html.NewNode(nil) for { switch { - case node.FirstChild != nil && node.FirstChild != prev && - node.LastChild != prev: - node = node.FirstChild + case node.FirstChild != nil && node.FirstChild != prev.Node && + node.LastChild != prev.Node: + node.Node = node.FirstChild case node.NextSibling != nil: - node = node.NextSibling + node.Node = node.NextSibling default: - prev = node - node = node.Parent + prev.Node = node.Node + node.Node = node.Parent } - if node == nil { + if node.Node == nil { break } - if node.Type != html.ElementNode { + if !node.IsElement() { continue } if node.Data != tagNameInput { continue } - for _, attr := range node.Attr { - if attr.Key != attrNameName { - continue - } - token = getAttrValue(node.Attr) - if len(token) > 0 { - return token, nil - } + token := node.GetAttrValue(attrNameName) + if len(token) > 0 { + return token, nil } } @@ -3,8 +3,8 @@ module github.com/shuLhan/kbbi go 1.13 require ( - github.com/shuLhan/share v0.13.1-0.20200330125604-7ac43c699173 - golang.org/x/net v0.0.0-20200320220750-118fecf932d8 + github.com/shuLhan/share v0.14.1-0.20200405081315-fe987df87daa + golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e ) //replace github.com/shuLhan/share => ../share @@ -1,11 +1,12 @@ -github.com/shuLhan/share v0.13.1-0.20200330125604-7ac43c699173 h1:lhiuIUynM8i0EdntUiy0gnyBcvRmkyrXkdQBPDf0iJw= -github.com/shuLhan/share v0.13.1-0.20200330125604-7ac43c699173/go.mod h1:uG1C5VfU81bI4iQ48VbWRm5c7mkvpr4huuUO54PKK1o= +github.com/shuLhan/share v0.14.1-0.20200405081315-fe987df87daa h1:PUSymJV6kPNjsziMhdHGjw2trQHknJPq29Is6MDYGcs= +github.com/shuLhan/share v0.14.1-0.20200405081315-fe987df87daa/go.mod h1:mpa0ub5qmuko/muUlOROOqLCSHKU76GzuAR/sUaSwRo= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20200320181102-891825fb96df/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200323165209-0ec3e9974c59/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20200320220750-118fecf932d8 h1:1+zQlQqEEhUeStBTi653GZAnAuivZq/2hz+Iz+OP7rg= -golang.org/x/net v0.0.0-20200320220750-118fecf932d8/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k= +golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200321134203-328b4cd54aae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -7,7 +7,7 @@ package kbbi import ( "bytes" - "golang.org/x/net/html" + "github.com/shuLhan/share/lib/net/html" ) // @@ -36,24 +36,24 @@ func (kata *Kata) parseHTMLEntri(htmlBody []byte) (err error) { return err } - var prev *html.Node + prev := html.NewNode(nil) for { switch { - case node.FirstChild != nil && node.FirstChild != prev && - node.LastChild != prev: - node = node.FirstChild + case node.FirstChild != nil && node.FirstChild != prev.Node && + node.LastChild != prev.Node: + node.Node = node.FirstChild case node.NextSibling != nil: - node = node.NextSibling + node.Node = node.NextSibling default: - prev = node - node = node.Parent + prev.Node = node.Node + node.Node = node.Parent } - if node == nil { + if node.Node == nil { break } - if node.Type != html.ElementNode { + if !node.IsElement() { continue } @@ -62,16 +62,16 @@ func (kata *Kata) parseHTMLEntri(htmlBody []byte) (err error) { kata.parseKataDasar(node) case tagNameOrderedList, tagNameUnorderedList: - li := getFirstChild(node) + li := node.GetFirstChild() for li != nil { defKata := parseDefinisiKata(li) if defKata == nil { break } kata.Definisi = append(kata.Definisi, defKata) - li = getNextSibling(li) + li = li.GetNextSibling() } - node = node.NextSibling + node.Node = node.NextSibling default: continue @@ -86,24 +86,21 @@ func (kata *Kata) parseHTMLEntri(htmlBody []byte) (err error) { // return true; otherwise it will return false. // func (kata *Kata) parseKataDasar(h2 *html.Node) bool { - el := getFirstChild(h2) + el := h2.GetFirstChild() if el.Data != tagNameSpan { return false } - for _, attr := range el.Attr { - if attr.Key != attrNameClass { - continue - } - if attr.Val != attrValueRootWord { - continue - } - el = getFirstChild(el) - if el.Data != tagNameAnchor { - return false - } - el = getFirstChild(el) - kata.Dasar = el.Data - return true + v := el.GetAttrValue(attrNameClass) + if v != attrValueRootWord { + return false } - return false + + el = el.GetFirstChild() + if el.Data != tagNameAnchor { + return false + } + el = el.GetFirstChild() + kata.Dasar = el.Data + + return true } diff --git a/parser.go b/parser.go deleted file mode 100644 index 8a66e6f..0000000 --- a/parser.go +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2020, Shulhan <m.shulhan@gmail.com>. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package kbbi - -import ( - "strings" - - "golang.org/x/net/html" -) - -func getAttrValue(attrs []html.Attribute) string { - for _, attr := range attrs { - if attr.Key == attrNameValue { - return attr.Val - } - } - return "" -} - -// -// getFirstChild get the first non-empty child. -// -func getFirstChild(node *html.Node) *html.Node { - el := node.FirstChild - for el != nil { - if el.Type == html.TextNode { - if len(strings.TrimSpace(el.Data)) == 0 { - el = el.NextSibling - continue - } - } - break - } - return el -} - -func getNextSibling(node *html.Node) *html.Node { - el := node.NextSibling - for el != nil { - if el.Type == html.TextNode { - if len(strings.TrimSpace(el.Data)) == 0 { - el = el.NextSibling - continue - } - } - break - } - return el -} |
