diff options
| author | Shulhan <m.shulhan@gmail.com> | 2020-03-06 00:57:07 +0700 |
|---|---|---|
| committer | Shulhan <m.shulhan@gmail.com> | 2020-03-16 01:17:35 +0700 |
| commit | 23a277ba6c3513346e29d39c5b2226219981fce0 (patch) | |
| tree | fa332a5486e4b2ee2e78baa57384fa40c0a346d1 /client.go | |
| parent | bf9240a2b91ec855a772c5fdc98ddd89ab28b6e5 (diff) | |
| download | kamusku-23a277ba6c3513346e29d39c5b2226219981fce0.tar.xz | |
kbbi: tambah fitur pencarian definisi kata
Diffstat (limited to 'client.go')
| -rw-r--r-- | client.go | 100 |
1 files changed, 86 insertions, 14 deletions
@@ -16,6 +16,7 @@ import ( "strings" "time" + "github.com/shuLhan/share/lib/debug" "golang.org/x/net/html" "golang.org/x/net/publicsuffix" ) @@ -67,6 +68,35 @@ func New(cookies []*http.Cookie) (cl *Client, err error) { } // +// CariDefinisi dari kata. +// +func (cl Client) CariDefinisi(kata string) (defKata []*DefinisiKata, err error) { + entriURL := baseURL + entriPath + kata + res, err := cl.httpc.Get(entriURL) + if err != nil { + return nil, fmt.Errorf("Cari %q: %w", kata, err) + } + + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + if err != nil { + return nil, fmt.Errorf("Cari %q: %w", kata, err) + } + + if debug.Value >= 2 { + fmt.Printf(">>> HTML body for %s:\n%s", entriURL, body) + } + + defKata, err = parseHTMLEntri(body) + if err != nil { + return nil, fmt.Errorf("Cari %q: %w", kata, err) + } + + return defKata, nil +} + +// // ListKataDasar get list of kata dasar // func (cl Client) ListKataDasar() (kataDasar daftarKata, err error) { @@ -174,6 +204,55 @@ func (cl *Client) SetCookies(cookies []*http.Cookie) { } } +// +// parseHTMLEntri parse HTML body from "/entri/<kata>" page to find the +// definition of the word. +// +func parseHTMLEntri(htmlBody []byte) (daftarDefinisi []*DefinisiKata, err error) { + node, err := html.Parse(bytes.NewReader(htmlBody)) + if err != nil { + return nil, err + } + + var prev *html.Node + + for { + switch { + case node.FirstChild != nil && node.FirstChild != prev && + node.LastChild != prev: + node = node.FirstChild + case node.NextSibling != nil: + node = node.NextSibling + default: + prev = node + node = node.Parent + } + if node == nil { + break + } + + if node.Type != html.ElementNode { + continue + } + if node.Data != tagNameOrderedList { + continue + } + + li := getFirstChild(node) + for li != nil { + defKata := parseDefinisiKata(li) + if defKata == nil { + break + } + daftarDefinisi = append(daftarDefinisi, defKata) + li = getNextSibling(li) + } + node = node.NextSibling + } + + return daftarDefinisi, nil +} + func (cl Client) parseHTMLKataDasar(htmlBody []byte) (kataDasar daftarKata, err error) { node, err := html.Parse(bytes.NewReader(htmlBody)) if err != nil { @@ -186,7 +265,8 @@ func (cl Client) parseHTMLKataDasar(htmlBody []byte) (kataDasar daftarKata, err for { switch { - case node.FirstChild != nil && node.FirstChild != prev && node.LastChild != prev: + case node.FirstChild != nil && node.FirstChild != prev && + node.LastChild != prev: node = node.FirstChild case node.NextSibling != nil: node = node.NextSibling @@ -201,14 +281,14 @@ func (cl Client) parseHTMLKataDasar(htmlBody []byte) (kataDasar daftarKata, err if node.Type != html.ElementNode { continue } - if node.Data != elementTypeAnchor { + if node.Data != tagNameAnchor { continue } for _, attr := range node.Attr { if attr.Key != attrNameHref { continue } - if !strings.HasPrefix(attr.Val, entriURL) { + if !strings.HasPrefix(attr.Val, entriPath) { continue } k := strings.TrimSpace(node.FirstChild.Data) @@ -232,7 +312,8 @@ func (cl Client) parseHTMLLogin(htmlBody []byte) (token string, err error) { for { switch { - case node.FirstChild != nil && node.FirstChild != prev && node.LastChild != prev: + case node.FirstChild != nil && node.FirstChild != prev && + node.LastChild != prev: node = node.FirstChild case node.NextSibling != nil: node = node.NextSibling @@ -247,7 +328,7 @@ func (cl Client) parseHTMLLogin(htmlBody []byte) (token string, err error) { if node.Type != html.ElementNode { continue } - if node.Data != elementTypeInput { + if node.Data != tagNameInput { continue } for _, attr := range node.Attr { @@ -293,12 +374,3 @@ func (cl *Client) preLogin() (token string, err error) { return token, nil } - -func getAttrValue(attrs []html.Attribute) string { - for _, attr := range attrs { - if attr.Key == attrNameValue { - return attr.Val - } - } - return "" -} |
