diff options
Diffstat (limited to 'direct_client.go')
| -rw-r--r-- | direct_client.go | 350 |
1 files changed, 350 insertions, 0 deletions
diff --git a/direct_client.go b/direct_client.go new file mode 100644 index 0000000..3123829 --- /dev/null +++ b/direct_client.go @@ -0,0 +1,350 @@ +// Copyright 2020, Shulhan <m.shulhan@gmail.com>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package kbbi + +import ( + "bytes" + "fmt" + "io/ioutil" + "log" + "net/http" + "net/http/cookiejar" + "net/url" + "strconv" + "strings" + "time" + + "github.com/shuLhan/share/lib/debug" + "golang.org/x/net/html" + "golang.org/x/net/publicsuffix" +) + +const ( + maxPageNumber = 501 + defTimeout = 20 * time.Second +) + +// +// directClient for KBBI web using HTTP. +// +type directClient struct { + cookieURL *url.URL + httpc *http.Client +} + +// +// newDirectClient create and initialize new client that connect directly to +// KBBI official website. +// +func newDirectClient(cookies []*http.Cookie) (cl *directClient, err error) { + cookieURL, err := url.Parse(baseURL) + if err != nil { + return nil, fmt.Errorf("New: %w", err) + } + + jarOpt := &cookiejar.Options{ + PublicSuffixList: publicsuffix.List, + } + + jar, err := cookiejar.New(jarOpt) + if err != nil { + return nil, err + } + + if cookies != nil { + jar.SetCookies(cookieURL, cookies) + } + + cl = &directClient{ + cookieURL: cookieURL, + httpc: &http.Client{ + Jar: jar, + Timeout: defTimeout, + }, + } + + return cl, nil +} + +// +// CariDefinisi dari daftar kata. +// +func (cl *directClient) CariDefinisi(ins []string) ( + res DefinisiResponse, err error, +) { + res = make(DefinisiResponse, len(ins)) + + for _, in := range ins { + _, ok := res[in] + if ok { + continue + } + + kata := &Kata{} + res[in] = kata + + entriURL := baseURL + entriPath + in + httpRes, err := cl.httpc.Get(entriURL) + if err != nil { + kata.err = err + continue + } + + defer httpRes.Body.Close() + + body, err := ioutil.ReadAll(httpRes.Body) + if err != nil { + kata.err = err + continue + } + + if debug.Value >= 2 { + fmt.Printf(">>> HTML body for %s:\n%s", entriURL, body) + } + + err = kata.parseHTMLEntri(body) + if err != nil { + kata.err = err + } + } + + return res, nil +} + +// +// ListKataDasar get list of kata dasar +// +func (cl *directClient) ListKataDasar() (kataDasar daftarKata, err error) { + params := url.Values{ + paramNameMasukan: []string{paramValueDasar}, + paramNameMasukanLengkap: []string{paramValueDasar}, + } + + urlPage := baseURL + "/Cari/Jenis?" + + kataDasar = make(daftarKata) + + for pageNumber := 1; pageNumber <= maxPageNumber; pageNumber++ { + params.Set(paramNamePage, strconv.Itoa(pageNumber)) + + req, err := http.NewRequest(http.MethodGet, urlPage+params.Encode(), nil) + if err != nil { + return kataDasar, err + } + + res, err := cl.httpc.Do(req) + if err != nil { + return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", + pageNumber, err) + } + + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + if err != nil { + return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", + pageNumber, err) + } + + got, err := cl.parseHTMLKataDasar(body) + if err != nil { + return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", + pageNumber, err) + } + if len(got) == 0 { + break + } + + kataDasar.merge(got) + + log.Printf("ListKataDasar: halaman %d, jumlah kata %d, total kata %d", + pageNumber, len(got), len(kataDasar)) + } + + return kataDasar, nil +} + +// +// Login authenticate the client using username and password. +// +func (cl *directClient) login(user, pass string) ( + cookies []*http.Cookie, err error, +) { + tokenLogin, err := cl.preLogin() + if err != nil { + return nil, fmt.Errorf("Login: %w", err) + } + + params := url.Values{ + paramNameRequestVerificationToken: []string{tokenLogin}, + paramNamePosel: []string{user}, + paramNameKataSandi: []string{pass}, + paramNameIngatSaya: []string{paramValueFalse}, + } + + reqBody := strings.NewReader(params.Encode()) + + req, err := http.NewRequest(http.MethodPost, loginURL, reqBody) + if err != nil { + return nil, fmt.Errorf("Login: %w", err) + } + + req.Header.Set(headerNameContentType, headerValueContentType) + + res, err := cl.httpc.Do(req) + if err != nil { + return nil, fmt.Errorf("Login: %w", err) + } + + defer res.Body.Close() + + resBody, err := ioutil.ReadAll(res.Body) + if err != nil { + return nil, fmt.Errorf("Login: %w", err) + } + + if res.StatusCode >= http.StatusBadRequest { + return nil, fmt.Errorf("Login: %d %s", res.StatusCode, resBody) + } + + cookies = cl.httpc.Jar.Cookies(cl.cookieURL) + + return cookies, nil +} + +// +// setCookies for HTTP request that need an authentication. +// +func (cl *directClient) setCookies(cookies []*http.Cookie) { + if len(cookies) > 0 { + cl.httpc.Jar.SetCookies(cl.cookieURL, cookies) + } +} + +func (cl *directClient) parseHTMLKataDasar(htmlBody []byte) ( + kataDasar daftarKata, err error, +) { + node, err := html.Parse(bytes.NewReader(htmlBody)) + if err != nil { + return nil, err + } + + kataDasar = make(daftarKata) + + var prev *html.Node + + for { + switch { + case node.FirstChild != nil && node.FirstChild != prev && + node.LastChild != prev: + node = node.FirstChild + case node.NextSibling != nil: + node = node.NextSibling + default: + prev = node + node = node.Parent + } + if node == nil { + break + } + + if node.Type != html.ElementNode { + continue + } + if node.Data != tagNameAnchor { + continue + } + for _, attr := range node.Attr { + if attr.Key != attrNameHref { + continue + } + if !strings.HasPrefix(attr.Val, entriPath) { + continue + } + k := strings.TrimSpace(node.FirstChild.Data) + kataDasar[k] = struct{}{} + } + } + + return kataDasar, nil +} + +// +// parseHTMLLogin get the token at the form login. +// +func (cl *directClient) parseHTMLLogin(htmlBody []byte) ( + token string, err error, +) { + node, err := html.Parse(bytes.NewReader(htmlBody)) + if err != nil { + return "", err + } + + var prev *html.Node + + for { + switch { + case node.FirstChild != nil && node.FirstChild != prev && + node.LastChild != prev: + node = node.FirstChild + case node.NextSibling != nil: + node = node.NextSibling + default: + prev = node + node = node.Parent + } + if node == nil { + break + } + + if node.Type != html.ElementNode { + continue + } + if node.Data != tagNameInput { + continue + } + for _, attr := range node.Attr { + if attr.Key != attrNameName { + continue + } + + token = getAttrValue(node.Attr) + if len(token) > 0 { + return token, nil + } + } + } + + return "", fmt.Errorf("token login not found") +} + +// +// preLogin initialize the client to get the first cookie. +// +func (cl *directClient) preLogin() (token string, err error) { + req, err := http.NewRequest(http.MethodGet, loginURL, nil) + if err != nil { + return "", err + } + + res, err := cl.httpc.Do(req) + if err != nil { + return "", err + } + + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + if err != nil { + return "", err + } + + token, err = cl.parseHTMLLogin(body) + if err != nil { + return "", err + } + + return token, nil +} |
