diff options
Diffstat (limited to 'kbbi_client.go')
| -rw-r--r-- | kbbi_client.go | 417 |
1 files changed, 417 insertions, 0 deletions
diff --git a/kbbi_client.go b/kbbi_client.go new file mode 100644 index 0000000..23767b4 --- /dev/null +++ b/kbbi_client.go @@ -0,0 +1,417 @@ +// Copyright 2020, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package kamusku + +import ( + "bytes" + "encoding/gob" + "errors" + "fmt" + "io/ioutil" + "log" + "net/http" + "net/http/cookiejar" + "net/url" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/shuLhan/share/lib/debug" + libhttp "github.com/shuLhan/share/lib/http" + "github.com/shuLhan/share/lib/net/html" + "golang.org/x/net/publicsuffix" +) + +const ( + kbbiUrlBase = "https://kbbi.kemdikbud.go.id" + kbbiUrlLogin = kbbiUrlBase + "/Account/Login" + kbbiPathEntri = "/entri/" + + attrNameClass = "class" + attrNameHref = "href" + attrNameTitle = "title" + attrNameValue = "value" + + attrValueRootWord = "rootword" + + paramNameMasukan = "masukan" + paramNameMasukanLengkap = "masukanLengkap" + paramNameIngatSaya = "IngatSaya" + paramNameKataSandi = "KataSandi" + paramNamePage = "page" + paramNamePosel = "Posel" + paramNameRequestVerificationToken = "__RequestVerificationToken" //nolint: gosec + + paramValueDasar = "dasar" + paramValueFalse = "false" + + tagNameAnchor = "a" + tagNameFont = "font" + tagNameHeader2 = "h2" + tagNameInput = "input" + tagNameItalic = "i" + tagNameOrderedList = "ol" + tagNameSpan = "span" + tagNameUnorderedList = "ul" + + cookieFile = "cookie" + configDir = "kamusku" + defTimeout = 20 * time.Second + maxPageNumber = 501 +) + +// +// KbbiClient client for official KBBI web using HTTP. +// +type KbbiClient struct { + baseDir string + cookieURL *url.URL + cookies []*http.Cookie + httpc *http.Client +} + +// +// NewKbbiClient create and initialize new client that connect directly to +// KBBI official website. +// +func NewKbbiClient() (cl *KbbiClient, err error) { + cookieURL, err := url.Parse(kbbiUrlBase) + if err != nil { + return nil, fmt.Errorf("New: %w", err) + } + + jarOpt := &cookiejar.Options{ + PublicSuffixList: publicsuffix.List, + } + + jar, err := cookiejar.New(jarOpt) + if err != nil { + return nil, fmt.Errorf("New: %w", err) + } + + cl = &KbbiClient{ + cookieURL: cookieURL, + httpc: &http.Client{ + Jar: jar, + Timeout: defTimeout, + }, + } + + err = cl.loadCookies() + if err != nil { + return nil, fmt.Errorf("New: %w", err) + } + + if cl.cookies != nil { + jar.SetCookies(cookieURL, cl.cookies) + } + + return cl, nil +} + +// +// Lookup lookup definition of one or more words. +// +func (cl *KbbiClient) Lookup(ins []string) (res LookupResponse, err error) { + res = make(LookupResponse, len(ins)) + + for _, in := range ins { + _, ok := res[in] + if ok { + continue + } + + kata := &Word{} + res[in] = kata + + entriURL := kbbiUrlBase + kbbiPathEntri + in + httpRes, err := cl.httpc.Get(entriURL) + if err != nil { + kata.err = err + continue + } + + defer httpRes.Body.Close() + + body, err := ioutil.ReadAll(httpRes.Body) + if err != nil { + kata.err = err + continue + } + + if debug.Value >= 3 { + fmt.Printf(">>> HTML body for %s:\n%s", entriURL, body) + } + + err = kata.parseHTMLEntri(in, body) + if err != nil { + kata.err = err + } + + if len(kata.Definition) == 0 && len(kata.Message) == 0 { + kata.Message = "Entri tidak ditemukan" + } + } + + return res, nil +} + +// +// ListRootWords list all of the root words in dictionary. +// +func (cl *KbbiClient) ListRootWords() (rootWords Words, err error) { + params := url.Values{ + paramNameMasukan: []string{paramValueDasar}, + paramNameMasukanLengkap: []string{paramValueDasar}, + } + + urlPage := kbbiUrlBase + "/Cari/Jenis?" + + rootWords = make(Words) + + for pageNumber := 1; pageNumber <= maxPageNumber; pageNumber++ { + params.Set(paramNamePage, strconv.Itoa(pageNumber)) + + req, err := http.NewRequest(http.MethodGet, urlPage+params.Encode(), nil) + if err != nil { + return rootWords, err + } + + res, err := cl.httpc.Do(req) + if err != nil { + return rootWords, fmt.Errorf("ListRootWords: page %d: %w", + pageNumber, err) + } + + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + if err != nil { + return rootWords, fmt.Errorf("ListRootWords: page %d: %w", + pageNumber, err) + } + + got, err := cl.parseHTMLRootWords(body) + if err != nil { + return rootWords, fmt.Errorf("ListRootWords: page %d: %w", + pageNumber, err) + } + if len(got) == 0 { + break + } + + rootWords.merge(got) + + log.Printf("ListRootWords: halaman %d, jumlah kata %d, total kata %d", + pageNumber, len(got), len(rootWords)) + } + + return rootWords, nil +} + +// +// IsAuthenticated will return true if the client already login; otherwise it +// will return false. +// +func (cl *KbbiClient) IsAuthenticated() bool { + return len(cl.cookies) > 0 +} + +// +// Login authenticate the client using user email and password. +// +func (cl *KbbiClient) Login(email, pass string) (err error) { + tokenLogin, err := cl.preLogin() + if err != nil { + return fmt.Errorf("Login: %w", err) + } + + params := url.Values{ + paramNameRequestVerificationToken: []string{tokenLogin}, + paramNamePosel: []string{email}, + paramNameKataSandi: []string{pass}, + paramNameIngatSaya: []string{paramValueFalse}, + } + + reqBody := strings.NewReader(params.Encode()) + + req, err := http.NewRequest(http.MethodPost, kbbiUrlLogin, reqBody) + if err != nil { + return fmt.Errorf("Login: %w", err) + } + + req.Header.Set(libhttp.HeaderContentType, libhttp.ContentTypeForm) + + res, err := cl.httpc.Do(req) + if err != nil { + return fmt.Errorf("Login: %w", err) + } + + defer res.Body.Close() + + resBody, err := ioutil.ReadAll(res.Body) + if err != nil { + return fmt.Errorf("Login: %w", err) + } + + if res.StatusCode >= http.StatusBadRequest { + return fmt.Errorf("login: %d %s", res.StatusCode, resBody) + } + + cl.cookies = cl.httpc.Jar.Cookies(cl.cookieURL) + cl.setCookies() + cl.saveCookies() + + return nil +} + +// +// setCookies for HTTP request that need an authentication. +// +func (cl *KbbiClient) setCookies() { + cl.httpc.Jar.SetCookies(cl.cookieURL, cl.cookies) +} + +func (cl *KbbiClient) parseHTMLRootWords(htmlBody []byte) ( + rootWords Words, err error, +) { + iter, err := html.Parse(bytes.NewReader(htmlBody)) + if err != nil { + return nil, err + } + + rootWords = make(Words) + + for node := iter.Next(); node != nil; node = iter.Next() { + if !node.IsElement() { + continue + } + if node.Data != tagNameAnchor { + continue + } + hrefValue := node.GetAttrValue(attrNameHref) + if !strings.HasPrefix(hrefValue, kbbiPathEntri) { + continue + } + k := strings.TrimSpace(node.FirstChild.Data) + rootWords[k] = struct{}{} + } + + return rootWords, nil +} + +// +// parseHTMLLogin get the token at the form login. +// +func (cl *KbbiClient) parseHTMLLogin(htmlBody []byte) ( + token string, err error, +) { + iter, err := html.Parse(bytes.NewReader(htmlBody)) + if err != nil { + return "", err + } + + for node := iter.Next(); node != nil; node = iter.Next() { + if !node.IsElement() { + continue + } + if node.Data != tagNameInput { + continue + } + + token := node.GetAttrValue(attrNameValue) + if len(token) > 0 { + return token, nil + } + } + + return "", fmt.Errorf("token login not found") +} + +// +// preLogin initialize the client to get the first cookie. +// +func (cl *KbbiClient) preLogin() (token string, err error) { + req, err := http.NewRequest(http.MethodGet, kbbiUrlLogin, nil) + if err != nil { + return "", err + } + + res, err := cl.httpc.Do(req) + if err != nil { + return "", err + } + + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + if err != nil { + return "", err + } + + token, err = cl.parseHTMLLogin(body) + if err != nil { + return "", err + } + + return token, nil +} + +// +// loadCookies load the KBBI cookies from file. +// +func (cl *KbbiClient) loadCookies() (err error) { + cl.baseDir, err = os.UserConfigDir() + if err != nil { + return fmt.Errorf("loadCookies: %w", err) + } + + f := filepath.Join(cl.baseDir, configDir, cookieFile) + + _, err = os.Stat(f) + if errors.Is(err, os.ErrNotExist) { + return nil + } + + body, err := ioutil.ReadFile(f) + if err != nil { + return fmt.Errorf("loadCookies: %w", err) + } + + dec := gob.NewDecoder(bytes.NewReader(body)) + + err = dec.Decode(&cl.cookies) + if err != nil { + return fmt.Errorf("loadCookies: %w", err) + } + + return nil +} + +// +// saveCookies store the client cookies to the file for future use. +// +func (cl *KbbiClient) saveCookies() { + err := os.MkdirAll(filepath.Join(cl.baseDir, configDir), 0700) + if err != nil { + log.Println("saveCookies:", err) + } + + f := filepath.Join(cl.baseDir, configDir, cookieFile) + + var buf bytes.Buffer + enc := gob.NewEncoder(&buf) + err = enc.Encode(cl.cookies) + if err != nil { + log.Println("saveCookies: ", err) + } + + err = ioutil.WriteFile(f, buf.Bytes(), 0600) + if err != nil { + log.Println("saveCookies: ", err) + } +} |
