diff options
| author | Shulhan <m.shulhan@gmail.com> | 2020-03-30 23:15:47 +0700 |
|---|---|---|
| committer | Shulhan <m.shulhan@gmail.com> | 2020-03-30 23:15:47 +0700 |
| commit | 84fdfdb6ae4175a125fc67a6aed377476d31ee0e (patch) | |
| tree | 28877c8088bb05b4d7bef8d6b585996128da68e4 | |
| parent | 7d2606dbcaaf3794907fbee185dcb1d78cfdb98c (diff) | |
| download | kamusku-84fdfdb6ae4175a125fc67a6aed377476d31ee0e.tar.xz | |
all: refactoring Client
The client will have two mode: direct or API. The direct mode connect
to official KBBI website, request the word page, and parse the HTML to
get the definition. The API mode connect to server API that provide
caching of dictionary.
| -rw-r--r-- | active_client.go | 13 | ||||
| -rw-r--r-- | client.go | 336 | ||||
| -rw-r--r-- | cmd/kbbi/main.go | 7 | ||||
| -rw-r--r-- | direct_client.go | 350 | ||||
| -rw-r--r-- | direct_client_test.go (renamed from client_test.go) | 4 |
5 files changed, 416 insertions, 294 deletions
diff --git a/active_client.go b/active_client.go new file mode 100644 index 0000000..061bd1d --- /dev/null +++ b/active_client.go @@ -0,0 +1,13 @@ +// Copyright 2020, Shulhan <m.shulhan@gmail.com>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package kbbi + +// +// activeClient define an interface for an active client. +// +type activeClient interface { + CariDefinisi(words []string) (res DefinisiResponse, err error) + ListKataDasar() (kataDasar daftarKata, err error) +} @@ -4,338 +4,94 @@ package kbbi -import ( - "bytes" - "fmt" - "io/ioutil" - "log" - "net/http" - "net/http/cookiejar" - "net/url" - "strconv" - "strings" - "time" - - "github.com/shuLhan/share/lib/debug" - "golang.org/x/net/html" - "golang.org/x/net/publicsuffix" -) - -const ( - maxPageNumber = 501 - defTimeout = 20 * time.Second -) +import "net/http" // -// Client for KBBI web using HTTP. +// Client for dictionary API and official KBBI servers. // type Client struct { - cookieURL *url.URL - httpc *http.Client + active activeClient + direct *directClient } // -// New create and initialize new client for KBBI web. +// NewClient create and initialize new client. +// If cookies is not empty, the direct client will be initialized and actived. // -func New(cookies []*http.Cookie) (cl *Client, err error) { - cookieURL, err := url.Parse(baseURL) - if err != nil { - return nil, fmt.Errorf("New: %w", err) - } - - jarOpt := &cookiejar.Options{ - PublicSuffixList: publicsuffix.List, - } - - jar, err := cookiejar.New(jarOpt) - if err != nil { - return nil, err - } +func NewClient(cookies []*http.Cookie) (cl *Client, err error) { + cl = &Client{} if cookies != nil { - jar.SetCookies(cookieURL, cookies) - } - - cl = &Client{ - cookieURL: cookieURL, - httpc: &http.Client{ - Jar: jar, - Timeout: defTimeout, - }, - } - - return cl, nil -} - -// -// CariDefinisi dari daftar kata. -// -func (cl Client) CariDefinisi(ins []string) (res DefinisiResponse) { - res = make(DefinisiResponse, len(ins)) - - for _, in := range ins { - _, ok := res[in] - if ok { - continue - } - - kata := &Kata{} - res[in] = kata - - entriURL := baseURL + entriPath + in - httpRes, err := cl.httpc.Get(entriURL) - if err != nil { - kata.err = err - continue - } - - defer httpRes.Body.Close() - - body, err := ioutil.ReadAll(httpRes.Body) - if err != nil { - kata.err = err - continue - } - - if debug.Value >= 2 { - fmt.Printf(">>> HTML body for %s:\n%s", entriURL, body) - } - - err = kata.parseHTMLEntri(body) + cl.direct, err = newDirectClient(cookies) if err != nil { - kata.err = err + return nil, err } + cl.active = cl.direct + return cl, nil } - return res + return cl, nil } // -// ListKataDasar get list of kata dasar +// CariDefinisi lookup definition of words. // -func (cl Client) ListKataDasar() (kataDasar daftarKata, err error) { - params := url.Values{ - paramNameMasukan: []string{paramValueDasar}, - paramNameMasukanLengkap: []string{paramValueDasar}, - } - - urlPage := baseURL + "/Cari/Jenis?" - - kataDasar = make(daftarKata) - - for pageNumber := 1; pageNumber <= maxPageNumber; pageNumber++ { - params.Set(paramNamePage, strconv.Itoa(pageNumber)) - - req, err := http.NewRequest(http.MethodGet, urlPage+params.Encode(), nil) - if err != nil { - return kataDasar, err - } - - res, err := cl.httpc.Do(req) - if err != nil { - return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", - pageNumber, err) - } - - defer res.Body.Close() - - body, err := ioutil.ReadAll(res.Body) - if err != nil { - return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", - pageNumber, err) - } - - got, err := cl.parseHTMLKataDasar(body) - if err != nil { - return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", - pageNumber, err) - } - if len(got) == 0 { - break - } - - kataDasar.merge(got) - - log.Printf("ListKataDasar: halaman %d, jumlah kata %d, total kata %d", - pageNumber, len(got), len(kataDasar)) +func (cl *Client) CariDefinisi(words []string) ( + res DefinisiResponse, err error, +) { + if cl.active != nil { + return cl.active.CariDefinisi(words) } - return kataDasar, nil -} + // TODO: start with api client first ... -// -// Login authenticate the client using username and password. -// -func (cl *Client) Login(user, pass string) (cookies []*http.Cookie, err error) { - tokenLogin, err := cl.preLogin() + cl.direct, err = newDirectClient(nil) if err != nil { - return nil, fmt.Errorf("Login: %w", err) - } - - params := url.Values{ - paramNameRequestVerificationToken: []string{tokenLogin}, - paramNamePosel: []string{user}, - paramNameKataSandi: []string{pass}, - paramNameIngatSaya: []string{paramValueFalse}, - } - - reqBody := strings.NewReader(params.Encode()) - - req, err := http.NewRequest(http.MethodPost, loginURL, reqBody) - if err != nil { - return nil, fmt.Errorf("Login: %w", err) + return nil, err } - req.Header.Set(headerNameContentType, headerValueContentType) + return cl.direct.CariDefinisi(words) +} - res, err := cl.httpc.Do(req) - if err != nil { - return nil, fmt.Errorf("Login: %w", err) +func (cl *Client) ListKataDasar() (kataDasar daftarKata, err error) { + if cl.active != nil { + return cl.active.ListKataDasar() } - defer res.Body.Close() + // TODO: start with api client first ... - resBody, err := ioutil.ReadAll(res.Body) + cl.direct, err = newDirectClient(nil) if err != nil { - return nil, fmt.Errorf("Login: %w", err) - } - - if res.StatusCode >= http.StatusBadRequest { - return nil, fmt.Errorf("Login: %d %s", res.StatusCode, resBody) + return nil, err } - cookies = cl.httpc.Jar.Cookies(cl.cookieURL) - - return cookies, nil + return cl.direct.ListKataDasar() } // -// SetCookies for HTTP request that need an authentication. +// Login authenticate the client using username and password to official KBBI +// server. // -func (cl *Client) SetCookies(cookies []*http.Cookie) { - if len(cookies) > 0 { - cl.httpc.Jar.SetCookies(cl.cookieURL, cookies) - } -} - -func (cl Client) parseHTMLKataDasar(htmlBody []byte) (kataDasar daftarKata, err error) { - node, err := html.Parse(bytes.NewReader(htmlBody)) - if err != nil { - return nil, err - } - - kataDasar = make(daftarKata) - - var prev *html.Node - - for { - switch { - case node.FirstChild != nil && node.FirstChild != prev && - node.LastChild != prev: - node = node.FirstChild - case node.NextSibling != nil: - node = node.NextSibling - default: - prev = node - node = node.Parent - } - if node == nil { - break - } - - if node.Type != html.ElementNode { - continue - } - if node.Data != tagNameAnchor { - continue - } - for _, attr := range node.Attr { - if attr.Key != attrNameHref { - continue - } - if !strings.HasPrefix(attr.Val, entriPath) { - continue - } - k := strings.TrimSpace(node.FirstChild.Data) - kataDasar[k] = struct{}{} +func (cl *Client) Login(user, pass string) (cookies []*http.Cookie, err error) { + if cl.direct == nil { + cl.direct, err = newDirectClient(nil) + if err != nil { + return nil, err } } - - return kataDasar, nil -} - -// -// parseHTMLLogin get the token at the form login. -// -func (cl Client) parseHTMLLogin(htmlBody []byte) (token string, err error) { - node, err := html.Parse(bytes.NewReader(htmlBody)) + cookies, err = cl.direct.login(user, pass) if err != nil { - return "", err - } - - var prev *html.Node - - for { - switch { - case node.FirstChild != nil && node.FirstChild != prev && - node.LastChild != prev: - node = node.FirstChild - case node.NextSibling != nil: - node = node.NextSibling - default: - prev = node - node = node.Parent - } - if node == nil { - break - } - - if node.Type != html.ElementNode { - continue - } - if node.Data != tagNameInput { - continue - } - for _, attr := range node.Attr { - if attr.Key != attrNameName { - continue - } - - token = getAttrValue(node.Attr) - if len(token) > 0 { - return token, nil - } - } + return nil, err } - - return "", fmt.Errorf("token login not found") + cl.active = cl.direct + return cookies, nil } // -// preLogin initialize the client to get the first cookie. +// SetCookies for HTTP request in direct client. // -func (cl *Client) preLogin() (token string, err error) { - req, err := http.NewRequest(http.MethodGet, loginURL, nil) - if err != nil { - return "", err - } - - res, err := cl.httpc.Do(req) - if err != nil { - return "", err - } - - defer res.Body.Close() - - body, err := ioutil.ReadAll(res.Body) - if err != nil { - return "", err - } - - token, err = cl.parseHTMLLogin(body) - if err != nil { - return "", err +func (cl *Client) SetCookies(cookies []*http.Cookie) { + if cl.direct != nil { + cl.direct.setCookies(cookies) } - - return token, nil } diff --git a/cmd/kbbi/main.go b/cmd/kbbi/main.go index 24a7b3d..ac9298c 100644 --- a/cmd/kbbi/main.go +++ b/cmd/kbbi/main.go @@ -49,7 +49,7 @@ func main() { flag.Parse() - cl, err := kbbi.New(cookies) + cl, err := kbbi.NewClient(cookies) if err != nil { log.Fatal(err) } @@ -72,7 +72,10 @@ func main() { return } - resDefinisi := cl.CariDefinisi(flag.Args()) + resDefinisi, err := cl.CariDefinisi(flag.Args()) + if err != nil { + log.Fatal(err) + } for k, kata := range resDefinisi { err = kata.Err() diff --git a/direct_client.go b/direct_client.go new file mode 100644 index 0000000..3123829 --- /dev/null +++ b/direct_client.go @@ -0,0 +1,350 @@ +// Copyright 2020, Shulhan <m.shulhan@gmail.com>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package kbbi + +import ( + "bytes" + "fmt" + "io/ioutil" + "log" + "net/http" + "net/http/cookiejar" + "net/url" + "strconv" + "strings" + "time" + + "github.com/shuLhan/share/lib/debug" + "golang.org/x/net/html" + "golang.org/x/net/publicsuffix" +) + +const ( + maxPageNumber = 501 + defTimeout = 20 * time.Second +) + +// +// directClient for KBBI web using HTTP. +// +type directClient struct { + cookieURL *url.URL + httpc *http.Client +} + +// +// newDirectClient create and initialize new client that connect directly to +// KBBI official website. +// +func newDirectClient(cookies []*http.Cookie) (cl *directClient, err error) { + cookieURL, err := url.Parse(baseURL) + if err != nil { + return nil, fmt.Errorf("New: %w", err) + } + + jarOpt := &cookiejar.Options{ + PublicSuffixList: publicsuffix.List, + } + + jar, err := cookiejar.New(jarOpt) + if err != nil { + return nil, err + } + + if cookies != nil { + jar.SetCookies(cookieURL, cookies) + } + + cl = &directClient{ + cookieURL: cookieURL, + httpc: &http.Client{ + Jar: jar, + Timeout: defTimeout, + }, + } + + return cl, nil +} + +// +// CariDefinisi dari daftar kata. +// +func (cl *directClient) CariDefinisi(ins []string) ( + res DefinisiResponse, err error, +) { + res = make(DefinisiResponse, len(ins)) + + for _, in := range ins { + _, ok := res[in] + if ok { + continue + } + + kata := &Kata{} + res[in] = kata + + entriURL := baseURL + entriPath + in + httpRes, err := cl.httpc.Get(entriURL) + if err != nil { + kata.err = err + continue + } + + defer httpRes.Body.Close() + + body, err := ioutil.ReadAll(httpRes.Body) + if err != nil { + kata.err = err + continue + } + + if debug.Value >= 2 { + fmt.Printf(">>> HTML body for %s:\n%s", entriURL, body) + } + + err = kata.parseHTMLEntri(body) + if err != nil { + kata.err = err + } + } + + return res, nil +} + +// +// ListKataDasar get list of kata dasar +// +func (cl *directClient) ListKataDasar() (kataDasar daftarKata, err error) { + params := url.Values{ + paramNameMasukan: []string{paramValueDasar}, + paramNameMasukanLengkap: []string{paramValueDasar}, + } + + urlPage := baseURL + "/Cari/Jenis?" + + kataDasar = make(daftarKata) + + for pageNumber := 1; pageNumber <= maxPageNumber; pageNumber++ { + params.Set(paramNamePage, strconv.Itoa(pageNumber)) + + req, err := http.NewRequest(http.MethodGet, urlPage+params.Encode(), nil) + if err != nil { + return kataDasar, err + } + + res, err := cl.httpc.Do(req) + if err != nil { + return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", + pageNumber, err) + } + + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + if err != nil { + return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", + pageNumber, err) + } + + got, err := cl.parseHTMLKataDasar(body) + if err != nil { + return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", + pageNumber, err) + } + if len(got) == 0 { + break + } + + kataDasar.merge(got) + + log.Printf("ListKataDasar: halaman %d, jumlah kata %d, total kata %d", + pageNumber, len(got), len(kataDasar)) + } + + return kataDasar, nil +} + +// +// Login authenticate the client using username and password. +// +func (cl *directClient) login(user, pass string) ( + cookies []*http.Cookie, err error, +) { + tokenLogin, err := cl.preLogin() + if err != nil { + return nil, fmt.Errorf("Login: %w", err) + } + + params := url.Values{ + paramNameRequestVerificationToken: []string{tokenLogin}, + paramNamePosel: []string{user}, + paramNameKataSandi: []string{pass}, + paramNameIngatSaya: []string{paramValueFalse}, + } + + reqBody := strings.NewReader(params.Encode()) + + req, err := http.NewRequest(http.MethodPost, loginURL, reqBody) + if err != nil { + return nil, fmt.Errorf("Login: %w", err) + } + + req.Header.Set(headerNameContentType, headerValueContentType) + + res, err := cl.httpc.Do(req) + if err != nil { + return nil, fmt.Errorf("Login: %w", err) + } + + defer res.Body.Close() + + resBody, err := ioutil.ReadAll(res.Body) + if err != nil { + return nil, fmt.Errorf("Login: %w", err) + } + + if res.StatusCode >= http.StatusBadRequest { + return nil, fmt.Errorf("Login: %d %s", res.StatusCode, resBody) + } + + cookies = cl.httpc.Jar.Cookies(cl.cookieURL) + + return cookies, nil +} + +// +// setCookies for HTTP request that need an authentication. +// +func (cl *directClient) setCookies(cookies []*http.Cookie) { + if len(cookies) > 0 { + cl.httpc.Jar.SetCookies(cl.cookieURL, cookies) + } +} + +func (cl *directClient) parseHTMLKataDasar(htmlBody []byte) ( + kataDasar daftarKata, err error, +) { + node, err := html.Parse(bytes.NewReader(htmlBody)) + if err != nil { + return nil, err + } + + kataDasar = make(daftarKata) + + var prev *html.Node + + for { + switch { + case node.FirstChild != nil && node.FirstChild != prev && + node.LastChild != prev: + node = node.FirstChild + case node.NextSibling != nil: + node = node.NextSibling + default: + prev = node + node = node.Parent + } + if node == nil { + break + } + + if node.Type != html.ElementNode { + continue + } + if node.Data != tagNameAnchor { + continue + } + for _, attr := range node.Attr { + if attr.Key != attrNameHref { + continue + } + if !strings.HasPrefix(attr.Val, entriPath) { + continue + } + k := strings.TrimSpace(node.FirstChild.Data) + kataDasar[k] = struct{}{} + } + } + + return kataDasar, nil +} + +// +// parseHTMLLogin get the token at the form login. +// +func (cl *directClient) parseHTMLLogin(htmlBody []byte) ( + token string, err error, +) { + node, err := html.Parse(bytes.NewReader(htmlBody)) + if err != nil { + return "", err + } + + var prev *html.Node + + for { + switch { + case node.FirstChild != nil && node.FirstChild != prev && + node.LastChild != prev: + node = node.FirstChild + case node.NextSibling != nil: + node = node.NextSibling + default: + prev = node + node = node.Parent + } + if node == nil { + break + } + + if node.Type != html.ElementNode { + continue + } + if node.Data != tagNameInput { + continue + } + for _, attr := range node.Attr { + if attr.Key != attrNameName { + continue + } + + token = getAttrValue(node.Attr) + if len(token) > 0 { + return token, nil + } + } + } + + return "", fmt.Errorf("token login not found") +} + +// +// preLogin initialize the client to get the first cookie. +// +func (cl *directClient) preLogin() (token string, err error) { + req, err := http.NewRequest(http.MethodGet, loginURL, nil) + if err != nil { + return "", err + } + + res, err := cl.httpc.Do(req) + if err != nil { + return "", err + } + + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + if err != nil { + return "", err + } + + token, err = cl.parseHTMLLogin(body) + if err != nil { + return "", err + } + + return token, nil +} diff --git a/client_test.go b/direct_client_test.go index 92f5fd4..894b876 100644 --- a/client_test.go +++ b/direct_client_test.go @@ -9,13 +9,13 @@ import ( "testing" ) -func TestClient_parseHTMLKataDasar(t *testing.T) { +func TestDirectClient_parseHTMLKataDasar(t *testing.T) { htmlBody, err := ioutil.ReadFile("testdata/kbbi_dasar.html") if err != nil { t.Fatal(err) } - cl, err := New(nil) + cl, err := newDirectClient(nil) if err != nil { t.Fatal(err) } |
