From 84fdfdb6ae4175a125fc67a6aed377476d31ee0e Mon Sep 17 00:00:00 2001 From: Shulhan Date: Mon, 30 Mar 2020 23:15:47 +0700 Subject: all: refactoring Client The client will have two mode: direct or API. The direct mode connect to official KBBI website, request the word page, and parse the HTML to get the definition. The API mode connect to server API that provide caching of dictionary. --- active_client.go | 13 ++ client.go | 336 +++++++----------------------------------------- client_test.go | 29 ----- cmd/kbbi/main.go | 7 +- direct_client.go | 350 ++++++++++++++++++++++++++++++++++++++++++++++++++ direct_client_test.go | 29 +++++ 6 files changed, 443 insertions(+), 321 deletions(-) create mode 100644 active_client.go delete mode 100644 client_test.go create mode 100644 direct_client.go create mode 100644 direct_client_test.go diff --git a/active_client.go b/active_client.go new file mode 100644 index 0000000..061bd1d --- /dev/null +++ b/active_client.go @@ -0,0 +1,13 @@ +// Copyright 2020, Shulhan . All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package kbbi + +// +// activeClient define an interface for an active client. +// +type activeClient interface { + CariDefinisi(words []string) (res DefinisiResponse, err error) + ListKataDasar() (kataDasar daftarKata, err error) +} diff --git a/client.go b/client.go index d90eb4a..fabe600 100644 --- a/client.go +++ b/client.go @@ -4,338 +4,94 @@ package kbbi -import ( - "bytes" - "fmt" - "io/ioutil" - "log" - "net/http" - "net/http/cookiejar" - "net/url" - "strconv" - "strings" - "time" - - "github.com/shuLhan/share/lib/debug" - "golang.org/x/net/html" - "golang.org/x/net/publicsuffix" -) - -const ( - maxPageNumber = 501 - defTimeout = 20 * time.Second -) +import "net/http" // -// Client for KBBI web using HTTP. +// Client for dictionary API and official KBBI servers. // type Client struct { - cookieURL *url.URL - httpc *http.Client + active activeClient + direct *directClient } // -// New create and initialize new client for KBBI web. +// NewClient create and initialize new client. +// If cookies is not empty, the direct client will be initialized and actived. // -func New(cookies []*http.Cookie) (cl *Client, err error) { - cookieURL, err := url.Parse(baseURL) - if err != nil { - return nil, fmt.Errorf("New: %w", err) - } - - jarOpt := &cookiejar.Options{ - PublicSuffixList: publicsuffix.List, - } - - jar, err := cookiejar.New(jarOpt) - if err != nil { - return nil, err - } +func NewClient(cookies []*http.Cookie) (cl *Client, err error) { + cl = &Client{} if cookies != nil { - jar.SetCookies(cookieURL, cookies) - } - - cl = &Client{ - cookieURL: cookieURL, - httpc: &http.Client{ - Jar: jar, - Timeout: defTimeout, - }, - } - - return cl, nil -} - -// -// CariDefinisi dari daftar kata. -// -func (cl Client) CariDefinisi(ins []string) (res DefinisiResponse) { - res = make(DefinisiResponse, len(ins)) - - for _, in := range ins { - _, ok := res[in] - if ok { - continue - } - - kata := &Kata{} - res[in] = kata - - entriURL := baseURL + entriPath + in - httpRes, err := cl.httpc.Get(entriURL) - if err != nil { - kata.err = err - continue - } - - defer httpRes.Body.Close() - - body, err := ioutil.ReadAll(httpRes.Body) - if err != nil { - kata.err = err - continue - } - - if debug.Value >= 2 { - fmt.Printf(">>> HTML body for %s:\n%s", entriURL, body) - } - - err = kata.parseHTMLEntri(body) + cl.direct, err = newDirectClient(cookies) if err != nil { - kata.err = err + return nil, err } + cl.active = cl.direct + return cl, nil } - return res + return cl, nil } // -// ListKataDasar get list of kata dasar +// CariDefinisi lookup definition of words. // -func (cl Client) ListKataDasar() (kataDasar daftarKata, err error) { - params := url.Values{ - paramNameMasukan: []string{paramValueDasar}, - paramNameMasukanLengkap: []string{paramValueDasar}, - } - - urlPage := baseURL + "/Cari/Jenis?" - - kataDasar = make(daftarKata) - - for pageNumber := 1; pageNumber <= maxPageNumber; pageNumber++ { - params.Set(paramNamePage, strconv.Itoa(pageNumber)) - - req, err := http.NewRequest(http.MethodGet, urlPage+params.Encode(), nil) - if err != nil { - return kataDasar, err - } - - res, err := cl.httpc.Do(req) - if err != nil { - return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", - pageNumber, err) - } - - defer res.Body.Close() - - body, err := ioutil.ReadAll(res.Body) - if err != nil { - return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", - pageNumber, err) - } - - got, err := cl.parseHTMLKataDasar(body) - if err != nil { - return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", - pageNumber, err) - } - if len(got) == 0 { - break - } - - kataDasar.merge(got) - - log.Printf("ListKataDasar: halaman %d, jumlah kata %d, total kata %d", - pageNumber, len(got), len(kataDasar)) +func (cl *Client) CariDefinisi(words []string) ( + res DefinisiResponse, err error, +) { + if cl.active != nil { + return cl.active.CariDefinisi(words) } - return kataDasar, nil -} + // TODO: start with api client first ... -// -// Login authenticate the client using username and password. -// -func (cl *Client) Login(user, pass string) (cookies []*http.Cookie, err error) { - tokenLogin, err := cl.preLogin() + cl.direct, err = newDirectClient(nil) if err != nil { - return nil, fmt.Errorf("Login: %w", err) - } - - params := url.Values{ - paramNameRequestVerificationToken: []string{tokenLogin}, - paramNamePosel: []string{user}, - paramNameKataSandi: []string{pass}, - paramNameIngatSaya: []string{paramValueFalse}, - } - - reqBody := strings.NewReader(params.Encode()) - - req, err := http.NewRequest(http.MethodPost, loginURL, reqBody) - if err != nil { - return nil, fmt.Errorf("Login: %w", err) + return nil, err } - req.Header.Set(headerNameContentType, headerValueContentType) + return cl.direct.CariDefinisi(words) +} - res, err := cl.httpc.Do(req) - if err != nil { - return nil, fmt.Errorf("Login: %w", err) +func (cl *Client) ListKataDasar() (kataDasar daftarKata, err error) { + if cl.active != nil { + return cl.active.ListKataDasar() } - defer res.Body.Close() + // TODO: start with api client first ... - resBody, err := ioutil.ReadAll(res.Body) + cl.direct, err = newDirectClient(nil) if err != nil { - return nil, fmt.Errorf("Login: %w", err) - } - - if res.StatusCode >= http.StatusBadRequest { - return nil, fmt.Errorf("Login: %d %s", res.StatusCode, resBody) + return nil, err } - cookies = cl.httpc.Jar.Cookies(cl.cookieURL) - - return cookies, nil + return cl.direct.ListKataDasar() } // -// SetCookies for HTTP request that need an authentication. +// Login authenticate the client using username and password to official KBBI +// server. // -func (cl *Client) SetCookies(cookies []*http.Cookie) { - if len(cookies) > 0 { - cl.httpc.Jar.SetCookies(cl.cookieURL, cookies) - } -} - -func (cl Client) parseHTMLKataDasar(htmlBody []byte) (kataDasar daftarKata, err error) { - node, err := html.Parse(bytes.NewReader(htmlBody)) - if err != nil { - return nil, err - } - - kataDasar = make(daftarKata) - - var prev *html.Node - - for { - switch { - case node.FirstChild != nil && node.FirstChild != prev && - node.LastChild != prev: - node = node.FirstChild - case node.NextSibling != nil: - node = node.NextSibling - default: - prev = node - node = node.Parent - } - if node == nil { - break - } - - if node.Type != html.ElementNode { - continue - } - if node.Data != tagNameAnchor { - continue - } - for _, attr := range node.Attr { - if attr.Key != attrNameHref { - continue - } - if !strings.HasPrefix(attr.Val, entriPath) { - continue - } - k := strings.TrimSpace(node.FirstChild.Data) - kataDasar[k] = struct{}{} +func (cl *Client) Login(user, pass string) (cookies []*http.Cookie, err error) { + if cl.direct == nil { + cl.direct, err = newDirectClient(nil) + if err != nil { + return nil, err } } - - return kataDasar, nil -} - -// -// parseHTMLLogin get the token at the form login. -// -func (cl Client) parseHTMLLogin(htmlBody []byte) (token string, err error) { - node, err := html.Parse(bytes.NewReader(htmlBody)) + cookies, err = cl.direct.login(user, pass) if err != nil { - return "", err - } - - var prev *html.Node - - for { - switch { - case node.FirstChild != nil && node.FirstChild != prev && - node.LastChild != prev: - node = node.FirstChild - case node.NextSibling != nil: - node = node.NextSibling - default: - prev = node - node = node.Parent - } - if node == nil { - break - } - - if node.Type != html.ElementNode { - continue - } - if node.Data != tagNameInput { - continue - } - for _, attr := range node.Attr { - if attr.Key != attrNameName { - continue - } - - token = getAttrValue(node.Attr) - if len(token) > 0 { - return token, nil - } - } + return nil, err } - - return "", fmt.Errorf("token login not found") + cl.active = cl.direct + return cookies, nil } // -// preLogin initialize the client to get the first cookie. +// SetCookies for HTTP request in direct client. // -func (cl *Client) preLogin() (token string, err error) { - req, err := http.NewRequest(http.MethodGet, loginURL, nil) - if err != nil { - return "", err - } - - res, err := cl.httpc.Do(req) - if err != nil { - return "", err - } - - defer res.Body.Close() - - body, err := ioutil.ReadAll(res.Body) - if err != nil { - return "", err - } - - token, err = cl.parseHTMLLogin(body) - if err != nil { - return "", err +func (cl *Client) SetCookies(cookies []*http.Cookie) { + if cl.direct != nil { + cl.direct.setCookies(cookies) } - - return token, nil } diff --git a/client_test.go b/client_test.go deleted file mode 100644 index 92f5fd4..0000000 --- a/client_test.go +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2020, Shulhan . All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package kbbi - -import ( - "io/ioutil" - "testing" -) - -func TestClient_parseHTMLKataDasar(t *testing.T) { - htmlBody, err := ioutil.ReadFile("testdata/kbbi_dasar.html") - if err != nil { - t.Fatal(err) - } - - cl, err := New(nil) - if err != nil { - t.Fatal(err) - } - - got, err := cl.parseHTMLKataDasar(htmlBody) - if err != nil { - t.Fatal(err) - } - - t.Logf("Kata dasar: %v", got) -} diff --git a/cmd/kbbi/main.go b/cmd/kbbi/main.go index 24a7b3d..ac9298c 100644 --- a/cmd/kbbi/main.go +++ b/cmd/kbbi/main.go @@ -49,7 +49,7 @@ func main() { flag.Parse() - cl, err := kbbi.New(cookies) + cl, err := kbbi.NewClient(cookies) if err != nil { log.Fatal(err) } @@ -72,7 +72,10 @@ func main() { return } - resDefinisi := cl.CariDefinisi(flag.Args()) + resDefinisi, err := cl.CariDefinisi(flag.Args()) + if err != nil { + log.Fatal(err) + } for k, kata := range resDefinisi { err = kata.Err() diff --git a/direct_client.go b/direct_client.go new file mode 100644 index 0000000..3123829 --- /dev/null +++ b/direct_client.go @@ -0,0 +1,350 @@ +// Copyright 2020, Shulhan . All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package kbbi + +import ( + "bytes" + "fmt" + "io/ioutil" + "log" + "net/http" + "net/http/cookiejar" + "net/url" + "strconv" + "strings" + "time" + + "github.com/shuLhan/share/lib/debug" + "golang.org/x/net/html" + "golang.org/x/net/publicsuffix" +) + +const ( + maxPageNumber = 501 + defTimeout = 20 * time.Second +) + +// +// directClient for KBBI web using HTTP. +// +type directClient struct { + cookieURL *url.URL + httpc *http.Client +} + +// +// newDirectClient create and initialize new client that connect directly to +// KBBI official website. +// +func newDirectClient(cookies []*http.Cookie) (cl *directClient, err error) { + cookieURL, err := url.Parse(baseURL) + if err != nil { + return nil, fmt.Errorf("New: %w", err) + } + + jarOpt := &cookiejar.Options{ + PublicSuffixList: publicsuffix.List, + } + + jar, err := cookiejar.New(jarOpt) + if err != nil { + return nil, err + } + + if cookies != nil { + jar.SetCookies(cookieURL, cookies) + } + + cl = &directClient{ + cookieURL: cookieURL, + httpc: &http.Client{ + Jar: jar, + Timeout: defTimeout, + }, + } + + return cl, nil +} + +// +// CariDefinisi dari daftar kata. +// +func (cl *directClient) CariDefinisi(ins []string) ( + res DefinisiResponse, err error, +) { + res = make(DefinisiResponse, len(ins)) + + for _, in := range ins { + _, ok := res[in] + if ok { + continue + } + + kata := &Kata{} + res[in] = kata + + entriURL := baseURL + entriPath + in + httpRes, err := cl.httpc.Get(entriURL) + if err != nil { + kata.err = err + continue + } + + defer httpRes.Body.Close() + + body, err := ioutil.ReadAll(httpRes.Body) + if err != nil { + kata.err = err + continue + } + + if debug.Value >= 2 { + fmt.Printf(">>> HTML body for %s:\n%s", entriURL, body) + } + + err = kata.parseHTMLEntri(body) + if err != nil { + kata.err = err + } + } + + return res, nil +} + +// +// ListKataDasar get list of kata dasar +// +func (cl *directClient) ListKataDasar() (kataDasar daftarKata, err error) { + params := url.Values{ + paramNameMasukan: []string{paramValueDasar}, + paramNameMasukanLengkap: []string{paramValueDasar}, + } + + urlPage := baseURL + "/Cari/Jenis?" + + kataDasar = make(daftarKata) + + for pageNumber := 1; pageNumber <= maxPageNumber; pageNumber++ { + params.Set(paramNamePage, strconv.Itoa(pageNumber)) + + req, err := http.NewRequest(http.MethodGet, urlPage+params.Encode(), nil) + if err != nil { + return kataDasar, err + } + + res, err := cl.httpc.Do(req) + if err != nil { + return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", + pageNumber, err) + } + + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + if err != nil { + return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", + pageNumber, err) + } + + got, err := cl.parseHTMLKataDasar(body) + if err != nil { + return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", + pageNumber, err) + } + if len(got) == 0 { + break + } + + kataDasar.merge(got) + + log.Printf("ListKataDasar: halaman %d, jumlah kata %d, total kata %d", + pageNumber, len(got), len(kataDasar)) + } + + return kataDasar, nil +} + +// +// Login authenticate the client using username and password. +// +func (cl *directClient) login(user, pass string) ( + cookies []*http.Cookie, err error, +) { + tokenLogin, err := cl.preLogin() + if err != nil { + return nil, fmt.Errorf("Login: %w", err) + } + + params := url.Values{ + paramNameRequestVerificationToken: []string{tokenLogin}, + paramNamePosel: []string{user}, + paramNameKataSandi: []string{pass}, + paramNameIngatSaya: []string{paramValueFalse}, + } + + reqBody := strings.NewReader(params.Encode()) + + req, err := http.NewRequest(http.MethodPost, loginURL, reqBody) + if err != nil { + return nil, fmt.Errorf("Login: %w", err) + } + + req.Header.Set(headerNameContentType, headerValueContentType) + + res, err := cl.httpc.Do(req) + if err != nil { + return nil, fmt.Errorf("Login: %w", err) + } + + defer res.Body.Close() + + resBody, err := ioutil.ReadAll(res.Body) + if err != nil { + return nil, fmt.Errorf("Login: %w", err) + } + + if res.StatusCode >= http.StatusBadRequest { + return nil, fmt.Errorf("Login: %d %s", res.StatusCode, resBody) + } + + cookies = cl.httpc.Jar.Cookies(cl.cookieURL) + + return cookies, nil +} + +// +// setCookies for HTTP request that need an authentication. +// +func (cl *directClient) setCookies(cookies []*http.Cookie) { + if len(cookies) > 0 { + cl.httpc.Jar.SetCookies(cl.cookieURL, cookies) + } +} + +func (cl *directClient) parseHTMLKataDasar(htmlBody []byte) ( + kataDasar daftarKata, err error, +) { + node, err := html.Parse(bytes.NewReader(htmlBody)) + if err != nil { + return nil, err + } + + kataDasar = make(daftarKata) + + var prev *html.Node + + for { + switch { + case node.FirstChild != nil && node.FirstChild != prev && + node.LastChild != prev: + node = node.FirstChild + case node.NextSibling != nil: + node = node.NextSibling + default: + prev = node + node = node.Parent + } + if node == nil { + break + } + + if node.Type != html.ElementNode { + continue + } + if node.Data != tagNameAnchor { + continue + } + for _, attr := range node.Attr { + if attr.Key != attrNameHref { + continue + } + if !strings.HasPrefix(attr.Val, entriPath) { + continue + } + k := strings.TrimSpace(node.FirstChild.Data) + kataDasar[k] = struct{}{} + } + } + + return kataDasar, nil +} + +// +// parseHTMLLogin get the token at the form login. +// +func (cl *directClient) parseHTMLLogin(htmlBody []byte) ( + token string, err error, +) { + node, err := html.Parse(bytes.NewReader(htmlBody)) + if err != nil { + return "", err + } + + var prev *html.Node + + for { + switch { + case node.FirstChild != nil && node.FirstChild != prev && + node.LastChild != prev: + node = node.FirstChild + case node.NextSibling != nil: + node = node.NextSibling + default: + prev = node + node = node.Parent + } + if node == nil { + break + } + + if node.Type != html.ElementNode { + continue + } + if node.Data != tagNameInput { + continue + } + for _, attr := range node.Attr { + if attr.Key != attrNameName { + continue + } + + token = getAttrValue(node.Attr) + if len(token) > 0 { + return token, nil + } + } + } + + return "", fmt.Errorf("token login not found") +} + +// +// preLogin initialize the client to get the first cookie. +// +func (cl *directClient) preLogin() (token string, err error) { + req, err := http.NewRequest(http.MethodGet, loginURL, nil) + if err != nil { + return "", err + } + + res, err := cl.httpc.Do(req) + if err != nil { + return "", err + } + + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + if err != nil { + return "", err + } + + token, err = cl.parseHTMLLogin(body) + if err != nil { + return "", err + } + + return token, nil +} diff --git a/direct_client_test.go b/direct_client_test.go new file mode 100644 index 0000000..894b876 --- /dev/null +++ b/direct_client_test.go @@ -0,0 +1,29 @@ +// Copyright 2020, Shulhan . All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package kbbi + +import ( + "io/ioutil" + "testing" +) + +func TestDirectClient_parseHTMLKataDasar(t *testing.T) { + htmlBody, err := ioutil.ReadFile("testdata/kbbi_dasar.html") + if err != nil { + t.Fatal(err) + } + + cl, err := newDirectClient(nil) + if err != nil { + t.Fatal(err) + } + + got, err := cl.parseHTMLKataDasar(htmlBody) + if err != nil { + t.Fatal(err) + } + + t.Logf("Kata dasar: %v", got) +} -- cgit v1.3