diff options
| -rw-r--r-- | active_client.go | 13 | ||||
| -rw-r--r-- | client.go | 336 | ||||
| -rw-r--r-- | cmd/kbbi/main.go | 7 | ||||
| -rw-r--r-- | direct_client.go | 350 | ||||
| -rw-r--r-- | direct_client_test.go (renamed from client_test.go) | 4 |
5 files changed, 416 insertions, 294 deletions
diff --git a/active_client.go b/active_client.go new file mode 100644 index 0000000..061bd1d --- /dev/null +++ b/active_client.go @@ -0,0 +1,13 @@ +// Copyright 2020, Shulhan <m.shulhan@gmail.com>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package kbbi + +// +// activeClient define an interface for an active client. +// +type activeClient interface { + CariDefinisi(words []string) (res DefinisiResponse, err error) + ListKataDasar() (kataDasar daftarKata, err error) +} @@ -4,338 +4,94 @@ package kbbi -import ( - "bytes" - "fmt" - "io/ioutil" - "log" - "net/http" - "net/http/cookiejar" - "net/url" - "strconv" - "strings" - "time" - - "github.com/shuLhan/share/lib/debug" - "golang.org/x/net/html" - "golang.org/x/net/publicsuffix" -) - -const ( - maxPageNumber = 501 - defTimeout = 20 * time.Second -) +import "net/http" // -// Client for KBBI web using HTTP. +// Client for dictionary API and official KBBI servers. // type Client struct { - cookieURL *url.URL - httpc *http.Client + active activeClient + direct *directClient } // -// New create and initialize new client for KBBI web. +// NewClient create and initialize new client. +// If cookies is not empty, the direct client will be initialized and actived. // -func New(cookies []*http.Cookie) (cl *Client, err error) { - cookieURL, err := url.Parse(baseURL) - if err != nil { - return nil, fmt.Errorf("New: %w", err) - } - - jarOpt := &cookiejar.Options{ - PublicSuffixList: publicsuffix.List, - } - - jar, err := cookiejar.New(jarOpt) - if err != nil { - return nil, err - } +func NewClient(cookies []*http.Cookie) (cl *Client, err error) { + cl = &Client{} if cookies != nil { - jar.SetCookies(cookieURL, cookies) - } - - cl = &Client{ - cookieURL: cookieURL, - httpc: &http.Client{ - Jar: jar, - Timeout: defTimeout, - }, - } - - return cl, nil -} - -// -// CariDefinisi dari daftar kata. -// -func (cl Client) CariDefinisi(ins []string) (res DefinisiResponse) { - res = make(DefinisiResponse, len(ins)) - - for _, in := range ins { - _, ok := res[in] - if ok { - continue - } - - kata := &Kata{} - res[in] = kata - - entriURL := baseURL + entriPath + in - httpRes, err := cl.httpc.Get(entriURL) - if err != nil { - kata.err = err - continue - } - - defer httpRes.Body.Close() - - body, err := ioutil.ReadAll(httpRes.Body) - if err != nil { - kata.err = err - continue - } - - if debug.Value >= 2 { - fmt.Printf(">>> HTML body for %s:\n%s", entriURL, body) - } - - err = kata.parseHTMLEntri(body) + cl.direct, err = newDirectClient(cookies) if err != nil { - kata.err = err + return nil, err } + cl.active = cl.direct + return cl, nil } - return res + return cl, nil } // -// ListKataDasar get list of kata dasar +// CariDefinisi lookup definition of words. // -func (cl Client) ListKataDasar() (kataDasar daftarKata, err error) { - params := url.Values{ - paramNameMasukan: []string{paramValueDasar}, - paramNameMasukanLengkap: []string{paramValueDasar}, - } - - urlPage := baseURL + "/Cari/Jenis?" - - kataDasar = make(daftarKata) - - for pageNumber := 1; pageNumber <= maxPageNumber; pageNumber++ { - params.Set(paramNamePage, strconv.Itoa(pageNumber)) - - req, err := http.NewRequest(http.MethodGet, urlPage+params.Encode(), nil) - if err != nil { - return kataDasar, err - } - - res, err := cl.httpc.Do(req) - if err != nil { - return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", - pageNumber, err) - } - - defer res.Body.Close() - - body, err := ioutil.ReadAll(res.Body) - if err != nil { - return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", - pageNumber, err) - } - - got, err := cl.parseHTMLKataDasar(body) - if err != nil { - return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", - pageNumber, err) - } - if len(got) == 0 { - break - } - - kataDasar.merge(got) - - log.Printf("ListKataDasar: halaman %d, jumlah kata %d, total kata %d", - pageNumber, len(got), len(kataDasar)) +func (cl *Client) CariDefinisi(words []string) ( + res DefinisiResponse, err error, +) { + if cl.active != nil { + return cl.active.CariDefinisi(words) } - return kataDasar, nil -} + // TODO: start with api client first ... -// -// Login authenticate the client using username and password. -// -func (cl *Client) Login(user, pass string) (cookies []*http.Cookie, err error) { - tokenLogin, err := cl.preLogin() + cl.direct, err = newDirectClient(nil) if err != nil { - return nil, fmt.Errorf("Login: %w", err) - } - - params := url.Values{ - paramNameRequestVerificationToken: []string{tokenLogin}, - paramNamePosel: []string{user}, - paramNameKataSandi: []string{pass}, - paramNameIngatSaya: []string{paramValueFalse}, - } - - reqBody := strings.NewReader(params.Encode()) - - req, err := http.NewRequest(http.MethodPost, loginURL, reqBody) - if err != nil { - return nil, fmt.Errorf("Login: %w", err) + return nil, err } - req.Header.Set(headerNameContentType, headerValueContentType) + return cl.direct.CariDefinisi(words) +} - res, err := cl.httpc.Do(req) - if err != nil { - return nil, fmt.Errorf("Login: %w", err) +func (cl *Client) ListKataDasar() (kataDasar daftarKata, err error) { + if cl.active != nil { + return cl.active.ListKataDasar() } - defer res.Body.Close() + // TODO: start with api client first ... - resBody, err := ioutil.ReadAll(res.Body) + cl.direct, err = newDirectClient(nil) if err != nil { - return nil, fmt.Errorf("Login: %w", err) - } - - if res.StatusCode >= http.StatusBadRequest { - return nil, fmt.Errorf("Login: %d %s", res.StatusCode, resBody) + return nil, err } - cookies = cl.httpc.Jar.Cookies(cl.cookieURL) - - return cookies, nil + return cl.direct.ListKataDasar() } // -// SetCookies for HTTP request that need an authentication. +// Login authenticate the client using username and password to official KBBI +// server. // -func (cl *Client) SetCookies(cookies []*http.Cookie) { - if len(cookies) > 0 { - cl.httpc.Jar.SetCookies(cl.cookieURL, cookies) - } -} - -func (cl Client) parseHTMLKataDasar(htmlBody []byte) (kataDasar daftarKata, err error) { - node, err := html.Parse(bytes.NewReader(htmlBody)) - if err != nil { - return nil, err - } - - kataDasar = make(daftarKata) - - var prev *html.Node - - for { - switch { - case node.FirstChild != nil && node.FirstChild != prev && - node.LastChild != prev: - node = node.FirstChild - case node.NextSibling != nil: - node = node.NextSibling - default: - prev = node - node = node.Parent - } - if node == nil { - break - } - - if node.Type != html.ElementNode { - continue - } - if node.Data != tagNameAnchor { - continue - } - for _, attr := range node.Attr { - if attr.Key != attrNameHref { - continue - } - if !strings.HasPrefix(attr.Val, entriPath) { - continue - } - k := strings.TrimSpace(node.FirstChild.Data) - kataDasar[k] = struct{}{} +func (cl *Client) Login(user, pass string) (cookies []*http.Cookie, err error) { + if cl.direct == nil { + cl.direct, err = newDirectClient(nil) + if err != nil { + return nil, err } } - - return kataDasar, nil -} - -// -// parseHTMLLogin get the token at the form login. -// -func (cl Client) parseHTMLLogin(htmlBody []byte) (token string, err error) { - node, err := html.Parse(bytes.NewReader(htmlBody)) + cookies, err = cl.direct.login(user, pass) if err != nil { - return "", err - } - - var prev *html.Node - - for { - switch { - case node.FirstChild != nil && node.FirstChild != prev && - node.LastChild != prev: - node = node.FirstChild - case node.NextSibling != nil: - node = node.NextSibling - default: - prev = node - node = node.Parent - } - if node == nil { - break - } - - if node.Type != html.ElementNode { - continue - } - if node.Data != tagNameInput { - continue - } - for _, attr := range node.Attr { - if attr.Key != attrNameName { - continue - } - - token = getAttrValue(node.Attr) - if len(token) > 0 { - return token, nil - } - } + return nil, err } - - return "", fmt.Errorf("token login not found") + cl.active = cl.direct + return cookies, nil } // -// preLogin initialize the client to get the first cookie. +// SetCookies for HTTP request in direct client. // -func (cl *Client) preLogin() (token string, err error) { - req, err := http.NewRequest(http.MethodGet, loginURL, nil) - if err != nil { - return "", err - } - - res, err := cl.httpc.Do(req) - if err != nil { - return "", err - } - - defer res.Body.Close() - - body, err := ioutil.ReadAll(res.Body) - if err != nil { - return "", err - } - - token, err = cl.parseHTMLLogin(body) - if err != nil { - return "", err +func (cl *Client) SetCookies(cookies []*http.Cookie) { + if cl.direct != nil { + cl.direct.setCookies(cookies) } - - return token, nil } diff --git a/cmd/kbbi/main.go b/cmd/kbbi/main.go index 24a7b3d..ac9298c 100644 --- a/cmd/kbbi/main.go +++ b/cmd/kbbi/main.go @@ -49,7 +49,7 @@ func main() { flag.Parse() - cl, err := kbbi.New(cookies) + cl, err := kbbi.NewClient(cookies) if err != nil { log.Fatal(err) } @@ -72,7 +72,10 @@ func main() { return } - resDefinisi := cl.CariDefinisi(flag.Args()) + resDefinisi, err := cl.CariDefinisi(flag.Args()) + if err != nil { + log.Fatal(err) + } for k, kata := range resDefinisi { err = kata.Err() diff --git a/direct_client.go b/direct_client.go new file mode 100644 index 0000000..3123829 --- /dev/null +++ b/direct_client.go @@ -0,0 +1,350 @@ +// Copyright 2020, Shulhan <m.shulhan@gmail.com>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package kbbi + +import ( + "bytes" + "fmt" + "io/ioutil" + "log" + "net/http" + "net/http/cookiejar" + "net/url" + "strconv" + "strings" + "time" + + "github.com/shuLhan/share/lib/debug" + "golang.org/x/net/html" + "golang.org/x/net/publicsuffix" +) + +const ( + maxPageNumber = 501 + defTimeout = 20 * time.Second +) + +// +// directClient for KBBI web using HTTP. +// +type directClient struct { + cookieURL *url.URL + httpc *http.Client +} + +// +// newDirectClient create and initialize new client that connect directly to +// KBBI official website. +// +func newDirectClient(cookies []*http.Cookie) (cl *directClient, err error) { + cookieURL, err := url.Parse(baseURL) + if err != nil { + return nil, fmt.Errorf("New: %w", err) + } + + jarOpt := &cookiejar.Options{ + PublicSuffixList: publicsuffix.List, + } + + jar, err := cookiejar.New(jarOpt) + if err != nil { + return nil, err + } + + if cookies != nil { + jar.SetCookies(cookieURL, cookies) + } + + cl = &directClient{ + cookieURL: cookieURL, + httpc: &http.Client{ + Jar: jar, + Timeout: defTimeout, + }, + } + + return cl, nil +} + +// +// CariDefinisi dari daftar kata. +// +func (cl *directClient) CariDefinisi(ins []string) ( + res DefinisiResponse, err error, +) { + res = make(DefinisiResponse, len(ins)) + + for _, in := range ins { + _, ok := res[in] + if ok { + continue + } + + kata := &Kata{} + res[in] = kata + + entriURL := baseURL + entriPath + in + httpRes, err := cl.httpc.Get(entriURL) + if err != nil { + kata.err = err + continue + } + + defer httpRes.Body.Close() + + body, err := ioutil.ReadAll(httpRes.Body) + if err != nil { + kata.err = err + continue + } + + if debug.Value >= 2 { + fmt.Printf(">>> HTML body for %s:\n%s", entriURL, body) + } + + err = kata.parseHTMLEntri(body) + if err != nil { + kata.err = err + } + } + + return res, nil +} + +// +// ListKataDasar get list of kata dasar +// +func (cl *directClient) ListKataDasar() (kataDasar daftarKata, err error) { + params := url.Values{ + paramNameMasukan: []string{paramValueDasar}, + paramNameMasukanLengkap: []string{paramValueDasar}, + } + + urlPage := baseURL + "/Cari/Jenis?" + + kataDasar = make(daftarKata) + + for pageNumber := 1; pageNumber <= maxPageNumber; pageNumber++ { + params.Set(paramNamePage, strconv.Itoa(pageNumber)) + + req, err := http.NewRequest(http.MethodGet, urlPage+params.Encode(), nil) + if err != nil { + return kataDasar, err + } + + res, err := cl.httpc.Do(req) + if err != nil { + return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", + pageNumber, err) + } + + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + if err != nil { + return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", + pageNumber, err) + } + + got, err := cl.parseHTMLKataDasar(body) + if err != nil { + return kataDasar, fmt.Errorf("ListKataDasar: page %d: %w", + pageNumber, err) + } + if len(got) == 0 { + break + } + + kataDasar.merge(got) + + log.Printf("ListKataDasar: halaman %d, jumlah kata %d, total kata %d", + pageNumber, len(got), len(kataDasar)) + } + + return kataDasar, nil +} + +// +// Login authenticate the client using username and password. +// +func (cl *directClient) login(user, pass string) ( + cookies []*http.Cookie, err error, +) { + tokenLogin, err := cl.preLogin() + if err != nil { + return nil, fmt.Errorf("Login: %w", err) + } + + params := url.Values{ + paramNameRequestVerificationToken: []string{tokenLogin}, + paramNamePosel: []string{user}, + paramNameKataSandi: []string{pass}, + paramNameIngatSaya: []string{paramValueFalse}, + } + + reqBody := strings.NewReader(params.Encode()) + + req, err := http.NewRequest(http.MethodPost, loginURL, reqBody) + if err != nil { + return nil, fmt.Errorf("Login: %w", err) + } + + req.Header.Set(headerNameContentType, headerValueContentType) + + res, err := cl.httpc.Do(req) + if err != nil { + return nil, fmt.Errorf("Login: %w", err) + } + + defer res.Body.Close() + + resBody, err := ioutil.ReadAll(res.Body) + if err != nil { + return nil, fmt.Errorf("Login: %w", err) + } + + if res.StatusCode >= http.StatusBadRequest { + return nil, fmt.Errorf("Login: %d %s", res.StatusCode, resBody) + } + + cookies = cl.httpc.Jar.Cookies(cl.cookieURL) + + return cookies, nil +} + +// +// setCookies for HTTP request that need an authentication. +// +func (cl *directClient) setCookies(cookies []*http.Cookie) { + if len(cookies) > 0 { + cl.httpc.Jar.SetCookies(cl.cookieURL, cookies) + } +} + +func (cl *directClient) parseHTMLKataDasar(htmlBody []byte) ( + kataDasar daftarKata, err error, +) { + node, err := html.Parse(bytes.NewReader(htmlBody)) + if err != nil { + return nil, err + } + + kataDasar = make(daftarKata) + + var prev *html.Node + + for { + switch { + case node.FirstChild != nil && node.FirstChild != prev && + node.LastChild != prev: + node = node.FirstChild + case node.NextSibling != nil: + node = node.NextSibling + default: + prev = node + node = node.Parent + } + if node == nil { + break + } + + if node.Type != html.ElementNode { + continue + } + if node.Data != tagNameAnchor { + continue + } + for _, attr := range node.Attr { + if attr.Key != attrNameHref { + continue + } + if !strings.HasPrefix(attr.Val, entriPath) { + continue + } + k := strings.TrimSpace(node.FirstChild.Data) + kataDasar[k] = struct{}{} + } + } + + return kataDasar, nil +} + +// +// parseHTMLLogin get the token at the form login. +// +func (cl *directClient) parseHTMLLogin(htmlBody []byte) ( + token string, err error, +) { + node, err := html.Parse(bytes.NewReader(htmlBody)) + if err != nil { + return "", err + } + + var prev *html.Node + + for { + switch { + case node.FirstChild != nil && node.FirstChild != prev && + node.LastChild != prev: + node = node.FirstChild + case node.NextSibling != nil: + node = node.NextSibling + default: + prev = node + node = node.Parent + } + if node == nil { + break + } + + if node.Type != html.ElementNode { + continue + } + if node.Data != tagNameInput { + continue + } + for _, attr := range node.Attr { + if attr.Key != attrNameName { + continue + } + + token = getAttrValue(node.Attr) + if len(token) > 0 { + return token, nil + } + } + } + + return "", fmt.Errorf("token login not found") +} + +// +// preLogin initialize the client to get the first cookie. +// +func (cl *directClient) preLogin() (token string, err error) { + req, err := http.NewRequest(http.MethodGet, loginURL, nil) + if err != nil { + return "", err + } + + res, err := cl.httpc.Do(req) + if err != nil { + return "", err + } + + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + if err != nil { + return "", err + } + + token, err = cl.parseHTMLLogin(body) + if err != nil { + return "", err + } + + return token, nil +} diff --git a/client_test.go b/direct_client_test.go index 92f5fd4..894b876 100644 --- a/client_test.go +++ b/direct_client_test.go @@ -9,13 +9,13 @@ import ( "testing" ) -func TestClient_parseHTMLKataDasar(t *testing.T) { +func TestDirectClient_parseHTMLKataDasar(t *testing.T) { htmlBody, err := ioutil.ReadFile("testdata/kbbi_dasar.html") if err != nil { t.Fatal(err) } - cl, err := New(nil) + cl, err := newDirectClient(nil) if err != nil { t.Fatal(err) } |
