From 95c8ba5699efa80fc19f7e210bcf4d1e189244e2 Mon Sep 17 00:00:00 2001 From: Shulhan Date: Sat, 11 Jan 2025 14:14:15 +0700 Subject: all: add options for Client Currently, the options contains Debug field that print the HTML response as text when set to 1. --- client.go | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 4 deletions(-) (limited to 'client.go') diff --git a/client.go b/client.go index 091ea87..fcbcc63 100644 --- a/client.go +++ b/client.go @@ -70,11 +70,12 @@ type Client struct { cookieURL *url.URL baseDir string cookies []*http.Cookie + opts ClientOptions } // NewClient create and initialize new client that connect directly to // KBBI official website. -func NewClient() (cl *Client, err error) { +func NewClient(opts ClientOptions) (cl *Client, err error) { cookieURL, err := url.Parse(kbbiUrlBase) if err != nil { return nil, fmt.Errorf("New: %w", err) @@ -97,6 +98,7 @@ func NewClient() (cl *Client, err error) { cl = &Client{ cookieURL: cookieURL, httpc: libhttp.NewClient(clientOpts), + opts: opts, } cl.httpc.Jar = jar @@ -127,8 +129,7 @@ func (cl *Client) Lookup(ins []string) (res LookupResponse, err error) { continue } - kata := &Word{} - res[in] = kata + var kata = &Word{} var req = libhttp.ClientRequest{ Path: kbbiPathEntri + in, @@ -137,22 +138,74 @@ func (cl *Client) Lookup(ins []string) (res LookupResponse, err error) { resp, err = cl.httpc.Get(req) if err != nil { kata.err = err + res[in] = kata continue } - err = kata.parseHTMLEntri(in, resp.Body) + kata, err = cl.parseHTMLEntri(in, resp.Body) if err != nil { kata.err = err + res[in] = kata + continue } if len(kata.Definition) == 0 && len(kata.Message) == 0 { kata.Message = "Entri tidak ditemukan" } + res[in] = kata } return res, nil } +// parseHTMLEntri parse HTML body from "/entri/" page to find the +// definition of the word. +func (cl *Client) parseHTMLEntri(in string, htmlBody []byte) (word *Word, err error) { + var logp = `parseHTMLEntri` + + if cl.opts.Debug == 1 { + var htmlText = html.Sanitize(htmlBody) + log.Printf("%s:\n%s", logp, htmlText) + } + + iter, err := html.Parse(bytes.NewReader(htmlBody)) + if err != nil { + return nil, err + } + + word = &Word{} + + for node := iter.Next(); node != nil; node = iter.Next() { + if !node.IsElement() { + continue + } + + switch node.Data { + case tagNameHeader2: + word.parseRootWord(node) + + case tagNameOrderedList, tagNameUnorderedList: + li := node.GetFirstChild() + for li != nil { + defKata, err := parseWordDefinition(in, li) + if err != nil { + word.Message = err.Error() + err = nil + break + } + if defKata == nil { + break + } + word.Definition = append(word.Definition, defKata) + li = li.GetNextSibling() + } + next := node.GetNextSibling() + iter.SetNext(next) + } + } + return word, nil +} + // ListRootWords list all of the root words in dictionary. func (cl *Client) ListRootWords(pageStart, pageEnd int) (rootWords Words, err error) { if pageStart < 1 { @@ -300,6 +353,13 @@ func (cl *Client) parseHTMLRootWords(htmlBody []byte) ( func (cl *Client) parseHTMLLogin(htmlBody []byte) ( token string, err error, ) { + var logp = `parseHTMLLogin` + + if cl.opts.Debug == 1 { + var htmlText = html.Sanitize(htmlBody) + log.Printf("%s:\n%s", logp, htmlText) + } + iter, err := html.Parse(bytes.NewReader(htmlBody)) if err != nil { return "", err -- cgit v1.3