diff options
| author | Shulhan <ms@kilabit.info> | 2025-01-11 14:14:15 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2025-01-11 14:14:15 +0700 |
| commit | 95c8ba5699efa80fc19f7e210bcf4d1e189244e2 (patch) | |
| tree | cbb505bf508b0e0aaa20574742e611bca6f16c46 | |
| parent | 37632b2fc455187d90e058e319e35a9bff944df7 (diff) | |
| download | kbbi-95c8ba5699efa80fc19f7e210bcf4d1e189244e2.tar.xz | |
all: add options for Client
Currently, the options contains Debug field that print the HTML
response as text when set to 1.
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | client.go | 68 | ||||
| -rw-r--r-- | client_options.go | 11 | ||||
| -rw-r--r-- | client_test.go | 63 | ||||
| -rw-r--r-- | cmd/kbbi/main.go | 5 | ||||
| -rw-r--r-- | word.go | 42 | ||||
| -rw-r--r-- | word_test.go | 65 |
7 files changed, 142 insertions, 113 deletions
@@ -5,3 +5,4 @@ /README.html /cover.html /cover.txt +/kbbi @@ -70,11 +70,12 @@ type Client struct { cookieURL *url.URL baseDir string cookies []*http.Cookie + opts ClientOptions } // NewClient create and initialize new client that connect directly to // KBBI official website. -func NewClient() (cl *Client, err error) { +func NewClient(opts ClientOptions) (cl *Client, err error) { cookieURL, err := url.Parse(kbbiUrlBase) if err != nil { return nil, fmt.Errorf("New: %w", err) @@ -97,6 +98,7 @@ func NewClient() (cl *Client, err error) { cl = &Client{ cookieURL: cookieURL, httpc: libhttp.NewClient(clientOpts), + opts: opts, } cl.httpc.Jar = jar @@ -127,8 +129,7 @@ func (cl *Client) Lookup(ins []string) (res LookupResponse, err error) { continue } - kata := &Word{} - res[in] = kata + var kata = &Word{} var req = libhttp.ClientRequest{ Path: kbbiPathEntri + in, @@ -137,22 +138,74 @@ func (cl *Client) Lookup(ins []string) (res LookupResponse, err error) { resp, err = cl.httpc.Get(req) if err != nil { kata.err = err + res[in] = kata continue } - err = kata.parseHTMLEntri(in, resp.Body) + kata, err = cl.parseHTMLEntri(in, resp.Body) if err != nil { kata.err = err + res[in] = kata + continue } if len(kata.Definition) == 0 && len(kata.Message) == 0 { kata.Message = "Entri tidak ditemukan" } + res[in] = kata } return res, nil } +// parseHTMLEntri parse HTML body from "/entri/<word>" page to find the +// definition of the word. +func (cl *Client) parseHTMLEntri(in string, htmlBody []byte) (word *Word, err error) { + var logp = `parseHTMLEntri` + + if cl.opts.Debug == 1 { + var htmlText = html.Sanitize(htmlBody) + log.Printf("%s:\n%s", logp, htmlText) + } + + iter, err := html.Parse(bytes.NewReader(htmlBody)) + if err != nil { + return nil, err + } + + word = &Word{} + + for node := iter.Next(); node != nil; node = iter.Next() { + if !node.IsElement() { + continue + } + + switch node.Data { + case tagNameHeader2: + word.parseRootWord(node) + + case tagNameOrderedList, tagNameUnorderedList: + li := node.GetFirstChild() + for li != nil { + defKata, err := parseWordDefinition(in, li) + if err != nil { + word.Message = err.Error() + err = nil + break + } + if defKata == nil { + break + } + word.Definition = append(word.Definition, defKata) + li = li.GetNextSibling() + } + next := node.GetNextSibling() + iter.SetNext(next) + } + } + return word, nil +} + // ListRootWords list all of the root words in dictionary. func (cl *Client) ListRootWords(pageStart, pageEnd int) (rootWords Words, err error) { if pageStart < 1 { @@ -300,6 +353,13 @@ func (cl *Client) parseHTMLRootWords(htmlBody []byte) ( func (cl *Client) parseHTMLLogin(htmlBody []byte) ( token string, err error, ) { + var logp = `parseHTMLLogin` + + if cl.opts.Debug == 1 { + var htmlText = html.Sanitize(htmlBody) + log.Printf("%s:\n%s", logp, htmlText) + } + iter, err := html.Parse(bytes.NewReader(htmlBody)) if err != nil { return "", err diff --git a/client_options.go b/client_options.go new file mode 100644 index 0000000..e489988 --- /dev/null +++ b/client_options.go @@ -0,0 +1,11 @@ +// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info> +// SPDX-License-Identifier: GPL-3.0-or-later + +package kbbi + +// ClientOptions define the client options. +type ClientOptions struct { + // Debug level for client connection. + // Level 1 print the received HTML page as plain text. + Debug int +} diff --git a/client_test.go b/client_test.go index fa9e3c7..2f429e8 100644 --- a/client_test.go +++ b/client_test.go @@ -6,6 +6,8 @@ package kbbi import ( "os" "testing" + + "git.sr.ht/~shulhan/pakakeh.go/lib/test" ) func TestClient_parseHTMLKataDasar(t *testing.T) { @@ -14,7 +16,9 @@ func TestClient_parseHTMLKataDasar(t *testing.T) { t.Fatal(err) } - cl, err := NewClient() + var opts ClientOptions + + cl, err := NewClient(opts) if err != nil { t.Fatal(err) } @@ -26,3 +30,60 @@ func TestClient_parseHTMLKataDasar(t *testing.T) { t.Logf("Root words: %v", got) } + +func TestClient_parseHTMLEntri(t *testing.T) { + cases := []struct { + exp *Word + infile string + cari string + }{{ + infile: "testdata/entri.html", + cari: "informasi", + exp: &Word{ + Definition: []*WordDefinition{{ + Value: "penerangan", + Classes: []string{"Nomina: kata benda"}, + }, { + Value: "pemberitahuan; kabar atau berita tentang sesuatu", + Classes: []string{"Nomina: kata benda"}, + }, { + Value: "keseluruhan makna yang menunjang amanat yang " + + "terlihat dalam bagian-bagian " + + "amanat itu", + Classes: []string{ + "Nomina: kata benda", + "Linguistik: -", + }, + }}, + }, + }, { + infile: "testdata/entri_analisa.html", + cari: "analisa", + exp: &Word{ + Message: `"analisa" adalah bentuk tidak baku dari "analisis"`, + }, + }} + + cl, err := NewClient(ClientOptions{}) + if err != nil { + t.Fatal(err) + } + + for _, c := range cases { + htmlBody, err := os.ReadFile(c.infile) + if err != nil { + t.Fatal(err) + } + + got, err := cl.parseHTMLEntri(c.cari, htmlBody) + if err != nil { + t.Fatal(err) + } + + for x, def := range c.exp.Definition { + test.Assert(t, `Definition`, def, got.Definition[x]) + } + + test.Assert(t, c.infile, c.exp, got) + } +} diff --git a/cmd/kbbi/main.go b/cmd/kbbi/main.go index 044b2c7..d833d6e 100644 --- a/cmd/kbbi/main.go +++ b/cmd/kbbi/main.go @@ -25,6 +25,8 @@ const ( func main() { var ( + clientOpts = kbbi.ClientOptions{} + email string pass string pageStart int @@ -39,12 +41,13 @@ func main() { flag.StringVar(&pass, cmdNameSandi, "", "Sandi pengguna") flag.IntVar(&pageStart, optPageStart, 1, `Mulai ambil kata dasar dari halaman ini`) flag.IntVar(&pageEnd, optPageEnd, 0, `Berhenti ambil kata dasar pada halaman ini`) + flag.IntVar(&clientOpts.Debug, `debug`, 0, `Set tingkat debug`) flag.BoolVar(&isListRootWords, cmdNameDaftarKataDasar, false, "Ambil dan cetak semua kata dasar") flag.Parse() - cl, err := kbbi.NewClient() + cl, err := kbbi.NewClient(clientOpts) if err != nil { log.Fatal(err) } @@ -4,8 +4,6 @@ package kbbi import ( - "bytes" - "git.sr.ht/~shulhan/pakakeh.go/lib/html" ) @@ -28,46 +26,6 @@ func (word *Word) Err() error { return word.err } -// parseHTMLEntri parse HTML body from "/entri/<word>" page to find the -// definition of the word. -func (word *Word) parseHTMLEntri(in string, htmlBody []byte) (err error) { - iter, err := html.Parse(bytes.NewReader(htmlBody)) - if err != nil { - return err - } - - for node := iter.Next(); node != nil; node = iter.Next() { - if !node.IsElement() { - continue - } - - switch node.Data { - case tagNameHeader2: - word.parseRootWord(node) - - case tagNameOrderedList, tagNameUnorderedList: - li := node.GetFirstChild() - for li != nil { - defKata, err := parseWordDefinition(in, li) - if err != nil { - word.Message = err.Error() - err = nil - break - } - if defKata == nil { - break - } - word.Definition = append(word.Definition, defKata) - li = li.GetNextSibling() - } - next := node.GetNextSibling() - iter.SetNext(next) - } - } - - return nil -} - // parseRootWord given an HMTL element "h2" find a possible root word and // return true; otherwise it will return false. func (word *Word) parseRootWord(h2 *html.Node) bool { diff --git a/word_test.go b/word_test.go deleted file mode 100644 index b7a3f82..0000000 --- a/word_test.go +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-FileCopyrightText: 2020 M. Shulhan <ms@kilabit.info> -// SPDX-License-Identifier: GPL-3.0-or-later - -package kbbi - -import ( - "os" - "testing" - - "git.sr.ht/~shulhan/pakakeh.go/lib/test" -) - -func TestWord_parseHTMLEntri(t *testing.T) { - cases := []struct { - exp *Word - infile string - cari string - }{{ - infile: "testdata/entri.html", - cari: "informasi", - exp: &Word{ - Definition: []*WordDefinition{{ - Value: "penerangan", - Classes: []string{"Nomina: kata benda"}, - }, { - Value: "pemberitahuan; kabar atau berita tentang sesuatu", - Classes: []string{"Nomina: kata benda"}, - }, { - Value: "keseluruhan makna yang menunjang amanat yang " + - "terlihat dalam bagian-bagian " + - "amanat itu", - Classes: []string{ - "Nomina: kata benda", - "Linguistik: -", - }, - }}, - }, - }, { - infile: "testdata/entri_analisa.html", - cari: "analisa", - exp: &Word{ - Message: `"analisa" adalah bentuk tidak baku dari "analisis"`, - }, - }} - - for _, c := range cases { - htmlBody, err := os.ReadFile(c.infile) - if err != nil { - t.Fatal(err) - } - - got := new(Word) - - err = got.parseHTMLEntri(c.cari, htmlBody) - if err != nil { - t.Fatal(err) - } - - for x, def := range c.exp.Definition { - test.Assert(t, `Definition`, def, got.Definition[x]) - } - - test.Assert(t, c.infile, c.exp, got) - } -} |
