aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2025-01-11 14:14:15 +0700
committerShulhan <ms@kilabit.info>2025-01-11 14:14:15 +0700
commit95c8ba5699efa80fc19f7e210bcf4d1e189244e2 (patch)
treecbb505bf508b0e0aaa20574742e611bca6f16c46
parent37632b2fc455187d90e058e319e35a9bff944df7 (diff)
downloadkbbi-95c8ba5699efa80fc19f7e210bcf4d1e189244e2.tar.xz
all: add options for Client
Currently, the options contains Debug field that print the HTML response as text when set to 1.
-rw-r--r--.gitignore1
-rw-r--r--client.go68
-rw-r--r--client_options.go11
-rw-r--r--client_test.go63
-rw-r--r--cmd/kbbi/main.go5
-rw-r--r--word.go42
-rw-r--r--word_test.go65
7 files changed, 142 insertions, 113 deletions
diff --git a/.gitignore b/.gitignore
index d3c77fa..6cf9119 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@
/README.html
/cover.html
/cover.txt
+/kbbi
diff --git a/client.go b/client.go
index 091ea87..fcbcc63 100644
--- a/client.go
+++ b/client.go
@@ -70,11 +70,12 @@ type Client struct {
cookieURL *url.URL
baseDir string
cookies []*http.Cookie
+ opts ClientOptions
}
// NewClient create and initialize new client that connect directly to
// KBBI official website.
-func NewClient() (cl *Client, err error) {
+func NewClient(opts ClientOptions) (cl *Client, err error) {
cookieURL, err := url.Parse(kbbiUrlBase)
if err != nil {
return nil, fmt.Errorf("New: %w", err)
@@ -97,6 +98,7 @@ func NewClient() (cl *Client, err error) {
cl = &Client{
cookieURL: cookieURL,
httpc: libhttp.NewClient(clientOpts),
+ opts: opts,
}
cl.httpc.Jar = jar
@@ -127,8 +129,7 @@ func (cl *Client) Lookup(ins []string) (res LookupResponse, err error) {
continue
}
- kata := &Word{}
- res[in] = kata
+ var kata = &Word{}
var req = libhttp.ClientRequest{
Path: kbbiPathEntri + in,
@@ -137,22 +138,74 @@ func (cl *Client) Lookup(ins []string) (res LookupResponse, err error) {
resp, err = cl.httpc.Get(req)
if err != nil {
kata.err = err
+ res[in] = kata
continue
}
- err = kata.parseHTMLEntri(in, resp.Body)
+ kata, err = cl.parseHTMLEntri(in, resp.Body)
if err != nil {
kata.err = err
+ res[in] = kata
+ continue
}
if len(kata.Definition) == 0 && len(kata.Message) == 0 {
kata.Message = "Entri tidak ditemukan"
}
+ res[in] = kata
}
return res, nil
}
+// parseHTMLEntri parse HTML body from "/entri/<word>" page to find the
+// definition of the word.
+func (cl *Client) parseHTMLEntri(in string, htmlBody []byte) (word *Word, err error) {
+ var logp = `parseHTMLEntri`
+
+ if cl.opts.Debug == 1 {
+ var htmlText = html.Sanitize(htmlBody)
+ log.Printf("%s:\n%s", logp, htmlText)
+ }
+
+ iter, err := html.Parse(bytes.NewReader(htmlBody))
+ if err != nil {
+ return nil, err
+ }
+
+ word = &Word{}
+
+ for node := iter.Next(); node != nil; node = iter.Next() {
+ if !node.IsElement() {
+ continue
+ }
+
+ switch node.Data {
+ case tagNameHeader2:
+ word.parseRootWord(node)
+
+ case tagNameOrderedList, tagNameUnorderedList:
+ li := node.GetFirstChild()
+ for li != nil {
+ defKata, err := parseWordDefinition(in, li)
+ if err != nil {
+ word.Message = err.Error()
+ err = nil
+ break
+ }
+ if defKata == nil {
+ break
+ }
+ word.Definition = append(word.Definition, defKata)
+ li = li.GetNextSibling()
+ }
+ next := node.GetNextSibling()
+ iter.SetNext(next)
+ }
+ }
+ return word, nil
+}
+
// ListRootWords list all of the root words in dictionary.
func (cl *Client) ListRootWords(pageStart, pageEnd int) (rootWords Words, err error) {
if pageStart < 1 {
@@ -300,6 +353,13 @@ func (cl *Client) parseHTMLRootWords(htmlBody []byte) (
func (cl *Client) parseHTMLLogin(htmlBody []byte) (
token string, err error,
) {
+ var logp = `parseHTMLLogin`
+
+ if cl.opts.Debug == 1 {
+ var htmlText = html.Sanitize(htmlBody)
+ log.Printf("%s:\n%s", logp, htmlText)
+ }
+
iter, err := html.Parse(bytes.NewReader(htmlBody))
if err != nil {
return "", err
diff --git a/client_options.go b/client_options.go
new file mode 100644
index 0000000..e489988
--- /dev/null
+++ b/client_options.go
@@ -0,0 +1,11 @@
+// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package kbbi
+
+// ClientOptions define the client options.
+type ClientOptions struct {
+ // Debug level for client connection.
+ // Level 1 print the received HTML page as plain text.
+ Debug int
+}
diff --git a/client_test.go b/client_test.go
index fa9e3c7..2f429e8 100644
--- a/client_test.go
+++ b/client_test.go
@@ -6,6 +6,8 @@ package kbbi
import (
"os"
"testing"
+
+ "git.sr.ht/~shulhan/pakakeh.go/lib/test"
)
func TestClient_parseHTMLKataDasar(t *testing.T) {
@@ -14,7 +16,9 @@ func TestClient_parseHTMLKataDasar(t *testing.T) {
t.Fatal(err)
}
- cl, err := NewClient()
+ var opts ClientOptions
+
+ cl, err := NewClient(opts)
if err != nil {
t.Fatal(err)
}
@@ -26,3 +30,60 @@ func TestClient_parseHTMLKataDasar(t *testing.T) {
t.Logf("Root words: %v", got)
}
+
+func TestClient_parseHTMLEntri(t *testing.T) {
+ cases := []struct {
+ exp *Word
+ infile string
+ cari string
+ }{{
+ infile: "testdata/entri.html",
+ cari: "informasi",
+ exp: &Word{
+ Definition: []*WordDefinition{{
+ Value: "penerangan",
+ Classes: []string{"Nomina: kata benda"},
+ }, {
+ Value: "pemberitahuan; kabar atau berita tentang sesuatu",
+ Classes: []string{"Nomina: kata benda"},
+ }, {
+ Value: "keseluruhan makna yang menunjang amanat yang " +
+ "terlihat dalam bagian-bagian " +
+ "amanat itu",
+ Classes: []string{
+ "Nomina: kata benda",
+ "Linguistik: -",
+ },
+ }},
+ },
+ }, {
+ infile: "testdata/entri_analisa.html",
+ cari: "analisa",
+ exp: &Word{
+ Message: `"analisa" adalah bentuk tidak baku dari "analisis"`,
+ },
+ }}
+
+ cl, err := NewClient(ClientOptions{})
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ for _, c := range cases {
+ htmlBody, err := os.ReadFile(c.infile)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ got, err := cl.parseHTMLEntri(c.cari, htmlBody)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ for x, def := range c.exp.Definition {
+ test.Assert(t, `Definition`, def, got.Definition[x])
+ }
+
+ test.Assert(t, c.infile, c.exp, got)
+ }
+}
diff --git a/cmd/kbbi/main.go b/cmd/kbbi/main.go
index 044b2c7..d833d6e 100644
--- a/cmd/kbbi/main.go
+++ b/cmd/kbbi/main.go
@@ -25,6 +25,8 @@ const (
func main() {
var (
+ clientOpts = kbbi.ClientOptions{}
+
email string
pass string
pageStart int
@@ -39,12 +41,13 @@ func main() {
flag.StringVar(&pass, cmdNameSandi, "", "Sandi pengguna")
flag.IntVar(&pageStart, optPageStart, 1, `Mulai ambil kata dasar dari halaman ini`)
flag.IntVar(&pageEnd, optPageEnd, 0, `Berhenti ambil kata dasar pada halaman ini`)
+ flag.IntVar(&clientOpts.Debug, `debug`, 0, `Set tingkat debug`)
flag.BoolVar(&isListRootWords, cmdNameDaftarKataDasar, false,
"Ambil dan cetak semua kata dasar")
flag.Parse()
- cl, err := kbbi.NewClient()
+ cl, err := kbbi.NewClient(clientOpts)
if err != nil {
log.Fatal(err)
}
diff --git a/word.go b/word.go
index ecbef63..1f913f9 100644
--- a/word.go
+++ b/word.go
@@ -4,8 +4,6 @@
package kbbi
import (
- "bytes"
-
"git.sr.ht/~shulhan/pakakeh.go/lib/html"
)
@@ -28,46 +26,6 @@ func (word *Word) Err() error {
return word.err
}
-// parseHTMLEntri parse HTML body from "/entri/<word>" page to find the
-// definition of the word.
-func (word *Word) parseHTMLEntri(in string, htmlBody []byte) (err error) {
- iter, err := html.Parse(bytes.NewReader(htmlBody))
- if err != nil {
- return err
- }
-
- for node := iter.Next(); node != nil; node = iter.Next() {
- if !node.IsElement() {
- continue
- }
-
- switch node.Data {
- case tagNameHeader2:
- word.parseRootWord(node)
-
- case tagNameOrderedList, tagNameUnorderedList:
- li := node.GetFirstChild()
- for li != nil {
- defKata, err := parseWordDefinition(in, li)
- if err != nil {
- word.Message = err.Error()
- err = nil
- break
- }
- if defKata == nil {
- break
- }
- word.Definition = append(word.Definition, defKata)
- li = li.GetNextSibling()
- }
- next := node.GetNextSibling()
- iter.SetNext(next)
- }
- }
-
- return nil
-}
-
// parseRootWord given an HMTL element "h2" find a possible root word and
// return true; otherwise it will return false.
func (word *Word) parseRootWord(h2 *html.Node) bool {
diff --git a/word_test.go b/word_test.go
deleted file mode 100644
index b7a3f82..0000000
--- a/word_test.go
+++ /dev/null
@@ -1,65 +0,0 @@
-// SPDX-FileCopyrightText: 2020 M. Shulhan <ms@kilabit.info>
-// SPDX-License-Identifier: GPL-3.0-or-later
-
-package kbbi
-
-import (
- "os"
- "testing"
-
- "git.sr.ht/~shulhan/pakakeh.go/lib/test"
-)
-
-func TestWord_parseHTMLEntri(t *testing.T) {
- cases := []struct {
- exp *Word
- infile string
- cari string
- }{{
- infile: "testdata/entri.html",
- cari: "informasi",
- exp: &Word{
- Definition: []*WordDefinition{{
- Value: "penerangan",
- Classes: []string{"Nomina: kata benda"},
- }, {
- Value: "pemberitahuan; kabar atau berita tentang sesuatu",
- Classes: []string{"Nomina: kata benda"},
- }, {
- Value: "keseluruhan makna yang menunjang amanat yang " +
- "terlihat dalam bagian-bagian " +
- "amanat itu",
- Classes: []string{
- "Nomina: kata benda",
- "Linguistik: -",
- },
- }},
- },
- }, {
- infile: "testdata/entri_analisa.html",
- cari: "analisa",
- exp: &Word{
- Message: `"analisa" adalah bentuk tidak baku dari "analisis"`,
- },
- }}
-
- for _, c := range cases {
- htmlBody, err := os.ReadFile(c.infile)
- if err != nil {
- t.Fatal(err)
- }
-
- got := new(Word)
-
- err = got.parseHTMLEntri(c.cari, htmlBody)
- if err != nil {
- t.Fatal(err)
- }
-
- for x, def := range c.exp.Definition {
- test.Assert(t, `Definition`, def, got.Definition[x])
- }
-
- test.Assert(t, c.infile, c.exp, got)
- }
-}