aboutsummaryrefslogtreecommitdiff
path: root/word.go
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2021-01-31 02:55:28 +0700
committerShulhan <ms@kilabit.info>2021-01-31 05:46:59 +0700
commit424583f727bf5d7da0552780ba369834a73c36d3 (patch)
tree529a148ea9e046300de8ff5bf7874e0f46a50575 /word.go
downloadkbbi-424583f727bf5d7da0552780ba369834a73c36d3.tar.xz
kamusku: the Go library for Kamus Besar Bahasa Indonesia (KBBI)
This module contains HTTP client and command line interface for official KBBI web.
Diffstat (limited to 'word.go')
-rw-r--r--word.go98
1 files changed, 98 insertions, 0 deletions
diff --git a/word.go b/word.go
new file mode 100644
index 0000000..8bc0da0
--- /dev/null
+++ b/word.go
@@ -0,0 +1,98 @@
+// Copyright 2020, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package kamusku
+
+import (
+ "bytes"
+
+ "github.com/shuLhan/share/lib/net/html"
+)
+
+//
+// Word store the single root word and its definitions.
+//
+type Word struct {
+ Root string `json:"dasar,omitempty"` // The root word
+ Definition []*WordDefinition `json:"definisi"` // The word definition.
+
+ // Message will contains the information when the word is not found or
+ // the word is informal (kata tidak baku).
+ Message string `json:"pesan,omitempty"`
+
+ err error
+}
+
+//
+// Err return an error from retrieving definition.
+//
+func (word *Word) Err() error {
+ return word.err
+}
+
+//
+// parseHTMLEntri parse HTML body from "/entri/<word>" page to find the
+// definition of the word.
+//
+func (word *Word) parseHTMLEntri(in string, htmlBody []byte) (err error) {
+ iter, err := html.Parse(bytes.NewReader(htmlBody))
+ if err != nil {
+ return err
+ }
+
+ for node := iter.Next(); node != nil; node = iter.Next() {
+ if !node.IsElement() {
+ continue
+ }
+
+ switch node.Data {
+ case tagNameHeader2:
+ word.parseRootWord(node)
+
+ case tagNameOrderedList, tagNameUnorderedList:
+ li := node.GetFirstChild()
+ for li != nil {
+ defKata, err := parseWordDefinition(in, li)
+ if err != nil {
+ word.Message = err.Error()
+ err = nil
+ break
+ }
+ if defKata == nil {
+ break
+ }
+ word.Definition = append(word.Definition, defKata)
+ li = li.GetNextSibling()
+ }
+ next := node.GetNextSibling()
+ iter.SetNext(next)
+ }
+ }
+
+ return nil
+}
+
+//
+// parseRootWord given an HMTL element "h2" find a possible root word and
+// return true; otherwise it will return false.
+//
+func (word *Word) parseRootWord(h2 *html.Node) bool {
+ el := h2.GetFirstChild()
+ if el.Data != tagNameSpan {
+ return false
+ }
+ v := el.GetAttrValue(attrNameClass)
+ if v != attrValueRootWord {
+ return false
+ }
+
+ el = el.GetFirstChild()
+ if el.Data != tagNameAnchor {
+ return false
+ }
+ el = el.GetFirstChild()
+ word.Root = el.Data
+
+ return true
+}