summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <m.shulhan@gmail.com>2020-03-29 00:42:35 +0700
committerShulhan <m.shulhan@gmail.com>2020-03-29 00:42:35 +0700
commitcea97a6f997afccfb4eca3a7f1672ea5a8da8a8c (patch)
tree8fc96c40d09d9bc2fed9ab86afaee39b6ccf7d0c
parent60042be915d8aae0ace76aefa054fde9969a98d1 (diff)
downloadkamusku-cea97a6f997afccfb4eca3a7f1672ea5a8da8a8c.tar.xz
kbbi: urai kata dasar dari hasil pencarian definisi kata
Kata dasar dari sebuah kata dalam HTML berada dalam elemen "h2 > span[class="rootword"]" Jika field Dasar kosong berarti kata tersebut adalah kata dasar. Perubahan ini mengubah struktur balikan untuk dapat menyimpan kata dasar.
-rw-r--r--client.go44
-rw-r--r--cmd/kbbi/main.go23
-rw-r--r--definisi_kata.go9
-rw-r--r--definisi_response.go11
-rw-r--r--kata.go42
-rw-r--r--kbbi.go11
6 files changed, 103 insertions, 37 deletions
diff --git a/client.go b/client.go
index 9a89e03..3301ff1 100644
--- a/client.go
+++ b/client.go
@@ -70,30 +70,30 @@ func New(cookies []*http.Cookie) (cl *Client, err error) {
//
// CariDefinisi dari kata.
//
-func (cl Client) CariDefinisi(kata string) (defKata []*DefinisiKata, err error) {
- entriURL := baseURL + entriPath + kata
+func (cl Client) CariDefinisi(in string) (kata *Kata, err error) {
+ entriURL := baseURL + entriPath + in
res, err := cl.httpc.Get(entriURL)
if err != nil {
- return nil, fmt.Errorf("Cari %q: %w", kata, err)
+ return nil, fmt.Errorf("CariDefinisi %q: %w", in, err)
}
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
if err != nil {
- return nil, fmt.Errorf("Cari %q: %w", kata, err)
+ return nil, fmt.Errorf("Cari %q: %w", in, err)
}
if debug.Value >= 2 {
fmt.Printf(">>> HTML body for %s:\n%s", entriURL, body)
}
- defKata, err = parseHTMLEntri(body)
+ kata, err = parseHTMLEntri(body)
if err != nil {
- return nil, fmt.Errorf("Cari %q: %w", kata, err)
+ return nil, fmt.Errorf("CariDefinisi %q: %w", in, err)
}
- return defKata, nil
+ return kata, nil
}
//
@@ -208,12 +208,13 @@ func (cl *Client) SetCookies(cookies []*http.Cookie) {
// parseHTMLEntri parse HTML body from "/entri/<kata>" page to find the
// definition of the word.
//
-func parseHTMLEntri(htmlBody []byte) (daftarDefinisi []*DefinisiKata, err error) {
+func parseHTMLEntri(htmlBody []byte) (kata *Kata, err error) {
node, err := html.Parse(bytes.NewReader(htmlBody))
if err != nil {
return nil, err
}
+ kata = new(Kata)
var prev *html.Node
for {
@@ -236,24 +237,27 @@ func parseHTMLEntri(htmlBody []byte) (daftarDefinisi []*DefinisiKata, err error)
}
switch node.Data {
+ case tagNameHeader2:
+ kata.parseKataDasar(node)
+
case tagNameOrderedList, tagNameUnorderedList:
+ li := getFirstChild(node)
+ for li != nil {
+ defKata := parseDefinisiKata(li)
+ if defKata == nil {
+ break
+ }
+ kata.Definisi = append(kata.Definisi, defKata)
+ li = getNextSibling(li)
+ }
+ node = node.NextSibling
+
default:
continue
}
-
- li := getFirstChild(node)
- for li != nil {
- defKata := parseDefinisiKata(li)
- if defKata == nil {
- break
- }
- daftarDefinisi = append(daftarDefinisi, defKata)
- li = getNextSibling(li)
- }
- node = node.NextSibling
}
- return daftarDefinisi, nil
+ return kata, nil
}
func (cl Client) parseHTMLKataDasar(htmlBody []byte) (kataDasar daftarKata, err error) {
diff --git a/cmd/kbbi/main.go b/cmd/kbbi/main.go
index f05e139..1a7f561 100644
--- a/cmd/kbbi/main.go
+++ b/cmd/kbbi/main.go
@@ -74,26 +74,29 @@ func main() {
}
var (
- pesan string
daftarKata []string = flag.Args()
)
- for _, kata := range daftarKata {
- daftarDefinisi, err := cl.CariDefinisi(kata)
+ for _, in := range daftarKata {
+ kata, err := cl.CariDefinisi(in)
if err != nil {
log.Println(err)
}
- if len(daftarDefinisi) == 0 {
- pesan = errKataNotFound
+ if kata == nil {
+ fmt.Printf("!!! %s: %s\n\n", in, errKataNotFound)
+ continue
}
- fmt.Println("===", kata, ":", pesan)
- for x, defKata := range daftarDefinisi {
- fmt.Printf(" Definisi %d: %s\n", x+1, defKata.Isi)
+ fmt.Println("===", in)
+ if len(kata.Dasar) > 0 {
+ fmt.Printf(" Kata dasar: %s\n", kata.Dasar)
+ }
+ for x, def := range kata.Definisi {
+ fmt.Printf(" Definisi #%d: %s\n", x+1, def.Isi)
- for y, nomina := range defKata.Kelas {
+ for y, nomina := range def.Kelas {
fmt.Printf(" Kelas #%d: %s\n", y+1, nomina)
}
- for z, contoh := range defKata.Contoh {
+ for z, contoh := range def.Contoh {
fmt.Printf(" Contoh #%d: %s\n", z+1, contoh)
}
fmt.Println()
diff --git a/definisi_kata.go b/definisi_kata.go
index f1ac008..faeda2e 100644
--- a/definisi_kata.go
+++ b/definisi_kata.go
@@ -10,9 +10,13 @@ import (
"golang.org/x/net/html"
)
+//
+// DefinisiKata contains the meaning of word in dictionary, and optional
+// attribute for word classifications and examples.
+//
type DefinisiKata struct {
- Kelas []string `json:"kelas"`
Isi string `json:"isi"`
+ Kelas []string `json:"kelas"`
Contoh []string `json:"contoh"`
}
@@ -34,8 +38,7 @@ func parseDefinisiKata(li *html.Node) (defKata *DefinisiKata) {
if attr.Key != attrNameTitle {
continue
}
- attrVal := strings.Trim(attr.Val, "[]")
- defKata.Kelas = append(defKata.Kelas, attrVal)
+ defKata.Kelas = append(defKata.Kelas, attr.Val)
}
elSpan = getNextSibling(elSpan)
}
diff --git a/definisi_response.go b/definisi_response.go
new file mode 100644
index 0000000..b7376aa
--- /dev/null
+++ b/definisi_response.go
@@ -0,0 +1,11 @@
+// Copyright 2020, Shulhan <m.shulhan@gmail.com>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package kbbi
+
+//
+// DefinisiResponse is a response from "/definisi" API.
+// Its contains mapping of words and their definitions.
+//
+type DefinisiResponse map[string]Kata
diff --git a/kata.go b/kata.go
new file mode 100644
index 0000000..1fb8eeb
--- /dev/null
+++ b/kata.go
@@ -0,0 +1,42 @@
+// Copyright 2020, Shulhan <m.shulhan@gmail.com>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package kbbi
+
+import "golang.org/x/net/html"
+
+//
+// Kata store the single root word and its definitions.
+//
+type Kata struct {
+ Dasar string `json:"dasar"`
+ Definisi []*DefinisiKata `json:"definisi"`
+}
+
+//
+// parseKataDasar given an HMTL element "h2" find a possible root word and
+// return true; otherwise it will return false.
+//
+func (kata *Kata) parseKataDasar(h2 *html.Node) bool {
+ el := getFirstChild(h2)
+ if el.Data != tagNameSpan {
+ return false
+ }
+ for _, attr := range el.Attr {
+ if attr.Key != attrNameClass {
+ continue
+ }
+ if attr.Val != attrValueRootWord {
+ continue
+ }
+ el = getFirstChild(el)
+ if el.Data != tagNameAnchor {
+ return false
+ }
+ el = getFirstChild(el)
+ kata.Dasar = el.Data
+ return true
+ }
+ return false
+}
diff --git a/kbbi.go b/kbbi.go
index e5b6e0e..4cec3e5 100644
--- a/kbbi.go
+++ b/kbbi.go
@@ -20,17 +20,20 @@ const (
attrNameTitle = "title"
attrNameValue = "value"
+ attrValueRootWord = "rootword"
+
headerNameContentType = "Content-Type"
headerValueContentType = "application/x-www-form-urlencoded"
tagNameAnchor = "a"
- tagNameInput = "input"
- tagNameOrderedList = "ol"
- tagNameUnorderedList = "ul"
- tagNameListItem = "li"
tagNameFont = "font"
+ tagNameHeader2 = "h2"
+ tagNameInput = "input"
tagNameItalic = "i"
+ tagNameListItem = "li"
+ tagNameOrderedList = "ol"
tagNameSpan = "span"
+ tagNameUnorderedList = "ul"
paramNameIngatSaya = "IngatSaya"
paramNameKataSandi = "KataSandi"