diff options
| -rw-r--r-- | client.go | 44 | ||||
| -rw-r--r-- | cmd/kbbi/main.go | 23 | ||||
| -rw-r--r-- | definisi_kata.go | 9 | ||||
| -rw-r--r-- | definisi_response.go | 11 | ||||
| -rw-r--r-- | kata.go | 42 | ||||
| -rw-r--r-- | kbbi.go | 11 |
6 files changed, 103 insertions, 37 deletions
@@ -70,30 +70,30 @@ func New(cookies []*http.Cookie) (cl *Client, err error) { // // CariDefinisi dari kata. // -func (cl Client) CariDefinisi(kata string) (defKata []*DefinisiKata, err error) { - entriURL := baseURL + entriPath + kata +func (cl Client) CariDefinisi(in string) (kata *Kata, err error) { + entriURL := baseURL + entriPath + in res, err := cl.httpc.Get(entriURL) if err != nil { - return nil, fmt.Errorf("Cari %q: %w", kata, err) + return nil, fmt.Errorf("CariDefinisi %q: %w", in, err) } defer res.Body.Close() body, err := ioutil.ReadAll(res.Body) if err != nil { - return nil, fmt.Errorf("Cari %q: %w", kata, err) + return nil, fmt.Errorf("Cari %q: %w", in, err) } if debug.Value >= 2 { fmt.Printf(">>> HTML body for %s:\n%s", entriURL, body) } - defKata, err = parseHTMLEntri(body) + kata, err = parseHTMLEntri(body) if err != nil { - return nil, fmt.Errorf("Cari %q: %w", kata, err) + return nil, fmt.Errorf("CariDefinisi %q: %w", in, err) } - return defKata, nil + return kata, nil } // @@ -208,12 +208,13 @@ func (cl *Client) SetCookies(cookies []*http.Cookie) { // parseHTMLEntri parse HTML body from "/entri/<kata>" page to find the // definition of the word. // -func parseHTMLEntri(htmlBody []byte) (daftarDefinisi []*DefinisiKata, err error) { +func parseHTMLEntri(htmlBody []byte) (kata *Kata, err error) { node, err := html.Parse(bytes.NewReader(htmlBody)) if err != nil { return nil, err } + kata = new(Kata) var prev *html.Node for { @@ -236,24 +237,27 @@ func parseHTMLEntri(htmlBody []byte) (daftarDefinisi []*DefinisiKata, err error) } switch node.Data { + case tagNameHeader2: + kata.parseKataDasar(node) + case tagNameOrderedList, tagNameUnorderedList: + li := getFirstChild(node) + for li != nil { + defKata := parseDefinisiKata(li) + if defKata == nil { + break + } + kata.Definisi = append(kata.Definisi, defKata) + li = getNextSibling(li) + } + node = node.NextSibling + default: continue } - - li := getFirstChild(node) - for li != nil { - defKata := parseDefinisiKata(li) - if defKata == nil { - break - } - daftarDefinisi = append(daftarDefinisi, defKata) - li = getNextSibling(li) - } - node = node.NextSibling } - return daftarDefinisi, nil + return kata, nil } func (cl Client) parseHTMLKataDasar(htmlBody []byte) (kataDasar daftarKata, err error) { diff --git a/cmd/kbbi/main.go b/cmd/kbbi/main.go index f05e139..1a7f561 100644 --- a/cmd/kbbi/main.go +++ b/cmd/kbbi/main.go @@ -74,26 +74,29 @@ func main() { } var ( - pesan string daftarKata []string = flag.Args() ) - for _, kata := range daftarKata { - daftarDefinisi, err := cl.CariDefinisi(kata) + for _, in := range daftarKata { + kata, err := cl.CariDefinisi(in) if err != nil { log.Println(err) } - if len(daftarDefinisi) == 0 { - pesan = errKataNotFound + if kata == nil { + fmt.Printf("!!! %s: %s\n\n", in, errKataNotFound) + continue } - fmt.Println("===", kata, ":", pesan) - for x, defKata := range daftarDefinisi { - fmt.Printf(" Definisi %d: %s\n", x+1, defKata.Isi) + fmt.Println("===", in) + if len(kata.Dasar) > 0 { + fmt.Printf(" Kata dasar: %s\n", kata.Dasar) + } + for x, def := range kata.Definisi { + fmt.Printf(" Definisi #%d: %s\n", x+1, def.Isi) - for y, nomina := range defKata.Kelas { + for y, nomina := range def.Kelas { fmt.Printf(" Kelas #%d: %s\n", y+1, nomina) } - for z, contoh := range defKata.Contoh { + for z, contoh := range def.Contoh { fmt.Printf(" Contoh #%d: %s\n", z+1, contoh) } fmt.Println() diff --git a/definisi_kata.go b/definisi_kata.go index f1ac008..faeda2e 100644 --- a/definisi_kata.go +++ b/definisi_kata.go @@ -10,9 +10,13 @@ import ( "golang.org/x/net/html" ) +// +// DefinisiKata contains the meaning of word in dictionary, and optional +// attribute for word classifications and examples. +// type DefinisiKata struct { - Kelas []string `json:"kelas"` Isi string `json:"isi"` + Kelas []string `json:"kelas"` Contoh []string `json:"contoh"` } @@ -34,8 +38,7 @@ func parseDefinisiKata(li *html.Node) (defKata *DefinisiKata) { if attr.Key != attrNameTitle { continue } - attrVal := strings.Trim(attr.Val, "[]") - defKata.Kelas = append(defKata.Kelas, attrVal) + defKata.Kelas = append(defKata.Kelas, attr.Val) } elSpan = getNextSibling(elSpan) } diff --git a/definisi_response.go b/definisi_response.go new file mode 100644 index 0000000..b7376aa --- /dev/null +++ b/definisi_response.go @@ -0,0 +1,11 @@ +// Copyright 2020, Shulhan <m.shulhan@gmail.com>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package kbbi + +// +// DefinisiResponse is a response from "/definisi" API. +// Its contains mapping of words and their definitions. +// +type DefinisiResponse map[string]Kata @@ -0,0 +1,42 @@ +// Copyright 2020, Shulhan <m.shulhan@gmail.com>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package kbbi + +import "golang.org/x/net/html" + +// +// Kata store the single root word and its definitions. +// +type Kata struct { + Dasar string `json:"dasar"` + Definisi []*DefinisiKata `json:"definisi"` +} + +// +// parseKataDasar given an HMTL element "h2" find a possible root word and +// return true; otherwise it will return false. +// +func (kata *Kata) parseKataDasar(h2 *html.Node) bool { + el := getFirstChild(h2) + if el.Data != tagNameSpan { + return false + } + for _, attr := range el.Attr { + if attr.Key != attrNameClass { + continue + } + if attr.Val != attrValueRootWord { + continue + } + el = getFirstChild(el) + if el.Data != tagNameAnchor { + return false + } + el = getFirstChild(el) + kata.Dasar = el.Data + return true + } + return false +} @@ -20,17 +20,20 @@ const ( attrNameTitle = "title" attrNameValue = "value" + attrValueRootWord = "rootword" + headerNameContentType = "Content-Type" headerValueContentType = "application/x-www-form-urlencoded" tagNameAnchor = "a" - tagNameInput = "input" - tagNameOrderedList = "ol" - tagNameUnorderedList = "ul" - tagNameListItem = "li" tagNameFont = "font" + tagNameHeader2 = "h2" + tagNameInput = "input" tagNameItalic = "i" + tagNameListItem = "li" + tagNameOrderedList = "ol" tagNameSpan = "span" + tagNameUnorderedList = "ul" paramNameIngatSaya = "IngatSaya" paramNameKataSandi = "KataSandi" |
