diff options
| author | Shulhan <ms@kilabit.info> | 2021-01-31 02:55:28 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2021-01-31 05:46:59 +0700 |
| commit | 424583f727bf5d7da0552780ba369834a73c36d3 (patch) | |
| tree | 529a148ea9e046300de8ff5bf7874e0f46a50575 /word.go | |
| download | kbbi-424583f727bf5d7da0552780ba369834a73c36d3.tar.xz | |
kamusku: the Go library for Kamus Besar Bahasa Indonesia (KBBI)
This module contains HTTP client and command line interface for official
KBBI web.
Diffstat (limited to 'word.go')
| -rw-r--r-- | word.go | 98 |
1 files changed, 98 insertions, 0 deletions
@@ -0,0 +1,98 @@ +// Copyright 2020, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package kamusku + +import ( + "bytes" + + "github.com/shuLhan/share/lib/net/html" +) + +// +// Word store the single root word and its definitions. +// +type Word struct { + Root string `json:"dasar,omitempty"` // The root word + Definition []*WordDefinition `json:"definisi"` // The word definition. + + // Message will contains the information when the word is not found or + // the word is informal (kata tidak baku). + Message string `json:"pesan,omitempty"` + + err error +} + +// +// Err return an error from retrieving definition. +// +func (word *Word) Err() error { + return word.err +} + +// +// parseHTMLEntri parse HTML body from "/entri/<word>" page to find the +// definition of the word. +// +func (word *Word) parseHTMLEntri(in string, htmlBody []byte) (err error) { + iter, err := html.Parse(bytes.NewReader(htmlBody)) + if err != nil { + return err + } + + for node := iter.Next(); node != nil; node = iter.Next() { + if !node.IsElement() { + continue + } + + switch node.Data { + case tagNameHeader2: + word.parseRootWord(node) + + case tagNameOrderedList, tagNameUnorderedList: + li := node.GetFirstChild() + for li != nil { + defKata, err := parseWordDefinition(in, li) + if err != nil { + word.Message = err.Error() + err = nil + break + } + if defKata == nil { + break + } + word.Definition = append(word.Definition, defKata) + li = li.GetNextSibling() + } + next := node.GetNextSibling() + iter.SetNext(next) + } + } + + return nil +} + +// +// parseRootWord given an HMTL element "h2" find a possible root word and +// return true; otherwise it will return false. +// +func (word *Word) parseRootWord(h2 *html.Node) bool { + el := h2.GetFirstChild() + if el.Data != tagNameSpan { + return false + } + v := el.GetAttrValue(attrNameClass) + if v != attrValueRootWord { + return false + } + + el = el.GetFirstChild() + if el.Data != tagNameAnchor { + return false + } + el = el.GetFirstChild() + word.Root = el.Data + + return true +} |
