diff options
Diffstat (limited to 'word_definition.go')
| -rw-r--r-- | word_definition.go | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/word_definition.go b/word_definition.go new file mode 100644 index 0000000..86b05bd --- /dev/null +++ b/word_definition.go @@ -0,0 +1,94 @@ +// Copyright 2020, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package kamusku + +import ( + "fmt" + "strings" + + "github.com/shuLhan/share/lib/net/html" + libstrings "github.com/shuLhan/share/lib/strings" +) + +// +// WordDefinition contains the meaning of word in dictionary, and optional +// attribute for word classifications and examples. +// +type WordDefinition struct { + Value string `json:"isi"` + Classes []string `json:"kelas,omitempty"` + Examples []string `json:"contoh,omitempty"` +} + +func parseWordDefinition(in string, li *html.Node) (defKata *WordDefinition, err error) { + elFont := li.GetFirstChild() + if elFont == nil || elFont.Data != tagNameFont { + return nil, nil + } + elItalic := elFont.GetFirstChild() + if elItalic == nil || elItalic.Data != tagNameItalic { + return nil, nil + } + + defKata = &WordDefinition{} + + elSpan := elItalic.GetFirstChild() + for elSpan != nil && elSpan.Data == tagNameSpan { + kelas := elSpan.GetAttrValue(attrNameTitle) + if len(kelas) > 0 { + defKata.Classes = append(defKata.Classes, kelas) + } + elSpan = elSpan.GetNextSibling() + } + + el := elFont.GetNextSibling() + if el == nil { + return defKata, nil + } + + defKata.Value = strings.TrimSpace(libstrings.SingleSpace(el.Data)) + + if defKata.Value == "→" { + defKata.Value = "" + el = el.GetNextSibling() + if el == nil || el.Data != tagNameAnchor { + return nil, nil + } + el = el.GetFirstChild() + return nil, fmt.Errorf(`%q adalah bentuk tidak baku dari %q`, + in, el.Data) + } + + if defKata.Value[len(defKata.Value)-1] != ':' { + return defKata, nil + } + + defKata.Value = defKata.Value[:len(defKata.Value)-1] + + // Parse the example of kata in the next sibling. + el = el.GetNextSibling() + for el != nil { + if el.Data != tagNameFont { + break + } + + elItalic = el.GetFirstChild() + if elItalic.Data != tagNameItalic { + break + } + + elText := elItalic.GetFirstChild() + if elText != nil { + contoh := strings.TrimSpace(elText.Data) + if len(contoh) > 0 && contoh != ";" { + defKata.Examples = append(defKata.Examples, elText.Data) + } + } + + el = el.GetNextSibling() + } + + return defKata, nil +} |
