aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--LICENSE39
-rw-r--r--Makefile16
-rw-r--r--README.md91
-rw-r--r--cmd/kamusku/main.go116
-rw-r--r--go.mod8
-rw-r--r--go.sum23
-rw-r--r--kamusku.go9
-rw-r--r--kbbi_client.go417
-rw-r--r--kbbi_client_test.go29
-rw-r--r--lookup_response.go10
-rw-r--r--testdata/entri.html408
-rw-r--r--testdata/entri_analisa.html342
-rw-r--r--testdata/kbbi_dasar.html707
-rw-r--r--word.go98
-rw-r--r--word_definition.go94
-rw-r--r--word_test.go66
-rw-r--r--words.go20
17 files changed, 2493 insertions, 0 deletions
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..7e49067
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,39 @@
+Copyright 2020, M. Shulhan (ms@kilabit.info).
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+3. Neither the name of copyright holder nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ --- --- --- --- --- --- ---
+
+ TT TT II BB AAAA LLLLLL II KKKKKKKK
+ TT TT II BB AA AA LL LL II KK
+ TTTT II BB AA AA LL LL II KK
+ TT TT II BB AAAAAAAA LLLLLL II KK
+ TT TT II BB AA AA LL LL II KK
+ TT TT II BBBBBBBB AA AA LLLLLL II KK
+
+Website: http://kilabit.info
+Contact: ms@kilabit.info
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..d7ccb28
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,16 @@
+## Copyright 2020, Shulhan <ms@kilabit.info>. All rights reserved.
+## Use of this source code is governed by a BSD-style
+## license that can be found in the LICENSE file.
+
+.PHONY: all install
+
+all: test check install
+
+test:
+ go test -v ./...
+
+check:
+ golangci-lint run ./...
+
+install:
+ go install ./cmd/kamusku
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..c5c41c3
--- /dev/null
+++ b/README.md
@@ -0,0 +1,91 @@
+# kamusku
+
+kamusku adalah Go module yang berisi pustaka dan program untuk mencari
+definisi kata Bahasa Indonesia dari situs resmi KBBI.
+
+
+## Program kamusku
+
+Program kamusku yaitu antar-muka untuk mencari definisi dari kata lewat baris
+perintah.
+
+Program ini sangat sederhana, caranya yaitu dengan memberikan kata yang dicari
+setelah nama program, misalnya,
+
+```
+$ kamusku kamus,bahasa
+```
+
+Maka akan mencetak definisi dari kata "kamus" dan "bahasa" ke layar,
+
+```
+=== bahasa
+ Definisi #1: sistem lambang bunyi yang arbitrer, yang digunakan oleh
+ anggota suatu masyarakat untuk bekerja sama, berinteraksi, dan
+ mengidentifikasikan diri
+ Kelas #1: Nomina: kata benda
+ Kelas #2: Linguistik: -
+
+ Definisi #2: percakapan (perkataan) yang baik; tingkah laku yang baik; sopan santun
+ Kelas #1: Nomina: kata benda
+ Contoh #1: baik budi --nya
+
+ ...
+
+=== kamus
+ Definisi #1: karya rujukan atau acuan dalam bentuk cetak maupun digital yang
+ memuat kata dan ungkapan, dapat disusun menurut abjad atau tema, berisi
+ keterangan tentang makna, pemakaian, atau terjemahan
+ Kelas #1: Nomina: kata benda
+
+ Definisi #2: buku yang memuat kumpulan istilah atau nama yang disusun
+ menurut abjad beserta penjelasan tentang makna dan pemakaiannya
+ Kelas #1: Nomina: kata benda
+
+ ...
+```
+
+
+## Bot Telegram
+
+Bot untuk aplikasi Telegram: https://t.me/KamuskuBot
+
+Untuk saat ini, KamuskuBot hanya punya satu perintah yaitu "/definisi". Cara
+menggunakan perintah ini hampir sama dengan program kamusku yaitu dengan
+memberikan kata yang dicari, contohnya,
+
+```
+/definisi kamus,bahasa
+```
+
+## LISENSI
+
+```
+Copyright 2020, M. Shulhan (ms@kilabit.info).
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+3. Neither the name of copyright holder nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+```
diff --git a/cmd/kamusku/main.go b/cmd/kamusku/main.go
new file mode 100644
index 0000000..b48a791
--- /dev/null
+++ b/cmd/kamusku/main.go
@@ -0,0 +1,116 @@
+// Copyright 2020, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// Program kamusku is the command-line interface to Kamus Besar Bahasa
+// Indonesia (KBBI).
+//
+package main
+
+import (
+ "flag"
+ "fmt"
+ "log"
+ "sort"
+
+ "git.sr.ht/~shulhan/kamusku"
+)
+
+const (
+ cmdNameSurel = "surel"
+ cmdNameSandi = "sandi"
+ cmdNameDaftarKataDasar = "daftar-kata-dasar"
+)
+
+func main() {
+ var (
+ isListRootWords bool
+ email string
+ pass string
+ )
+
+ log.SetFlags(0)
+ log.SetPrefix("kamusku: ")
+
+ flag.StringVar(&email, cmdNameSurel, "", "Nama pengguna")
+ flag.StringVar(&pass, cmdNameSandi, "", "Sandi pengguna")
+ flag.BoolVar(&isListRootWords, cmdNameDaftarKataDasar, false,
+ "Ambil dan cetak semua kata dasar")
+
+ flag.Parse()
+
+ cl, err := kamusku.NewKbbiClient()
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ if len(email) > 0 && len(pass) > 0 {
+ err = cl.Login(email, pass)
+ if err != nil {
+ log.Fatal(err)
+ }
+ }
+
+ if isListRootWords {
+ if !cl.IsAuthenticated() {
+ log.Fatalf("opsi %s membutuhkan opsi %s dan %s",
+ cmdNameDaftarKataDasar, cmdNameSurel,
+ cmdNameSandi)
+ }
+ listRootWords(cl)
+ return
+ }
+
+ resDefinition, err := cl.Lookup(flag.Args())
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ for word, wordDef := range resDefinition {
+ err = wordDef.Err()
+ if err != nil {
+ fmt.Printf("!!! %s: %s\n", word, err)
+ continue
+ }
+
+ fmt.Println("===", word)
+ if len(wordDef.Message) != 0 {
+ fmt.Println(" " + wordDef.Message)
+ continue
+ }
+ if len(wordDef.Root) > 0 {
+ fmt.Printf(" Kata dasar: %s\n", wordDef.Root)
+ }
+ for x, def := range wordDef.Definition {
+ fmt.Printf(" Definisi #%d: %s\n", x+1, def.Value)
+
+ for y, nomina := range def.Classes {
+ fmt.Printf(" Kelas #%d: %s\n", y+1, nomina)
+ }
+ for z, contoh := range def.Examples {
+ fmt.Printf(" Contoh #%d: %s\n", z+1, contoh)
+ }
+ fmt.Println()
+ }
+ }
+}
+
+func listRootWords(cl *kamusku.KbbiClient) {
+ words, err := cl.ListRootWords()
+ if err != nil {
+ log.Println(err)
+ }
+
+ list := make([]string, 0, len(words))
+
+ for k := range words {
+ list = append(list, k)
+ }
+
+ sort.Strings(list)
+
+ for _, word := range list {
+ fmt.Println(word)
+ }
+}
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..b3a733a
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,8 @@
+module git.sr.ht/~shulhan/kamusku
+
+go 1.15
+
+require (
+ github.com/shuLhan/share v0.22.0
+ golang.org/x/net v0.0.0-20210119194325-5f4716e94777
+)
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..aec0a9d
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,23 @@
+git.sr.ht/~shulhan/asciidoctor-go v0.0.0-20201205130914-be765f32b57b/go.mod h1:ejaxKeBMNL5EpP2zjRP4B8zuOr+MM4ZyGwE3y7807WI=
+git.sr.ht/~shulhan/ciigo v0.3.0/go.mod h1:Y5FvSiJg88qshoR1ktj4fLzM5sk1pZcV0kJGU8GAuTo=
+github.com/shuLhan/share v0.20.2-0.20201122173411-e8b3bf5ee6e9/go.mod h1:oBv+CGHG6u4Sa71+nJJJji8mCgPAadywjsB3I3k/b0o=
+github.com/shuLhan/share v0.20.2-0.20201205202022-66069b9e49fe/go.mod h1:oBv+CGHG6u4Sa71+nJJJji8mCgPAadywjsB3I3k/b0o=
+github.com/shuLhan/share v0.22.0 h1:oTV1M0X3TqyhwSoT0BxVBmnUZLbhkvRwmhyV0KkTOR4=
+github.com/shuLhan/share v0.22.0/go.mod h1:u9caerexlcxmPVDttj7PnkxCBDY6yBRTZ+gGR+1tO98=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.0.0-20201016220609-9e8e0b390897/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.0.0-20210119194325-5f4716e94777 h1:003p0dJM77cxMSyCPFphvZf/Y5/NXf5fzg6ufd1/Oew=
+golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201107080550-4d91cf3a1aaf/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
diff --git a/kamusku.go b/kamusku.go
new file mode 100644
index 0000000..8780723
--- /dev/null
+++ b/kamusku.go
@@ -0,0 +1,9 @@
+// Copyright 2020, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// Package kamusku is the Go library to access the Bahasa Indonesia dictionary
+// from https://kbbi.kemdikbud.go.id.
+//
+package kamusku
diff --git a/kbbi_client.go b/kbbi_client.go
new file mode 100644
index 0000000..23767b4
--- /dev/null
+++ b/kbbi_client.go
@@ -0,0 +1,417 @@
+// Copyright 2020, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package kamusku
+
+import (
+ "bytes"
+ "encoding/gob"
+ "errors"
+ "fmt"
+ "io/ioutil"
+ "log"
+ "net/http"
+ "net/http/cookiejar"
+ "net/url"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/shuLhan/share/lib/debug"
+ libhttp "github.com/shuLhan/share/lib/http"
+ "github.com/shuLhan/share/lib/net/html"
+ "golang.org/x/net/publicsuffix"
+)
+
+const (
+ kbbiUrlBase = "https://kbbi.kemdikbud.go.id"
+ kbbiUrlLogin = kbbiUrlBase + "/Account/Login"
+ kbbiPathEntri = "/entri/"
+
+ attrNameClass = "class"
+ attrNameHref = "href"
+ attrNameTitle = "title"
+ attrNameValue = "value"
+
+ attrValueRootWord = "rootword"
+
+ paramNameMasukan = "masukan"
+ paramNameMasukanLengkap = "masukanLengkap"
+ paramNameIngatSaya = "IngatSaya"
+ paramNameKataSandi = "KataSandi"
+ paramNamePage = "page"
+ paramNamePosel = "Posel"
+ paramNameRequestVerificationToken = "__RequestVerificationToken" //nolint: gosec
+
+ paramValueDasar = "dasar"
+ paramValueFalse = "false"
+
+ tagNameAnchor = "a"
+ tagNameFont = "font"
+ tagNameHeader2 = "h2"
+ tagNameInput = "input"
+ tagNameItalic = "i"
+ tagNameOrderedList = "ol"
+ tagNameSpan = "span"
+ tagNameUnorderedList = "ul"
+
+ cookieFile = "cookie"
+ configDir = "kamusku"
+ defTimeout = 20 * time.Second
+ maxPageNumber = 501
+)
+
+//
+// KbbiClient client for official KBBI web using HTTP.
+//
+type KbbiClient struct {
+ baseDir string
+ cookieURL *url.URL
+ cookies []*http.Cookie
+ httpc *http.Client
+}
+
+//
+// NewKbbiClient create and initialize new client that connect directly to
+// KBBI official website.
+//
+func NewKbbiClient() (cl *KbbiClient, err error) {
+ cookieURL, err := url.Parse(kbbiUrlBase)
+ if err != nil {
+ return nil, fmt.Errorf("New: %w", err)
+ }
+
+ jarOpt := &cookiejar.Options{
+ PublicSuffixList: publicsuffix.List,
+ }
+
+ jar, err := cookiejar.New(jarOpt)
+ if err != nil {
+ return nil, fmt.Errorf("New: %w", err)
+ }
+
+ cl = &KbbiClient{
+ cookieURL: cookieURL,
+ httpc: &http.Client{
+ Jar: jar,
+ Timeout: defTimeout,
+ },
+ }
+
+ err = cl.loadCookies()
+ if err != nil {
+ return nil, fmt.Errorf("New: %w", err)
+ }
+
+ if cl.cookies != nil {
+ jar.SetCookies(cookieURL, cl.cookies)
+ }
+
+ return cl, nil
+}
+
+//
+// Lookup lookup definition of one or more words.
+//
+func (cl *KbbiClient) Lookup(ins []string) (res LookupResponse, err error) {
+ res = make(LookupResponse, len(ins))
+
+ for _, in := range ins {
+ _, ok := res[in]
+ if ok {
+ continue
+ }
+
+ kata := &Word{}
+ res[in] = kata
+
+ entriURL := kbbiUrlBase + kbbiPathEntri + in
+ httpRes, err := cl.httpc.Get(entriURL)
+ if err != nil {
+ kata.err = err
+ continue
+ }
+
+ defer httpRes.Body.Close()
+
+ body, err := ioutil.ReadAll(httpRes.Body)
+ if err != nil {
+ kata.err = err
+ continue
+ }
+
+ if debug.Value >= 3 {
+ fmt.Printf(">>> HTML body for %s:\n%s", entriURL, body)
+ }
+
+ err = kata.parseHTMLEntri(in, body)
+ if err != nil {
+ kata.err = err
+ }
+
+ if len(kata.Definition) == 0 && len(kata.Message) == 0 {
+ kata.Message = "Entri tidak ditemukan"
+ }
+ }
+
+ return res, nil
+}
+
+//
+// ListRootWords list all of the root words in dictionary.
+//
+func (cl *KbbiClient) ListRootWords() (rootWords Words, err error) {
+ params := url.Values{
+ paramNameMasukan: []string{paramValueDasar},
+ paramNameMasukanLengkap: []string{paramValueDasar},
+ }
+
+ urlPage := kbbiUrlBase + "/Cari/Jenis?"
+
+ rootWords = make(Words)
+
+ for pageNumber := 1; pageNumber <= maxPageNumber; pageNumber++ {
+ params.Set(paramNamePage, strconv.Itoa(pageNumber))
+
+ req, err := http.NewRequest(http.MethodGet, urlPage+params.Encode(), nil)
+ if err != nil {
+ return rootWords, err
+ }
+
+ res, err := cl.httpc.Do(req)
+ if err != nil {
+ return rootWords, fmt.Errorf("ListRootWords: page %d: %w",
+ pageNumber, err)
+ }
+
+ defer res.Body.Close()
+
+ body, err := ioutil.ReadAll(res.Body)
+ if err != nil {
+ return rootWords, fmt.Errorf("ListRootWords: page %d: %w",
+ pageNumber, err)
+ }
+
+ got, err := cl.parseHTMLRootWords(body)
+ if err != nil {
+ return rootWords, fmt.Errorf("ListRootWords: page %d: %w",
+ pageNumber, err)
+ }
+ if len(got) == 0 {
+ break
+ }
+
+ rootWords.merge(got)
+
+ log.Printf("ListRootWords: halaman %d, jumlah kata %d, total kata %d",
+ pageNumber, len(got), len(rootWords))
+ }
+
+ return rootWords, nil
+}
+
+//
+// IsAuthenticated will return true if the client already login; otherwise it
+// will return false.
+//
+func (cl *KbbiClient) IsAuthenticated() bool {
+ return len(cl.cookies) > 0
+}
+
+//
+// Login authenticate the client using user email and password.
+//
+func (cl *KbbiClient) Login(email, pass string) (err error) {
+ tokenLogin, err := cl.preLogin()
+ if err != nil {
+ return fmt.Errorf("Login: %w", err)
+ }
+
+ params := url.Values{
+ paramNameRequestVerificationToken: []string{tokenLogin},
+ paramNamePosel: []string{email},
+ paramNameKataSandi: []string{pass},
+ paramNameIngatSaya: []string{paramValueFalse},
+ }
+
+ reqBody := strings.NewReader(params.Encode())
+
+ req, err := http.NewRequest(http.MethodPost, kbbiUrlLogin, reqBody)
+ if err != nil {
+ return fmt.Errorf("Login: %w", err)
+ }
+
+ req.Header.Set(libhttp.HeaderContentType, libhttp.ContentTypeForm)
+
+ res, err := cl.httpc.Do(req)
+ if err != nil {
+ return fmt.Errorf("Login: %w", err)
+ }
+
+ defer res.Body.Close()
+
+ resBody, err := ioutil.ReadAll(res.Body)
+ if err != nil {
+ return fmt.Errorf("Login: %w", err)
+ }
+
+ if res.StatusCode >= http.StatusBadRequest {
+ return fmt.Errorf("login: %d %s", res.StatusCode, resBody)
+ }
+
+ cl.cookies = cl.httpc.Jar.Cookies(cl.cookieURL)
+ cl.setCookies()
+ cl.saveCookies()
+
+ return nil
+}
+
+//
+// setCookies for HTTP request that need an authentication.
+//
+func (cl *KbbiClient) setCookies() {
+ cl.httpc.Jar.SetCookies(cl.cookieURL, cl.cookies)
+}
+
+func (cl *KbbiClient) parseHTMLRootWords(htmlBody []byte) (
+ rootWords Words, err error,
+) {
+ iter, err := html.Parse(bytes.NewReader(htmlBody))
+ if err != nil {
+ return nil, err
+ }
+
+ rootWords = make(Words)
+
+ for node := iter.Next(); node != nil; node = iter.Next() {
+ if !node.IsElement() {
+ continue
+ }
+ if node.Data != tagNameAnchor {
+ continue
+ }
+ hrefValue := node.GetAttrValue(attrNameHref)
+ if !strings.HasPrefix(hrefValue, kbbiPathEntri) {
+ continue
+ }
+ k := strings.TrimSpace(node.FirstChild.Data)
+ rootWords[k] = struct{}{}
+ }
+
+ return rootWords, nil
+}
+
+//
+// parseHTMLLogin get the token at the form login.
+//
+func (cl *KbbiClient) parseHTMLLogin(htmlBody []byte) (
+ token string, err error,
+) {
+ iter, err := html.Parse(bytes.NewReader(htmlBody))
+ if err != nil {
+ return "", err
+ }
+
+ for node := iter.Next(); node != nil; node = iter.Next() {
+ if !node.IsElement() {
+ continue
+ }
+ if node.Data != tagNameInput {
+ continue
+ }
+
+ token := node.GetAttrValue(attrNameValue)
+ if len(token) > 0 {
+ return token, nil
+ }
+ }
+
+ return "", fmt.Errorf("token login not found")
+}
+
+//
+// preLogin initialize the client to get the first cookie.
+//
+func (cl *KbbiClient) preLogin() (token string, err error) {
+ req, err := http.NewRequest(http.MethodGet, kbbiUrlLogin, nil)
+ if err != nil {
+ return "", err
+ }
+
+ res, err := cl.httpc.Do(req)
+ if err != nil {
+ return "", err
+ }
+
+ defer res.Body.Close()
+
+ body, err := ioutil.ReadAll(res.Body)
+ if err != nil {
+ return "", err
+ }
+
+ token, err = cl.parseHTMLLogin(body)
+ if err != nil {
+ return "", err
+ }
+
+ return token, nil
+}
+
+//
+// loadCookies load the KBBI cookies from file.
+//
+func (cl *KbbiClient) loadCookies() (err error) {
+ cl.baseDir, err = os.UserConfigDir()
+ if err != nil {
+ return fmt.Errorf("loadCookies: %w", err)
+ }
+
+ f := filepath.Join(cl.baseDir, configDir, cookieFile)
+
+ _, err = os.Stat(f)
+ if errors.Is(err, os.ErrNotExist) {
+ return nil
+ }
+
+ body, err := ioutil.ReadFile(f)
+ if err != nil {
+ return fmt.Errorf("loadCookies: %w", err)
+ }
+
+ dec := gob.NewDecoder(bytes.NewReader(body))
+
+ err = dec.Decode(&cl.cookies)
+ if err != nil {
+ return fmt.Errorf("loadCookies: %w", err)
+ }
+
+ return nil
+}
+
+//
+// saveCookies store the client cookies to the file for future use.
+//
+func (cl *KbbiClient) saveCookies() {
+ err := os.MkdirAll(filepath.Join(cl.baseDir, configDir), 0700)
+ if err != nil {
+ log.Println("saveCookies:", err)
+ }
+
+ f := filepath.Join(cl.baseDir, configDir, cookieFile)
+
+ var buf bytes.Buffer
+ enc := gob.NewEncoder(&buf)
+ err = enc.Encode(cl.cookies)
+ if err != nil {
+ log.Println("saveCookies: ", err)
+ }
+
+ err = ioutil.WriteFile(f, buf.Bytes(), 0600)
+ if err != nil {
+ log.Println("saveCookies: ", err)
+ }
+}
diff --git a/kbbi_client_test.go b/kbbi_client_test.go
new file mode 100644
index 0000000..d83ab5c
--- /dev/null
+++ b/kbbi_client_test.go
@@ -0,0 +1,29 @@
+// Copyright 2020, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package kamusku
+
+import (
+ "io/ioutil"
+ "testing"
+)
+
+func TestClient_parseHTMLKataDasar(t *testing.T) {
+ htmlBody, err := ioutil.ReadFile("testdata/kbbi_dasar.html")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ cl, err := NewKbbiClient()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ got, err := cl.parseHTMLRootWords(htmlBody)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ t.Logf("Root words: %v", got)
+}
diff --git a/lookup_response.go b/lookup_response.go
new file mode 100644
index 0000000..aac9175
--- /dev/null
+++ b/lookup_response.go
@@ -0,0 +1,10 @@
+// Copyright 2020, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package kamusku
+
+//
+// LookupResponse contains mapping of word and its definition.
+//
+type LookupResponse map[string]*Word
diff --git a/testdata/entri.html b/testdata/entri.html
new file mode 100644
index 0000000..1899950
--- /dev/null
+++ b/testdata/entri.html
@@ -0,0 +1,408 @@
+<!DOCTYPE html>
+<html>
+ <head>
+ <meta charset="utf-8" />
+ <meta
+ name="viewport"
+ content="width=device-width, initial-scale=1.0"
+ />
+ <meta
+ name="keywords"
+ content="kbbi, kbbi online, kbbi daring, kbbi dalam jaringan, kbbi 5, kbbi V, kbbi online terbaru, kbbi terbaru, kbbi resmi, Kamus Besar Bahasa Indonesia, Badan Bahasa, Pusat Bahasa, kamus bahasa Indonesia, kamus daring, kamus indonesia,"
+ />
+ <link rel="icon" href="/kbbi-daring-3.ico" />
+ <title>Hasil Pencarian - KBBI Daring</title>
+ <link
+ href="/Content/css?v=DsWRYqffn1l_yiM362JpjeKWGHv3Xp66PuBRKIpyVUU1"
+ rel="stylesheet"
+ />
+
+ <script src="/bundles/modernizr?v=inCVuEFe6J4Q07A0AcRsbJic_UE5MwpRMNGcOtk94TE1"></script>
+ </head>
+ <body style="font-family: Verdana, Geneva, Tahoma, sans-serif;">
+ <div
+ class="navbar navbar-inverse navbar-fixed-top"
+ style="background-color: #110063; border-color: gold;"
+ >
+ <div class="container">
+ <div class="navbar-header">
+ <img
+ src="/Content/Images/Logo-Tut-Wuri-Handayani-blue.png"
+ height="40px;"
+ width="40px;"
+ style="margin: 5px;"
+ />
+ <button
+ type="button"
+ class="navbar-toggle"
+ data-toggle="collapse"
+ data-target=".navbar-collapse"
+ style="background-color: #110063; border-color: gold;"
+ >
+ <span class="icon-bar"></span>
+ <span class="icon-bar"></span>
+ <span class="icon-bar"></span>
+ </button>
+ <a
+ href="/Beranda"
+ style="
+ color: gold;
+ text-decoration: none;
+ margin-left: 5px;
+ margin-right: 5px;
+ font-size: larger;
+ "
+ >KBBI Daring</a
+ >
+ </div>
+ <div
+ class="navbar-collapse collapse"
+ style="
+ background-color: #110063;
+ color: gold;
+ border-top-color: gold;
+ "
+ >
+ <ul class="nav navbar-nav">
+ <li><a href="/" style="color: gold;">Cari</a></li>
+ <li>
+ <a
+ href="/Beranda/SeputarLaman"
+ style="color: gold;"
+ >Seputar Laman</a
+ >
+ </li>
+ </ul>
+ <form
+ action="/Account/LogOff"
+ class="navbar-right"
+ id="logoutForm"
+ method="post"
+ >
+ <input
+ name="__RequestVerificationToken"
+ type="hidden"
+ value="CF8U6nmOAJlAY2k60AvI39TUbFkvFxPojfIX5wstWyS2o46yCeF1TmqBO6HgMak8TWbCvm9K2hN-k0egbXfWAsX9x0OC0g2c3LC2C0-dcLzFEK2_ly2KN7J4rtx8LJ4wAl6ye-QpucHX-AN6HAqo3g2"
+ />
+ <ul class="nav navbar-nav navbar-right">
+ <li>
+ <a
+ href="/Manage"
+ style="color: gold;"
+ title="Lakukan Manajemen Akun"
+ >Halo Shulhan!</a
+ >
+ </li>
+ <li>
+ <a
+ href="javascript:document.getElementById('logoutForm').submit()"
+ style="color: gold;"
+ >Keluar</a
+ >
+ </li>
+ </ul>
+ </form>
+ </div>
+ </div>
+ </div>
+ <div class="container body-content">
+ <script src="/bundles/jquery?v=2u0aRenDpYxArEyILB59ETSCA2cfQkSMlxb6jbMBqf81"></script>
+ <script>
+ $(function () {
+ $(".entrisButton").hover(
+ function () {
+ $(this).fadeTo(1, 1)
+ },
+ function () {
+ $(this).fadeTo(1, 0.18)
+ },
+ )
+ })
+ </script>
+ <br />
+
+ <div>
+ <h4 class="text-center">
+ <span
+ class="glyphicon glyphicon-info-sign text-primary"
+ ></span>
+ <b>Halo Shulhan!</b>
+ Sudahkah Anda mengecek
+ <a href="/Manage">halaman manajemen akun Anda</a>? Anda
+ dapat melihat cara membukanya
+ <a href="/Beranda/Bantuan#pertanyaan-1">di sini</a>. Jika
+ Anda pernah mengajukan
+ <a href="/Manage/ProposalDibuat">usulan-usulan</a>,
+ mungkin usulan-usulan tersebut telah diproses oleh redaksi
+ kami.
+ </h4>
+ <br />
+ </div>
+
+ <form
+ action="/entri/nul"
+ class="form-horizontal"
+ id="searchForm"
+ method="post"
+ onsubmit="searchText(event)"
+ role="form"
+ >
+ <div class="form-group">
+ <div class="col-md-2"></div>
+ <div class="col-md-8">
+ <div class="input-group form-control-max">
+ <input
+ id="textBoxSearch"
+ name="frasa"
+ value="informasi"
+ type="text"
+ class="form-control form-control-max"
+ style="margin-top: 1px;"
+ placeholder="Pencarian..."
+ />
+ <span class="input-group-btn">
+ <span
+ class="btn btn-primary glyphicon glyphicon-search"
+ onclick="searchText(event)"
+ ></span>
+ </span>
+ </div>
+ </div>
+ </div>
+ <h3 id="errorMessageDiv"></h3>
+ <script>
+ String.prototype.contains = function (it) {
+ return this.indexOf(it) != -1
+ }
+ function searchText(ev) {
+ var val = $("#textBoxSearch").val()
+ ev.preventDefault()
+ if (!val) {
+ $("#errorMessageDiv").replaceWith(
+ '<h3 id="errorMessageDiv"><font color="red"><p class="text-center add-margin-top-5"><i>Kotak pencarian tidak boleh kosong</i></p></font></h3>',
+ )
+ } else {
+ if (
+ val.contains(".") ||
+ val.contains("?") ||
+ val.toLowerCase() == "nul" ||
+ val.toLowerCase() == "bin"
+ ) {
+ //for non-dependent respond
+ window.location.href =
+ "/" + "Cari/Hasil?frasa=" + val
+ } else {
+ window.location.href = "/" + "entri/" + val
+ }
+ }
+ }
+ </script>
+ </form>
+
+ <hr />
+ <h2 style="margin-bottom: 3px;">
+ in.for.ma.si
+ <small
+ ><span class="entrisButton"
+ ><a href="../DataDasarEntri/Edit?eid=31762"
+ ><span
+ title="Ubah"
+ class="glyphicon glyphicon-edit text-success"
+ ></span></a
+ ><a href="../DataDasarEntri/Copy?eid=31762"
+ ><span
+ title="Usulkan entri baru berdasarkan entri ini"
+ class="glyphicon glyphicon-duplicate"
+ style="color: darkcyan;"
+ ></span></a
+ ><a href="../DataDasarEntri/Details?eid=31762"
+ ><span
+ title="Detail"
+ class="glyphicon glyphicon-list-alt text-info"
+ ></span></a
+ ><span
+ title="Sejarah redaksi - entri ini tidak memiliki sejarah redaksi"
+ class="glyphicon glyphicon-book"
+ style="color: lightgrey;"
+ ></span
+ ><a href="https://www.google.com/#q=informasi"
+ ><span
+ title="Cari di Google"
+ class="glyphicon glyphicon-search text-primary"
+ ></span></a></span
+ ></small>
+ </h2>
+ <p>
+ <a
+ href="http://tesaurus.kemdikbud.go.id/tematis/lema/informasi"
+ >&#x21E2; Tesaurus</a
+ >
+ </p>
+ <ol class="last-list-child">
+ <li>
+ <font color="red"
+ ><i
+ ><span title="Nomina: kata benda">n</span>
+ </i></font
+ >penerangan
+ </li>
+ <li>
+ <font color="red"
+ ><i
+ ><span title="Nomina: kata benda">n</span>
+ </i></font
+ >pemberitahuan; kabar atau berita tentang sesuatu
+ </li>
+ <li>
+ <font color="red"
+ ><i
+ ><span title="Nomina: kata benda">n</span>
+ <span title="Linguistik: -">Ling</span>
+ </i></font
+ >keseluruhan makna yang menunjang amanat yang terlihat
+ dalam bagian-bagian amanat itu
+ </li>
+ <li style="margin -left:-19px">
+ <a
+ href="../DataDasarMakna/Create?eid=31762&number=4"
+ class="entrisButton"
+ ><span
+ title="Usulkan makna baru"
+ class="glyphicon glyphicon-plus-sign text-success"
+ ></span
+ >Usulkan makna baru</a
+ >
+ </li>
+ </ol>
+ <h4 style="padding-top: 6px;">Kata Turunan</h4>
+ <ul style="list -style: none;" class="adjusted-par">
+ <li>
+ <a href="../../entri/menginformasikan"
+ >menginformasikan</a
+ >
+ </li>
+ </ul>
+ <h4 style="padding-top: 6px;">Gabungan Kata</h4>
+ <ul style="list -style: none;" class="adjusted-par">
+ <li>
+ <a href="../../entri/informasi%20elektronik"
+ >informasi elektronik</a
+ >;
+ <a href="../../entri/informasi%20gizi">informasi gizi</a>;
+ <a href="../../entri/informasi%20jabatan"
+ >informasi jabatan</a
+ >;
+ <a href="../../entri/informasi%20karier"
+ >informasi karier</a
+ >;
+ <a href="../../entri/informasi%20meteorologi"
+ >informasi meteorologi</a
+ >;
+ <a href="../../entri/informasi%20pekerjaan"
+ >informasi pekerjaan</a
+ >;
+ <a href="../../entri/informasi%20pendidikan"
+ >informasi pendidikan</a
+ >;
+ <a href="../../entri/informasi%20pornografi"
+ >informasi pornografi</a
+ >
+ </li>
+ </ul>
+ <br /><br />
+ <h4>
+ <a href="/DataDasarEntri/Create" class="entrisButton"
+ ><span
+ title="Usulkan entri baru"
+ class="glyphicon glyphicon-plus-sign text-success"
+ ></span
+ >Usulkan entri baru</a
+ >
+ </h4>
+ <hr />
+ <footer>
+ <p>
+ &copy; 2016
+ <a href="http://badanbahasa.kemdikbud.go.id/"
+ >Badan Pengembangan Bahasa dan Perbukuan</a
+ >, Kementerian Pendidikan dan Kebudayaan Republik
+ Indonesia
+ </p>
+ <p>
+ Versi luring:
+ <a
+ class="btn btn-primary"
+ href="https://play.google.com/store/apps/details?id=yuku.kbbi5&hl=in"
+ >Android</a
+ >
+ |
+ <a
+ class="btn btn-primary"
+ href="https://itunes.apple.com/app/kamus-besar-bahasa-indonesia/id1173573777"
+ >iOS</a
+ >
+ ||
+ <span title="by: Ian K"
+ >Versi daring: 2.0.2.0-20191127214052</span
+ >
+ </p>
+ </footer>
+ </div>
+ <script src="/bundles/jquery?v=2u0aRenDpYxArEyILB59ETSCA2cfQkSMlxb6jbMBqf81"></script>
+
+ <script src="/bundles/bootstrap?v=7k-mK_Lw6GRA4MkvIrgrWipUHc3KUDohIwN2DDpspCI1"></script>
+
+ <!-- Global site tag (gtag.js) - Google Analytics -->
+ <script
+ async
+ src="https://www.googletagmanager.com/gtag/js?id=UA-128199158-1"
+ ></script>
+ <script>
+ window.dataLayer = window.dataLayer || []
+ function gtag() {
+ dataLayer.push(arguments)
+ }
+ gtag("js", new Date())
+
+ gtag("config", "UA-128199158-1")
+ </script>
+
+ <script>
+ function setSelectionRange(input, selectionStart, selectionEnd) {
+ if (input.setSelectionRange) {
+ input.focus()
+ input.setSelectionRange(selectionStart, selectionEnd)
+ } else if (input.createTextRange) {
+ var range = input.createTextRange()
+ range.collapse(true)
+ range.moveEnd("character", selectionEnd)
+ range.moveStart("character", selectionStart)
+ range.select()
+ }
+ }
+
+ function setCaretToPos(input, pos) {
+ setSelectionRange(input, pos, pos)
+ }
+
+ $(document).ready(function () {
+ // Catch all events related to changes http://stackoverflow.com/questions/21215049/disable-text-entry-in-input-type-number
+ $(".number-input").on("change keyup", function () {
+ var sanitized = $(this)
+ .val()
+ .replace(/[^0-9]/g, "") // Remove invalid characters
+ $(this).val(sanitized) // Update value
+ })
+
+ $(function () {
+ var tb = document.getElementById("textBoxSearch")
+ if (tb) {
+ var val = $("#textBoxSearch").val()
+ var caretPos = val.length
+ setCaretToPos(tb, caretPos)
+ }
+ })
+ })
+ </script>
+ </body>
+</html>
diff --git a/testdata/entri_analisa.html b/testdata/entri_analisa.html
new file mode 100644
index 0000000..9ba807d
--- /dev/null
+++ b/testdata/entri_analisa.html
@@ -0,0 +1,342 @@
+<!DOCTYPE html>
+<html>
+ <head>
+ <meta charset="utf-8" />
+ <meta
+ name="viewport"
+ content="width=device-width, initial-scale=1.0"
+ />
+ <meta
+ name="keywords"
+ content="kbbi, kbbi online, kbbi daring, kbbi dalam jaringan, kbbi 5, kbbi V, kbbi online terbaru, kbbi terbaru, kbbi resmi, Kamus Besar Bahasa Indonesia, Badan Bahasa, Pusat Bahasa, kamus bahasa Indonesia, kamus daring, kamus indonesia,"
+ />
+ <link rel="icon" href="/kbbi-daring-3.ico" />
+ <title>Hasil Pencarian - KBBI Daring</title>
+ <link
+ href="/Content/css?v=oq5T2FgFNthYPMx1RHccxOAHAzzHSva0HzZ7iXO7RRY1"
+ rel="stylesheet"
+ />
+
+ <script src="/bundles/modernizr?v=inCVuEFe6J4Q07A0AcRsbJic_UE5MwpRMNGcOtk94TE1"></script>
+ </head>
+ <body style="font-family: Verdana, Geneva, Tahoma, sans-serif;">
+ <div
+ class="navbar navbar-inverse navbar-fixed-top"
+ style="background-color: #110063; border-color: gold;"
+ >
+ <div class="container">
+ <div class="navbar-header">
+ <img
+ src="/Content/Images/Logo-Tut-Wuri-Handayani-blue.png"
+ height="40px;"
+ width="40px;"
+ style="margin: 5px;"
+ />
+ <button
+ type="button"
+ class="navbar-toggle"
+ data-toggle="collapse"
+ data-target=".navbar-collapse"
+ style="background-color: #110063; border-color: gold;"
+ >
+ <span class="icon-bar"></span>
+ <span class="icon-bar"></span>
+ <span class="icon-bar"></span>
+ </button>
+ <a
+ href="/Beranda"
+ style="
+ color: gold;
+ text-decoration: none;
+ margin-left: 5px;
+ margin-right: 5px;
+ font-size: larger;
+ "
+ >KBBI Daring</a
+ >
+ </div>
+ <div
+ class="navbar-collapse collapse"
+ style="
+ background-color: #110063;
+ color: gold;
+ border-top-color: gold;
+ "
+ >
+ <ul class="nav navbar-nav">
+ <li><a href="/" style="color: gold;">Cari</a></li>
+ <li>
+ <a
+ href="/Beranda/SeputarLaman"
+ style="color: gold;"
+ >Seputar Laman</a
+ >
+ </li>
+ </ul>
+ <form
+ action="/Account/LogOff"
+ class="navbar-right"
+ id="logoutForm"
+ method="post"
+ >
+ <input
+ name="__RequestVerificationToken"
+ type="hidden"
+ value="S1pOnzWTQn0qbprhG9bMrC2fYckZS9BO8NZO4CE0mS8LRyW7GKyg303gEDWLZpzf0RURhBcZNfNnjlL05N_xcLvnVBWcVeTYn6pgoTQpnmCxicWd01JnnJs-RKDi9e1P6VZPEajoFg2DjZlS91a4Bw2"
+ />
+ <ul class="nav navbar-nav navbar-right">
+ <li>
+ <a
+ href="/Manage"
+ style="color: gold;"
+ title="Lakukan Manajemen Akun"
+ >Halo Shulhan!</a
+ >
+ </li>
+ <li>
+ <a
+ href="javascript:document.getElementById('logoutForm').submit()"
+ style="color: gold;"
+ >Keluar</a
+ >
+ </li>
+ </ul>
+ </form>
+ </div>
+ </div>
+ </div>
+ <div class="container body-content">
+ <script src="/bundles/jquery?v=2u0aRenDpYxArEyILB59ETSCA2cfQkSMlxb6jbMBqf81"></script>
+ <script>
+ $(function () {
+ $(".entrisButton").hover(
+ function () {
+ $(this).fadeTo(1, 1)
+ },
+ function () {
+ $(this).fadeTo(1, 0.18)
+ },
+ )
+ })
+ </script>
+ <br />
+
+ <div>
+ <h4 class="text-center">
+ <span
+ class="glyphicon glyphicon-info-sign text-primary"
+ ></span>
+ <b>Halo Shulhan!</b>
+ Sudahkah Anda mengecek
+ <a href="/Manage">halaman manajemen akun Anda</a>? Anda
+ dapat melihat cara membukanya
+ <a href="/Beranda/Bantuan#pertanyaan-1">di sini</a>. Jika
+ Anda pernah mengajukan
+ <a href="/Manage/ProposalDibuat">usulan-usulan</a>,
+ mungkin usulan-usulan tersebut telah diproses oleh redaksi
+ kami.
+ </h4>
+ <br />
+ </div>
+
+ <form
+ action="/entri/nul"
+ class="form-horizontal"
+ id="searchForm"
+ method="post"
+ onsubmit="searchText(event)"
+ role="form"
+ >
+ <div class="form-group">
+ <div class="col-md-2"></div>
+ <div class="col-md-8">
+ <div class="input-group form-control-max">
+ <input
+ id="textBoxSearch"
+ name="frasa"
+ value="analisa"
+ type="text"
+ class="form-control form-control-max"
+ style="margin-top: 1px;"
+ placeholder="Pencarian..."
+ />
+ <span class="input-group-btn">
+ <span
+ class="btn btn-primary glyphicon glyphicon-search"
+ onclick="searchText(event)"
+ ></span>
+ </span>
+ </div>
+ </div>
+ </div>
+ <h3 id="errorMessageDiv"></h3>
+ <script>
+ String.prototype.contains = function (it) {
+ return this.indexOf(it) != -1
+ }
+ function searchText(ev) {
+ var val = $("#textBoxSearch").val()
+ ev.preventDefault()
+ if (!val) {
+ $("#errorMessageDiv").replaceWith(
+ '<h3 id="errorMessageDiv"><font color="red"><p class="text-center add-margin-top-5"><i>Kotak pencarian tidak boleh kosong</i></p></font></h3>',
+ )
+ } else {
+ if (
+ val.contains(".") ||
+ val.contains("?") ||
+ val.toLowerCase() == "nul" ||
+ val.toLowerCase() == "bin"
+ ) {
+ //for non-dependent respond
+ window.location.href =
+ "/" + "Cari/Hasil?frasa=" + val
+ } else {
+ window.location.href = "/" + "entri/" + val
+ }
+ }
+ }
+ </script>
+ </form>
+
+ <hr />
+ <h2 style="margin-bottom: 3px;">
+ ana.li.sa
+ <small
+ ><span class="entrisButton"
+ ><a href="../DataDasarEntri/Edit?eid=3476"
+ ><span
+ title="Ubah"
+ class="glyphicon glyphicon-edit text-success"
+ ></span></a
+ ><a href="../DataDasarEntri/Copy?eid=3476"
+ ><span
+ title="Usulkan entri baru berdasarkan entri ini"
+ class="glyphicon glyphicon-duplicate"
+ style="color: darkcyan;"
+ ></span></a
+ ><a href="../DataDasarEntri/Details?eid=3476"
+ ><span
+ title="Detail"
+ class="glyphicon glyphicon-list-alt text-info"
+ ></span></a
+ ><span
+ title="Sejarah redaksi - entri ini tidak memiliki sejarah redaksi"
+ class="glyphicon glyphicon-book"
+ style="color: lightgrey;"
+ ></span
+ ><a href="https://www.google.com/#q=analisa"
+ ><span
+ title="Cari di Google"
+ class="glyphicon glyphicon-search text-primary"
+ ></span></a></span
+ ></small>
+ </h2>
+ <p>
+ <a href="http://tesaurus.kemdikbud.go.id/tematis/lema/analisa"
+ >&#x21E2; Tesaurus</a
+ >
+ </p>
+ <ul style="list-style: none;" class="adjusted-par">
+ <li>
+ <font color="red"><i> </i></font>&rarr;
+ <a href="../../entri/analisis">analisis</a>
+ </li>
+ </ul>
+ <br /><br />
+ <h4>
+ <a href="/DataDasarEntri/Create" class="entrisButton"
+ ><span
+ title="Usulkan entri baru"
+ class="glyphicon glyphicon-plus-sign text-success"
+ ></span
+ >Usulkan entri baru</a
+ >
+ </h4>
+ <hr />
+ <footer>
+ <p>
+ &copy; 2016
+ <a href="http://badanbahasa.kemdikbud.go.id/"
+ >Badan Pengembangan dan Pembinaan Bahasa</a
+ >, Kementerian Pendidikan dan Kebudayaan Republik
+ Indonesia
+ </p>
+ <p>
+ Versi luring:
+ <a
+ class="btn btn-primary"
+ href="https://play.google.com/store/apps/details?id=yuku.kbbi5&hl=in"
+ >Android</a
+ >
+ |
+ <a
+ class="btn btn-primary"
+ href="https://itunes.apple.com/app/kamus-besar-bahasa-indonesia/id1173573777"
+ >iOS</a
+ >
+ ||
+ <span title="by: Ian K"
+ >Versi daring: 3.0.0.0-20200410085735</span
+ >
+ </p>
+ </footer>
+ </div>
+ <script src="/bundles/jquery?v=2u0aRenDpYxArEyILB59ETSCA2cfQkSMlxb6jbMBqf81"></script>
+
+ <script src="/bundles/bootstrap?v=7k-mK_Lw6GRA4MkvIrgrWipUHc3KUDohIwN2DDpspCI1"></script>
+
+ <!-- Global site tag (gtag.js) - Google Analytics -->
+ <script
+ async
+ src="https://www.googletagmanager.com/gtag/js?id=UA-128199158-1"
+ ></script>
+ <script>
+ window.dataLayer = window.dataLayer || []
+ function gtag() {
+ dataLayer.push(arguments)
+ }
+ gtag("js", new Date())
+
+ gtag("config", "UA-128199158-1")
+ </script>
+
+ <script>
+ function setSelectionRange(input, selectionStart, selectionEnd) {
+ if (input.setSelectionRange) {
+ input.focus()
+ input.setSelectionRange(selectionStart, selectionEnd)
+ } else if (input.createTextRange) {
+ var range = input.createTextRange()
+ range.collapse(true)
+ range.moveEnd("character", selectionEnd)
+ range.moveStart("character", selectionStart)
+ range.select()
+ }
+ }
+
+ function setCaretToPos(input, pos) {
+ setSelectionRange(input, pos, pos)
+ }
+
+ $(document).ready(function () {
+ // Catch all events related to changes http://stackoverflow.com/questions/21215049/disable-text-entry-in-input-type-number
+ $(".number-input").on("change keyup", function () {
+ var sanitized = $(this)
+ .val()
+ .replace(/[^0-9]/g, "") // Remove invalid characters
+ $(this).val(sanitized) // Update value
+ })
+
+ $(function () {
+ var tb = document.getElementById("textBoxSearch")
+ if (tb) {
+ var val = $("#textBoxSearch").val()
+ var caretPos = val.length
+ setCaretToPos(tb, caretPos)
+ }
+ })
+ })
+ </script>
+ </body>
+</html>
diff --git a/testdata/kbbi_dasar.html b/testdata/kbbi_dasar.html
new file mode 100644
index 0000000..4bd5170
--- /dev/null
+++ b/testdata/kbbi_dasar.html
@@ -0,0 +1,707 @@
+<!DOCTYPE html>
+<html>
+<head>
+ <meta charset="utf-8" />
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+ <meta name="keywords" content="kbbi, kbbi online, kbbi daring, kbbi dalam jaringan, kbbi 5, kbbi V, kbbi online terbaru, kbbi terbaru, kbbi resmi, Kamus Besar Bahasa Indonesia, Badan Bahasa, Pusat Bahasa, kamus bahasa Indonesia, kamus daring, kamus indonesia," />
+ <link rel="icon" href="/kbbi-daring-3.ico" />
+ <title>Jenis - KBBI Daring</title>
+ <link href="/Content/css?v=DsWRYqffn1l_yiM362JpjeKWGHv3Xp66PuBRKIpyVUU1" rel="stylesheet"/>
+
+ <script src="/bundles/modernizr?v=inCVuEFe6J4Q07A0AcRsbJic_UE5MwpRMNGcOtk94TE1"></script>
+
+</head>
+<body style="font-family:Verdana, Geneva, Tahoma, sans-serif">
+ <div class="navbar navbar-inverse navbar-fixed-top"
+ style="background-color:#110063;border-color:gold">
+ <div class="container">
+ <div class="navbar-header">
+ <img src="/Content/Images/Logo-Tut-Wuri-Handayani-blue.png"
+ height="40px;" width="40px;" style="margin:5px;" />
+ <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse"
+ style="background-color:#110063;border-color:gold;">
+ <span class="icon-bar"></span>
+ <span class="icon-bar"></span>
+ <span class="icon-bar"></span>
+ </button>
+ <a href="/Beranda" style="color:gold;text-decoration:none;margin-left:5px;margin-right:5px;font-size:larger">KBBI Daring</a>
+ </div>
+ <div class="navbar-collapse collapse"
+ style="background-color:#110063;color:gold;border-top-color:gold">
+ <ul class="nav navbar-nav">
+ <li><a href="/" style="color:gold">Cari</a></li>
+ <li><a href="/Beranda/SeputarLaman" style="color:gold">Seputar Laman</a></li>
+ </ul>
+ <form action="/Account/LogOff" class="navbar-right" id="logoutForm" method="post"><input name="__RequestVerificationToken" type="hidden" value="VxJ0nEPsxOv5dcs7onowFHpKj96ciIx_V-tsPLY3105li6cieBcOCPaG4SamV6etKq_FqvdYc74vQp9meDxVcBpm2IIhdQAA2z6Y7k5j0cA3ERD3nhT0nge31OlYBb_EUwk8S6pAH4rPPz2baY57ig2" /> <ul class="nav navbar-nav navbar-right">
+ <li>
+ <a href="/Manage" style="color:gold" title="Lakukan Manajemen Akun">Halo Shulhan!</a>
+ </li>
+ <li><a href="javascript:document.getElementById('logoutForm').submit()" style="color:gold">Keluar</a></li>
+ </ul>
+</form>
+ </div>
+ </div>
+ </div>
+ <div class="container body-content">
+
+
+
+<br />
+<h2>Daftar Entri Jenis Dasar</h2>
+<br />
+
+<div class="row">
+ <div class="col-lg-2">
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1"
+ class="btn btn-default btn-xs"
+ title="Ke halaman awal">
+ awal
+ </a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1"
+ class="btn btn-default btn-xs"
+ title="Ke 100 halaman sebelumnya">
+ &#x25c0&#x25c0&#x25c0
+ </a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1"
+ class="btn btn-default btn-xs"
+ title="Ke 10 halaman sebelumnya">
+ &#x25c0&#x25c0
+ </a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1"
+ class="btn btn-default btn-xs"
+ title="Ke halaman sebelumnya">
+ &#x25c0
+ </a>
+ </div>
+ <div class="col-lg-2">
+ <span id="currentPageId">
+ Halaman 1 / 501
+ </span>
+ </div>
+ <div class="col-lg-2">
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=2"
+ class="btn btn-default btn-xs"
+ title="Ke halaman berikutnya">
+ &#x25b6
+ </a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=11"
+ class="btn btn-default btn-xs"
+ title="Ke 10 halaman berikutnya">
+ &#x25b6&#x25b6
+ </a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=101"
+ class="btn btn-default btn-xs"
+ title="Ke 100 halaman berikutnya">
+ &#x25b6&#x25b6&#x25b6
+ </a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=501"
+ class="btn btn-default btn-xs"
+ title="Ke halaman terakhir">
+ akhir
+ </a>
+ </div>
+ <div class="col-lg-3">
+ <span><i>Hasil Pencarian: 1 - 100 dari 50001</i></span>
+ </div>
+</div>
+ <br />
+<div class="row">
+ <div class="col-md-3">
+ <a href="/entri/A">
+ A
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/&#224;">
+ à
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/a-">
+ a-
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/Cari/Hasil?frasa=A.Md.">
+ A.Md.
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/Cari/Hasil?frasa=a.n.">
+ a.n.
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/Cari/Hasil?frasa=A.P.">
+ A.P.
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/Cari/Hasil?frasa=A.Pkt.">
+ A.Pkt.
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/Cari/Hasil?frasa=a.s.">
+ a.s.
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/aa">
+ aa
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/AAJI">
+ AAJI
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/AAL">
+ AAL
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/AAU">
+ AAU
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/AAUI">
+ AAUI
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/AB">
+ AB<sup>1</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/ab-">
+ ab-<sup>1</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/ab">
+ ab<sup>2</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/AB">
+ AB<sup>2</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/ab">
+ ab<sup>3</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/AB">
+ AB<sup>3</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/AB">
+ AB<sup>4</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/aba">
+ aba<sup>1</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/aba">
+ aba<sup>2</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/aba-aba">
+ aba-aba
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abaaka">
+ abaaka
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abad">
+ abad
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abadi">
+ abadi
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abadiah">
+ abadiah
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abadiat">
+ abadiat
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abah">
+ abah<sup>1</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abah">
+ abah<sup>2</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abah-abah">
+ abah-abah<sup>1</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abah-abah">
+ abah-abah<sup>2</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abah-abah">
+ abah-abah<sup>3</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abahui">
+ abahui
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abai">
+ abai<sup>1</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/Abai">
+ Abai<sup>2</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abaimana">
+ abaimana
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abaka">
+ abaka
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abaksial">
+ abaksial
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abaktinal">
+ abaktinal
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abakus">
+ abakus<sup>1</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abakus">
+ abakus<sup>2</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abal">
+ abal
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abal-abal">
+ abal-abal<sup>1</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abal-abal">
+ abal-abal<sup>2</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abal-abal">
+ abal-abal<sup>3</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abalone">
+ abalone
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abampere">
+ abampere
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/aban">
+ aban
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abandira">
+ abandira
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abandonemen">
+ abandonemen
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abang">
+ abang<sup>1</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abang">
+ abang<sup>2</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abang">
+ abang<sup>3</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abangan">
+ abangan<sup>1</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abangan">
+ abangan<sup>2</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abangda">
+ abangda
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abangga">
+ abangga
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abap">
+ abap
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abar">
+ abar
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abas">
+ abas
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abas-abas">
+ abas-abas
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abasia">
+ abasia
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abat">
+ abat
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abate">
+ abate
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abatis">
+ abatis
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abatisasi">
+ abatisasi
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abatoar">
+ abatoar
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abau">
+ abau<sup>1</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/Abau">
+ Abau<sup>2</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abawi">
+ abawi
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abaya">
+ abaya
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abbas">
+ abbas
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abdas">
+ abdas
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abdi">
+ abdi
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abdikasi">
+ abdikasi
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abdis">
+ abdis
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abdomen">
+ abdomen
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abdominal">
+ abdominal
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abdu">
+ abdu
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abdu">
+ abdu
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abduksi">
+ abduksi<sup>1</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abduksi">
+ abduksi<sup>2</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abduktor">
+ abduktor
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abdul">
+ abdul
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abdusen">
+ abdusen
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abe">
+ abe
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abece">
+ abece
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/aben">
+ aben
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/aber">
+ aber
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/aberasi">
+ aberasi
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abet">
+ abet
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abet">
+ abet
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/ABG">
+ ABG
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abi">
+ abi
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abian">
+ abian
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abib">
+ abib
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abibliofobia">
+ abibliofobia
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abid">
+ abid<sup>1</sup>
+ </a>
+ </div>
+ <div class="col-md-3">
+ <a href="/entri/abid">
+ abid<sup>2</sup>
+ </a>
+ </div>
+</div>
+<br />
+ <div class="btn-group-sm">
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=A" class="btn btn-sm btn-default"><font color="#0060B6">A</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=B" class="btn btn-sm btn-default"><font color="#0060B6">B</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=C" class="btn btn-sm btn-default"><font color="#0060B6">C</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=D" class="btn btn-sm btn-default"><font color="#0060B6">D</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=E" class="btn btn-sm btn-default"><font color="#0060B6">E</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=F" class="btn btn-sm btn-default"><font color="#0060B6">F</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=G" class="btn btn-sm btn-default"><font color="#0060B6">G</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=H" class="btn btn-sm btn-default"><font color="#0060B6">H</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=I" class="btn btn-sm btn-default"><font color="#0060B6">I</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=J" class="btn btn-sm btn-default"><font color="#0060B6">J</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=K" class="btn btn-sm btn-default"><font color="#0060B6">K</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=L" class="btn btn-sm btn-default"><font color="#0060B6">L</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=M" class="btn btn-sm btn-default"><font color="#0060B6">M</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=N" class="btn btn-sm btn-default"><font color="#0060B6">N</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=O" class="btn btn-sm btn-default"><font color="#0060B6">O</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=P" class="btn btn-sm btn-default"><font color="#0060B6">P</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=Q" class="btn btn-sm btn-default"><font color="#0060B6">Q</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=R" class="btn btn-sm btn-default"><font color="#0060B6">R</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=S" class="btn btn-sm btn-default"><font color="#0060B6">S</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=T" class="btn btn-sm btn-default"><font color="#0060B6">T</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=U" class="btn btn-sm btn-default"><font color="#0060B6">U</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=V" class="btn btn-sm btn-default"><font color="#0060B6">V</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=W" class="btn btn-sm btn-default"><font color="#0060B6">W</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=X" class="btn btn-sm btn-default"><font color="#0060B6">X</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=Y" class="btn btn-sm btn-default"><font color="#0060B6">Y</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1&filter=Z" class="btn btn-sm btn-default"><font color="#0060B6">Z</font></a>
+ <a href="/Cari/Jenis?masukan=dasar&amp;masukanLengkap=Dasar&amp;page=1" class="btn btn-sm btn-default">Semua</a>
+ </div>
+
+ <br />
+
+
+
+
+ <hr />
+ <footer>
+ <p>&copy; 2016 <a href="http://badanbahasa.kemdikbud.go.id/">Badan Pengembangan Bahasa dan Perbukuan</a>, Kementerian Pendidikan dan Kebudayaan Republik Indonesia</p>
+ <p>Versi luring: <a class="btn btn-primary" href="https://play.google.com/store/apps/details?id=yuku.kbbi5&hl=in">Android</a> | <a class="btn btn-primary" href="https://itunes.apple.com/app/kamus-besar-bahasa-indonesia/id1173573777">iOS</a>
+ || <span title="by: Ian K">Versi daring: 2.0.2.0-20191127214052</span></p>
+</footer>
+
+ </div>
+ <script src="/bundles/jquery?v=2u0aRenDpYxArEyILB59ETSCA2cfQkSMlxb6jbMBqf81"></script>
+
+ <script src="/bundles/bootstrap?v=7k-mK_Lw6GRA4MkvIrgrWipUHc3KUDohIwN2DDpspCI1"></script>
+
+ <!-- Global site tag (gtag.js) - Google Analytics -->
+ <script async src="https://www.googletagmanager.com/gtag/js?id=UA-128199158-1"></script>
+ <script>
+ window.dataLayer = window.dataLayer || [];
+ function gtag() { dataLayer.push(arguments); }
+ gtag('js', new Date());
+
+ gtag('config', 'UA-128199158-1');
+ </script>
+
+ <script>
+ function setSelectionRange(input, selectionStart, selectionEnd) {
+ if (input.setSelectionRange) {
+ input.focus();
+ input.setSelectionRange(selectionStart, selectionEnd);
+ }
+ else if (input.createTextRange) {
+ var range = input.createTextRange();
+ range.collapse(true);
+ range.moveEnd('character', selectionEnd);
+ range.moveStart('character', selectionStart);
+ range.select();
+ }
+ }
+
+ function setCaretToPos(input, pos) {
+ setSelectionRange(input, pos, pos);
+ }
+
+ $(document).ready(function () {
+ // Catch all events related to changes http://stackoverflow.com/questions/21215049/disable-text-entry-in-input-type-number
+ $('.number-input').on('change keyup', function () {
+ var sanitized = $(this).val().replace(/[^0-9]/g, ''); // Remove invalid characters
+ $(this).val(sanitized); // Update value
+ });
+
+ $(function () {
+ var tb = document.getElementById('textBoxSearch');
+ if (tb) {
+ var val = $("#textBoxSearch").val();
+ var caretPos = val.length;
+ setCaretToPos(tb, caretPos);
+ }
+ });
+ });
+ </script>
+
+
+</body>
+</html>
diff --git a/word.go b/word.go
new file mode 100644
index 0000000..8bc0da0
--- /dev/null
+++ b/word.go
@@ -0,0 +1,98 @@
+// Copyright 2020, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package kamusku
+
+import (
+ "bytes"
+
+ "github.com/shuLhan/share/lib/net/html"
+)
+
+//
+// Word store the single root word and its definitions.
+//
+type Word struct {
+ Root string `json:"dasar,omitempty"` // The root word
+ Definition []*WordDefinition `json:"definisi"` // The word definition.
+
+ // Message will contains the information when the word is not found or
+ // the word is informal (kata tidak baku).
+ Message string `json:"pesan,omitempty"`
+
+ err error
+}
+
+//
+// Err return an error from retrieving definition.
+//
+func (word *Word) Err() error {
+ return word.err
+}
+
+//
+// parseHTMLEntri parse HTML body from "/entri/<word>" page to find the
+// definition of the word.
+//
+func (word *Word) parseHTMLEntri(in string, htmlBody []byte) (err error) {
+ iter, err := html.Parse(bytes.NewReader(htmlBody))
+ if err != nil {
+ return err
+ }
+
+ for node := iter.Next(); node != nil; node = iter.Next() {
+ if !node.IsElement() {
+ continue
+ }
+
+ switch node.Data {
+ case tagNameHeader2:
+ word.parseRootWord(node)
+
+ case tagNameOrderedList, tagNameUnorderedList:
+ li := node.GetFirstChild()
+ for li != nil {
+ defKata, err := parseWordDefinition(in, li)
+ if err != nil {
+ word.Message = err.Error()
+ err = nil
+ break
+ }
+ if defKata == nil {
+ break
+ }
+ word.Definition = append(word.Definition, defKata)
+ li = li.GetNextSibling()
+ }
+ next := node.GetNextSibling()
+ iter.SetNext(next)
+ }
+ }
+
+ return nil
+}
+
+//
+// parseRootWord given an HMTL element "h2" find a possible root word and
+// return true; otherwise it will return false.
+//
+func (word *Word) parseRootWord(h2 *html.Node) bool {
+ el := h2.GetFirstChild()
+ if el.Data != tagNameSpan {
+ return false
+ }
+ v := el.GetAttrValue(attrNameClass)
+ if v != attrValueRootWord {
+ return false
+ }
+
+ el = el.GetFirstChild()
+ if el.Data != tagNameAnchor {
+ return false
+ }
+ el = el.GetFirstChild()
+ word.Root = el.Data
+
+ return true
+}
diff --git a/word_definition.go b/word_definition.go
new file mode 100644
index 0000000..86b05bd
--- /dev/null
+++ b/word_definition.go
@@ -0,0 +1,94 @@
+// Copyright 2020, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package kamusku
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/shuLhan/share/lib/net/html"
+ libstrings "github.com/shuLhan/share/lib/strings"
+)
+
+//
+// WordDefinition contains the meaning of word in dictionary, and optional
+// attribute for word classifications and examples.
+//
+type WordDefinition struct {
+ Value string `json:"isi"`
+ Classes []string `json:"kelas,omitempty"`
+ Examples []string `json:"contoh,omitempty"`
+}
+
+func parseWordDefinition(in string, li *html.Node) (defKata *WordDefinition, err error) {
+ elFont := li.GetFirstChild()
+ if elFont == nil || elFont.Data != tagNameFont {
+ return nil, nil
+ }
+ elItalic := elFont.GetFirstChild()
+ if elItalic == nil || elItalic.Data != tagNameItalic {
+ return nil, nil
+ }
+
+ defKata = &WordDefinition{}
+
+ elSpan := elItalic.GetFirstChild()
+ for elSpan != nil && elSpan.Data == tagNameSpan {
+ kelas := elSpan.GetAttrValue(attrNameTitle)
+ if len(kelas) > 0 {
+ defKata.Classes = append(defKata.Classes, kelas)
+ }
+ elSpan = elSpan.GetNextSibling()
+ }
+
+ el := elFont.GetNextSibling()
+ if el == nil {
+ return defKata, nil
+ }
+
+ defKata.Value = strings.TrimSpace(libstrings.SingleSpace(el.Data))
+
+ if defKata.Value == "→" {
+ defKata.Value = ""
+ el = el.GetNextSibling()
+ if el == nil || el.Data != tagNameAnchor {
+ return nil, nil
+ }
+ el = el.GetFirstChild()
+ return nil, fmt.Errorf(`%q adalah bentuk tidak baku dari %q`,
+ in, el.Data)
+ }
+
+ if defKata.Value[len(defKata.Value)-1] != ':' {
+ return defKata, nil
+ }
+
+ defKata.Value = defKata.Value[:len(defKata.Value)-1]
+
+ // Parse the example of kata in the next sibling.
+ el = el.GetNextSibling()
+ for el != nil {
+ if el.Data != tagNameFont {
+ break
+ }
+
+ elItalic = el.GetFirstChild()
+ if elItalic.Data != tagNameItalic {
+ break
+ }
+
+ elText := elItalic.GetFirstChild()
+ if elText != nil {
+ contoh := strings.TrimSpace(elText.Data)
+ if len(contoh) > 0 && contoh != ";" {
+ defKata.Examples = append(defKata.Examples, elText.Data)
+ }
+ }
+
+ el = el.GetNextSibling()
+ }
+
+ return defKata, nil
+}
diff --git a/word_test.go b/word_test.go
new file mode 100644
index 0000000..5dcb468
--- /dev/null
+++ b/word_test.go
@@ -0,0 +1,66 @@
+// Copyright 2020, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package kamusku
+
+import (
+ "io/ioutil"
+ "testing"
+
+ "github.com/shuLhan/share/lib/test"
+)
+
+func TestWord_parseHTMLEntri(t *testing.T) {
+ cases := []struct {
+ infile string
+ cari string
+ exp *Word
+ }{{
+ infile: "testdata/entri.html",
+ cari: "informasi",
+ exp: &Word{
+ Definition: []*WordDefinition{{
+ Value: "penerangan",
+ Classes: []string{"Nomina: kata benda"},
+ }, {
+ Value: "pemberitahuan; kabar atau berita tentang sesuatu",
+ Classes: []string{"Nomina: kata benda"},
+ }, {
+ Value: "keseluruhan makna yang menunjang amanat yang " +
+ "terlihat dalam bagian-bagian " +
+ "amanat itu",
+ Classes: []string{
+ "Nomina: kata benda",
+ "Linguistik: -",
+ },
+ }},
+ },
+ }, {
+ infile: "testdata/entri_analisa.html",
+ cari: "analisa",
+ exp: &Word{
+ Message: `"analisa" adalah bentuk tidak baku dari "analisis"`,
+ },
+ }}
+
+ for _, c := range cases {
+ htmlBody, err := ioutil.ReadFile(c.infile)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ got := new(Word)
+
+ err = got.parseHTMLEntri(c.cari, htmlBody)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ for x, def := range c.exp.Definition {
+ test.Assert(t, "Definition", def, got.Definition[x], true)
+ }
+
+ test.Assert(t, c.infile, c.exp, got, true)
+ }
+}
diff --git a/words.go b/words.go
new file mode 100644
index 0000000..6cdba0c
--- /dev/null
+++ b/words.go
@@ -0,0 +1,20 @@
+// Copyright 2020, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package kamusku
+
+//
+// Words contains list of words.
+//
+type Words map[string]struct{}
+
+//
+// merge other map into current map.
+//
+func (words Words) merge(in Words) Words {
+ for k := range in {
+ words[k] = struct{}{}
+ }
+ return words
+}