diff options
Diffstat (limited to 'internal/cmd/mergedic')
| -rw-r--r-- | internal/cmd/mergedic/main.go | 83 |
1 files changed, 81 insertions, 2 deletions
diff --git a/internal/cmd/mergedic/main.go b/internal/cmd/mergedic/main.go index 7f6301b..d564dee 100644 --- a/internal/cmd/mergedic/main.go +++ b/internal/cmd/mergedic/main.go @@ -5,9 +5,15 @@ package main import ( + "fmt" "log" + "os" + "sort" + "strings" - "git.sr.ht/~shulhan/pakakeh.go/lib/hunspell" + "git.sr.ht/~shulhan/pakakeh.go/lib/ascii" + libos "git.sr.ht/~shulhan/pakakeh.go/lib/os" + libstrings "git.sr.ht/~shulhan/pakakeh.go/lib/strings" ) func main() { @@ -15,8 +21,81 @@ func main() { in := "id_ID.dic" daftarKata := "daftar_kata_dasar" - _, err := hunspell.MergeDictionaries(out, in, daftarKata) + _, err := mergeDictionaries(out, in, daftarKata) if err != nil { log.Fatal(err) } } + +// mergeDictionaries merge two or more dictionaries into single file. +// The outFile define the output of merged dictionaries. +// If the outFile already exist it will be truncated, otherwise it will be +// created. +// The inFiles contains list of input dictionary files. +// +// On success it will return number of words merged into output file. +func mergeDictionaries(outFile string, inFiles ...string) (n int, err error) { + if len(inFiles) == 0 { + return 0, nil + } + + if len(inFiles) == 1 { + err = libos.Copy(outFile, inFiles[0]) + return 0, err + } + + var ( + dict = make(map[string]string, 1024) + + lines []string + ) + + for x := range len(inFiles) { + lines, err = libstrings.LinesOfFile(inFiles[x]) + if err != nil { + return 0, err + } + + // Skip the first line that may contains number of words. + y := 0 + if ascii.IsDigit(lines[y][0]) { + y = 1 + } + + for ; y < len(lines); y++ { + ss := strings.Split(lines[y], "/") + key := ss[0] + attr := dict[key] + strings.Join(ss[1:], "") + dict[key] = attr + } + } + + words := make([]string, 0, len(dict)) + + for word, attr := range dict { + if len(attr) == 0 { + words = append(words, word) + } else { + words = append(words, word+"/"+attr) + } + } + + sort.Strings(words) + + fout, err := os.OpenFile(outFile, os.O_WRONLY|os.O_CREATE, 0600) + if err != nil { + return 0, err + } + + fmt.Fprintf(fout, "%d\n", len(words)) + for x := range len(words) { + fmt.Fprintf(fout, "%s\n", words[x]) + } + + err = fout.Close() + if err != nil { + return 0, err + } + + return len(words), nil +} |
