Merge package "github.com/shuLhan/tabula"

author: Shulhan <ms@kilabit.info> 2018-09-17 01:21:27 +0700
committer: Shulhan <ms@kilabit.info> 2018-09-18 01:50:21 +0700
commit: 44b26edf7f390db383fe025454be0c4e30cfbd9b (patch)
tree: 84d02953bc9095312182534936c1b60667957f07 /lib
parent: 4a820ec157501c957d2e30f1670656cceec5c044 (diff)
download: pakakeh.go-44b26edf7f390db383fe025454be0c4e30cfbd9b.tar.xz
28 files changed, 4041 insertions, 0 deletions
diff --git a/lib/tabula/.gitignore b/lib/tabula/.gitignore
new file mode 100644
index 00000000..f5ddbe1c
--- /dev/null
+++ b/lib/tabula/.gitignore
@@ -0,0 +1,5 @@
+cover.html
+cover.out
+*.bench
+*.prof
+*.test
diff --git a/lib/tabula/LICENSE b/lib/tabula/LICENSE
new file mode 100644
index 00000000..d3ff23a6
--- /dev/null
+++ b/lib/tabula/LICENSE
@@ -0,0 +1,39 @@
+Copyright 2017, Shulhan (ms@kilabit.info).
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of copyright holder nor the names of its contributors may be
+   used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+        ---      --- ---       ---       ---      --- ---
+
+	TT  TT   II  BB          AAAA     LLLLLL  II  KKKKKKKK
+	TT TT    II  BB         AA  AA   LL   LL  II     KK
+	TTTT     II  BB        AA    AA   LL  LL  II     KK
+	TT TT    II  BB        AAAAAAAA   LLLLLL  II     KK
+	TT  TT   II  BB        AA    AA  LL   LL  II     KK
+	TT   TT  II  BBBBBBBB  AA    AA   LLLLLL  II     KK
+
+Website: http://kilabit.info
+Contact: ms@kilabit.info
diff --git a/lib/tabula/Makefile b/lib/tabula/Makefile
new file mode 100644
index 00000000..d77283bd
--- /dev/null
+++ b/lib/tabula/Makefile
@@ -0,0 +1,31 @@
+#!/bin/make
+
+## Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+## Use of this source code is governed by a BSD-style license that can be found
+## in the LICENSE file.
+
+SRC_FILES	:=$(shell go list -f '{{ join .GoFiles " " }}')
+TEST_FILES	:=$(shell go list -f '{{ join .TestGoFiles " " }}')
+XTEST_FILES	:=$(shell go list -f '{{ join .XTestGoFiles " " }}')
+COVER_OUT	:=cover.out
+COVER_HTML	:=cover.html
+TARGET		:=$(shell go list -f '{{ .Target }}')
+
+.PHONY: all clean coverbrowse
+
+all: ${TARGET}
+
+${TARGET}: ${COVER_HTML}
+	go install -a .
+
+${COVER_HTML}: ${COVER_OUT}
+	go tool cover -html=$< -o $@
+
+${COVER_OUT}: ${SRC_FILES} ${TEST_FILES} ${XTEST_FILES}
+	go test -v -coverprofile $@
+
+coverbrowse: ${COVER_HTML}
+	xdg-open $<
+
+clean:
+	rm -f ${COVER_HTML} ${COVER_OUT} *.bench *.prof *.test
diff --git a/lib/tabula/README.md b/lib/tabula/README.md
new file mode 100644
index 00000000..8fbd2a40
--- /dev/null
+++ b/lib/tabula/README.md
@@ -0,0 +1,165 @@
+[![GoDoc](https://godoc.org/github.com/shuLhan/share/lib/tabula?status.svg)](https://godoc.org/github.com/shuLhan/share/lib/tabula)
+[![Go Report Card](https://goreportcard.com/badge/github.com/shuLhan/share/lib/tabula)](https://goreportcard.com/report/github.com/shuLhan/share/lib/tabula)
+![cover.run go](https://cover.run/go/github.com/shuLhan/share/lib/tabula.svg)
+
+Package tabula is a Go library for working with rows, columns, or matrix
+(table), or in another terms working with data set.
+
+# Overview
+
+Go's slice gave a flexible way to manage sequence of data in one type, but what
+if you want to manage a sequence of value but with different type of data?
+Or manage a bunch of values like a table?
+
+You can use this library to manage sequence of value with different type
+and manage data in two dimensional tuple.
+
+## Terminology
+
+Here are some terminologies that we used in developing this library, which may
+help reader understand the internal and API.
+
+Record is a single cell in row or column, or the smallest building block of
+dataset.
+
+Row is a horizontal representation of records in dataset.
+
+Column is a vertical representation of records in dataset.
+Each column has a unique name and has the same type data.
+
+Dataset is a collection of rows and columns.
+
+Given those definitions we can draw the representation of rows, columns, or
+matrix:
+
+            COL-0  COL-1 ...  COL-x
+    ROW-0: record record ... record
+    ROW-1: record record ... record
+    ...
+    ROW-y: record record ... record
+
+## What make this package different from other dataset packages?
+
+### Record Type
+
+There are only three valid type in record: int64, float64, and string.
+
+Each record is a pointer to interface value. Which means,
+
+- Switching between rows to columns mode, or vice versa, is only a matter of
+  pointer switching, no memory relocations.
+- When using matrix mode, additional memory is used only to allocate slice, the
+  record in each rows and columns is shared.
+
+### Dataset Mode
+
+Tabula has three mode for dataset: rows, columns, or matrix.
+
+For example, given a table of data,
+
+    col1,col2,col3
+    a,b,c
+    1,2,3
+
+- When in "rows" mode, each line is saved in its own slice, resulting in Rows:
+
+  ```
+  Rows[0]: [a b c]
+  Rows[1]: [1 2 3]
+  ```
+
+  Columns is used only to save record metadata: column name, type, flag and
+  value space.
+
+- When in "columns" mode, each line saved in columns, resulting in Columns:
+
+  ```
+  Columns[0]: {col1 0 0 [] [a 1]}
+  Columns[1]: {col2 0 0 [] [b 2]}
+  Columns[1]: {col3 0 0 [] [c 3]}
+  ```
+
+  Each column will contain metadata including column name, type, flag, and
+  value space (all possible value that _may_ contain in column value).
+
+  Rows in "columns" mode is empty.
+
+- When in "matrix" mode, each record is saved both in row and column using
+  shared pointer to record.
+
+  Matrix mode consume more memory by allocating two slice in rows and columns,
+  but give flexible way to manage records.
+
+## Features
+
+- **Switching between rows and columns mode**.
+
+- [**Random pick rows with or without replacement**](https://godoc.org/github.com/shuLhan/share/lib/tabula#RandomPickRows).
+
+- [**Random pick columns with or without replacement**](https://godoc.org/github.com/shuLhan/share/lib/tabula#RandomPickColumns).
+
+- [**Select column from dataset by index**](https://godoc.org/github.com/shuLhan/share/lib/tabula#SelectColumnsByIdx).
+
+- [**Sort columns by index**](https://godoc.org/github.com/shuLhan/share/lib/tabula#SortColumnsByIndex),
+  or indirect sort.
+
+- [**Split rows value by numeric**](https://godoc.org/github.com/shuLhan/share/lib/tabula#SplitRowsByNumeric).
+  For example, given two numeric rows,
+
+  ```
+  A: {1,2,3,4}
+  B: {5,6,7,8}
+  ```
+
+  if we split row by value 7, the data will splitted into left set
+
+  ```
+  A': {1,2}
+  B': {5,6}
+  ```
+
+  and the right set would be
+
+  ```
+  A'': {3,4}
+  B'': {7,8}
+  ```
+
+- [**Split rows by string**](https://godoc.org/github.com/shuLhan/share/lib/tabula#SplitRowsByCategorical).
+  For example, given two rows,
+
+  ```
+  X: [A,B,A,B,C,D,C,D]
+  Y: [1,2,3,4,5,6,7,8]
+  ```
+
+  if we split the rows with value set `[A,C]`, the data will splitted into left
+  set which contain all rows that have A or C,
+
+  ```
+  		X': [A,A,C,C]
+  		Y': [1,3,5,7]
+  ```
+
+  and the right set, excluded set, will contain all rows which is not A or C,
+
+  ```
+  		X'': [B,B,D,D]
+  		Y'': [2,4,6,8]
+  ```
+
+- [**Select row where**](https://godoc.org/github.com/shuLhan/share/lib/tabula#SelectRowsWhere).
+  Select row at column index x where their value is equal to y (an analogy to
+  _select where_ in SQL).
+  For example, given a rows of dataset,
+  ```
+  ROW-1: {1,A}
+  ROW-2: {2,B}
+  ROW-3: {3,A}
+  ROW-4: {4,C}
+  ```
+  we can select row where the second column contain 'A', which result in,
+  ```
+  ROW-1: {1,A}
+  ROW-3: {3,A}
+  ```
diff --git a/lib/tabula/claset.go b/lib/tabula/claset.go
new file mode 100644
index 00000000..5d7eea7e
--- /dev/null
+++ b/lib/tabula/claset.go
@@ -0,0 +1,303 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	"fmt"
+	"strconv"
+
+	libnumbers "github.com/shuLhan/share/lib/numbers"
+	libstrings "github.com/shuLhan/share/lib/strings"
+)
+
+//
+// Claset define a dataset with class attribute.
+//
+type Claset struct {
+	// Dataset embedded, for implementing the dataset interface.
+	Dataset
+	// ClassIndex contain index for target classification in columns.
+	ClassIndex int `json:"ClassIndex"`
+
+	// vs contain a copy of value space.
+	vs []string
+	// counts number of value space in current set.
+	counts []int
+
+	// major contain the name of majority class in dataset.
+	major string
+	// minor contain the name of minority class in dataset.
+	minor string
+}
+
+//
+// NewClaset create and return new Claset object.
+//
+func NewClaset(mode int, types []int, names []string) (claset *Claset) {
+	claset = &Claset{
+		ClassIndex: -1,
+	}
+
+	claset.Init(mode, types, names)
+
+	return
+}
+
+//
+// Clone return a copy of current claset object.
+//
+func (claset *Claset) Clone() interface{} {
+	clone := Claset{
+		ClassIndex: claset.GetClassIndex(),
+		major:      claset.MajorityClass(),
+		minor:      claset.MinorityClass(),
+	}
+	clone.SetDataset(claset.GetDataset().Clone().(DatasetInterface))
+	return &clone
+}
+
+//
+// GetDataset return the dataset.
+//
+func (claset *Claset) GetDataset() DatasetInterface {
+	return &claset.Dataset
+}
+
+//
+// GetClassType return type of class in dataset.
+//
+func (claset *Claset) GetClassType() int {
+	if claset.Columns.Len() <= 0 {
+		return TString
+	}
+	return claset.Columns[claset.ClassIndex].Type
+}
+
+//
+// GetClassValueSpace return the class value space.
+//
+func (claset *Claset) GetClassValueSpace() []string {
+	if claset.Columns.Len() <= 0 {
+		return nil
+	}
+	return claset.Columns[claset.ClassIndex].ValueSpace
+}
+
+//
+// GetClassColumn return dataset class values in column.
+//
+func (claset *Claset) GetClassColumn() *Column {
+	if claset.Mode == DatasetModeRows {
+		claset.TransposeToColumns()
+	}
+	if claset.Columns.Len() <= 0 {
+		return nil
+	}
+	return &claset.Columns[claset.ClassIndex]
+}
+
+//
+// GetClassRecords return class values as records.
+//
+func (claset *Claset) GetClassRecords() *Records {
+	if claset.Mode == DatasetModeRows {
+		claset.TransposeToColumns()
+	}
+	if claset.Columns.Len() <= 0 {
+		return nil
+	}
+	return &claset.Columns[claset.ClassIndex].Records
+}
+
+//
+// GetClassAsStrings return all class values as slice of string.
+//
+func (claset *Claset) GetClassAsStrings() []string {
+	if claset.Mode == DatasetModeRows {
+		claset.TransposeToColumns()
+	}
+	if claset.Columns.Len() <= 0 {
+		return nil
+	}
+	return claset.Columns[claset.ClassIndex].ToStringSlice()
+}
+
+//
+// GetClassAsReals return class record value as slice of float64.
+//
+func (claset *Claset) GetClassAsReals() []float64 {
+	if claset.Mode == DatasetModeRows {
+		claset.TransposeToColumns()
+	}
+	if claset.Columns.Len() <= 0 {
+		return nil
+	}
+	return claset.Columns[claset.ClassIndex].ToFloatSlice()
+}
+
+//
+// GetClassAsInteger return class record value as slice of int64.
+//
+func (claset *Claset) GetClassAsInteger() []int64 {
+	if claset.Mode == DatasetModeRows {
+		claset.TransposeToColumns()
+	}
+	if claset.Columns.Len() <= 0 {
+		return nil
+	}
+	return claset.Columns[claset.ClassIndex].ToIntegers()
+}
+
+//
+// GetClassIndex return index of class attribute in dataset.
+//
+func (claset *Claset) GetClassIndex() int {
+	return claset.ClassIndex
+}
+
+//
+// MajorityClass return the majority class of data.
+//
+func (claset *Claset) MajorityClass() string {
+	return claset.major
+}
+
+//
+// MinorityClass return the minority class in dataset.
+//
+func (claset *Claset) MinorityClass() string {
+	return claset.minor
+}
+
+//
+// Counts return the number of each class in value-space.
+//
+func (claset *Claset) Counts() []int {
+	if len(claset.counts) <= 0 {
+		claset.CountValueSpaces()
+	}
+	return claset.counts
+}
+
+//
+// SetDataset in class set.
+//
+func (claset *Claset) SetDataset(dataset DatasetInterface) {
+	claset.Dataset = *(dataset.(*Dataset))
+}
+
+//
+// SetClassIndex will set the class index to `v`.
+//
+func (claset *Claset) SetClassIndex(v int) {
+	claset.ClassIndex = v
+}
+
+//
+// SetMajorityClass will set the majority class to `v`.
+//
+func (claset *Claset) SetMajorityClass(v string) {
+	claset.major = v
+}
+
+//
+// SetMinorityClass will set the minority class to `v`.
+//
+func (claset *Claset) SetMinorityClass(v string) {
+	claset.minor = v
+}
+
+//
+// CountValueSpaces will count number of value space in current dataset.
+//
+func (claset *Claset) CountValueSpaces() {
+	classv := claset.GetClassAsStrings()
+	claset.vs = claset.GetClassValueSpace()
+
+	claset.counts = libstrings.CountTokens(classv, claset.vs, false)
+}
+
+//
+// RecountMajorMinor recount major and minor class in claset.
+//
+func (claset *Claset) RecountMajorMinor() {
+	claset.CountValueSpaces()
+
+	_, maxIdx, maxok := libnumbers.IntsFindMax(claset.counts)
+	_, minIdx, minok := libnumbers.IntsFindMin(claset.counts)
+
+	if maxok {
+		claset.major = claset.vs[maxIdx]
+	}
+	if minok {
+		claset.minor = claset.vs[minIdx]
+	}
+}
+
+//
+// IsInSingleClass check whether all target class contain only single value.
+// Return true and name of target if all rows is in the same class,
+// false and empty string otherwise.
+//
+func (claset *Claset) IsInSingleClass() (single bool, class string) {
+	classv := claset.GetClassAsStrings()
+
+	for i, t := range classv {
+		if i == 0 {
+			single = true
+			class = t
+			continue
+		}
+		if t != class {
+			return false, ""
+		}
+	}
+	return
+}
+
+//
+// GetMinorityRows return rows where their class is minority in dataset, or nil
+// if dataset is empty.
+//
+func (claset *Claset) GetMinorityRows() *Rows {
+	if claset.Len() == 0 {
+		return nil
+	}
+	if claset.vs == nil {
+		claset.RecountMajorMinor()
+	}
+
+	minRows := claset.GetRows().SelectWhere(claset.ClassIndex,
+		claset.minor)
+
+	return &minRows
+}
+
+//
+// String, yes it will pretty print the meta-data in JSON format.
+//
+func (claset *Claset) String() (s string) {
+	if claset.vs == nil {
+		claset.RecountMajorMinor()
+	}
+
+	s = fmt.Sprintf("'claset':{'rows': %d, 'columns': %d, ", claset.Len(),
+		claset.GetNColumn())
+
+	s += "'vs':{"
+	for x, v := range claset.vs {
+		if x > 0 {
+			s += ", "
+		}
+		s += "'" + v + "':" + strconv.Itoa(claset.counts[x])
+	}
+	s += "}"
+
+	s += ", 'major': '" + claset.major + "'"
+	s += ", 'minor': '" + claset.minor + "'"
+	s += "}"
+
+	return
+}
diff --git a/lib/tabula/clasetinterface.go b/lib/tabula/clasetinterface.go
new file mode 100644
index 00000000..ae8cdfcd
--- /dev/null
+++ b/lib/tabula/clasetinterface.go
@@ -0,0 +1,38 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+//
+// ClasetInterface is the interface for working with dataset containing class
+// or target attribute. It embed dataset interface.
+//
+// Yes, the name is Claset with single `s` not Classset with triple `s` to
+// minimize typo.
+//
+type ClasetInterface interface {
+	DatasetInterface
+
+	GetClassType() int
+	GetClassValueSpace() []string
+	GetClassColumn() *Column
+	GetClassRecords() *Records
+	GetClassAsStrings() []string
+	GetClassAsReals() []float64
+	GetClassIndex() int
+	MajorityClass() string
+	MinorityClass() string
+	Counts() []int
+
+	SetDataset(DatasetInterface)
+	SetClassIndex(int)
+	SetMajorityClass(string)
+	SetMinorityClass(string)
+
+	CountValueSpaces()
+	RecountMajorMinor()
+	IsInSingleClass() (bool, string)
+
+	GetMinorityRows() *Rows
+}
diff --git a/lib/tabula/column.go b/lib/tabula/column.go
new file mode 100644
index 00000000..f631fb30
--- /dev/null
+++ b/lib/tabula/column.go
@@ -0,0 +1,309 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	"strconv"
+)
+
+//
+// Column represent slice of record. A vertical representation of data.
+//
+type Column struct {
+	// Name of column. String identifier for the column.
+	Name string
+	// Type of column. All record in column have the same type.
+	Type int
+	// Flag additional attribute that can be set to mark some value on this
+	// column
+	Flag int
+	// ValueSpace contain the possible value in records
+	ValueSpace []string
+	// Records contain column data.
+	Records Records
+}
+
+//
+// NewColumn return new column with type and name.
+//
+func NewColumn(colType int, colName string) (col *Column) {
+	col = &Column{
+		Type: colType,
+		Name: colName,
+		Flag: 0,
+	}
+
+	col.Records = make([]*Record, 0)
+
+	return
+}
+
+//
+// NewColumnString initialize column with type anda data as string.
+//
+func NewColumnString(data []string, colType int, colName string) (
+	col *Column,
+	e error,
+) {
+	col = NewColumn(colType, colName)
+
+	datalen := len(data)
+
+	if datalen <= 0 {
+		return
+	}
+
+	col.Records = make([]*Record, datalen)
+
+	for x := 0; x < datalen; x++ {
+		col.Records[x] = NewRecordString(data[x])
+	}
+
+	return col, nil
+}
+
+//
+// NewColumnInt create new column with record type as integer, and fill it
+// with `data`.
+//
+func NewColumnInt(data []int64, colName string) (col *Column) {
+	col = NewColumn(TInteger, colName)
+
+	datalen := len(data)
+	if datalen <= 0 {
+		return
+	}
+
+	col.Records = make([]*Record, datalen)
+
+	for x, v := range data {
+		col.Records[x] = NewRecordInt(v)
+	}
+	return
+}
+
+//
+// NewColumnReal create new column with record type is real.
+//
+func NewColumnReal(data []float64, colName string) (col *Column) {
+	col = NewColumn(TReal, colName)
+
+	datalen := len(data)
+
+	if datalen <= 0 {
+		return
+	}
+
+	col.Records = make([]*Record, datalen)
+
+	for x := 0; x < datalen; x++ {
+		rec := NewRecordReal(data[x])
+		col.Records[x] = rec
+	}
+
+	return
+}
+
+//
+// SetType will set the type of column to `tipe`.
+//
+func (col *Column) SetType(tipe int) {
+	col.Type = tipe
+}
+
+//
+// SetName will set the name of column to `name`.
+//
+func (col *Column) SetName(name string) {
+	col.Name = name
+}
+
+//
+// GetType return the type of column.
+//
+func (col *Column) GetType() int {
+	return col.Type
+}
+
+//
+// GetName return the column name.
+//
+func (col *Column) GetName() string {
+	return col.Name
+}
+
+//
+// SetRecords will set records in column to `recs`.
+//
+func (col *Column) SetRecords(recs *Records) {
+	col.Records = *recs
+}
+
+//
+// Interface return the column object as an interface.
+//
+func (col *Column) Interface() interface{} {
+	return col
+}
+
+//
+// Reset column data and flag.
+//
+func (col *Column) Reset() {
+	col.Flag = 0
+	col.Records = make([]*Record, 0)
+}
+
+//
+// Len return number of record.
+//
+func (col *Column) Len() int {
+	return len(col.Records)
+}
+
+//
+// PushBack push record the end of column.
+//
+func (col *Column) PushBack(r *Record) {
+	col.Records = append(col.Records, r)
+}
+
+//
+// PushRecords append slice of record to the end of column's records.
+//
+func (col *Column) PushRecords(rs []*Record) {
+	col.Records = append(col.Records, rs...)
+}
+
+//
+// ToIntegers convert slice of record to slice of int64.
+//
+func (col *Column) ToIntegers() []int64 {
+	newcol := make([]int64, col.Len())
+
+	for x := range col.Records {
+		newcol[x] = col.Records[x].Integer()
+	}
+
+	return newcol
+}
+
+//
+// ToFloatSlice convert slice of record to slice of float64.
+//
+func (col *Column) ToFloatSlice() (newcol []float64) {
+	newcol = make([]float64, col.Len())
+
+	for i := range col.Records {
+		newcol[i] = col.Records[i].Float()
+	}
+
+	return
+}
+
+//
+// ToStringSlice convert slice of record to slice of string.
+//
+func (col *Column) ToStringSlice() (newcol []string) {
+	newcol = make([]string, col.Len())
+
+	for i := range col.Records {
+		newcol[i] = col.Records[i].String()
+	}
+
+	return
+}
+
+//
+// ClearValues set all value in column to empty string or zero if column type is
+// numeric.
+//
+func (col *Column) ClearValues() {
+	for _, r := range col.Records {
+		r.Reset()
+	}
+}
+
+//
+// SetValueAt will set column value at cell `idx` with `v`, unless the index
+// is out of range.
+//
+func (col *Column) SetValueAt(idx int, v string) {
+	if idx < 0 {
+		return
+	}
+	if col.Records.Len() <= idx {
+		return
+	}
+	_ = col.Records[idx].SetValue(v, col.Type)
+}
+
+//
+// SetValueByNumericAt will set column value at cell `idx` with numeric value
+// `v`, unless the index is out of range.
+//
+func (col *Column) SetValueByNumericAt(idx int, v float64) {
+	if idx < 0 {
+		return
+	}
+	if col.Records.Len() <= idx {
+		return
+	}
+	switch col.Type {
+	case TString:
+		col.Records[idx].SetString(strconv.FormatFloat(v, 'f', -1, 64))
+	case TInteger:
+		col.Records[idx].SetInteger(int64(v))
+	case TReal:
+		col.Records[idx].SetFloat(v)
+	}
+}
+
+//
+// SetValues of all column record.
+//
+func (col *Column) SetValues(values []string) {
+	vallen := len(values)
+	reclen := col.Len()
+
+	// initialize column record if its empty.
+	if reclen <= 0 {
+		col.Records = make([]*Record, vallen)
+		reclen = vallen
+	}
+
+	// pick the least length
+	minlen := reclen
+	if vallen < reclen {
+		minlen = vallen
+	}
+
+	for x := 0; x < minlen; x++ {
+		_ = col.Records[x].SetValue(values[x], col.Type)
+	}
+}
+
+//
+// DeleteRecordAt will delete record at index `i` and return it.
+//
+func (col *Column) DeleteRecordAt(i int) *Record {
+	if i < 0 {
+		return nil
+	}
+
+	clen := col.Len()
+	if i >= clen {
+		return nil
+	}
+
+	r := col.Records[i]
+
+	last := clen - 1
+	copy(col.Records[i:], col.Records[i+1:])
+	col.Records[last] = nil
+	col.Records = col.Records[0:last]
+
+	return r
+}
diff --git a/lib/tabula/column_test.go b/lib/tabula/column_test.go
new file mode 100644
index 00000000..bf2434fc
--- /dev/null
+++ b/lib/tabula/column_test.go
@@ -0,0 +1,67 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	"testing"
+
+	"github.com/shuLhan/share/lib/test"
+)
+
+var data = []string{"9.987654321", "8.8", "7.7", "6.6", "5.5", "4.4", "3.3"}
+var expFloat = []float64{9.987654321, 8.8, 7.7, 6.6, 5.5, 4.4, 3.3}
+
+func initColReal(t *testing.T) (col *Column) {
+	col = NewColumn(TReal, "TREAL")
+
+	for x := range data {
+		rec, e := NewRecordBy(data[x], TReal)
+		if e != nil {
+			t.Fatal(e)
+		}
+
+		col.PushBack(rec)
+	}
+
+	return col
+}
+
+func TestToFloatSlice(t *testing.T) {
+	col := initColReal(t)
+	got := col.ToFloatSlice()
+
+	test.Assert(t, "", expFloat, got, true)
+}
+
+func TestToStringSlice(t *testing.T) {
+	var col Column
+
+	for x := range data {
+		rec, e := NewRecordBy(data[x], TString)
+		if e != nil {
+			t.Fatal(e)
+		}
+
+		col.PushBack(rec)
+	}
+
+	got := col.ToStringSlice()
+
+	test.Assert(t, "", data, got, true)
+}
+
+func TestDeleteRecordAt(t *testing.T) {
+	var exp []float64
+	del := 2
+
+	exp = append(exp, expFloat[:del]...)
+	exp = append(exp, expFloat[del+1:]...)
+
+	col := initColReal(t)
+	col.DeleteRecordAt(del)
+	got := col.ToFloatSlice()
+
+	test.Assert(t, "", exp, got, true)
+}
diff --git a/lib/tabula/columninterface.go b/lib/tabula/columninterface.go
new file mode 100644
index 00000000..8a961b8b
--- /dev/null
+++ b/lib/tabula/columninterface.go
@@ -0,0 +1,20 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+//
+// ColumnInterface define an interface for working with Column.
+//
+type ColumnInterface interface {
+	SetType(tipe int)
+	SetName(name string)
+
+	GetType() int
+	GetName() string
+
+	SetRecords(recs *Records)
+
+	Interface() interface{}
+}
diff --git a/lib/tabula/columns.go b/lib/tabula/columns.go
new file mode 100644
index 00000000..a5cd05d5
--- /dev/null
+++ b/lib/tabula/columns.go
@@ -0,0 +1,147 @@
+// Copyright 2017m Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	libbytes "github.com/shuLhan/share/lib/bytes"
+	libnumbers "github.com/shuLhan/share/lib/numbers"
+)
+
+//
+// Columns represent slice of Column.
+//
+type Columns []Column
+
+//
+// Len return length of columns.
+//
+func (cols *Columns) Len() int {
+	return len(*cols)
+}
+
+//
+// Reset each data and attribute in all columns.
+//
+func (cols *Columns) Reset() {
+	for x := range *cols {
+		(*cols)[x].Reset()
+	}
+}
+
+//
+// SetTypes of each column. The length of type must be equal with the number of
+// column, otherwise it will used the minimum length between types or columns.
+//
+func (cols *Columns) SetTypes(types []int) {
+	typeslen := len(types)
+	colslen := len(*cols)
+	minlen := typeslen
+
+	if colslen < minlen {
+		minlen = colslen
+	}
+
+	for x := 0; x < minlen; x++ {
+		(*cols)[x].Type = types[x]
+	}
+}
+
+//
+// RandomPick column in columns until n item and return it like its has been
+// shuffled.  If duplicate is true, column that has been picked can be picked up
+// again, otherwise it will only picked up once.
+//
+// This function return picked and unpicked column and index of them.
+//
+func (cols *Columns) RandomPick(n int, dup bool, excludeIdx []int) (
+	picked Columns,
+	unpicked Columns,
+	pickedIdx []int,
+	unpickedIdx []int,
+) {
+	excLen := len(excludeIdx)
+	colsLen := len(*cols)
+	allowedLen := colsLen - excLen
+
+	// if duplication is not allowed, limit the number of selected
+	// column.
+	if n > allowedLen && !dup {
+		n = allowedLen
+	}
+
+	for ; n >= 1; n-- {
+		idx := libnumbers.IntPickRandPositive(colsLen, dup, pickedIdx,
+			excludeIdx)
+
+		pickedIdx = append(pickedIdx, idx)
+		picked = append(picked, (*cols)[idx])
+	}
+
+	// select unpicked columns using picked index.
+	for cid := range *cols {
+		// check if column index has been picked up
+		isPicked := false
+		for _, idx := range pickedIdx {
+			if cid == idx {
+				isPicked = true
+				break
+			}
+		}
+		if !isPicked {
+			unpicked = append(unpicked, (*cols)[cid])
+			unpickedIdx = append(unpickedIdx, cid)
+		}
+	}
+
+	return
+}
+
+//
+// GetMinMaxLength given a slice of column, find the minimum and maximum column
+// length among them.
+//
+func (cols *Columns) GetMinMaxLength() (min, max int) {
+	for _, col := range *cols {
+		collen := col.Len()
+		if collen < min {
+			min = collen
+		} else if collen > max {
+			max = collen
+		}
+	}
+	return
+}
+
+//
+// Join all column records value at index `row` using separator `sep` and make
+// sure if there is a separator in value it will be escaped with `esc`.
+//
+// Given slice of columns, where row is 1 and sep is `,` and escape is `\`
+//
+// 	  0 1 2
+// 	0 A B C
+// 	1 D , F <- row
+// 	2 G H I
+//
+// this function will return "D,\,,F" in bytes.
+//
+//
+func (cols *Columns) Join(row int, sep, esc []byte) (v []byte) {
+	for y, col := range *cols {
+		if y > 0 {
+			v = append(v, sep...)
+		}
+
+		rec := col.Records[row]
+		recV := rec.Bytes()
+
+		if rec.Type() == TString {
+			recV, _ = libbytes.EncloseToken(recV, sep, esc, nil)
+		}
+
+		v = append(v, recV...)
+	}
+	return
+}
diff --git a/lib/tabula/columns_test.go b/lib/tabula/columns_test.go
new file mode 100644
index 00000000..43b30028
--- /dev/null
+++ b/lib/tabula/columns_test.go
@@ -0,0 +1,56 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	"testing"
+
+	"github.com/shuLhan/share/lib/test"
+)
+
+func TestRandomPickColumns(t *testing.T) {
+	var dataset Dataset
+	var e error
+
+	dataset.Init(DatasetModeRows, testColTypes, testColNames)
+
+	dataset.Rows, e = initRows()
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	dataset.TransposeToColumns()
+
+	// random pick with duplicate
+	ncols := 6
+	dup := true
+	excludeIdx := []int{3}
+
+	for i := 0; i < 5; i++ {
+		picked, unpicked, _, _ :=
+			dataset.Columns.RandomPick(ncols, dup, excludeIdx)
+
+		// check if unpicked item exist in picked items.
+		for _, un := range unpicked {
+			for _, pick := range picked {
+				test.Assert(t, "", un, pick, false)
+			}
+		}
+	}
+
+	// random pick without duplicate
+	dup = false
+	for i := 0; i < 5; i++ {
+		picked, unpicked, _, _ :=
+			dataset.Columns.RandomPick(ncols, dup, excludeIdx)
+
+		// check if unpicked item exist in picked items.
+		for _, un := range unpicked {
+			for _, pick := range picked {
+				test.Assert(t, "", un, pick, false)
+			}
+		}
+	}
+}
diff --git a/lib/tabula/dataset.go b/lib/tabula/dataset.go
new file mode 100644
index 00000000..703aca35
--- /dev/null
+++ b/lib/tabula/dataset.go
@@ -0,0 +1,747 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	"errors"
+	"math"
+)
+
+const (
+	// DatasetNoMode default to matrix.
+	DatasetNoMode = 0
+	// DatasetModeRows for output mode in rows.
+	DatasetModeRows = 1
+	// DatasetModeColumns for output mode in columns.
+	DatasetModeColumns = 2
+	// DatasetModeMatrix will save data in rows and columns.
+	DatasetModeMatrix = 4
+)
+
+var (
+	// ErrColIdxOutOfRange operation on column index is invalid
+	ErrColIdxOutOfRange = errors.New("tabula: Column index out of range")
+	// ErrInvalidColType operation on column with different type
+	ErrInvalidColType = errors.New("tabula: Invalid column type")
+	// ErrMisColLength returned when operation on columns does not match
+	// between parameter and their length
+	ErrMisColLength = errors.New("tabula: mismatch on column length")
+)
+
+//
+// Dataset contain the data, mode of saved data, number of columns and rows in
+// data.
+//
+type Dataset struct {
+	// Mode define the numeric value of output mode.
+	Mode int
+	// Columns is input data that has been parsed.
+	Columns Columns
+	// Rows is input data that has been parsed.
+	Rows Rows
+}
+
+//
+// NewDataset create new dataset, use the mode to initialize the dataset.
+//
+func NewDataset(mode int, types []int, names []string) (
+	dataset *Dataset,
+) {
+	dataset = &Dataset{}
+
+	dataset.Init(mode, types, names)
+
+	return
+}
+
+//
+// Init will set the dataset using mode and types.
+//
+func (dataset *Dataset) Init(mode int, types []int, names []string) {
+	if types == nil {
+		dataset.Columns = make(Columns, 0)
+	} else {
+		dataset.Columns = make(Columns, len(types))
+		dataset.Columns.SetTypes(types)
+	}
+
+	dataset.SetColumnsName(names)
+	dataset.SetMode(mode)
+}
+
+//
+// Clone return a copy of current dataset.
+//
+func (dataset *Dataset) Clone() interface{} {
+	clone := NewDataset(dataset.GetMode(), nil, nil)
+
+	for _, col := range dataset.Columns {
+		newcol := Column{
+			Type:       col.Type,
+			Name:       col.Name,
+			ValueSpace: col.ValueSpace,
+		}
+		clone.PushColumn(newcol)
+	}
+
+	return clone
+}
+
+//
+// Reset all data and attributes.
+//
+func (dataset *Dataset) Reset() error {
+	dataset.Rows = Rows{}
+	dataset.Columns.Reset()
+	return nil
+}
+
+//
+// GetMode return mode of data.
+//
+func (dataset *Dataset) GetMode() int {
+	return dataset.Mode
+}
+
+//
+// SetMode of saved data to `mode`.
+//
+func (dataset *Dataset) SetMode(mode int) {
+	switch mode {
+	case DatasetModeRows:
+		dataset.Mode = DatasetModeRows
+		dataset.Rows = make(Rows, 0)
+	case DatasetModeColumns:
+		dataset.Mode = DatasetModeColumns
+		dataset.Columns.Reset()
+	default:
+		dataset.Mode = DatasetModeMatrix
+		dataset.Rows = make(Rows, 0)
+		dataset.Columns.Reset()
+	}
+}
+
+//
+// GetNColumn return the number of column in dataset.
+//
+func (dataset *Dataset) GetNColumn() (ncol int) {
+	ncol = len(dataset.Columns)
+
+	if ncol > 0 {
+		return
+	}
+
+	switch dataset.Mode {
+	case DatasetModeRows:
+		if len(dataset.Rows) <= 0 {
+			return 0
+		}
+		return dataset.Rows[0].Len()
+	}
+
+	return
+}
+
+//
+// GetNRow return number of rows in dataset.
+//
+func (dataset *Dataset) GetNRow() (nrow int) {
+	switch dataset.Mode {
+	case DatasetModeRows:
+		nrow = len(dataset.Rows)
+	case DatasetModeColumns:
+		if len(dataset.Columns) <= 0 {
+			nrow = 0
+		} else {
+			// get length of record in the first column
+			nrow = dataset.Columns[0].Len()
+		}
+	case DatasetModeMatrix, DatasetNoMode:
+		// matrix mode could have empty either in rows or column.
+		nrow = len(dataset.Rows)
+	}
+	return
+}
+
+//
+// Len return number of row in dataset.
+//
+func (dataset *Dataset) Len() int {
+	return dataset.GetNRow()
+}
+
+//
+// GetColumnsType return the type of all columns.
+//
+func (dataset *Dataset) GetColumnsType() (types []int) {
+	for x := range dataset.Columns {
+		types = append(types, dataset.Columns[x].Type)
+	}
+
+	return
+}
+
+//
+// SetColumnsType of data in all columns.
+//
+func (dataset *Dataset) SetColumnsType(types []int) {
+	dataset.Columns = make(Columns, len(types))
+	dataset.Columns.SetTypes(types)
+}
+
+//
+// GetColumnTypeAt return type of column in index `colidx` in dataset.
+//
+func (dataset *Dataset) GetColumnTypeAt(idx int) (int, error) {
+	if idx >= dataset.GetNColumn() {
+		return TUndefined, ErrColIdxOutOfRange
+	}
+
+	return dataset.Columns[idx].Type, nil
+}
+
+//
+// SetColumnTypeAt will set column type at index `colidx` to `tipe`.
+//
+func (dataset *Dataset) SetColumnTypeAt(idx, tipe int) error {
+	if idx >= dataset.GetNColumn() {
+		return ErrColIdxOutOfRange
+	}
+
+	dataset.Columns[idx].Type = tipe
+	return nil
+}
+
+//
+// GetColumnsName return name of all columns.
+//
+func (dataset *Dataset) GetColumnsName() (names []string) {
+	for x := range dataset.Columns {
+		names = append(names, dataset.Columns[x].Name)
+	}
+
+	return
+}
+
+//
+// SetColumnsName set column name.
+//
+func (dataset *Dataset) SetColumnsName(names []string) {
+	nameslen := len(names)
+
+	if nameslen <= 0 {
+		// empty names, return immediately.
+		return
+	}
+
+	collen := dataset.GetNColumn()
+
+	if collen <= 0 {
+		dataset.Columns = make(Columns, nameslen)
+		collen = nameslen
+	}
+
+	// find minimum length
+	minlen := collen
+	if nameslen < collen {
+		minlen = nameslen
+	}
+
+	for x := 0; x < minlen; x++ {
+		dataset.Columns[x].Name = names[x]
+	}
+}
+
+//
+// AddColumn will create and add new empty column with specific type and name
+// into dataset.
+//
+func (dataset *Dataset) AddColumn(tipe int, name string, vs []string) {
+	col := Column{
+		Type:       tipe,
+		Name:       name,
+		ValueSpace: vs,
+	}
+	dataset.PushColumn(col)
+}
+
+//
+// GetColumn return pointer to column object at index `idx`.  If `idx` is out of
+// range return nil.
+//
+func (dataset *Dataset) GetColumn(idx int) (col *Column) {
+	if idx > dataset.GetNColumn() {
+		return
+	}
+
+	switch dataset.Mode {
+	case DatasetModeRows:
+		dataset.TransposeToColumns()
+	case DatasetModeColumns:
+		// do nothing
+	case DatasetModeMatrix:
+		// do nothing
+	}
+
+	return &dataset.Columns[idx]
+}
+
+//
+// GetColumnByName return column based on their `name`.
+//
+func (dataset *Dataset) GetColumnByName(name string) (col *Column) {
+	switch dataset.Mode {
+	case DatasetModeRows:
+		dataset.TransposeToColumns()
+	}
+
+	for x, col := range dataset.Columns {
+		if col.Name == name {
+			return &dataset.Columns[x]
+		}
+	}
+	return
+}
+
+//
+// GetColumns return columns in dataset, without transposing.
+//
+func (dataset *Dataset) GetColumns() *Columns {
+	return &dataset.Columns
+}
+
+//
+// SetColumns will replace current columns with new one from parameter.
+//
+func (dataset *Dataset) SetColumns(cols *Columns) {
+	dataset.Columns = *cols
+}
+
+//
+// GetRow return pointer to row at index `idx` or nil if index is out of range.
+//
+func (dataset *Dataset) GetRow(idx int) *Row {
+	if idx < 0 {
+		return nil
+	}
+	if idx >= dataset.Rows.Len() {
+		return nil
+	}
+	return dataset.Rows[idx]
+}
+
+//
+// GetRows return rows in dataset, without transposing.
+//
+func (dataset *Dataset) GetRows() *Rows {
+	return &dataset.Rows
+}
+
+//
+// SetRows will replace current rows with new one from parameter.
+//
+func (dataset *Dataset) SetRows(rows *Rows) {
+	dataset.Rows = *rows
+}
+
+//
+// GetData return the data, based on mode (rows, columns, or matrix).
+//
+func (dataset *Dataset) GetData() interface{} {
+	switch dataset.Mode {
+	case DatasetModeRows:
+		return &dataset.Rows
+	case DatasetModeColumns:
+		return &dataset.Columns
+	case DatasetModeMatrix, DatasetNoMode:
+		return &Matrix{
+			Columns: &dataset.Columns,
+			Rows:    &dataset.Rows,
+		}
+	}
+
+	return nil
+}
+
+//
+// GetDataAsRows return data in rows mode.
+//
+func (dataset *Dataset) GetDataAsRows() *Rows {
+	if dataset.Mode == DatasetModeColumns {
+		dataset.TransposeToRows()
+	}
+	return &dataset.Rows
+}
+
+//
+// GetDataAsColumns return data in columns mode.
+//
+func (dataset *Dataset) GetDataAsColumns() (columns *Columns) {
+	if dataset.Mode == DatasetModeRows {
+		dataset.TransposeToColumns()
+	}
+	return &dataset.Columns
+}
+
+//
+// TransposeToColumns move all data from rows (horizontal) to columns
+// (vertical) mode.
+//
+func (dataset *Dataset) TransposeToColumns() {
+	if dataset.GetNRow() <= 0 {
+		// nothing to transpose
+		return
+	}
+
+	ncol := dataset.GetNColumn()
+	if ncol <= 0 {
+		// if no columns defined, initialize it using record type
+		// in the first row.
+		types := dataset.GetRow(0).Types()
+		dataset.SetColumnsType(types)
+		ncol = len(types)
+	}
+
+	orgmode := dataset.GetMode()
+
+	switch orgmode {
+	case DatasetModeRows:
+		// do nothing.
+	case DatasetModeColumns, DatasetModeMatrix, DatasetNoMode:
+		// check if column records contain data.
+		nrow := dataset.Columns[0].Len()
+		if nrow > 0 {
+			// return if column record is not empty, its already
+			// transposed
+			return
+		}
+	}
+
+	// use the least length
+	minlen := len(*dataset.GetRow(0))
+
+	if minlen > ncol {
+		minlen = ncol
+	}
+
+	switch orgmode {
+	case DatasetModeRows, DatasetNoMode:
+		dataset.SetMode(DatasetModeColumns)
+	}
+
+	for _, row := range dataset.Rows {
+		for y := 0; y < minlen; y++ {
+			dataset.Columns[y].PushBack((*row)[y])
+		}
+	}
+
+	// reset the rows data only if original mode is rows
+	// this to prevent empty data when mode is matrix.
+	switch orgmode {
+	case DatasetModeRows:
+		dataset.Rows = nil
+	}
+}
+
+//
+// TransposeToRows will move all data from columns (vertical) to rows
+// (horizontal) mode.
+//
+func (dataset *Dataset) TransposeToRows() {
+	orgmode := dataset.GetMode()
+
+	if orgmode == DatasetModeRows {
+		// already transposed
+		return
+	}
+
+	if orgmode == DatasetModeColumns {
+		// only set mode if transposing from columns to rows
+		dataset.SetMode(DatasetModeRows)
+	}
+
+	// Get the max length of columns.
+	rowlen := math.MinInt32
+	flen := len(dataset.Columns)
+
+	for f := 0; f < flen; f++ {
+		l := dataset.Columns[f].Len()
+
+		if l > rowlen {
+			rowlen = l
+		}
+	}
+
+	dataset.Rows = make(Rows, 0)
+
+	// Transpose record from column to row.
+	for r := 0; r < rowlen; r++ {
+		row := make(Row, flen)
+
+		for f := 0; f < flen; f++ {
+			if dataset.Columns[f].Len() > r {
+				row[f] = dataset.Columns[f].Records[r]
+			} else {
+				row[f] = NewRecord()
+			}
+		}
+
+		dataset.Rows = append(dataset.Rows, &row)
+	}
+
+	// Only reset the columns if original dataset mode is "columns".
+	// This to prevent empty data when mode is matrix.
+	if orgmode == DatasetModeColumns {
+		dataset.Columns.Reset()
+	}
+}
+
+//
+// PushRow save the data, which is already in row object, to Rows.
+//
+func (dataset *Dataset) PushRow(row *Row) {
+	switch dataset.GetMode() {
+	case DatasetModeRows:
+		dataset.Rows = append(dataset.Rows, row)
+	case DatasetModeColumns:
+		dataset.PushRowToColumns(row)
+	case DatasetModeMatrix, DatasetNoMode:
+		dataset.Rows = append(dataset.Rows, row)
+		dataset.PushRowToColumns(row)
+	}
+}
+
+//
+// PushRowToColumns push each data in Row to Columns.
+//
+func (dataset *Dataset) PushRowToColumns(row *Row) {
+	rowlen := row.Len()
+	if rowlen <= 0 {
+		// return immediately if no data in row.
+		return
+	}
+
+	// check if columns is initialize.
+	collen := len(dataset.Columns)
+	if collen <= 0 {
+		dataset.Columns = make(Columns, rowlen)
+		collen = rowlen
+	}
+
+	// pick the minimum length.
+	min := rowlen
+	if collen < rowlen {
+		min = collen
+	}
+
+	for x := 0; x < min; x++ {
+		dataset.Columns[x].PushBack((*row)[x])
+	}
+}
+
+//
+// FillRowsWithColumn given a column, fill the dataset with row where the record
+// only set at index `colIdx`.
+//
+// Example, content of dataset was,
+//
+// index:	0 1 2
+// 	A B C
+// 	X     (step 1) nrow = 2
+//
+// If we filled column at index 2 with [Y Z], the dataset will become:
+//
+// index:	0 1 2
+// 	A B C
+// 	X   Y (step 2) fill the empty row
+// 	    Z (step 3) create dummy row which contain the rest of column data.
+//
+func (dataset *Dataset) FillRowsWithColumn(colIdx int, col Column) {
+	if dataset.GetMode() != DatasetModeRows {
+		// Only work if dataset mode is ROWS
+		return
+	}
+
+	nrow := dataset.GetNRow()
+	emptyAt := nrow
+
+	// (step 1) Find the row with empty records
+	for x, row := range dataset.Rows {
+		if row.IsNilAt(colIdx) {
+			emptyAt = x
+			break
+		}
+	}
+
+	// (step 2) Fill the empty rows using column records.
+	y := 0
+	for x := emptyAt; x < nrow; x++ {
+		dataset.Rows[x].SetValueAt(colIdx, col.Records[y])
+		y++
+	}
+
+	// (step 3) Continue filling the column but using dummy row which
+	// contain only record at index `colIdx`.
+	ncol := dataset.GetNColumn()
+	nrow = col.Len()
+	for ; y < nrow; y++ {
+		row := make(Row, ncol)
+
+		for z := 0; z < ncol; z++ {
+			if z == colIdx {
+				row[colIdx] = col.Records[y]
+			} else {
+				row[z] = NewRecord()
+			}
+		}
+
+		dataset.PushRow(&row)
+	}
+}
+
+//
+// PushColumn will append new column to the end of slice if no existing column
+// with the same name. If it exist, the records will be merged.
+//
+func (dataset *Dataset) PushColumn(col Column) {
+	exist := false
+	colIdx := 0
+	for x, c := range dataset.Columns {
+		if c.Name == col.Name {
+			exist = true
+			colIdx = x
+			break
+		}
+	}
+
+	switch dataset.GetMode() {
+	case DatasetModeRows:
+		if exist {
+			dataset.FillRowsWithColumn(colIdx, col)
+		} else {
+			// append new column
+			dataset.Columns = append(dataset.Columns, col)
+			dataset.PushColumnToRows(col)
+			// Remove records in column
+			dataset.Columns[dataset.GetNColumn()-1].Reset()
+		}
+	case DatasetModeColumns:
+		if exist {
+			dataset.Columns[colIdx].PushRecords(col.Records)
+		} else {
+			dataset.Columns = append(dataset.Columns, col)
+		}
+	case DatasetModeMatrix, DatasetNoMode:
+		if exist {
+			dataset.Columns[colIdx].PushRecords(col.Records)
+		} else {
+			dataset.Columns = append(dataset.Columns, col)
+			dataset.PushColumnToRows(col)
+		}
+	}
+}
+
+//
+// PushColumnToRows add each record in column to each rows, from top to bottom.
+//
+func (dataset *Dataset) PushColumnToRows(col Column) {
+	colsize := col.Len()
+	if colsize <= 0 {
+		// Do nothing if column is empty.
+		return
+	}
+
+	nrow := dataset.GetNRow()
+	if nrow <= 0 {
+		// If no existing rows in dataset, initialize the rows slice.
+		dataset.Rows = make(Rows, colsize)
+
+		for nrow = 0; nrow < colsize; nrow++ {
+			row := make(Row, 0)
+			dataset.Rows[nrow] = &row
+		}
+	}
+
+	// Pick the minimum length between column or current row length.
+	minrow := nrow
+
+	if colsize < nrow {
+		minrow = colsize
+	}
+
+	// Push each record in column to each rows
+	var row *Row
+	var rec *Record
+
+	for x := 0; x < minrow; x++ {
+		row = dataset.Rows[x]
+		rec = col.Records[x]
+
+		row.PushBack(rec)
+	}
+}
+
+//
+// MergeColumns append columns from other dataset into current dataset.
+//
+func (dataset *Dataset) MergeColumns(other DatasetInterface) {
+	othermode := other.GetMode()
+	if othermode == DatasetModeRows {
+		other.TransposeToColumns()
+	}
+
+	cols := other.GetDataAsColumns()
+	for _, col := range *cols {
+		dataset.PushColumn(col)
+	}
+
+	switch othermode {
+	case DatasetModeRows:
+		other.TransposeToRows()
+	}
+}
+
+//
+// MergeRows append rows from other dataset into current dataset.
+//
+func (dataset *Dataset) MergeRows(other DatasetInterface) {
+	rows := other.GetDataAsRows()
+	for _, row := range *rows {
+		dataset.PushRow(row)
+	}
+}
+
+//
+// DeleteRow will detach row at index `i` from dataset and return it.
+//
+func (dataset *Dataset) DeleteRow(i int) (row *Row) {
+	if i < 0 {
+		return
+	}
+	if i >= dataset.Rows.Len() {
+		return
+	}
+
+	orgmode := dataset.GetMode()
+	if orgmode == DatasetModeColumns {
+		dataset.TransposeToRows()
+	}
+
+	row = dataset.Rows.Del(i)
+
+	if orgmode == DatasetModeColumns {
+		dataset.TransposeToColumns()
+	}
+
+	if orgmode != DatasetModeRows {
+		// Delete record in each columns as the same index as deleted
+		// row.
+		for x := range dataset.Columns {
+			dataset.Columns[x].DeleteRecordAt(i)
+		}
+	}
+
+	return row
+}
diff --git a/lib/tabula/dataset_bench_test.go b/lib/tabula/dataset_bench_test.go
new file mode 100644
index 00000000..86e36cc9
--- /dev/null
+++ b/lib/tabula/dataset_bench_test.go
@@ -0,0 +1,20 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	"testing"
+)
+
+func BenchmarkPushRow(b *testing.B) {
+	dataset := NewDataset(DatasetModeRows, nil, nil)
+
+	for i := 0; i < b.N; i++ {
+		e := populateWithRows(dataset)
+		if e != nil {
+			b.Fatal(e)
+		}
+	}
+}
diff --git a/lib/tabula/dataset_test.go b/lib/tabula/dataset_test.go
new file mode 100644
index 00000000..0b43f71c
--- /dev/null
+++ b/lib/tabula/dataset_test.go
@@ -0,0 +1,365 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/shuLhan/share/lib/test"
+)
+
+var datasetRows = [][]string{
+	{"0", "1", "A"},
+	{"1", "1.1", "B"},
+	{"2", "1.2", "A"},
+	{"3", "1.3", "B"},
+	{"4", "1.4", "C"},
+	{"5", "1.5", "D"},
+	{"6", "1.6", "C"},
+	{"7", "1.7", "D"},
+	{"8", "1.8", "E"},
+	{"9", "1.9", "F"},
+}
+
+var datasetCols = [][]string{
+	{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"},
+	{"1", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7", "1.8", "1.9"},
+	{"A", "B", "A", "B", "C", "D", "C", "D", "E", "F"},
+}
+
+var datasetTypes = []int{
+	TInteger,
+	TReal,
+	TString,
+}
+
+var datasetNames = []string{"int", "real", "string"}
+
+func populateWithRows(dataset *Dataset) error {
+	for _, rowin := range datasetRows {
+		row := make(Row, len(rowin))
+
+		for x, recin := range rowin {
+			rec, e := NewRecordBy(recin, datasetTypes[x])
+			if e != nil {
+				return e
+			}
+
+			row[x] = rec
+		}
+
+		dataset.PushRow(&row)
+	}
+	return nil
+}
+
+func populateWithColumns(t *testing.T, dataset *Dataset) {
+	for x := range datasetCols {
+		col, e := NewColumnString(datasetCols[x], datasetTypes[x],
+			datasetNames[x])
+		if e != nil {
+			t.Fatal(e)
+		}
+
+		dataset.PushColumn(*col)
+	}
+}
+
+func createDataset(t *testing.T) (dataset *Dataset) {
+	dataset = NewDataset(DatasetModeRows, datasetTypes,
+		datasetNames)
+
+	e := populateWithRows(dataset)
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	return
+}
+
+func DatasetStringJoinByIndex(t *testing.T, dataset [][]string, indis []int) (res string) {
+	for x := range indis {
+		res += fmt.Sprint("&", dataset[indis[x]])
+	}
+	return res
+}
+
+func DatasetRowsJoin(t *testing.T) (s string) {
+	for x := range datasetRows {
+		s += fmt.Sprint("&", datasetRows[x])
+	}
+	return
+}
+
+func DatasetColumnsJoin(t *testing.T) (s string) {
+	for x := range datasetCols {
+		s += fmt.Sprint(datasetCols[x])
+	}
+	return
+}
+
+func TestSplitRowsByNumeric(t *testing.T) {
+	dataset := createDataset(t)
+
+	// Split integer by float
+	splitL, splitR, e := SplitRowsByNumeric(dataset, 0, 4.5)
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	expIdx := []int{0, 1, 2, 3, 4}
+	exp := DatasetStringJoinByIndex(t, datasetRows, expIdx)
+	rows := splitL.GetDataAsRows()
+	got := fmt.Sprint(rows)
+
+	test.Assert(t, "", exp, got, true)
+
+	expIdx = []int{5, 6, 7, 8, 9}
+	exp = DatasetStringJoinByIndex(t, datasetRows, expIdx)
+	got = fmt.Sprint(splitR.GetDataAsRows())
+
+	test.Assert(t, "", exp, got, true)
+
+	// Split by float
+	splitL, splitR, e = SplitRowsByNumeric(dataset, 1, 1.8)
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	expIdx = []int{0, 1, 2, 3, 4, 5, 6, 7}
+	exp = DatasetStringJoinByIndex(t, datasetRows, expIdx)
+	got = fmt.Sprint(splitL.GetDataAsRows())
+
+	test.Assert(t, "", exp, got, true)
+
+	expIdx = []int{8, 9}
+	exp = DatasetStringJoinByIndex(t, datasetRows, expIdx)
+	got = fmt.Sprint(splitR.GetDataAsRows())
+
+	test.Assert(t, "", exp, got, true)
+}
+
+func TestSplitRowsByCategorical(t *testing.T) {
+	dataset := createDataset(t)
+	splitval := []string{"A", "D"}
+
+	splitL, splitR, e := SplitRowsByCategorical(dataset, 2,
+		splitval)
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	expIdx := []int{0, 2, 5, 7}
+	exp := DatasetStringJoinByIndex(t, datasetRows, expIdx)
+	got := fmt.Sprint(splitL.GetDataAsRows())
+
+	test.Assert(t, "", exp, got, true)
+
+	expIdx = []int{1, 3, 4, 6, 8, 9}
+	exp = DatasetStringJoinByIndex(t, datasetRows, expIdx)
+	got = fmt.Sprint(splitR.GetDataAsRows())
+
+	test.Assert(t, "", exp, got, true)
+}
+
+func TestModeColumnsPushColumn(t *testing.T) {
+	dataset := NewDataset(DatasetModeColumns, nil, nil)
+
+	exp := ""
+	got := ""
+	for x := range datasetCols {
+		col, e := NewColumnString(datasetCols[x], datasetTypes[x],
+			datasetNames[x])
+		if e != nil {
+			t.Fatal(e)
+		}
+
+		dataset.PushColumn(*col)
+
+		exp += fmt.Sprint(datasetCols[x])
+		got += fmt.Sprint(dataset.Columns[x].Records)
+	}
+
+	test.Assert(t, "", exp, got, true)
+
+	// Check rows
+	exp = ""
+	got = fmt.Sprint(dataset.Rows)
+	test.Assert(t, "", exp, got, true)
+}
+
+func TestModeRowsPushColumn(t *testing.T) {
+	dataset := NewDataset(DatasetModeRows, nil, nil)
+
+	populateWithColumns(t, dataset)
+
+	// Check rows
+	exp := DatasetRowsJoin(t)
+	got := fmt.Sprint(dataset.Rows)
+
+	test.Assert(t, "", exp, got, true)
+
+	// Check columns
+	exp = "[{int 1 0 [] []} {real 2 0 [] []} {string 0 0 [] []}]"
+	got = fmt.Sprint(dataset.Columns)
+
+	test.Assert(t, "", exp, got, true)
+}
+
+func TestModeMatrixPushColumn(t *testing.T) {
+	dataset := NewDataset(DatasetModeMatrix, nil, nil)
+
+	exp := ""
+	got := ""
+	for x := range datasetCols {
+		col, e := NewColumnString(datasetCols[x], datasetTypes[x],
+			datasetNames[x])
+		if e != nil {
+			t.Fatal(e)
+		}
+
+		dataset.PushColumn(*col)
+
+		exp += fmt.Sprint(datasetCols[x])
+		got += fmt.Sprint(dataset.Columns[x].Records)
+	}
+
+	test.Assert(t, "", exp, got, true)
+
+	// Check rows
+	exp = DatasetRowsJoin(t)
+	got = fmt.Sprint(dataset.Rows)
+
+	test.Assert(t, "", exp, got, true)
+}
+
+func TestModeRowsPushRows(t *testing.T) {
+	dataset := NewDataset(DatasetModeRows, nil, nil)
+
+	e := populateWithRows(dataset)
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	exp := DatasetRowsJoin(t)
+	got := fmt.Sprint(dataset.Rows)
+
+	test.Assert(t, "", exp, got, true)
+}
+
+func TestModeColumnsPushRows(t *testing.T) {
+	dataset := NewDataset(DatasetModeColumns, nil, nil)
+
+	e := populateWithRows(dataset)
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	// check rows
+	exp := ""
+	got := fmt.Sprint(dataset.Rows)
+
+	test.Assert(t, "", exp, got, true)
+
+	// check columns
+	exp = DatasetColumnsJoin(t)
+	got = ""
+	for x := range dataset.Columns {
+		got += fmt.Sprint(dataset.Columns[x].Records)
+	}
+
+	test.Assert(t, "", exp, got, true)
+}
+
+func TestModeMatrixPushRows(t *testing.T) {
+	dataset := NewDataset(DatasetModeMatrix, nil, nil)
+
+	e := populateWithRows(dataset)
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	exp := DatasetRowsJoin(t)
+	got := fmt.Sprint(dataset.Rows)
+
+	test.Assert(t, "", exp, got, true)
+
+	// check columns
+	exp = DatasetColumnsJoin(t)
+	got = ""
+	for x := range dataset.Columns {
+		got += fmt.Sprint(dataset.Columns[x].Records)
+	}
+
+	test.Assert(t, "", exp, got, true)
+}
+
+func TestSelectRowsWhere(t *testing.T) {
+	dataset := NewDataset(DatasetModeMatrix, nil, nil)
+
+	e := populateWithRows(dataset)
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	// select all rows where the first column value is 9.
+	selected := SelectRowsWhere(dataset, 0, "9")
+	exp := dataset.GetRow(9)
+	got := selected.GetRow(0)
+
+	test.Assert(t, "", exp, got, true)
+}
+
+func TestDeleteRow(t *testing.T) {
+	dataset := NewDataset(DatasetModeMatrix, nil, nil)
+
+	e := populateWithRows(dataset)
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	delIdx := 2
+
+	// Check rows len.
+	exp := dataset.Len() - 1
+	dataset.DeleteRow(delIdx)
+	got := dataset.Len()
+
+	test.Assert(t, "", exp, got, true)
+
+	// Check columns len.
+	for _, col := range dataset.Columns {
+		got = col.Len()
+
+		test.Assert(t, "", exp, got, true)
+	}
+
+	// Check rows data.
+	ridx := 0
+	for x, row := range datasetRows {
+		if x == delIdx {
+			continue
+		}
+		exp := fmt.Sprint("&", row)
+		got := fmt.Sprint(dataset.GetRow(ridx))
+		ridx++
+
+		test.Assert(t, "", exp, got, true)
+	}
+
+	// Check columns data.
+	for x := range dataset.Columns {
+		col := datasetCols[x]
+
+		coldel := []string{}
+		coldel = append(coldel, col[:delIdx]...)
+		coldel = append(coldel, col[delIdx+1:]...)
+
+		exp := fmt.Sprint(coldel)
+		got := fmt.Sprint(dataset.Columns[x].Records)
+		test.Assert(t, "", exp, got, true)
+	}
+}
diff --git a/lib/tabula/datasetinterface.go b/lib/tabula/datasetinterface.go
new file mode 100644
index 00000000..b68b5b12
--- /dev/null
+++ b/lib/tabula/datasetinterface.go
@@ -0,0 +1,442 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+
+	"github.com/shuLhan/share/lib/debug"
+)
+
+//
+// DatasetInterface is the interface for working with DSV data.
+//
+type DatasetInterface interface {
+	Init(mode int, types []int, names []string)
+	Clone() interface{}
+	Reset() error
+
+	GetMode() int
+	SetMode(mode int)
+
+	GetNColumn() int
+	GetNRow() int
+	Len() int
+
+	GetColumnsType() []int
+	SetColumnsType(types []int)
+
+	GetColumnTypeAt(idx int) (int, error)
+	SetColumnTypeAt(idx, tipe int) error
+
+	GetColumnsName() []string
+	SetColumnsName(names []string)
+
+	AddColumn(tipe int, name string, vs []string)
+	GetColumn(idx int) *Column
+	GetColumnByName(name string) *Column
+	GetColumns() *Columns
+	SetColumns(*Columns)
+
+	GetRow(idx int) *Row
+	GetRows() *Rows
+	SetRows(*Rows)
+	DeleteRow(idx int) *Row
+
+	GetData() interface{}
+	GetDataAsRows() *Rows
+	GetDataAsColumns() *Columns
+
+	TransposeToColumns()
+	TransposeToRows()
+
+	PushRow(r *Row)
+	PushRowToColumns(r *Row)
+	FillRowsWithColumn(colidx int, col Column)
+	PushColumn(col Column)
+	PushColumnToRows(col Column)
+
+	MergeColumns(DatasetInterface)
+	MergeRows(DatasetInterface)
+}
+
+//
+// ReadDatasetConfig open dataset configuration file and initialize dataset
+// field from there.
+//
+func ReadDatasetConfig(ds interface{}, fcfg string) (e error) {
+	cfg, e := ioutil.ReadFile(fcfg)
+
+	if nil != e {
+		return e
+	}
+
+	return json.Unmarshal(cfg, ds)
+}
+
+//
+// SortColumnsByIndex will sort all columns using sorted index.
+//
+func SortColumnsByIndex(di DatasetInterface, sortedIdx []int) {
+	if di.GetMode() == DatasetModeRows {
+		di.TransposeToColumns()
+	}
+
+	cols := di.GetColumns()
+	for x, col := range *cols {
+		colsorted := col.Records.SortByIndex(sortedIdx)
+		(*cols)[x].SetRecords(colsorted)
+	}
+}
+
+//
+// SplitRowsByNumeric will split the data using splitVal in column `colidx`.
+//
+// For example, given two continuous attribute,
+//
+// 	A: {1,2,3,4}
+// 	B: {5,6,7,8}
+//
+// if colidx is (1) B and splitVal is 7, the data will splitted into left set
+//
+// 	A': {1,2}
+// 	B': {5,6}
+//
+// and right set
+//
+// 	A'': {3,4}
+// 	B'': {7,8}
+//
+func SplitRowsByNumeric(di DatasetInterface, colidx int, splitVal float64) (
+	splitLess DatasetInterface,
+	splitGreater DatasetInterface,
+	e error,
+) {
+	// check type of column
+	coltype, e := di.GetColumnTypeAt(colidx)
+	if e != nil {
+		return
+	}
+
+	if !(coltype == TInteger || coltype == TReal) {
+		return splitLess, splitGreater, ErrInvalidColType
+	}
+
+	// Should we convert the data mode back later.
+	orgmode := di.GetMode()
+
+	if orgmode == DatasetModeColumns {
+		di.TransposeToRows()
+	}
+
+	if debug.Value >= 2 {
+		fmt.Println("[tabula] dataset:", di)
+	}
+
+	splitLess = di.Clone().(DatasetInterface)
+	splitGreater = di.Clone().(DatasetInterface)
+
+	rows := di.GetRows()
+	for _, row := range *rows {
+		if (*row)[colidx].Float() < splitVal {
+			splitLess.PushRow(row)
+		} else {
+			splitGreater.PushRow(row)
+		}
+	}
+
+	if debug.Value >= 2 {
+		fmt.Println("[tabula] split less:", splitLess)
+		fmt.Println("[tabula] split greater:", splitGreater)
+	}
+
+	switch orgmode {
+	case DatasetModeColumns:
+		di.TransposeToColumns()
+		splitLess.TransposeToColumns()
+		splitGreater.TransposeToColumns()
+	case DatasetModeMatrix:
+		// do nothing, since its already filled when pushing new row.
+	}
+
+	return
+}
+
+//
+// SplitRowsByCategorical will split the data using a set of split value in
+// column `colidx`.
+//
+// For example, given two attributes,
+//
+// 	X: [A,B,A,B,C,D,C,D]
+// 	Y: [1,2,3,4,5,6,7,8]
+//
+// if colidx is (0) or A and split value is a set `[A,C]`, the data will
+// splitted into left set which contain all rows that have A or C,
+//
+// 	X': [A,A,C,C]
+// 	Y': [1,3,5,7]
+//
+// and the right set, excluded set, will contain all rows which is not A or C,
+//
+// 	X'': [B,B,D,D]
+// 	Y'': [2,4,6,8]
+//
+func SplitRowsByCategorical(di DatasetInterface, colidx int,
+	splitVal []string) (
+	splitIn DatasetInterface,
+	splitEx DatasetInterface,
+	e error,
+) {
+	// check type of column
+	coltype, e := di.GetColumnTypeAt(colidx)
+	if e != nil {
+		return
+	}
+
+	if coltype != TString {
+		return splitIn, splitEx, ErrInvalidColType
+	}
+
+	// should we convert the data mode back?
+	orgmode := di.GetMode()
+
+	if orgmode == DatasetModeColumns {
+		di.TransposeToRows()
+	}
+
+	splitIn = di.Clone().(DatasetInterface)
+	splitEx = di.Clone().(DatasetInterface)
+
+	for _, row := range *di.GetRows() {
+		found := false
+		for _, val := range splitVal {
+			if (*row)[colidx].String() == val {
+				splitIn.PushRow(row)
+				found = true
+				break
+			}
+		}
+		if !found {
+			splitEx.PushRow(row)
+		}
+	}
+
+	// convert all dataset based on original
+	switch orgmode {
+	case DatasetModeColumns:
+		di.TransposeToColumns()
+		splitIn.TransposeToColumns()
+		splitEx.TransposeToColumns()
+	case DatasetModeMatrix, DatasetNoMode:
+		splitIn.TransposeToColumns()
+		splitEx.TransposeToColumns()
+	}
+
+	return
+}
+
+//
+// SplitRowsByValue generic function to split data by value. This function will
+// split data using value in column `colidx`. If value is numeric it will return
+// any rows that have column value less than `value` in `splitL`, and any column
+// value greater or equal to `value` in `splitR`.
+//
+func SplitRowsByValue(di DatasetInterface, colidx int, value interface{}) (
+	splitL DatasetInterface,
+	splitR DatasetInterface,
+	e error,
+) {
+	coltype, e := di.GetColumnTypeAt(colidx)
+	if e != nil {
+		return
+	}
+
+	if coltype == TString {
+		splitL, splitR, e = SplitRowsByCategorical(di, colidx,
+			value.([]string))
+	} else {
+		var splitval float64
+
+		switch value.(type) {
+		case int:
+			splitval = float64(value.(int))
+		case int64:
+			splitval = float64(value.(int64))
+		case float32:
+			splitval = float64(value.(float32))
+		case float64:
+			splitval = value.(float64)
+		}
+
+		splitL, splitR, e = SplitRowsByNumeric(di, colidx,
+			splitval)
+	}
+
+	if e != nil {
+		return nil, nil, e
+	}
+
+	return
+}
+
+//
+// SelectRowsWhere return all rows which column value in `colidx` is equal to
+// `colval`.
+//
+func SelectRowsWhere(dataset DatasetInterface, colidx int, colval string) DatasetInterface {
+	orgmode := dataset.GetMode()
+
+	if orgmode == DatasetModeColumns {
+		dataset.TransposeToRows()
+	}
+
+	selected := NewDataset(dataset.GetMode(), nil, nil)
+
+	selected.Rows = dataset.GetRows().SelectWhere(colidx, colval)
+
+	switch orgmode {
+	case DatasetModeColumns:
+		dataset.TransposeToColumns()
+		selected.TransposeToColumns()
+	case DatasetModeMatrix, DatasetNoMode:
+		selected.TransposeToColumns()
+	}
+
+	return selected
+}
+
+//
+// RandomPickRows return `n` item of row that has been selected randomly from
+// dataset.Rows. The ids of rows that has been picked is saved id `pickedIdx`.
+//
+// If duplicate is true, the row that has been picked can be picked up again,
+// otherwise it only allow one pick. This is also called as random selection
+// with or without replacement in machine learning domain.
+//
+// If output mode is columns, it will be transposed to rows.
+//
+func RandomPickRows(dataset DatasetInterface, n int, duplicate bool) (
+	picked DatasetInterface,
+	unpicked DatasetInterface,
+	pickedIdx []int,
+	unpickedIdx []int,
+) {
+	orgmode := dataset.GetMode()
+
+	if orgmode == DatasetModeColumns {
+		dataset.TransposeToRows()
+	}
+
+	picked = dataset.Clone().(DatasetInterface)
+	unpicked = dataset.Clone().(DatasetInterface)
+
+	pickedRows, unpickedRows, pickedIdx, unpickedIdx :=
+		dataset.GetRows().RandomPick(n, duplicate)
+
+	picked.SetRows(&pickedRows)
+	unpicked.SetRows(&unpickedRows)
+
+	// switch the dataset based on original mode
+	switch orgmode {
+	case DatasetModeColumns:
+		dataset.TransposeToColumns()
+		// transform the picked and unpicked set.
+		picked.TransposeToColumns()
+		unpicked.TransposeToColumns()
+
+	case DatasetModeMatrix, DatasetNoMode:
+		// transform the picked and unpicked set.
+		picked.TransposeToColumns()
+		unpicked.TransposeToColumns()
+	}
+
+	return
+}
+
+//
+// RandomPickColumns will select `n` column randomly from dataset and return
+// new dataset with picked and unpicked columns, and their column index.
+//
+// If duplicate is true, column that has been pick up can be pick up again.
+//
+// If dataset output mode is rows, it will transposed to columns.
+//
+func RandomPickColumns(dataset DatasetInterface, n int, dup bool,
+	excludeIdx []int) (
+	picked DatasetInterface,
+	unpicked DatasetInterface,
+	pickedIdx []int,
+	unpickedIdx []int,
+) {
+	orgmode := dataset.GetMode()
+
+	if orgmode == DatasetModeRows {
+		dataset.TransposeToColumns()
+	}
+
+	picked = dataset.Clone().(DatasetInterface)
+	unpicked = dataset.Clone().(DatasetInterface)
+
+	pickedColumns, unpickedColumns, pickedIdx, unpickedIdx :=
+		dataset.GetColumns().RandomPick(n, dup, excludeIdx)
+
+	picked.SetColumns(&pickedColumns)
+	unpicked.SetColumns(&unpickedColumns)
+
+	// transpose picked and unpicked dataset based on original mode
+	switch orgmode {
+	case DatasetModeRows:
+		dataset.TransposeToRows()
+		picked.TransposeToRows()
+		unpicked.TransposeToRows()
+	case DatasetModeMatrix, DatasetNoMode:
+		picked.TransposeToRows()
+		unpicked.TransposeToRows()
+	}
+
+	return
+}
+
+//
+// SelectColumnsByIdx return new dataset with selected column index.
+//
+func SelectColumnsByIdx(dataset DatasetInterface, colsIdx []int) (
+	newset DatasetInterface,
+) {
+	var col *Column
+
+	orgmode := dataset.GetMode()
+
+	if orgmode == DatasetModeRows {
+		dataset.TransposeToColumns()
+	}
+
+	newset = dataset.Clone().(DatasetInterface)
+
+	for _, idx := range colsIdx {
+		col = dataset.GetColumn(idx)
+		if col == nil {
+			continue
+		}
+
+		newset.PushColumn(*col)
+	}
+
+	// revert the mode back
+	switch orgmode {
+	case DatasetModeRows:
+		dataset.TransposeToRows()
+		newset.TransposeToRows()
+	case DatasetModeColumns:
+		// do nothing
+	case DatasetModeMatrix:
+		// do nothing
+	}
+
+	return
+}
diff --git a/lib/tabula/maprows.go b/lib/tabula/maprows.go
new file mode 100644
index 00000000..a93f0308
--- /dev/null
+++ b/lib/tabula/maprows.go
@@ -0,0 +1,65 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	"math"
+)
+
+//
+// MapRowsElement represent a single mapping of string key to rows.
+//
+type MapRowsElement struct {
+	Key   string
+	Value Rows
+}
+
+//
+// MapRows represent a list of mapping between string key and rows.
+//
+type MapRows []MapRowsElement
+
+//
+// insertRow will insert a row `v` into map using key `k`.
+//
+func (mapRows *MapRows) insertRow(k string, v *Row) {
+	rows := Rows{}
+	rows.PushBack(v)
+	el := MapRowsElement{k, rows}
+	(*mapRows) = append((*mapRows), el)
+}
+
+//
+// AddRow will append a row `v` into map value if they key `k` exist in map,
+// otherwise it will insert a new map element.
+//
+func (mapRows *MapRows) AddRow(k string, v *Row) {
+	for x := range *mapRows {
+		if (*mapRows)[x].Key == k {
+			(*mapRows)[x].Value.PushBack(v)
+			return
+		}
+	}
+	// no key found on map
+	mapRows.insertRow(k, v)
+}
+
+//
+// GetMinority return map value which contain the minimum rows.
+//
+func (mapRows *MapRows) GetMinority() (keyMin string, valMin Rows) {
+	min := math.MaxInt32
+
+	for k := range *mapRows {
+		v := (*mapRows)[k].Value
+		l := len(v)
+		if l < min {
+			keyMin = (*mapRows)[k].Key
+			valMin = v
+			min = l
+		}
+	}
+	return
+}
diff --git a/lib/tabula/maprows_test.go b/lib/tabula/maprows_test.go
new file mode 100644
index 00000000..19cd5ac8
--- /dev/null
+++ b/lib/tabula/maprows_test.go
@@ -0,0 +1,54 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/shuLhan/share/lib/test"
+)
+
+func TestAddRow(t *testing.T) {
+	mapRows := MapRows{}
+	rows, e := initRows()
+
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	for _, row := range rows {
+		key := fmt.Sprint((*row)[testClassIdx].Interface())
+		mapRows.AddRow(key, row)
+	}
+
+	got := fmt.Sprint(mapRows)
+
+	test.Assert(t, "", groupByExpect, got, true)
+}
+
+func TestGetMinority(t *testing.T) {
+	mapRows := MapRows{}
+	rows, e := initRows()
+
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	for _, row := range rows {
+		key := fmt.Sprint((*row)[testClassIdx].Interface())
+		mapRows.AddRow(key, row)
+	}
+
+	// remove the first row in the first key, so we can make it minority.
+	mapRows[0].Value.PopFront()
+
+	_, minRows := mapRows.GetMinority()
+
+	exp := rowsExpect[3]
+	got := fmt.Sprint(minRows)
+
+	test.Assert(t, "", exp, got, true)
+}
diff --git a/lib/tabula/matrix.go b/lib/tabula/matrix.go
new file mode 100644
index 00000000..62ab68ac
--- /dev/null
+++ b/lib/tabula/matrix.go
@@ -0,0 +1,13 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+//
+// Matrix is a combination of columns and rows.
+//
+type Matrix struct {
+	Columns *Columns
+	Rows    *Rows
+}
diff --git a/lib/tabula/record.go b/lib/tabula/record.go
new file mode 100644
index 00000000..527ab430
--- /dev/null
+++ b/lib/tabula/record.go
@@ -0,0 +1,292 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	"math"
+	"reflect"
+	"strconv"
+)
+
+const (
+	// TUndefined for undefined type
+	TUndefined = -1
+	// TString string type.
+	TString = 0
+	// TInteger integer type (64 bit).
+	TInteger = 1
+	// TReal float type (64 bit).
+	TReal = 2
+)
+
+//
+// Record represent the smallest building block of data-set.
+//
+type Record struct {
+	v interface{}
+}
+
+//
+// NewRecord will create and return record with nil value.
+//
+func NewRecord() *Record {
+	return &Record{v: nil}
+}
+
+//
+// NewRecordBy create new record from string with type set to `t`.
+//
+func NewRecordBy(v string, t int) (r *Record, e error) {
+	r = NewRecord()
+	e = r.SetValue(v, t)
+	return
+}
+
+//
+// NewRecordString will create new record from string.
+//
+func NewRecordString(v string) (r *Record) {
+	return &Record{v: v}
+}
+
+//
+// NewRecordInt create new record from integer value.
+//
+func NewRecordInt(v int64) (r *Record) {
+	return &Record{v: v}
+}
+
+//
+// NewRecordReal create new record from float value.
+//
+func NewRecordReal(v float64) (r *Record) {
+	return &Record{v: v}
+}
+
+//
+// Clone will create and return a clone of record.
+//
+func (r *Record) Clone() *Record {
+	return &Record{v: r.v}
+}
+
+//
+// IsNil return true if record has not been set with value, or nil.
+//
+func (r *Record) IsNil() bool {
+	return r.v == nil
+}
+
+//
+// Type of record.
+//
+func (r *Record) Type() int {
+	switch r.v.(type) {
+	case int64:
+		return TInteger
+	case float64:
+		return TReal
+	}
+	return TString
+}
+
+//
+// SetValue set the record value from string using type `t`. If value can not
+// be converted to type, it will return an error.
+//
+func (r *Record) SetValue(v string, t int) error {
+	switch t {
+	case TString:
+		r.v = v
+
+	case TInteger:
+		i64, e := strconv.ParseInt(v, 10, 64)
+		if nil != e {
+			return e
+		}
+
+		r.v = i64
+
+	case TReal:
+		f64, e := strconv.ParseFloat(v, 64)
+		if nil != e {
+			return e
+		}
+
+		r.v = f64
+	}
+	return nil
+}
+
+//
+// SetString will set the record value with string value.
+//
+func (r *Record) SetString(v string) {
+	r.v = v
+}
+
+//
+// SetFloat will set the record value with float 64bit.
+//
+func (r *Record) SetFloat(v float64) {
+	r.v = v
+}
+
+//
+// SetInteger will set the record value with integer 64bit.
+//
+func (r *Record) SetInteger(v int64) {
+	r.v = v
+}
+
+//
+// IsMissingValue check wether the value is a missing attribute.
+//
+// If its string the missing value is indicated by character '?'.
+//
+// If its integer the missing value is indicated by minimum negative integer,
+// or math.MinInt64.
+//
+// If its real the missing value is indicated by -Inf.
+//
+func (r *Record) IsMissingValue() bool {
+	switch r.v.(type) {
+	case string:
+		str := r.v.(string)
+		if str == "?" {
+			return true
+		}
+
+	case int64:
+		i64 := r.v.(int64)
+		if i64 == math.MinInt64 {
+			return true
+		}
+
+	case float64:
+		f64 := r.v.(float64)
+		return math.IsInf(f64, -1)
+	}
+
+	return false
+}
+
+//
+// Interface return record value as interface.
+//
+func (r *Record) Interface() interface{} {
+	return r.v
+}
+
+//
+// Bytes convert record value to slice of byte.
+//
+func (r *Record) Bytes() []byte {
+	return []byte(r.String())
+}
+
+//
+// String convert record value to string.
+//
+func (r Record) String() (s string) {
+	switch r.v.(type) {
+	case string:
+		s = r.v.(string)
+
+	case int64:
+		s = strconv.FormatInt(r.v.(int64), 10)
+
+	case float64:
+		s = strconv.FormatFloat(r.v.(float64), 'f', -1, 64)
+	}
+	return
+}
+
+//
+// Float convert given record to float value. If its failed it will return
+// the -Infinity value.
+//
+func (r *Record) Float() (f64 float64) {
+	var e error
+
+	switch r.v.(type) {
+	case string:
+		f64, e = strconv.ParseFloat(r.v.(string), 64)
+
+		if nil != e {
+			f64 = math.Inf(-1)
+		}
+
+	case int64:
+		f64 = float64(r.v.(int64))
+
+	case float64:
+		f64 = r.v.(float64)
+	}
+
+	return
+}
+
+//
+// Integer convert given record to integer value. If its failed, it will return
+// the minimum integer in 64bit.
+//
+func (r *Record) Integer() (i64 int64) {
+	var e error
+
+	switch r.v.(type) {
+	case string:
+		i64, e = strconv.ParseInt(r.v.(string), 10, 64)
+
+		if nil != e {
+			i64 = math.MinInt64
+		}
+
+	case int64:
+		i64 = r.v.(int64)
+
+	case float64:
+		i64 = int64(r.v.(float64))
+	}
+
+	return
+}
+
+//
+// IsEqual return true if record is equal with other, otherwise return false.
+//
+func (r *Record) IsEqual(o *Record) bool {
+	return reflect.DeepEqual(r.v, o.Interface())
+}
+
+//
+// IsEqualToString return true if string representation of record value is
+// equal to string `v`.
+//
+func (r *Record) IsEqualToString(v string) bool {
+	return r.String() == v
+}
+
+//
+// IsEqualToInterface return true if interface type and value equal to record
+// type and value.
+//
+func (r *Record) IsEqualToInterface(v interface{}) bool {
+	return reflect.DeepEqual(r.v, v)
+}
+
+//
+// Reset will reset record value to empty string or zero, depend on type.
+//
+func (r *Record) Reset() {
+	switch r.v.(type) {
+	case string:
+		r.v = ""
+	case int64:
+		r.v = int64(0)
+	case float64:
+		r.v = float64(0)
+	}
+}
diff --git a/lib/tabula/record_test.go b/lib/tabula/record_test.go
new file mode 100644
index 00000000..223f9235
--- /dev/null
+++ b/lib/tabula/record_test.go
@@ -0,0 +1,35 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/shuLhan/share/lib/test"
+)
+
+//
+// TestRecord simply check how the stringer work.
+//
+func TestRecord(t *testing.T) {
+	expec := []string{"test", "1", "2"}
+	expType := []int{TString, TInteger, TInteger}
+
+	row := make(Row, 0)
+
+	for i := range expec {
+		r, e := NewRecordBy(expec[i], expType[i])
+		if nil != e {
+			t.Error(e)
+		}
+
+		row = append(row, r)
+	}
+
+	exp := fmt.Sprint(expec)
+	got := fmt.Sprint(row)
+	test.Assert(t, "", exp, got, true)
+}
diff --git a/lib/tabula/records.go b/lib/tabula/records.go
new file mode 100644
index 00000000..e00c03b9
--- /dev/null
+++ b/lib/tabula/records.go
@@ -0,0 +1,54 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+//
+// Records define slice of pointer to Record.
+//
+type Records []*Record
+
+//
+// Len will return the length of records.
+//
+func (recs *Records) Len() int {
+	return len(*recs)
+}
+
+//
+// SortByIndex will sort the records using slice of index `sortedIDx` and
+// return it.
+//
+func (recs *Records) SortByIndex(sortedIdx []int) *Records {
+	sorted := make(Records, len(*recs))
+
+	for x, v := range sortedIdx {
+		sorted[x] = (*recs)[v]
+	}
+	return &sorted
+}
+
+//
+// CountWhere return number of record where its value is equal to `v` type and
+// value.
+//
+func (recs *Records) CountWhere(v interface{}) (c int) {
+	for _, r := range *recs {
+		if r.IsEqualToInterface(v) {
+			c++
+		}
+	}
+	return
+}
+
+//
+// CountsWhere will return count of each value in slice `sv`.
+//
+func (recs *Records) CountsWhere(vs []interface{}) (counts []int) {
+	for _, v := range vs {
+		c := recs.CountWhere(v)
+		counts = append(counts, c)
+	}
+	return
+}
diff --git a/lib/tabula/records_test.go b/lib/tabula/records_test.go
new file mode 100644
index 00000000..2be6f7b1
--- /dev/null
+++ b/lib/tabula/records_test.go
@@ -0,0 +1,29 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/shuLhan/share/lib/test"
+)
+
+func TestSortByIndex(t *testing.T) {
+	data := make(Records, 3)
+	data[0] = NewRecordInt(3)
+	data[1] = NewRecordInt(2)
+	data[2] = NewRecordInt(1)
+
+	sortedIdx := []int{2, 1, 0}
+	expect := []int{1, 2, 3}
+
+	sorted := data.SortByIndex(sortedIdx)
+
+	got := fmt.Sprint(sorted)
+	exp := fmt.Sprint(&expect)
+
+	test.Assert(t, "", exp, got, true)
+}
diff --git a/lib/tabula/row.go b/lib/tabula/row.go
new file mode 100644
index 00000000..105577c5
--- /dev/null
+++ b/lib/tabula/row.go
@@ -0,0 +1,123 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+//
+// Row represent slice of record.
+//
+type Row []*Record
+
+//
+// Len return number of record in row.
+//
+func (row *Row) Len() int {
+	return len(*row)
+}
+
+//
+// PushBack will add new record to the end of row.
+//
+func (row *Row) PushBack(r *Record) {
+	*row = append(*row, r)
+}
+
+//
+// Types return type of all records.
+//
+func (row *Row) Types() (types []int) {
+	for _, r := range *row {
+		types = append(types, r.Type())
+	}
+	return
+}
+
+//
+// Clone create and return a clone of row.
+//
+func (row *Row) Clone() *Row {
+	clone := make(Row, len(*row))
+
+	for x, rec := range *row {
+		clone[x] = rec.Clone()
+	}
+	return &clone
+}
+
+//
+// IsNilAt return true if there is no record value in row at `idx`, otherwise
+// return false.
+//
+func (row *Row) IsNilAt(idx int) bool {
+	if idx < 0 {
+		return true
+	}
+	if idx >= len(*row) {
+		return true
+	}
+	if (*row)[idx] == nil {
+		return true
+	}
+	return (*row)[idx].IsNil()
+}
+
+//
+// SetValueAt will set the value of row at cell index `idx` with record `rec`.
+//
+func (row *Row) SetValueAt(idx int, rec *Record) {
+	(*row)[idx] = rec
+}
+
+//
+// GetRecord will return pointer to record at index `i`, or nil if index
+// is out of range.
+//
+func (row *Row) GetRecord(i int) *Record {
+	if i < 0 {
+		return nil
+	}
+	if i >= row.Len() {
+		return nil
+	}
+	return (*row)[i]
+}
+
+//
+// GetValueAt return the value of row record at index `idx`. If the index is
+// out of range it will return nil and false
+//
+func (row *Row) GetValueAt(idx int) (interface{}, bool) {
+	if row.Len() <= idx {
+		return nil, false
+	}
+	return (*row)[idx].Interface(), true
+}
+
+//
+// GetIntAt return the integer value of row record at index `idx`.
+// If the index is out of range it will return 0 and false.
+//
+func (row *Row) GetIntAt(idx int) (int64, bool) {
+	if row.Len() <= idx {
+		return 0, false
+	}
+
+	return (*row)[idx].Integer(), true
+}
+
+//
+// IsEqual return true if row content equal with `other` row, otherwise return
+// false.
+//
+func (row *Row) IsEqual(other *Row) bool {
+	if len(*row) != len(*other) {
+		return false
+	}
+	for x, xrec := range *row {
+		if !xrec.IsEqual((*other)[x]) {
+			return false
+		}
+	}
+	return true
+}
diff --git a/lib/tabula/row_test.go b/lib/tabula/row_test.go
new file mode 100644
index 00000000..5fa45775
--- /dev/null
+++ b/lib/tabula/row_test.go
@@ -0,0 +1,33 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	"testing"
+
+	"github.com/shuLhan/share/lib/test"
+)
+
+var dataFloat64 = []float64{0.1, 0.2, 0.3, 0.4, 0.5}
+
+func createRow() (row Row) {
+	for _, v := range dataFloat64 {
+		row.PushBack(NewRecordReal(v))
+	}
+	return
+}
+
+func TestClone(t *testing.T) {
+	row := createRow()
+	rowClone := row.Clone()
+	rowClone2 := row.Clone()
+
+	test.Assert(t, "", &row, rowClone, true)
+
+	// changing the clone value should not change the original copy.
+	(*rowClone2)[0].SetFloat(0)
+	test.Assert(t, "", &row, rowClone, true)
+	test.Assert(t, "", &row, rowClone2, false)
+}
diff --git a/lib/tabula/rows.go b/lib/tabula/rows.go
new file mode 100644
index 00000000..fcaed021
--- /dev/null
+++ b/lib/tabula/rows.go
@@ -0,0 +1,251 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	"fmt"
+	"math/rand"
+	"time"
+)
+
+//
+// Rows represent slice of Row.
+//
+type Rows []*Row
+
+//
+// Len return number of row.
+//
+func (rows *Rows) Len() int {
+	return len(*rows)
+}
+
+//
+// PushBack append record r to the end of rows.
+//
+func (rows *Rows) PushBack(r *Row) {
+	if r != nil {
+		(*rows) = append((*rows), r)
+	}
+}
+
+//
+// PopFront remove the head, return the record value.
+//
+func (rows *Rows) PopFront() (row *Row) {
+	l := len(*rows)
+	if l > 0 {
+		row = (*rows)[0]
+		(*rows) = (*rows)[1:]
+	}
+	return
+}
+
+//
+// PopFrontAsRows remove the head and return ex-head as new rows.
+//
+func (rows *Rows) PopFrontAsRows() (newRows Rows) {
+	row := rows.PopFront()
+	if nil == row {
+		return
+	}
+	newRows.PushBack(row)
+	return
+}
+
+//
+// Del will detach row at index `i` from slice and return it.
+//
+func (rows *Rows) Del(i int) (row *Row) {
+	if i < 0 {
+		return
+	}
+	if i >= rows.Len() {
+		return
+	}
+
+	row = (*rows)[i]
+
+	last := len(*rows) - 1
+	copy((*rows)[i:], (*rows)[i+1:])
+	(*rows)[last] = nil
+	(*rows) = (*rows)[0:last]
+
+	return row
+}
+
+//
+// GroupByValue will group each row based on record value in index recGroupIdx
+// into map of string -> *Row.
+//
+// WARNING: returned rows will be empty!
+//
+// For example, given rows with target group in column index 1,
+//
+// 	[1 +]
+// 	[2 -]
+// 	[3 -]
+// 	[4 +]
+//
+// this function will create a map with key is string of target and value is
+// pointer to sub-rows,
+//
+// 	+ -> [1 +]
+//           [4 +]
+// 	- -> [2 -]
+//           [3 -]
+//
+//
+func (rows *Rows) GroupByValue(GroupIdx int) (mapRows MapRows) {
+	for {
+		row := rows.PopFront()
+		if nil == row {
+			break
+		}
+
+		key := fmt.Sprint((*row)[GroupIdx])
+
+		mapRows.AddRow(key, row)
+	}
+	return
+}
+
+//
+// RandomPick row in rows until n item and return it like its has been shuffled.
+// If duplicate is true, row that has been picked can be picked up again,
+// otherwise it will only picked up once.
+//
+// This function return picked and unpicked rows and index of them.
+//
+func (rows *Rows) RandomPick(n int, duplicate bool) (
+	picked Rows,
+	unpicked Rows,
+	pickedIdx []int,
+	unpickedIdx []int,
+) {
+	rowsLen := len(*rows)
+
+	// if duplication is not allowed, we can only select as many as rows
+	// that we have.
+	if n > rowsLen && !duplicate {
+		n = rowsLen
+	}
+
+	rand.Seed(time.Now().UnixNano())
+
+	for ; n >= 1; n-- {
+		idx := 0
+		for {
+			idx = rand.Intn(len(*rows))
+
+			if duplicate {
+				// allow duplicate idx
+				pickedIdx = append(pickedIdx, idx)
+				break
+			}
+
+			// check if its already picked
+			isPicked := false
+			for _, pastIdx := range pickedIdx {
+				if idx == pastIdx {
+					isPicked = true
+					break
+				}
+			}
+			// get another random idx again
+			if isPicked {
+				continue
+			}
+
+			// bingo, we found unique idx that has not been picked.
+			pickedIdx = append(pickedIdx, idx)
+			break
+		}
+
+		row := (*rows)[idx]
+
+		picked.PushBack(row)
+	}
+
+	// select unpicked rows using picked index.
+	for rid := range *rows {
+		// check if row index has been picked up
+		isPicked := false
+		for _, idx := range pickedIdx {
+			if rid == idx {
+				isPicked = true
+				break
+			}
+		}
+		if !isPicked {
+			unpicked.PushBack((*rows)[rid])
+			unpickedIdx = append(unpickedIdx, rid)
+		}
+	}
+	return
+}
+
+//
+// Contain return true and index of row, if rows has data that has the same value
+// with `row`, otherwise return false and -1 as index.
+//
+func (rows *Rows) Contain(xrow *Row) (bool, int) {
+	for x, row := range *rows {
+		if xrow.IsEqual(row) {
+			return true, x
+		}
+	}
+	return false, -1
+}
+
+//
+// Contains return true and indices of row, if rows has data that has the same
+// value with `rows`, otherwise return false and empty indices.
+//
+func (rows *Rows) Contains(xrows Rows) (isin bool, indices []int) {
+	// No data to compare.
+	if len(xrows) <= 0 {
+		return
+	}
+
+	for _, xrow := range xrows {
+		isin, idx := rows.Contain(xrow)
+
+		if isin {
+			indices = append(indices, idx)
+		}
+	}
+
+	// Check if indices length equal to searched rows
+	if len(indices) == len(xrows) {
+		return true, indices
+	}
+
+	return false, nil
+}
+
+//
+// SelectWhere return all rows which column value in `colidx` is equal
+// to `colval`.
+//
+func (rows *Rows) SelectWhere(colidx int, colval string) (selected Rows) {
+	for _, row := range *rows {
+		col := (*row)[colidx]
+		if col.IsEqualToString(colval) {
+			selected.PushBack(row)
+		}
+	}
+	return
+}
+
+//
+// String return the string representation of each row.
+//
+func (rows Rows) String() (s string) {
+	for x := range rows {
+		s += fmt.Sprint(rows[x])
+	}
+	return
+}
diff --git a/lib/tabula/rows_test.go b/lib/tabula/rows_test.go
new file mode 100644
index 00000000..174dd10f
--- /dev/null
+++ b/lib/tabula/rows_test.go
@@ -0,0 +1,181 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/shuLhan/share/lib/test"
+)
+
+func TestPushBack(t *testing.T) {
+	rows, e := initRows()
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	exp := strings.Join(rowsExpect, "")
+	got := fmt.Sprint(rows)
+
+	test.Assert(t, "", exp, got, true)
+}
+
+func TestPopFront(t *testing.T) {
+	rows, e := initRows()
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	l := len(rows) - 1
+	for i := range rows {
+		row := rows.PopFront()
+
+		exp := rowsExpect[i]
+		got := fmt.Sprint(row)
+
+		test.Assert(t, "", exp, got, true)
+
+		if i < l {
+			exp = strings.Join(rowsExpect[i+1:], "")
+		} else {
+			exp = ""
+		}
+		got = fmt.Sprint(rows)
+
+		test.Assert(t, "", exp, got, true)
+	}
+
+	// empty rows
+	row := rows.PopFront()
+
+	exp := "<nil>"
+	got := fmt.Sprint(row)
+
+	test.Assert(t, "", exp, got, true)
+}
+
+func TestPopFrontRow(t *testing.T) {
+	rows, e := initRows()
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	l := len(rows) - 1
+	for i := range rows {
+		newRows := rows.PopFrontAsRows()
+
+		exp := rowsExpect[i]
+		got := fmt.Sprint(newRows)
+
+		test.Assert(t, "", exp, got, true)
+
+		if i < l {
+			exp = strings.Join(rowsExpect[i+1:], "")
+		} else {
+			exp = ""
+		}
+		got = fmt.Sprint(rows)
+
+		test.Assert(t, "", exp, got, true)
+	}
+
+	// empty rows
+	row := rows.PopFrontAsRows()
+
+	exp := ""
+	got := fmt.Sprint(row)
+
+	test.Assert(t, "", exp, got, true)
+}
+
+func TestGroupByValue(t *testing.T) {
+	rows, e := initRows()
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	mapRows := rows.GroupByValue(testClassIdx)
+
+	got := fmt.Sprint(mapRows)
+
+	test.Assert(t, "", groupByExpect, got, true)
+}
+
+func TestRandomPick(t *testing.T) {
+	rows, e := initRows()
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	// random pick with duplicate
+	for i := 0; i < 5; i++ {
+		picked, unpicked, pickedIdx, unpickedIdx := rows.RandomPick(6,
+			true)
+
+		// check if unpicked item exist in picked items.
+		isin, _ := picked.Contains(unpicked)
+
+		if isin {
+			fmt.Println("Random pick with duplicate rows")
+			fmt.Println("==> picked rows   :", picked)
+			fmt.Println("==> picked idx    :", pickedIdx)
+			fmt.Println("==> unpicked rows :", unpicked)
+			fmt.Println("==> unpicked idx  :", unpickedIdx)
+			t.Fatal("random pick: unpicked is false")
+		}
+	}
+
+	// random pick without duplication
+	for i := 0; i < 5; i++ {
+		picked, unpicked, pickedIdx, unpickedIdx := rows.RandomPick(3,
+			false)
+
+		// check if picked rows is duplicate
+		test.Assert(t, "", picked[0], picked[1], false)
+
+		// check if unpicked item exist in picked items.
+		isin, _ := picked.Contains(unpicked)
+
+		if isin {
+			fmt.Println("Random pick with no duplicate rows")
+			fmt.Println("==> picked rows   :", picked)
+			fmt.Println("==> picked idx    :", pickedIdx)
+			fmt.Println("==> unpicked rows :", unpicked)
+			fmt.Println("==> unpicked idx  :", unpickedIdx)
+			t.Fatal("random pick: unpicked is false")
+		}
+	}
+}
+
+func TestRowsDel(t *testing.T) {
+	rows, e := initRows()
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	// Test deleting row index out of range.
+	row := rows.Del(-1)
+	if row != nil {
+		t.Fatal("row should be nil!")
+	}
+
+	row = rows.Del(rows.Len())
+	if row != nil {
+		t.Fatal("row should be nil!")
+	}
+
+	// Test deleting index that is actually exist.
+	row = rows.Del(0)
+
+	exp := strings.Join(rowsExpect[1:], "")
+	got := fmt.Sprint(rows)
+
+	test.Assert(t, "", exp, got, true)
+
+	got = fmt.Sprint(row)
+	test.Assert(t, "", rowsExpect[0], got, true)
+}
diff --git a/lib/tabula/tabula.go b/lib/tabula/tabula.go
new file mode 100644
index 00000000..3d7f57df
--- /dev/null
+++ b/lib/tabula/tabula.go
@@ -0,0 +1,76 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+//
+// Package tabula is a Go library for working with rows, columns, or matrix
+// (table), or in another terms working with data set.
+//
+// Introduction
+//
+// Go's slice gave a flexible way to manage sequence of data in one type, but
+// what if you want to manage a sequence of value but with different type of
+// data?  Or manage a bunch of values like a table?
+//
+// You can use this library to manage sequence of value with different type
+// and manage data in two dimensional tuple.
+//
+// Terminology
+//
+// Here are some terminologies that we used in developing this library, which
+// may help reader understand the internal and API.
+//
+// Record is a single cell in row or column, or the smallest building block of
+// dataset.
+//
+// Row is a horizontal representation of records in dataset.
+//
+// Column is a vertical representation of records in dataset.
+// Each column has a unique name and has the same type data.
+//
+// Dataset is a collection of rows and columns.
+//
+// Given those definitions we can draw the representation of rows, columns, or
+// matrix:
+//
+// 	        COL-0  COL-1 ...  COL-x
+// 	ROW-0: record record ... record
+// 	ROW-1: record record ... record
+// 	...
+// 	ROW-y: record record ... record
+//
+// Record Type
+//
+// There are only three valid type in record: int64, float64, and string.
+//
+// Dataset Mode
+//
+// Tabula has three mode for dataset: rows, columns, or matrix.
+//
+// For example, given a table of data,
+//
+//     col1,col2,col3
+//     a,b,c
+//     1,2,3
+//
+// "rows" mode is where each line saved in its own slice, resulting in Rows:
+//
+//     Rows[0]: [a b c]
+//     Rows[1]: [1 2 3]
+//
+// "columns" mode is where each line saved by columns, resulting in Columns:
+//
+//     Columns[0]: {col1 0 0 [] [a 1]}
+//     Columns[1]: {col2 0 0 [] [b 2]}
+//     Columns[1]: {col3 0 0 [] [c 3]}
+//
+// Unlike rows mode, each column contain metadata including column name, type,
+// flag, and value space (all possible value that _may_ contain in column
+// value).
+//
+// "matrix" mode is where each record saved both in row and column.
+//
+// Matrix mode consume more memory but give a flexible way to manage records.
+//
+//
+package tabula
diff --git a/lib/tabula/tabula_test.go b/lib/tabula/tabula_test.go
new file mode 100644
index 00000000..6b13d60c
--- /dev/null
+++ b/lib/tabula/tabula_test.go
@@ -0,0 +1,81 @@
+// Copyright 2017, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package tabula
+
+import (
+	"os"
+)
+
+var (
+	traces = make([]byte, 1024)
+)
+
+func printStackTrace() {
+	var lines, start, end int
+
+	for x, b := range traces {
+		if b != '\n' {
+			continue
+		}
+		lines++
+		if lines == 3 {
+			start = x
+		} else if lines == 5 {
+			end = x + 1
+			break
+		}
+	}
+
+	os.Stderr.Write(traces[start:end])
+}
+
+var testColTypes = []int{
+	TInteger,
+	TInteger,
+	TInteger,
+	TString,
+}
+
+var testColNames = []string{"int01", "int02", "int03", "class"}
+
+// Testing data and function for Rows and MapRows
+var rowsData = [][]string{
+	{"1", "5", "9", "+"},
+	{"2", "6", "0", "-"},
+	{"3", "7", "1", "-"},
+	{"4", "8", "2", "+"},
+}
+
+var testClassIdx = 3
+
+var rowsExpect = []string{
+	"&[1 5 9 +]",
+	"&[2 6 0 -]",
+	"&[3 7 1 -]",
+	"&[4 8 2 +]",
+}
+
+var groupByExpect = "[{+ &[1 5 9 +]&[4 8 2 +]} {- &[2 6 0 -]&[3 7 1 -]}]"
+
+func initRows() (rows Rows, e error) {
+	for i := range rowsData {
+		l := len(rowsData[i])
+		row := make(Row, 0)
+
+		for j := 0; j < l; j++ {
+			rec, e := NewRecordBy(rowsData[i][j],
+				testColTypes[j])
+
+			if nil != e {
+				return nil, e
+			}
+
+			row = append(row, rec)
+		}
+
+		rows.PushBack(&row)
+	}
+	return rows, nil
+}
author	Shulhan <ms@kilabit.info>	2018-09-17 01:21:27 +0700
committer	Shulhan <ms@kilabit.info>	2018-09-18 01:50:21 +0700
commit	44b26edf7f390db383fe025454be0c4e30cfbd9b (patch)
tree	84d02953bc9095312182534936c1b60667957f07 /lib
parent	4a820ec157501c957d2e30f1670656cceec5c044 (diff)
download	pakakeh.go-44b26edf7f390db383fe025454be0c4e30cfbd9b.tar.xz