Merge package "github.com/shuLhan/go-mining"

author: Shulhan <ms@kilabit.info> 2018-09-17 05:04:26 +0700
committer: Shulhan <ms@kilabit.info> 2018-09-18 01:50:21 +0700
commit: 1cae4ca316afa5d177fdbf7a98a0ec7fffe76a3e (patch)
tree: 5fa83fc0faa31e09cae82ac4d467cf8ba5f87fc2
parent: 446fef94cd712861221c0098dcdd9ae52aaed0eb (diff)
download: pakakeh.go-1cae4ca316afa5d177fdbf7a98a0ec7fffe76a3e.tar.xz
56 files changed, 7367 insertions, 0 deletions
diff --git a/lib/mining/.gitignore b/lib/mining/.gitignore
new file mode 100644
index 00000000..4b45a114
--- /dev/null
+++ b/lib/mining/.gitignore
@@ -0,0 +1,26 @@
+cmd/cart/cart
+cmd/crf/crf
+cmd/lnsmote/lnsmote
+cmd/rf/rf
+cmd/smote/smote
+resampling/lnsmote/lnsmote.outliers
+resampling/lnsmote/phoneme_lnsmote.csv
+resampling/lnsmote/synthetic.csv
+resampling/smote/phoneme_smote.csv
+resampling/smote/synthetic.csv
+resampling/smote/testdata/phoneme.csv
+testdata/phoneme/phoneme.csv
+*.bak
+*.bench
+*.cprof
+*.csv
+*.dat
+*.mprof
+*.oob
+*.perf
+*.prof
+*.rej
+*.sh
+*.stat
+*.stats
+*.test
diff --git a/lib/mining/LICENSE b/lib/mining/LICENSE
new file mode 100644
index 00000000..ec6ea686
--- /dev/null
+++ b/lib/mining/LICENSE
@@ -0,0 +1,38 @@
+Copyright (C) 2015, Mhd Sulhan (ms@kilabit.info). All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of Kilabit nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY M.SHULHAN "AS IS" AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+        ---      --- ---       ---       ---      --- ---
+
+	TT  TT   II  BB          AAAA     LLLLLL  II  KKKKKKKK
+	TT TT    II  BB         AA  AA   LL   LL  II     KK
+	TTTT     II  BB        AA    AA   LL  LL  II     KK
+	TT TT    II  BB        AAAAAAAA   LLLLLL  II     KK
+	TT  TT   II  BB        AA    AA  LL   LL  II     KK
+	TT   TT  II  BBBBBBBB  AA    AA   LLLLLL  II     KK
+
+Website: http://kilabit.info
+Contact: ms@kilabit.info
diff --git a/lib/mining/README.md b/lib/mining/README.md
new file mode 100644
index 00000000..821a6aa3
--- /dev/null
+++ b/lib/mining/README.md
@@ -0,0 +1,26 @@
+[![GoDoc](https://godoc.org/github.com/shuLhan/share/lib/mining?status.svg)](https://godoc.org/github.com/shuLhan/share/lib/mining)
+[![Go Report Card](https://goreportcard.com/badge/github.com/shuLhan/share/lib/mining)](https://goreportcard.com/report/github.com/shuLhan/share/lib/mining)
+
+# go-mining
+
+Go-mining is a small library for data mining.
+
+The library is written in [Go language](golang/go).
+
+## Features
+
+### Classifiers
+
+- CART
+- Random Forest
+- Cascaded Random Forest
+- K-Nearest Neighbourhood
+
+### Resampling
+
+- SMOTE
+- LN-SMOTE (Local Neigbourhood SMOTE)
+
+### Miscellaneous
+
+- Gini index
diff --git a/lib/mining/classifier/cart/cart.go b/lib/mining/classifier/cart/cart.go
new file mode 100644
index 00000000..449781d9
--- /dev/null
+++ b/lib/mining/classifier/cart/cart.go
@@ -0,0 +1,480 @@
+// Copyright 2015 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// Package cart implement the Classification and Regression Tree by Breiman, et al.
+// CART is binary decision tree.
+//
+// Breiman, Leo, et al. Classification and regression trees. CRC press,
+// 1984.
+//
+// The implementation is based on Data Mining book,
+//
+// Han, Jiawei, Micheline Kamber, and Jian Pei. Data mining: concepts and
+// techniques: concepts and techniques. Elsevier, 2011.
+//
+package cart
+
+import (
+	"fmt"
+
+	"github.com/shuLhan/share/lib/debug"
+	"github.com/shuLhan/share/lib/mining/gain/gini"
+	"github.com/shuLhan/share/lib/mining/tree/binary"
+	"github.com/shuLhan/share/lib/numbers"
+	libstrings "github.com/shuLhan/share/lib/strings"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+const (
+	// SplitMethodGini if defined in Runtime, the dataset will be splitted
+	// using Gini gain for each possible value or partition.
+	//
+	// This option is used in Runtime.SplitMethod.
+	SplitMethodGini = "gini"
+)
+
+const (
+	// ColFlagParent denote that the column is parent/split node.
+	ColFlagParent = 1
+	// ColFlagSkip denote that the column would be skipped.
+	ColFlagSkip = 2
+)
+
+//
+// Runtime data for building CART.
+//
+type Runtime struct {
+	// SplitMethod define the criteria to used for splitting.
+	SplitMethod string `json:"SplitMethod"`
+	// NRandomFeature if less or equal to zero compute gain on all feature,
+	// otherwise select n random feature and compute gain only on selected
+	// features.
+	NRandomFeature int `json:"NRandomFeature"`
+	// OOBErrVal is the last out-of-bag error value in the tree.
+	OOBErrVal float64
+	// Tree in classification.
+	Tree binary.Tree
+}
+
+//
+// New create new Runtime object.
+//
+func New(D tabula.ClasetInterface, splitMethod string, nRandomFeature int) (
+	*Runtime, error,
+) {
+	runtime := &Runtime{
+		SplitMethod:    splitMethod,
+		NRandomFeature: nRandomFeature,
+		Tree:           binary.Tree{},
+	}
+
+	e := runtime.Build(D)
+	if e != nil {
+		return nil, e
+	}
+
+	return runtime, nil
+}
+
+//
+// Build will create a tree using CART algorithm.
+//
+func (runtime *Runtime) Build(D tabula.ClasetInterface) (e error) {
+	// Re-check input configuration.
+	switch runtime.SplitMethod {
+	case SplitMethodGini:
+		// Do nothing.
+	default:
+		// Set default split method to Gini index.
+		runtime.SplitMethod = SplitMethodGini
+	}
+
+	runtime.Tree.Root, e = runtime.splitTreeByGain(D)
+
+	return
+}
+
+//
+// splitTreeByGain calculate the gain in all dataset, and split into two node:
+// left and right.
+//
+// Return node with the split information.
+//
+func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) (
+	node *binary.BTNode,
+	e error,
+) {
+	node = &binary.BTNode{}
+
+	D.RecountMajorMinor()
+
+	// if dataset is empty return node labeled with majority classes in
+	// dataset.
+	nrow := D.GetNRow()
+
+	if nrow <= 0 {
+		if debug.Value >= 2 {
+			fmt.Printf("[cart] empty dataset (%s) : %v\n",
+				D.MajorityClass(), D)
+		}
+
+		node.Value = NodeValue{
+			IsLeaf: true,
+			Class:  D.MajorityClass(),
+			Size:   0,
+		}
+		return node, nil
+	}
+
+	// if all dataset is in the same class, return node as leaf with class
+	// is set to that class.
+	single, name := D.IsInSingleClass()
+	if single {
+		if debug.Value >= 2 {
+			fmt.Printf("[cart] in single class (%s): %v\n", name,
+				D.GetColumns())
+		}
+
+		node.Value = NodeValue{
+			IsLeaf: true,
+			Class:  name,
+			Size:   nrow,
+		}
+		return node, nil
+	}
+
+	if debug.Value >= 2 {
+		fmt.Println("[cart] D:", D)
+	}
+
+	// calculate the Gini gain for each attribute.
+	gains := runtime.computeGain(D)
+
+	// get attribute with maximum Gini gain.
+	MaxGainIdx := gini.FindMaxGain(&gains)
+	MaxGain := gains[MaxGainIdx]
+
+	// if maxgain value is 0, use majority class as node and terminate
+	// the process
+	if MaxGain.GetMaxGainValue() == 0 {
+		if debug.Value >= 2 {
+			fmt.Println("[cart] max gain 0 with target",
+				D.GetClassAsStrings(),
+				" and majority class is ", D.MajorityClass())
+		}
+
+		node.Value = NodeValue{
+			IsLeaf: true,
+			Class:  D.MajorityClass(),
+			Size:   0,
+		}
+		return node, nil
+	}
+
+	// using the sorted index in MaxGain, sort all field in dataset
+	tabula.SortColumnsByIndex(D, MaxGain.SortedIndex)
+
+	if debug.Value >= 2 {
+		fmt.Println("[cart] maxgain:", MaxGain)
+	}
+
+	// Now that we have attribute with max gain in MaxGainIdx, and their
+	// gain dan partition value in Gains[MaxGainIdx] and
+	// GetMaxPartValue(), we split the dataset based on type of max-gain
+	// attribute.
+	// If its continuous, split the attribute using numeric value.
+	// If its discrete, split the attribute using subset (partition) of
+	// nominal values.
+	var splitV interface{}
+
+	if MaxGain.IsContinu {
+		splitV = MaxGain.GetMaxPartGainValue()
+	} else {
+		attrPartV := MaxGain.GetMaxPartGainValue()
+		attrSubV := attrPartV.(libstrings.Row)
+		splitV = attrSubV[0]
+	}
+
+	if debug.Value >= 2 {
+		fmt.Println("[cart] maxgainindex:", MaxGainIdx)
+		fmt.Println("[cart] split v:", splitV)
+	}
+
+	node.Value = NodeValue{
+		SplitAttrName: D.GetColumn(MaxGainIdx).GetName(),
+		IsLeaf:        false,
+		IsContinu:     MaxGain.IsContinu,
+		Size:          nrow,
+		SplitAttrIdx:  MaxGainIdx,
+		SplitV:        splitV,
+	}
+
+	dsL, dsR, e := tabula.SplitRowsByValue(D, MaxGainIdx, splitV)
+
+	if e != nil {
+		return node, e
+	}
+
+	splitL := dsL.(tabula.ClasetInterface)
+	splitR := dsR.(tabula.ClasetInterface)
+
+	// Set the flag to parent in attribute referenced by
+	// MaxGainIdx, so it will not computed again in the next round.
+	cols := splitL.GetColumns()
+	for x := range *cols {
+		if x == MaxGainIdx {
+			(*cols)[x].Flag = ColFlagParent
+		} else {
+			(*cols)[x].Flag = 0
+		}
+	}
+
+	cols = splitR.GetColumns()
+	for x := range *cols {
+		if x == MaxGainIdx {
+			(*cols)[x].Flag = ColFlagParent
+		} else {
+			(*cols)[x].Flag = 0
+		}
+	}
+
+	nodeLeft, e := runtime.splitTreeByGain(splitL)
+	if e != nil {
+		return node, e
+	}
+
+	nodeRight, e := runtime.splitTreeByGain(splitR)
+	if e != nil {
+		return node, e
+	}
+
+	node.SetLeft(nodeLeft)
+	node.SetRight(nodeRight)
+
+	return node, nil
+}
+
+// SelectRandomFeature if NRandomFeature is greater than zero, select and
+// compute gain in n random features instead of in all features
+func (runtime *Runtime) SelectRandomFeature(D tabula.ClasetInterface) {
+	if runtime.NRandomFeature <= 0 {
+		// all features selected
+		return
+	}
+
+	ncols := D.GetNColumn()
+
+	// count all features minus class
+	nfeature := ncols - 1
+	if runtime.NRandomFeature >= nfeature {
+		// Do nothing if number of random feature equal or greater than
+		// number of feature in dataset.
+		return
+	}
+
+	// exclude class index and parent node index
+	excludeIdx := []int{D.GetClassIndex()}
+	cols := D.GetColumns()
+	for x, col := range *cols {
+		if (col.Flag & ColFlagParent) == ColFlagParent {
+			excludeIdx = append(excludeIdx, x)
+		} else {
+			(*cols)[x].Flag |= ColFlagSkip
+		}
+	}
+
+	// Select random features excluding feature in `excludeIdx`.
+	var pickedIdx []int
+	for x := 0; x < runtime.NRandomFeature; x++ {
+		idx := numbers.IntPickRandPositive(ncols, false, pickedIdx,
+			excludeIdx)
+		pickedIdx = append(pickedIdx, idx)
+
+		// Remove skip flag on selected column
+		col := D.GetColumn(idx)
+		col.Flag = col.Flag &^ ColFlagSkip
+	}
+
+	if debug.Value >= 1 {
+		fmt.Println("[cart] selected random features:", pickedIdx)
+		fmt.Println("[cart] selected columns        :", D.GetColumns())
+	}
+}
+
+//
+// computeGain calculate the gini index for each value in each attribute.
+//
+func (runtime *Runtime) computeGain(D tabula.ClasetInterface) (
+	gains []gini.Gini,
+) {
+	switch runtime.SplitMethod {
+	case SplitMethodGini:
+		// create gains value for all attribute minus target class.
+		gains = make([]gini.Gini, D.GetNColumn())
+	}
+
+	runtime.SelectRandomFeature(D)
+
+	classVS := D.GetClassValueSpace()
+	classIdx := D.GetClassIndex()
+	classType := D.GetClassType()
+
+	for x, col := range *D.GetColumns() {
+		// skip class attribute.
+		if x == classIdx {
+			continue
+		}
+
+		// skip column flagged with parent
+		if (col.Flag & ColFlagParent) == ColFlagParent {
+			gains[x].Skip = true
+			continue
+		}
+
+		// ignore column flagged with skip
+		if (col.Flag & ColFlagSkip) == ColFlagSkip {
+			gains[x].Skip = true
+			continue
+		}
+
+		// compute gain.
+		if col.GetType() == tabula.TReal {
+			attr := col.ToFloatSlice()
+
+			if classType == tabula.TString {
+				target := D.GetClassAsStrings()
+				gains[x].ComputeContinu(&attr, &target,
+					&classVS)
+			} else {
+				targetReal := D.GetClassAsReals()
+				classVSReal := libstrings.ToFloat64(classVS)
+
+				gains[x].ComputeContinuFloat(&attr,
+					&targetReal, &classVSReal)
+			}
+		} else {
+			attr := col.ToStringSlice()
+			attrV := col.ValueSpace
+
+			if debug.Value >= 2 {
+				fmt.Println("[cart] attr :", attr)
+				fmt.Println("[cart] attrV:", attrV)
+			}
+
+			target := D.GetClassAsStrings()
+			gains[x].ComputeDiscrete(&attr, &attrV, &target,
+				&classVS)
+		}
+
+		if debug.Value >= 2 {
+			fmt.Println("[cart] gain :", gains[x])
+		}
+	}
+	return
+}
+
+//
+// Classify return the prediction of one sample.
+//
+func (runtime *Runtime) Classify(data *tabula.Row) (class string) {
+	node := runtime.Tree.Root
+	nodev := node.Value.(NodeValue)
+
+	for !nodev.IsLeaf {
+		if nodev.IsContinu {
+			splitV := nodev.SplitV.(float64)
+			attrV := (*data)[nodev.SplitAttrIdx].Float()
+
+			if attrV < splitV {
+				node = node.Left
+			} else {
+				node = node.Right
+			}
+		} else {
+			splitV := nodev.SplitV.([]string)
+			attrV := (*data)[nodev.SplitAttrIdx].String()
+
+			if libstrings.IsContain(splitV, attrV) {
+				node = node.Left
+			} else {
+				node = node.Right
+			}
+		}
+		nodev = node.Value.(NodeValue)
+	}
+
+	return nodev.Class
+}
+
+//
+// ClassifySet set the class attribute based on tree classification.
+//
+func (runtime *Runtime) ClassifySet(data tabula.ClasetInterface) (e error) {
+	nrow := data.GetNRow()
+	targetAttr := data.GetClassColumn()
+
+	for i := 0; i < nrow; i++ {
+		class := runtime.Classify(data.GetRow(i))
+
+		_ = (*targetAttr).Records[i].SetValue(class, tabula.TString)
+	}
+
+	return
+}
+
+//
+// CountOOBError process out-of-bag data on tree and return error value.
+//
+func (runtime *Runtime) CountOOBError(oob tabula.Claset) (
+	errval float64,
+	e error,
+) {
+	// save the original target to be compared later.
+	origTarget := oob.GetClassAsStrings()
+
+	if debug.Value >= 2 {
+		fmt.Println("[cart] OOB:", oob.Columns)
+		fmt.Println("[cart] TREE:", &runtime.Tree)
+	}
+
+	// reset the target.
+	oobtarget := oob.GetClassColumn()
+	oobtarget.ClearValues()
+
+	e = runtime.ClassifySet(&oob)
+
+	if e != nil {
+		// set original target values back.
+		oobtarget.SetValues(origTarget)
+		return
+	}
+
+	target := oobtarget.ToStringSlice()
+
+	if debug.Value >= 2 {
+		fmt.Println("[cart] original target:", origTarget)
+		fmt.Println("[cart] classify target:", target)
+	}
+
+	// count how many target value is miss-classified.
+	runtime.OOBErrVal, _, _ = libstrings.CountMissRate(origTarget, target)
+
+	// set original target values back.
+	oobtarget.SetValues(origTarget)
+
+	return runtime.OOBErrVal, nil
+}
+
+//
+// String yes, it will print it JSON like format.
+//
+func (runtime *Runtime) String() (s string) {
+	s = fmt.Sprintf("NRandomFeature: %d\n"+
+		" SplitMethod   : %s\n"+
+		" Tree          :\n%v", runtime.NRandomFeature,
+		runtime.SplitMethod,
+		runtime.Tree.String())
+	return s
+}
diff --git a/lib/mining/classifier/cart/cart_test.go b/lib/mining/classifier/cart/cart_test.go
new file mode 100644
index 00000000..14e89b12
--- /dev/null
+++ b/lib/mining/classifier/cart/cart_test.go
@@ -0,0 +1,62 @@
+// Copyright 2015 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cart
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/shuLhan/share/lib/dsv"
+	"github.com/shuLhan/share/lib/tabula"
+	"github.com/shuLhan/share/lib/test"
+)
+
+const (
+	NRows = 150
+)
+
+func TestCART(t *testing.T) {
+	fds := "../../testdata/iris/iris.dsv"
+
+	ds := tabula.Claset{}
+
+	_, e := dsv.SimpleRead(fds, &ds)
+	if nil != e {
+		t.Fatal(e)
+	}
+
+	fmt.Println("[cart_test] class index:", ds.GetClassIndex())
+
+	// copy target to be compared later.
+	targetv := ds.GetClassAsStrings()
+
+	test.Assert(t, "", NRows, ds.GetNRow(), true)
+
+	// Build CART tree.
+	CART, e := New(&ds, SplitMethodGini, 0)
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	fmt.Println("[cart_test] CART Tree:\n", CART)
+
+	// Create test set
+	testset := tabula.Claset{}
+	_, e = dsv.SimpleRead(fds, &testset)
+
+	if nil != e {
+		t.Fatal(e)
+	}
+
+	testset.GetClassColumn().ClearValues()
+
+	// Classifiy test set
+	e = CART.ClassifySet(&testset)
+	if nil != e {
+		t.Fatal(e)
+	}
+
+	test.Assert(t, "", targetv, testset.GetClassAsStrings(), true)
+}
diff --git a/lib/mining/classifier/cart/node.go b/lib/mining/classifier/cart/node.go
new file mode 100644
index 00000000..b64dd13c
--- /dev/null
+++ b/lib/mining/classifier/cart/node.go
@@ -0,0 +1,44 @@
+// Copyright 2015 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cart
+
+import (
+	"fmt"
+	"reflect"
+)
+
+//
+// NodeValue of tree in CART.
+//
+type NodeValue struct {
+	// Class of leaf node.
+	Class string
+	// SplitAttrName define the name of attribute which cause the split.
+	SplitAttrName string
+	// IsLeaf define whether node is a leaf or not.
+	IsLeaf bool
+	// IsContinu define whether the node split is continuous or discrete.
+	IsContinu bool
+	// Size define number of sample that this node hold before splitting.
+	Size int
+	// SplitAttrIdx define the attribute which cause the split.
+	SplitAttrIdx int
+	// SplitV define the split value.
+	SplitV interface{}
+}
+
+//
+// String will return the value of node for printable.
+//
+func (nodev *NodeValue) String() (s string) {
+	if nodev.IsLeaf {
+		s = fmt.Sprintf("Class: %s", nodev.Class)
+	} else {
+		s = fmt.Sprintf("(SplitValue: %v)",
+			reflect.ValueOf(nodev.SplitV))
+	}
+
+	return s
+}
diff --git a/lib/mining/classifier/cm.go b/lib/mining/classifier/cm.go
new file mode 100644
index 00000000..0dc2ee05
--- /dev/null
+++ b/lib/mining/classifier/cm.go
@@ -0,0 +1,442 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package classifier
+
+import (
+	"fmt"
+	"strconv"
+
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+//
+// CM represent the matrix of classification.
+//
+type CM struct {
+	tabula.Dataset
+	// rowNames contain name in each row.
+	rowNames []string
+	// nSamples contain number of class.
+	nSamples int64
+	// nTrue contain number of true positive and negative.
+	nTrue int64
+	// nFalse contain number of false positive and negative.
+	nFalse int64
+
+	// tpIds contain index of true-positive samples.
+	tpIds []int
+	// fpIds contain index of false-positive samples.
+	fpIds []int
+	// tnIds contain index of true-negative samples.
+	tnIds []int
+	// fnIds contain index of false-negative samples.
+	fnIds []int
+}
+
+//
+// initByNumeric will initialize confusion matrix using numeric value space.
+//
+func (cm *CM) initByNumeric(vs []int64) {
+	var colTypes []int
+	var colNames []string
+
+	for _, v := range vs {
+		vstr := strconv.FormatInt(v, 10)
+		colTypes = append(colTypes, tabula.TInteger)
+		colNames = append(colNames, vstr)
+		cm.rowNames = append(cm.rowNames, vstr)
+	}
+
+	// class error column
+	colTypes = append(colTypes, tabula.TReal)
+	colNames = append(colNames, "class_error")
+
+	cm.Dataset.Init(tabula.DatasetModeMatrix, colTypes, colNames)
+}
+
+//
+// ComputeStrings will calculate confusion matrix using targets and predictions
+// class values.
+//
+func (cm *CM) ComputeStrings(valueSpace, targets, predictions []string) {
+	cm.init(valueSpace)
+
+	for x, target := range valueSpace {
+		col := cm.GetColumn(x)
+
+		for _, predict := range valueSpace {
+			cnt := cm.countTargetPrediction(target, predict,
+				targets, predictions)
+
+			col.PushBack(tabula.NewRecordInt(cnt))
+		}
+
+		cm.PushColumnToRows(*col)
+	}
+
+	cm.computeClassError()
+}
+
+//
+// ComputeNumeric will calculate confusion matrix using targets and predictions
+// values.
+//
+func (cm *CM) ComputeNumeric(vs, actuals, predictions []int64) {
+	cm.initByNumeric(vs)
+
+	for x, act := range vs {
+		col := cm.GetColumn(x)
+
+		for _, pred := range vs {
+			cnt := cm.countNumeric(act, pred, actuals, predictions)
+
+			rec := tabula.NewRecordInt(cnt)
+			col.PushBack(rec)
+		}
+
+		cm.PushColumnToRows(*col)
+	}
+
+	cm.computeClassError()
+}
+
+//
+// create will initialize confusion matrix using value space.
+//
+func (cm *CM) init(valueSpace []string) {
+	var colTypes []int
+	var colNames []string
+
+	for _, v := range valueSpace {
+		colTypes = append(colTypes, tabula.TInteger)
+		colNames = append(colNames, v)
+		cm.rowNames = append(cm.rowNames, v)
+	}
+
+	// class error column
+	colTypes = append(colTypes, tabula.TReal)
+	colNames = append(colNames, "class_error")
+
+	cm.Dataset.Init(tabula.DatasetModeMatrix, colTypes, colNames)
+}
+
+//
+// countTargetPrediction will count and return number of true positive or false
+// positive in predictions using targets values.
+//
+func (cm *CM) countTargetPrediction(target, predict string,
+	targets, predictions []string,
+) (
+	cnt int64,
+) {
+	predictslen := len(predictions)
+
+	for x, v := range targets {
+		// In case out of range, where predictions length less than
+		// targets length.
+		if x >= predictslen {
+			break
+		}
+		if v != target {
+			continue
+		}
+		if predict == predictions[x] {
+			cnt++
+		}
+	}
+	return
+}
+
+//
+// countNumeric will count and return number of `pred` in predictions where
+// actuals value is `act`.
+//
+func (cm *CM) countNumeric(act, pred int64, actuals, predictions []int64) (
+	cnt int64,
+) {
+	// Find minimum length to mitigate out-of-range loop.
+	minlen := len(actuals)
+	if len(predictions) < minlen {
+		minlen = len(predictions)
+	}
+
+	for x := 0; x < minlen; x++ {
+		if actuals[x] != act {
+			continue
+		}
+		if predictions[x] != pred {
+			continue
+		}
+		cnt++
+	}
+	return cnt
+}
+
+//
+// computeClassError will compute the classification error in matrix.
+//
+func (cm *CM) computeClassError() {
+	var tp, fp int64
+
+	cm.nSamples = 0
+	cm.nFalse = 0
+
+	classcol := cm.GetNColumn() - 1
+	col := cm.GetColumnClassError()
+	rows := cm.GetDataAsRows()
+	for x, row := range *rows {
+		for y, cell := range *row {
+			if y == classcol {
+				break
+			}
+			if x == y {
+				tp = cell.Integer()
+			} else {
+				fp += cell.Integer()
+			}
+		}
+
+		nSamplePerRow := tp + fp
+		errv := float64(fp) / float64(nSamplePerRow)
+		col.PushBack(tabula.NewRecordReal(errv))
+
+		cm.nSamples += nSamplePerRow
+		cm.nTrue += tp
+		cm.nFalse += fp
+	}
+
+	cm.PushColumnToRows(*col)
+}
+
+//
+// GroupIndexPredictions given index of samples, group the samples by their
+// class of prediction. For example,
+//
+//	sampleIds:   [0, 1, 2, 3, 4, 5]
+//	actuals:     [1, 1, 0, 0, 1, 0]
+//	predictions: [1, 0, 1, 0, 1, 1]
+//
+// This function will group the index by true-positive, false-positive,
+// true-negative, and false-negative, which result in,
+//
+//	true-positive indices:  [0, 4]
+//	false-positive indices: [2, 5]
+//	true-negative indices:  [3]
+//      false-negative indices: [1]
+//
+// This function assume that positive value as "1" and negative value as "0".
+//
+func (cm *CM) GroupIndexPredictions(sampleIds []int,
+	actuals, predictions []int64,
+) {
+	// Reset indices.
+	cm.tpIds = nil
+	cm.fpIds = nil
+	cm.tnIds = nil
+	cm.fnIds = nil
+
+	// Make sure we are not out-of-range when looping, always pick the
+	// minimum length between the three parameters.
+	min := len(sampleIds)
+	if len(actuals) < min {
+		min = len(actuals)
+	}
+	if len(predictions) < min {
+		min = len(predictions)
+	}
+
+	for x := 0; x < min; x++ {
+		if actuals[x] == 1 {
+			if predictions[x] == 1 {
+				cm.tpIds = append(cm.tpIds, sampleIds[x])
+			} else {
+				cm.fnIds = append(cm.fnIds, sampleIds[x])
+			}
+		} else {
+			if predictions[x] == 1 {
+				cm.fpIds = append(cm.fpIds, sampleIds[x])
+			} else {
+				cm.tnIds = append(cm.tnIds, sampleIds[x])
+			}
+		}
+	}
+}
+
+//
+// GroupIndexPredictionsStrings is an alternative to GroupIndexPredictions
+// which work with string class.
+//
+func (cm *CM) GroupIndexPredictionsStrings(sampleIds []int,
+	actuals, predictions []string,
+) {
+	if len(sampleIds) <= 0 {
+		return
+	}
+
+	// Reset indices.
+	cm.tpIds = nil
+	cm.fpIds = nil
+	cm.tnIds = nil
+	cm.fnIds = nil
+
+	// Make sure we are not out-of-range when looping, always pick the
+	// minimum length between the three parameters.
+	min := len(sampleIds)
+	if len(actuals) < min {
+		min = len(actuals)
+	}
+	if len(predictions) < min {
+		min = len(predictions)
+	}
+
+	for x := 0; x < min; x++ {
+		if actuals[x] == "1" {
+			if predictions[x] == "1" {
+				cm.tpIds = append(cm.tpIds, sampleIds[x])
+			} else {
+				cm.fnIds = append(cm.fnIds, sampleIds[x])
+			}
+		} else {
+			if predictions[x] == "1" {
+				cm.fpIds = append(cm.fpIds, sampleIds[x])
+			} else {
+				cm.tnIds = append(cm.tnIds, sampleIds[x])
+			}
+		}
+	}
+}
+
+//
+// GetColumnClassError return the last column which is the column that contain
+// the error of classification.
+//
+func (cm *CM) GetColumnClassError() *tabula.Column {
+	return cm.GetColumn(cm.GetNColumn() - 1)
+}
+
+//
+// GetTrueRate return true-positive rate in term of
+//
+//	true-positive / (true-positive + false-positive)
+//
+func (cm *CM) GetTrueRate() float64 {
+	return float64(cm.nTrue) / float64(cm.nTrue+cm.nFalse)
+}
+
+//
+// GetFalseRate return false-positive rate in term of,
+//
+//	false-positive / (false-positive + true negative)
+//
+func (cm *CM) GetFalseRate() float64 {
+	return float64(cm.nFalse) / float64(cm.nTrue+cm.nFalse)
+}
+
+//
+// TP return number of true-positive in confusion matrix.
+//
+func (cm *CM) TP() int {
+	row := cm.GetRow(0)
+	if row == nil {
+		return 0
+	}
+
+	v, _ := row.GetIntAt(0)
+	return int(v)
+}
+
+//
+// FP return number of false-positive in confusion matrix.
+//
+func (cm *CM) FP() int {
+	row := cm.GetRow(0)
+	if row == nil {
+		return 0
+	}
+
+	v, _ := row.GetIntAt(1)
+	return int(v)
+}
+
+//
+// FN return number of false-negative.
+//
+func (cm *CM) FN() int {
+	row := cm.GetRow(1)
+	if row == nil {
+		return 0
+	}
+	v, _ := row.GetIntAt(0)
+	return int(v)
+}
+
+//
+// TN return number of true-negative.
+//
+func (cm *CM) TN() int {
+	row := cm.GetRow(1)
+	if row == nil {
+		return 0
+	}
+	v, _ := row.GetIntAt(1)
+	return int(v)
+}
+
+//
+// TPIndices return indices of all true-positive samples.
+//
+func (cm *CM) TPIndices() []int {
+	return cm.tpIds
+}
+
+//
+// FNIndices return indices of all false-negative samples.
+//
+func (cm *CM) FNIndices() []int {
+	return cm.fnIds
+}
+
+//
+// FPIndices return indices of all false-positive samples.
+//
+func (cm *CM) FPIndices() []int {
+	return cm.fpIds
+}
+
+//
+// TNIndices return indices of all true-negative samples.
+//
+func (cm *CM) TNIndices() []int {
+	return cm.tnIds
+}
+
+//
+// String will return the output of confusion matrix in table like format.
+//
+func (cm *CM) String() (s string) {
+	s += "Confusion Matrix:\n"
+
+	// Row header: column names.
+	s += "\t"
+	for _, col := range cm.GetColumnsName() {
+		s += col + "\t"
+	}
+	s += "\n"
+
+	rows := cm.GetDataAsRows()
+	for x, row := range *rows {
+		s += cm.rowNames[x] + "\t"
+
+		for _, v := range *row {
+			s += v.String() + "\t"
+		}
+		s += "\n"
+	}
+
+	s += fmt.Sprintf("TP-FP indices %d %d\n", len(cm.tpIds), len(cm.fpIds))
+	s += fmt.Sprintf("FN-TN indices %d %d\n", len(cm.fnIds), len(cm.tnIds))
+
+	return
+}
diff --git a/lib/mining/classifier/cm_test.go b/lib/mining/classifier/cm_test.go
new file mode 100644
index 00000000..6fd47d93
--- /dev/null
+++ b/lib/mining/classifier/cm_test.go
@@ -0,0 +1,69 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package classifier
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/shuLhan/share/lib/test"
+)
+
+func TestComputeNumeric(t *testing.T) {
+	actuals := []int64{1, 1, 1, 0, 0, 0, 0}
+	predics := []int64{1, 1, 0, 0, 0, 0, 1}
+	vs := []int64{1, 0}
+	exp := []int{2, 1, 3, 1}
+
+	cm := &CM{}
+
+	cm.ComputeNumeric(vs, actuals, predics)
+
+	test.Assert(t, "", exp[0], cm.TP(), true)
+	test.Assert(t, "", exp[1], cm.FN(), true)
+	test.Assert(t, "", exp[2], cm.TN(), true)
+	test.Assert(t, "", exp[3], cm.FP(), true)
+
+	fmt.Println(cm)
+}
+
+func TestComputeStrings(t *testing.T) {
+	actuals := []string{"1", "1", "1", "0", "0", "0", "0"}
+	predics := []string{"1", "1", "0", "0", "0", "0", "1"}
+	vs := []string{"1", "0"}
+	exp := []int{2, 1, 3, 1}
+
+	cm := &CM{}
+
+	cm.ComputeStrings(vs, actuals, predics)
+
+	test.Assert(t, "", exp[0], cm.TP(), true)
+	test.Assert(t, "", exp[1], cm.FN(), true)
+	test.Assert(t, "", exp[2], cm.TN(), true)
+	test.Assert(t, "", exp[3], cm.FP(), true)
+
+	fmt.Println(cm)
+}
+
+func TestGroupIndexPredictions(t *testing.T) {
+	testIds := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
+	actuals := []int64{1, 1, 1, 1, 0, 0, 0, 0, 0, 0}
+	predics := []int64{1, 1, 0, 1, 0, 0, 0, 0, 1, 0}
+	exp := [][]int{
+		{0, 1, 3},       // tp
+		{2},             // fn
+		{8},             // fp
+		{4, 5, 6, 7, 9}, // tn
+	}
+
+	cm := &CM{}
+
+	cm.GroupIndexPredictions(testIds, actuals, predics)
+
+	test.Assert(t, "", exp[0], cm.TPIndices(), true)
+	test.Assert(t, "", exp[1], cm.FNIndices(), true)
+	test.Assert(t, "", exp[2], cm.FPIndices(), true)
+	test.Assert(t, "", exp[3], cm.TNIndices(), true)
+}
diff --git a/lib/mining/classifier/crf/crf.go b/lib/mining/classifier/crf/crf.go
new file mode 100644
index 00000000..3df7dd83
--- /dev/null
+++ b/lib/mining/classifier/crf/crf.go
@@ -0,0 +1,470 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// Package crf implement the cascaded random forest algorithm, proposed by
+// Baumann et.al in their paper:
+//
+// Baumann, Florian, et al. "Cascaded Random Forest for Fast Object
+// Detection." Image Analysis. Springer Berlin Heidelberg, 2013. 131-142.
+//
+//
+package crf
+
+import (
+	"errors"
+	"fmt"
+	"math"
+	"sort"
+
+	"github.com/shuLhan/share/lib/debug"
+	"github.com/shuLhan/share/lib/mining/classifier"
+	"github.com/shuLhan/share/lib/mining/classifier/rf"
+	"github.com/shuLhan/share/lib/numbers"
+	libstrings "github.com/shuLhan/share/lib/strings"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+const (
+	tag = "[crf]"
+
+	// DefStage default number of stage
+	DefStage = 200
+	// DefTPRate default threshold for true-positive rate.
+	DefTPRate = 0.9
+	// DefTNRate default threshold for true-negative rate.
+	DefTNRate = 0.7
+
+	// DefNumTree default number of tree.
+	DefNumTree = 1
+	// DefPercentBoot default percentage of sample that will be used for
+	// bootstraping a tree.
+	DefPercentBoot = 66
+	// DefPerfFile default performance file output.
+	DefPerfFile = "crf.perf"
+	// DefStatFile default statistic file output.
+	DefStatFile = "crf.stat"
+)
+
+var (
+	// ErrNoInput will tell you when no input is given.
+	ErrNoInput = errors.New("rf: input samples is empty")
+)
+
+//
+// Runtime define the cascaded random forest runtime input and output.
+//
+type Runtime struct {
+	// Runtime embed common fields for classifier.
+	classifier.Runtime
+
+	// NStage number of stage.
+	NStage int `json:"NStage"`
+	// TPRate threshold for true positive rate per stage.
+	TPRate float64 `json:"TPRate"`
+	// TNRate threshold for true negative rate per stage.
+	TNRate float64 `json:"TNRate"`
+
+	// NTree number of tree in each stage.
+	NTree int `json:"NTree"`
+	// NRandomFeature number of features used to split the dataset.
+	NRandomFeature int `json:"NRandomFeature"`
+	// PercentBoot percentage of bootstrap.
+	PercentBoot int `json:"PercentBoot"`
+
+	// forests contain forest for each stage.
+	forests []*rf.Runtime
+	// weights contain weight for each stage.
+	weights []float64
+	// tnset contain sample of all true-negative in each iteration.
+	tnset *tabula.Claset
+}
+
+//
+// New create and return new input for cascaded random-forest.
+//
+func New(nstage, ntree, percentboot, nfeature int,
+	tprate, tnrate float64,
+	samples tabula.ClasetInterface,
+) (
+	crf *Runtime,
+) {
+	crf = &Runtime{
+		NStage:         nstage,
+		NTree:          ntree,
+		PercentBoot:    percentboot,
+		NRandomFeature: nfeature,
+		TPRate:         tprate,
+		TNRate:         tnrate,
+	}
+
+	return crf
+}
+
+//
+// AddForest will append new forest.
+//
+func (crf *Runtime) AddForest(forest *rf.Runtime) {
+	crf.forests = append(crf.forests, forest)
+}
+
+//
+// Initialize will check crf inputs and set it to default values if its
+// invalid.
+//
+func (crf *Runtime) Initialize(samples tabula.ClasetInterface) error {
+	if crf.NStage <= 0 {
+		crf.NStage = DefStage
+	}
+	if crf.TPRate <= 0 || crf.TPRate >= 1 {
+		crf.TPRate = DefTPRate
+	}
+	if crf.TNRate <= 0 || crf.TNRate >= 1 {
+		crf.TNRate = DefTNRate
+	}
+	if crf.NTree <= 0 {
+		crf.NTree = DefNumTree
+	}
+	if crf.PercentBoot <= 0 {
+		crf.PercentBoot = DefPercentBoot
+	}
+	if crf.NRandomFeature <= 0 {
+		// Set default value to square-root of features.
+		ncol := samples.GetNColumn() - 1
+		crf.NRandomFeature = int(math.Sqrt(float64(ncol)))
+	}
+	if crf.PerfFile == "" {
+		crf.PerfFile = DefPerfFile
+	}
+	if crf.StatFile == "" {
+		crf.StatFile = DefStatFile
+	}
+	crf.tnset = samples.Clone().(*tabula.Claset)
+
+	return crf.Runtime.Initialize()
+}
+
+//
+// Build given a sample dataset, build the stage with randomforest.
+//
+func (crf *Runtime) Build(samples tabula.ClasetInterface) (e error) {
+	if samples == nil {
+		return ErrNoInput
+	}
+
+	e = crf.Initialize(samples)
+	if e != nil {
+		return
+	}
+
+	fmt.Println(tag, "Training samples:", samples)
+	fmt.Println(tag, "Sample (one row):", samples.GetRow(0))
+	fmt.Println(tag, "Config:", crf)
+
+	for x := 0; x < crf.NStage; x++ {
+		if debug.Value >= 1 {
+			fmt.Println(tag, "Stage #", x)
+		}
+
+		forest, e := crf.createForest(samples)
+		if e != nil {
+			return e
+		}
+
+		e = crf.finalizeStage(forest)
+		if e != nil {
+			return e
+		}
+	}
+
+	return crf.Finalize()
+}
+
+//
+// createForest will create and return a forest and run the training `samples`
+// on it.
+//
+// Algorithm,
+// (1) Initialize forest.
+// (2) For 0 to maximum number of tree in forest,
+// (2.1) grow one tree until success.
+// (2.2) If tree tp-rate and tn-rate greater than threshold, stop growing.
+// (3) Calculate weight.
+// (4) TODO: Move true-negative from samples. The collection of true-negative
+// will be used again to test the model and after test and the sample with FP
+// will be moved to training samples again.
+// (5) Refill samples with false-positive.
+//
+func (crf *Runtime) createForest(samples tabula.ClasetInterface) (
+	forest *rf.Runtime, e error,
+) {
+	var cm *classifier.CM
+	var stat *classifier.Stat
+
+	fmt.Println(tag, "Forest samples:", samples)
+
+	// (1)
+	forest = &rf.Runtime{
+		Runtime: classifier.Runtime{
+			RunOOB: true,
+		},
+		NTree:          crf.NTree,
+		NRandomFeature: crf.NRandomFeature,
+	}
+
+	e = forest.Initialize(samples)
+	if e != nil {
+		return nil, e
+	}
+
+	// (2)
+	for t := 0; t < crf.NTree; t++ {
+		if debug.Value >= 2 {
+			fmt.Println(tag, "Tree #", t)
+		}
+
+		// (2.1)
+		for {
+			cm, stat, e = forest.GrowTree(samples)
+			if e == nil {
+				break
+			}
+		}
+
+		// (2.2)
+		if stat.TPRate > crf.TPRate &&
+			stat.TNRate > crf.TNRate {
+			break
+		}
+	}
+
+	e = forest.Finalize()
+	if e != nil {
+		return nil, e
+	}
+
+	// (3)
+	crf.computeWeight(stat)
+
+	if debug.Value >= 1 {
+		fmt.Println(tag, "Weight:", stat.FMeasure)
+	}
+
+	// (4)
+	crf.deleteTrueNegative(samples, cm)
+
+	// (5)
+	crf.runTPSet(samples)
+
+	samples.RecountMajorMinor()
+
+	return forest, nil
+}
+
+//
+// finalizeStage save forest and write the forest statistic to file.
+//
+func (crf *Runtime) finalizeStage(forest *rf.Runtime) (e error) {
+	stat := forest.StatTotal()
+	stat.ID = int64(len(crf.forests))
+
+	e = crf.WriteOOBStat(stat)
+	if e != nil {
+		return e
+	}
+
+	crf.AddStat(stat)
+	crf.ComputeStatTotal(stat)
+
+	if debug.Value >= 1 {
+		crf.PrintStatTotal(nil)
+	}
+
+	// (7)
+	crf.AddForest(forest)
+
+	return nil
+}
+
+//
+// computeWeight will compute the weight of stage based on F-measure of the
+// last tree in forest.
+//
+func (crf *Runtime) computeWeight(stat *classifier.Stat) {
+	crf.weights = append(crf.weights, math.Exp(stat.FMeasure))
+}
+
+//
+// deleteTrueNegative will delete all samples data where their row index is in
+// true-negative values in confusion matrix and move it to TN-set.
+//
+// (1) Move true negative to tnset on the first iteration, on the next
+// iteration it will be full deleted.
+// (2) Delete TN from sample set one-by-one with offset, to make sure we
+// are not deleting with wrong index.
+
+func (crf *Runtime) deleteTrueNegative(samples tabula.ClasetInterface,
+	cm *classifier.CM,
+) {
+	var row *tabula.Row
+
+	tnids := cm.TNIndices()
+	sort.Ints(tnids)
+
+	// (1)
+	if len(crf.weights) <= 1 {
+		for _, i := range tnids {
+			crf.tnset.PushRow(samples.GetRow(i))
+		}
+	}
+
+	// (2)
+	c := 0
+	for x, i := range tnids {
+		row = samples.DeleteRow(i - x)
+		if row != nil {
+			c++
+		}
+	}
+
+	if debug.Value >= 1 {
+		fmt.Println(tag, "# TN", len(tnids), "# deleted", c)
+	}
+}
+
+//
+// refillWithFP will copy the false-positive data in training set `tnset`
+// and append it to `samples`.
+//
+func (crf *Runtime) refillWithFP(samples, tnset tabula.ClasetInterface,
+	cm *classifier.CM,
+) {
+	// Get and sort FP.
+	fpids := cm.FPIndices()
+	sort.Ints(fpids)
+
+	// Move FP samples from TN-set to training set samples.
+	for _, i := range fpids {
+		samples.PushRow(tnset.GetRow(i))
+	}
+
+	// Delete FP from training set.
+	var row *tabula.Row
+	c := 0
+	for x, i := range fpids {
+		row = tnset.DeleteRow(i - x)
+		if row != nil {
+			c++
+		}
+	}
+
+	if debug.Value >= 1 {
+		fmt.Println(tag, "# FP", len(fpids), "# refilled", c)
+	}
+}
+
+//
+// runTPSet will run true-positive set into trained stage, to get the
+// false-positive. The FP samples will be added to training set.
+//
+func (crf *Runtime) runTPSet(samples tabula.ClasetInterface) {
+	// Skip the first stage, because we just got tnset from them.
+	if len(crf.weights) <= 1 {
+		return
+	}
+
+	tnIds := numbers.IntCreateSeq(0, crf.tnset.Len()-1)
+	_, cm, _ := crf.ClassifySetByWeight(crf.tnset, tnIds)
+
+	crf.refillWithFP(samples, crf.tnset, cm)
+}
+
+//
+// ClassifySetByWeight will classify each instance in samples by weight
+// with respect to its single performance.
+//
+// Algorithm,
+// (1) For each instance in samples,
+// (1.1) for each stage,
+// (1.1.1) collect votes for instance in current stage.
+// (1.1.2) Compute probabilities of each classes in votes.
+//
+//		prob_class = count_of_class / total_votes
+//
+// (1.1.3) Compute total of probabilites times of stage weight.
+//
+//		stage_prob = prob_class * stage_weight
+//
+// (1.2) Divide each class stage probabilites with
+//
+//		stage_prob = stage_prob /
+//			(sum_of_all_weights * number_of_tree_in_forest)
+//
+// (1.3) Select class label with highest probabilites.
+// (1.4) Save stage probabilities for positive class.
+// (2) Compute confusion matrix.
+//
+func (crf *Runtime) ClassifySetByWeight(samples tabula.ClasetInterface,
+	sampleIds []int,
+) (
+	predicts []string, cm *classifier.CM, probs []float64,
+) {
+	stat := classifier.Stat{}
+	stat.Start()
+
+	vs := samples.GetClassValueSpace()
+	stageProbs := make([]float64, len(vs))
+	stageSumProbs := make([]float64, len(vs))
+	sumWeights := numbers.Floats64Sum(crf.weights)
+
+	// (1)
+	rows := samples.GetDataAsRows()
+	for _, row := range *rows {
+		for y := range stageSumProbs {
+			stageSumProbs[y] = 0
+		}
+
+		// (1.1)
+		for y, forest := range crf.forests {
+			// (1.1.1)
+			votes := forest.Votes(row, -1)
+
+			// (1.1.2)
+			probs := libstrings.FrequencyOfTokens(votes, vs, false)
+
+			// (1.1.3)
+			for z := range probs {
+				stageSumProbs[z] += probs[z]
+				stageProbs[z] += probs[z] * crf.weights[y]
+			}
+		}
+
+		// (1.2)
+		stageWeight := sumWeights * float64(crf.NTree)
+
+		for x := range stageProbs {
+			stageProbs[x] = stageProbs[x] / stageWeight
+		}
+
+		// (1.3)
+		_, maxi, ok := numbers.Floats64FindMax(stageProbs)
+		if ok {
+			predicts = append(predicts, vs[maxi])
+		}
+
+		probs = append(probs, stageSumProbs[0]/
+			float64(len(crf.forests)))
+	}
+
+	// (2)
+	actuals := samples.GetClassAsStrings()
+	cm = crf.ComputeCM(sampleIds, vs, actuals, predicts)
+
+	crf.ComputeStatFromCM(&stat, cm)
+	stat.End()
+
+	_ = stat.Write(crf.StatFile)
+
+	return predicts, cm, probs
+}
diff --git a/lib/mining/classifier/crf/crf_test.go b/lib/mining/classifier/crf/crf_test.go
new file mode 100644
index 00000000..4e530591
--- /dev/null
+++ b/lib/mining/classifier/crf/crf_test.go
@@ -0,0 +1,92 @@
+// Copyright 2015 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package crf
+
+import (
+	"fmt"
+	"os"
+	"testing"
+
+	"github.com/shuLhan/share/lib/dsv"
+	"github.com/shuLhan/share/lib/mining/classifier"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+var (
+	SampleFile string
+	PerfFile   string
+	StatFile   string
+	NStage     = 200
+	NTree      = 1
+)
+
+func runCRF(t *testing.T) {
+	// read trainingset.
+	samples := tabula.Claset{}
+	_, e := dsv.SimpleRead(SampleFile, &samples)
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	nbag := (samples.Len() * 63) / 100
+	train, test, _, testIds := tabula.RandomPickRows(&samples, nbag, false)
+
+	trainset := train.(tabula.ClasetInterface)
+	testset := test.(tabula.ClasetInterface)
+
+	crfRuntime := Runtime{
+		Runtime: classifier.Runtime{
+			StatFile: StatFile,
+			PerfFile: PerfFile,
+		},
+		NStage: NStage,
+		NTree:  NTree,
+	}
+
+	e = crfRuntime.Build(trainset)
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	testset.RecountMajorMinor()
+	fmt.Println("Testset:", testset)
+
+	predicts, cm, probs := crfRuntime.ClassifySetByWeight(testset, testIds)
+
+	fmt.Println("Confusion matrix:", cm)
+
+	crfRuntime.Performance(testset, predicts, probs)
+	e = crfRuntime.WritePerformance()
+	if e != nil {
+		t.Fatal(e)
+	}
+}
+
+func TestPhoneme200_1(t *testing.T) {
+	SampleFile = "../../testdata/phoneme/phoneme.dsv"
+	PerfFile = "phoneme_200_1.perf"
+	StatFile = "phoneme_200_1.stat"
+
+	runCRF(t)
+}
+
+func TestPhoneme200_10(t *testing.T) {
+	SampleFile = "../../testdata/phoneme/phoneme.dsv"
+	PerfFile = "phoneme_200_10.perf"
+	StatFile = "phoneme_200_10.stat"
+	NTree = 10
+
+	runCRF(t)
+}
+
+func TestMain(m *testing.M) {
+	envTestCRF := os.Getenv("TEST_CRF")
+
+	if len(envTestCRF) == 0 {
+		os.Exit(0)
+	}
+
+	os.Exit(m.Run())
+}
diff --git a/lib/mining/classifier/rf/rf.go b/lib/mining/classifier/rf/rf.go
new file mode 100644
index 00000000..86595efd
--- /dev/null
+++ b/lib/mining/classifier/rf/rf.go
@@ -0,0 +1,363 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// Package rf implement ensemble of classifiers using random forest
+// algorithm by Breiman and Cutler.
+//
+// Breiman, Leo. "Random forests." Machine learning 45.1 (2001): 5-32.
+//
+// The implementation is based on various sources and using author experience.
+//
+package rf
+
+import (
+	"errors"
+	"fmt"
+	"math"
+
+	"github.com/shuLhan/share/lib/debug"
+	"github.com/shuLhan/share/lib/mining/classifier"
+	"github.com/shuLhan/share/lib/mining/classifier/cart"
+	"github.com/shuLhan/share/lib/numbers"
+	libstrings "github.com/shuLhan/share/lib/strings"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+const (
+	tag = "[rf]"
+
+	// DefNumTree default number of tree.
+	DefNumTree = 100
+
+	// DefPercentBoot default percentage of sample that will be used for
+	// bootstraping a tree.
+	DefPercentBoot = 66
+
+	// DefOOBStatsFile default statistic file output.
+	DefOOBStatsFile = "rf.oob.stat"
+
+	// DefPerfFile default performance file output.
+	DefPerfFile = "rf.perf"
+
+	// DefStatFile default statistic file.
+	DefStatFile = "rf.stat"
+)
+
+var (
+	// ErrNoInput will tell you when no input is given.
+	ErrNoInput = errors.New("rf: input samples is empty")
+)
+
+//
+// Runtime contains input and output configuration when generating random forest.
+//
+type Runtime struct {
+	// Runtime embed common fields for classifier.
+	classifier.Runtime
+
+	// NTree number of tree in forest.
+	NTree int `json:"NTree"`
+	// NRandomFeature number of feature randomly selected for each tree.
+	NRandomFeature int `json:"NRandomFeature"`
+	// PercentBoot percentage of sample for bootstraping.
+	PercentBoot int `json:"PercentBoot"`
+
+	// nSubsample number of samples used for bootstraping.
+	nSubsample int
+	// trees contain all tree in the forest.
+	trees []cart.Runtime
+	// bagIndices contain list of index of selected samples at bootstraping
+	// for book-keeping.
+	bagIndices [][]int
+}
+
+//
+// Trees return all tree in forest.
+//
+func (forest *Runtime) Trees() []cart.Runtime {
+	return forest.trees
+}
+
+//
+// AddCartTree add tree to forest
+//
+func (forest *Runtime) AddCartTree(tree cart.Runtime) {
+	forest.trees = append(forest.trees, tree)
+}
+
+//
+// AddBagIndex add bagging index for book keeping.
+//
+func (forest *Runtime) AddBagIndex(bagIndex []int) {
+	forest.bagIndices = append(forest.bagIndices, bagIndex)
+}
+
+//
+// Initialize will check forest inputs and set it to default values if invalid.
+//
+// It will also calculate number of random samples for each tree using,
+//
+//	number-of-sample * percentage-of-bootstrap
+//
+//
+func (forest *Runtime) Initialize(samples tabula.ClasetInterface) error {
+	if forest.NTree <= 0 {
+		forest.NTree = DefNumTree
+	}
+	if forest.PercentBoot <= 0 {
+		forest.PercentBoot = DefPercentBoot
+	}
+	if forest.NRandomFeature <= 0 {
+		// Set default value to square-root of features.
+		ncol := samples.GetNColumn() - 1
+		forest.NRandomFeature = int(math.Sqrt(float64(ncol)))
+	}
+	if forest.OOBStatsFile == "" {
+		forest.OOBStatsFile = DefOOBStatsFile
+	}
+	if forest.PerfFile == "" {
+		forest.PerfFile = DefPerfFile
+	}
+	if forest.StatFile == "" {
+		forest.StatFile = DefStatFile
+	}
+
+	forest.nSubsample = int(float32(samples.GetNRow()) *
+		(float32(forest.PercentBoot) / 100.0))
+
+	return forest.Runtime.Initialize()
+}
+
+//
+// Build the forest using samples dataset.
+//
+// Algorithm,
+//
+// (0) Recheck input value: number of tree, percentage bootstrap, etc; and
+//    Open statistic file output.
+// (1) For 0 to NTree,
+// (1.1) Create new tree, repeat until all trees has been build.
+// (2) Compute and write total statistic.
+//
+func (forest *Runtime) Build(samples tabula.ClasetInterface) (e error) {
+	// check input samples
+	if samples == nil {
+		return ErrNoInput
+	}
+
+	// (0)
+	e = forest.Initialize(samples)
+	if e != nil {
+		return
+	}
+
+	fmt.Println(tag, "Training set    :", samples)
+	fmt.Println(tag, "Sample (one row):", samples.GetRow(0))
+	fmt.Println(tag, "Forest config   :", forest)
+
+	// (1)
+	for t := 0; t < forest.NTree; t++ {
+		if debug.Value >= 1 {
+			fmt.Println(tag, "tree #", t)
+		}
+
+		// (1.1)
+		for {
+			_, _, e = forest.GrowTree(samples)
+			if e == nil {
+				break
+			}
+
+			fmt.Println(tag, "error:", e)
+		}
+	}
+
+	// (2)
+	return forest.Finalize()
+}
+
+//
+// GrowTree build a new tree in forest, return OOB error value or error if tree
+// can not grow.
+//
+// Algorithm,
+//
+// (1) Select random samples with replacement, also with OOB.
+// (2) Build tree using CART, without pruning.
+// (3) Add tree to forest.
+// (4) Save index of random samples for calculating error rate later.
+// (5) Run OOB on forest.
+// (6) Calculate OOB error rate and statistic values.
+//
+func (forest *Runtime) GrowTree(samples tabula.ClasetInterface) (
+	cm *classifier.CM, stat *classifier.Stat, e error,
+) {
+	stat = &classifier.Stat{}
+	stat.ID = int64(len(forest.trees))
+	stat.Start()
+
+	// (1)
+	bag, oob, bagIdx, oobIdx := tabula.RandomPickRows(
+		samples.(tabula.DatasetInterface),
+		forest.nSubsample, true)
+
+	bagset := bag.(tabula.ClasetInterface)
+
+	if debug.Value >= 2 {
+		bagset.RecountMajorMinor()
+		fmt.Println(tag, "Bagging:", bagset)
+	}
+
+	// (2)
+	cart, e := cart.New(bagset, cart.SplitMethodGini,
+		forest.NRandomFeature)
+	if e != nil {
+		return nil, nil, e
+	}
+
+	// (3)
+	forest.AddCartTree(*cart)
+
+	// (4)
+	forest.AddBagIndex(bagIdx)
+
+	// (5)
+	if forest.RunOOB {
+		oobset := oob.(tabula.ClasetInterface)
+		_, cm, _ = forest.ClassifySet(oobset, oobIdx)
+
+		forest.AddOOBCM(cm)
+	}
+
+	stat.End()
+
+	if debug.Value >= 3 && forest.RunOOB {
+		fmt.Println(tag, "Elapsed time (s):", stat.ElapsedTime)
+	}
+
+	forest.AddStat(stat)
+
+	// (6)
+	if forest.RunOOB {
+		forest.ComputeStatFromCM(stat, cm)
+
+		if debug.Value >= 2 {
+			fmt.Println(tag, "OOB stat:", stat)
+		}
+	}
+
+	forest.ComputeStatTotal(stat)
+	e = forest.WriteOOBStat(stat)
+
+	return cm, stat, e
+}
+
+//
+// ClassifySet given a samples predict their class by running each sample in
+// forest, adn return their class prediction with confusion matrix.
+// `samples` is the sample that will be predicted, `sampleIds` is the index of
+// samples.
+// If `sampleIds` is not nil, then sample index will be checked in each tree,
+// if the sample is used for training, their vote is not counted.
+//
+// Algorithm,
+//
+// (0) Get value space (possible class values in dataset)
+// (1) For each row in test-set,
+// (1.1) collect votes in all trees,
+// (1.2) select majority class vote, and
+// (1.3) compute and save the actual class probabilities.
+// (2) Compute confusion matrix from predictions.
+// (3) Compute stat from confusion matrix.
+// (4) Write the stat to file only if sampleIds is empty, which mean its run
+// not from OOB set.
+//
+func (forest *Runtime) ClassifySet(samples tabula.ClasetInterface,
+	sampleIds []int,
+) (
+	predicts []string, cm *classifier.CM, probs []float64,
+) {
+	stat := classifier.Stat{}
+	stat.Start()
+
+	if len(sampleIds) <= 0 {
+		fmt.Println(tag, "Classify set:", samples)
+		fmt.Println(tag, "Classify set sample (one row):",
+			samples.GetRow(0))
+	}
+
+	// (0)
+	vs := samples.GetClassValueSpace()
+	actuals := samples.GetClassAsStrings()
+	sampleIdx := -1
+
+	// (1)
+	rows := samples.GetRows()
+	for x, row := range *rows {
+		// (1.1)
+		if len(sampleIds) > 0 {
+			sampleIdx = sampleIds[x]
+		}
+		votes := forest.Votes(row, sampleIdx)
+
+		// (1.2)
+		classProbs := libstrings.FrequencyOfTokens(votes, vs, false)
+
+		_, idx, ok := numbers.Floats64FindMax(classProbs)
+
+		if ok {
+			predicts = append(predicts, vs[idx])
+		}
+
+		// (1.3)
+		probs = append(probs, classProbs[0])
+	}
+
+	// (2)
+	cm = forest.ComputeCM(sampleIds, vs, actuals, predicts)
+
+	// (3)
+	forest.ComputeStatFromCM(&stat, cm)
+	stat.End()
+
+	if len(sampleIds) <= 0 {
+		fmt.Println(tag, "CM:", cm)
+		fmt.Println(tag, "Classifying stat:", stat)
+		_ = stat.Write(forest.StatFile)
+	}
+
+	return predicts, cm, probs
+}
+
+//
+// Votes will return votes, or classes, in each tree based on sample.
+// If checkIdx is true then the `sampleIdx` will be checked in if it has been used
+// when training the tree, if its exist then the sample will be skipped.
+//
+// (1) If row is used to build the tree then skip it,
+// (2) classify row in tree,
+// (3) save tree class value.
+//
+func (forest *Runtime) Votes(sample *tabula.Row, sampleIdx int) (
+	votes []string,
+) {
+	for x, tree := range forest.trees {
+		// (1)
+		if sampleIdx >= 0 {
+			exist := numbers.IntsIsExist(forest.bagIndices[x],
+				sampleIdx)
+			if exist {
+				continue
+			}
+		}
+
+		// (2)
+		class := tree.Classify(sample)
+
+		// (3)
+		votes = append(votes, class)
+	}
+	return votes
+}
diff --git a/lib/mining/classifier/rf/rf_bench_test.go b/lib/mining/classifier/rf/rf_bench_test.go
new file mode 100644
index 00000000..eddd721c
--- /dev/null
+++ b/lib/mining/classifier/rf/rf_bench_test.go
@@ -0,0 +1,22 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package rf
+
+import (
+	"testing"
+)
+
+func BenchmarkPhoneme(b *testing.B) {
+	SampleDsvFile = "../../testdata/phoneme/phoneme.dsv"
+	OOBStatsFile = "phoneme.oob"
+	PerfFile = "phoneme.perf"
+
+	MinFeature = 3
+	MaxFeature = 4
+
+	for x := 0; x < b.N; x++ {
+		runRandomForest()
+	}
+}
diff --git a/lib/mining/classifier/rf/rf_test.go b/lib/mining/classifier/rf/rf_test.go
new file mode 100644
index 00000000..ba96125b
--- /dev/null
+++ b/lib/mining/classifier/rf/rf_test.go
@@ -0,0 +1,190 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package rf
+
+import (
+	"fmt"
+	"log"
+	"os"
+	"testing"
+
+	"github.com/shuLhan/share/lib/dsv"
+	"github.com/shuLhan/share/lib/mining/classifier"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+// Global options to run for each test.
+var (
+	// SampleDsvFile is the file that contain samples config.
+	SampleDsvFile string
+	// DoTest if its true then the dataset will splited into training and
+	// test set with random selection without replacement.
+	DoTest = false
+	// NTree number of tree to generate.
+	NTree = 100
+	// NBootstrap percentage of sample used as subsample.
+	NBootstrap = 66
+	// MinFeature number of feature to begin with.
+	MinFeature = 1
+	// MaxFeature maximum number of feature to test
+	MaxFeature = -1
+	// RunOOB if its true the the OOB samples will be used to test the
+	// model in each iteration.
+	RunOOB = true
+	// OOBStatsFile is the file where OOB statistic will be saved.
+	OOBStatsFile string
+	// PerfFile is the file where performance statistic will be saved.
+	PerfFile string
+	// StatFile is the file where classifying statistic will be saved.
+	StatFile string
+)
+
+func getSamples() (train, test tabula.ClasetInterface) {
+	samples := tabula.Claset{}
+	_, e := dsv.SimpleRead(SampleDsvFile, &samples)
+	if nil != e {
+		log.Fatal(e)
+	}
+
+	if !DoTest {
+		return &samples, nil
+	}
+
+	ntrain := int(float32(samples.Len()) * (float32(NBootstrap) / 100.0))
+
+	bag, oob, _, _ := tabula.RandomPickRows(&samples, ntrain, false)
+
+	train = bag.(tabula.ClasetInterface)
+	test = oob.(tabula.ClasetInterface)
+
+	train.SetClassIndex(samples.GetClassIndex())
+	test.SetClassIndex(samples.GetClassIndex())
+
+	return train, test
+}
+
+func runRandomForest() {
+	oobStatsFile := OOBStatsFile
+	perfFile := PerfFile
+	statFile := StatFile
+
+	trainset, testset := getSamples()
+
+	if MaxFeature < 0 {
+		MaxFeature = trainset.GetNColumn()
+	}
+
+	for nfeature := MinFeature; nfeature < MaxFeature; nfeature++ {
+		// Add prefix to OOB stats file.
+		oobStatsFile = fmt.Sprintf("N%d.%s", nfeature, OOBStatsFile)
+
+		// Add prefix to performance file.
+		perfFile = fmt.Sprintf("N%d.%s", nfeature, PerfFile)
+
+		// Add prefix to stat file.
+		statFile = fmt.Sprintf("N%d.%s", nfeature, StatFile)
+
+		// Create and build random forest.
+		forest := Runtime{
+			Runtime: classifier.Runtime{
+				RunOOB:       RunOOB,
+				OOBStatsFile: oobStatsFile,
+				PerfFile:     perfFile,
+				StatFile:     statFile,
+			},
+			NTree:          NTree,
+			NRandomFeature: nfeature,
+			PercentBoot:    NBootstrap,
+		}
+
+		e := forest.Build(trainset)
+		if e != nil {
+			log.Fatal(e)
+		}
+
+		if DoTest {
+			predicts, _, probs := forest.ClassifySet(testset, nil)
+
+			forest.Performance(testset, predicts, probs)
+			e = forest.WritePerformance()
+			if e != nil {
+				log.Fatal(e)
+			}
+		}
+	}
+}
+
+func TestEnsemblingGlass(t *testing.T) {
+	SampleDsvFile = "../../testdata/forensic_glass/fgl.dsv"
+	RunOOB = false
+	OOBStatsFile = "glass.oob"
+	StatFile = "glass.stat"
+	PerfFile = "glass.perf"
+	DoTest = true
+
+	runRandomForest()
+}
+
+func TestEnsemblingIris(t *testing.T) {
+	SampleDsvFile = "../../testdata/iris/iris.dsv"
+	OOBStatsFile = "iris.oob"
+
+	runRandomForest()
+}
+
+func TestEnsemblingPhoneme(t *testing.T) {
+	SampleDsvFile = "../../testdata/phoneme/phoneme.dsv"
+	OOBStatsFile = "phoneme.oob.stat"
+	StatFile = "phoneme.stat"
+	PerfFile = "phoneme.perf"
+
+	NTree = 200
+	MinFeature = 3
+	MaxFeature = 4
+	RunOOB = false
+	DoTest = true
+
+	runRandomForest()
+}
+
+func TestEnsemblingSmotePhoneme(t *testing.T) {
+	SampleDsvFile = "../../resampling/smote/phoneme_smote.dsv"
+	OOBStatsFile = "phonemesmote.oob"
+
+	MinFeature = 3
+	MaxFeature = 4
+
+	runRandomForest()
+}
+
+func TestEnsemblingLnsmotePhoneme(t *testing.T) {
+	SampleDsvFile = "../../resampling/lnsmote/phoneme_lnsmote.dsv"
+	OOBStatsFile = "phonemelnsmote.oob"
+
+	MinFeature = 3
+	MaxFeature = 4
+
+	runRandomForest()
+}
+
+func TestWvc2010Lnsmote(t *testing.T) {
+	SampleDsvFile = "../../testdata/wvc2010lnsmote/wvc2010_features.lnsmote.dsv"
+	OOBStatsFile = "wvc2010lnsmote.oob"
+
+	NTree = 1
+	MinFeature = 5
+	MaxFeature = 6
+
+	runRandomForest()
+}
+
+func TestMain(m *testing.M) {
+	envTestRF := os.Getenv("TEST_RF")
+	if len(envTestRF) == 0 {
+		os.Exit(0)
+	}
+
+	os.Exit(m.Run())
+}
diff --git a/lib/mining/classifier/runtime.go b/lib/mining/classifier/runtime.go
new file mode 100644
index 00000000..0f7a7755
--- /dev/null
+++ b/lib/mining/classifier/runtime.go
@@ -0,0 +1,443 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package classifier
+
+import (
+	"fmt"
+	"math"
+
+	"github.com/shuLhan/share/lib/debug"
+	"github.com/shuLhan/share/lib/dsv"
+	"github.com/shuLhan/share/lib/numbers"
+	libstrings "github.com/shuLhan/share/lib/strings"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+const (
+	tag = "[classifier.runtime]"
+)
+
+//
+// Runtime define a generic type which provide common fields that can be
+// embedded by the real classifier (e.g. RandomForest).
+//
+type Runtime struct {
+	// RunOOB if its true the OOB will be computed, default is false.
+	RunOOB bool `json:"RunOOB"`
+
+	// OOBStatsFile is the file where OOB statistic will be written.
+	OOBStatsFile string `json:"OOBStatsFile"`
+
+	// PerfFile is the file where statistic of performance will be written.
+	PerfFile string `json:"PerfFile"`
+
+	// StatFile is the file where statistic of classifying samples will be
+	// written.
+	StatFile string `json:"StatFile"`
+
+	// oobCms contain confusion matrix value for each OOB in iteration.
+	oobCms []CM
+
+	// oobStats contain statistic of classifier for each OOB in iteration.
+	oobStats Stats
+
+	// oobStatTotal contain total OOB statistic values.
+	oobStatTotal Stat
+
+	// oobWriter contain file writer for statistic.
+	oobWriter *dsv.Writer
+
+	// perfs contain performance statistic per sample, after classifying
+	// sample on classifier.
+	perfs Stats
+}
+
+//
+// Initialize will start the runtime for processing by saving start time and
+// opening stats file.
+//
+func (rt *Runtime) Initialize() error {
+	rt.oobStatTotal.Start()
+
+	return rt.OpenOOBStatsFile()
+}
+
+//
+// Finalize finish the runtime, compute total statistic, write it to file, and
+// close the file.
+//
+func (rt *Runtime) Finalize() (e error) {
+	st := &rt.oobStatTotal
+
+	st.End()
+	st.ID = int64(len(rt.oobStats))
+
+	e = rt.WriteOOBStat(st)
+	if e != nil {
+		return e
+	}
+
+	return rt.CloseOOBStatsFile()
+}
+
+//
+// OOBStats return all statistic objects.
+//
+func (rt *Runtime) OOBStats() *Stats {
+	return &rt.oobStats
+}
+
+//
+// StatTotal return total statistic.
+//
+func (rt *Runtime) StatTotal() *Stat {
+	return &rt.oobStatTotal
+}
+
+//
+// AddOOBCM will append new confusion matrix.
+//
+func (rt *Runtime) AddOOBCM(cm *CM) {
+	rt.oobCms = append(rt.oobCms, *cm)
+}
+
+//
+// AddStat will append new classifier statistic data.
+//
+func (rt *Runtime) AddStat(stat *Stat) {
+	rt.oobStats = append(rt.oobStats, stat)
+}
+
+//
+// ComputeCM will compute confusion matrix of sample using value space, actual
+// and prediction values.
+//
+func (rt *Runtime) ComputeCM(sampleIds []int,
+	vs, actuals, predicts []string,
+) (
+	cm *CM,
+) {
+	cm = &CM{}
+
+	cm.ComputeStrings(vs, actuals, predicts)
+	cm.GroupIndexPredictionsStrings(sampleIds, actuals, predicts)
+
+	if debug.Value >= 2 {
+		fmt.Println(tag, cm)
+	}
+
+	return cm
+}
+
+//
+// ComputeStatFromCM will compute statistic using confusion matrix.
+//
+func (rt *Runtime) ComputeStatFromCM(stat *Stat, cm *CM) {
+
+	stat.OobError = cm.GetFalseRate()
+
+	stat.OobErrorMean = rt.oobStatTotal.OobError /
+		float64(len(rt.oobStats)+1)
+
+	stat.TP = int64(cm.TP())
+	stat.FP = int64(cm.FP())
+	stat.TN = int64(cm.TN())
+	stat.FN = int64(cm.FN())
+
+	t := float64(stat.TP + stat.FN)
+	if t == 0 {
+		stat.TPRate = 0
+	} else {
+		stat.TPRate = float64(stat.TP) / t
+	}
+
+	t = float64(stat.FP + stat.TN)
+	if t == 0 {
+		stat.FPRate = 0
+	} else {
+		stat.FPRate = float64(stat.FP) / t
+	}
+
+	t = float64(stat.FP + stat.TN)
+	if t == 0 {
+		stat.TNRate = 0
+	} else {
+		stat.TNRate = float64(stat.TN) / t
+	}
+
+	t = float64(stat.TP + stat.FP)
+	if t == 0 {
+		stat.Precision = 0
+	} else {
+		stat.Precision = float64(stat.TP) / t
+	}
+
+	t = (1 / stat.Precision) + (1 / stat.TPRate)
+	if t == 0 {
+		stat.FMeasure = 0
+	} else {
+		stat.FMeasure = 2 / t
+	}
+
+	t = float64(stat.TP + stat.TN + stat.FP + stat.FN)
+	if t == 0 {
+		stat.Accuracy = 0
+	} else {
+		stat.Accuracy = float64(stat.TP+stat.TN) / t
+	}
+
+	if debug.Value >= 1 {
+		rt.PrintOobStat(stat, cm)
+		rt.PrintStat(stat)
+	}
+}
+
+//
+// ComputeStatTotal compute total statistic.
+//
+func (rt *Runtime) ComputeStatTotal(stat *Stat) {
+	if stat == nil {
+		return
+	}
+
+	nstat := len(rt.oobStats)
+	if nstat == 0 {
+		return
+	}
+
+	t := &rt.oobStatTotal
+
+	t.OobError += stat.OobError
+	t.OobErrorMean = t.OobError / float64(nstat)
+	t.TP += stat.TP
+	t.FP += stat.FP
+	t.TN += stat.TN
+	t.FN += stat.FN
+
+	total := float64(t.TP + t.FN)
+	if total == 0 {
+		t.TPRate = 0
+	} else {
+		t.TPRate = float64(t.TP) / total
+	}
+
+	total = float64(t.FP + t.TN)
+	if total == 0 {
+		t.FPRate = 0
+	} else {
+		t.FPRate = float64(t.FP) / total
+	}
+
+	total = float64(t.FP + t.TN)
+	if total == 0 {
+		t.TNRate = 0
+	} else {
+		t.TNRate = float64(t.TN) / total
+	}
+
+	total = float64(t.TP + t.FP)
+	if total == 0 {
+		t.Precision = 0
+	} else {
+		t.Precision = float64(t.TP) / total
+	}
+
+	total = (1 / t.Precision) + (1 / t.TPRate)
+	if total == 0 {
+		t.FMeasure = 0
+	} else {
+		t.FMeasure = 2 / total
+	}
+
+	total = float64(t.TP + t.TN + t.FP + t.FN)
+	if total == 0 {
+		t.Accuracy = 0
+	} else {
+		t.Accuracy = float64(t.TP+t.TN) / total
+	}
+}
+
+//
+// OpenOOBStatsFile will open statistic file for output.
+//
+func (rt *Runtime) OpenOOBStatsFile() error {
+	if rt.oobWriter != nil {
+		_ = rt.CloseOOBStatsFile()
+	}
+	rt.oobWriter = &dsv.Writer{}
+	return rt.oobWriter.OpenOutput(rt.OOBStatsFile)
+}
+
+//
+// WriteOOBStat will write statistic of process to file.
+//
+func (rt *Runtime) WriteOOBStat(stat *Stat) error {
+	if rt.oobWriter == nil {
+		return nil
+	}
+	if stat == nil {
+		return nil
+	}
+	return rt.oobWriter.WriteRawRow(stat.ToRow(), nil, nil)
+}
+
+//
+// CloseOOBStatsFile will close statistics file for writing.
+//
+func (rt *Runtime) CloseOOBStatsFile() (e error) {
+	if rt.oobWriter == nil {
+		return
+	}
+
+	e = rt.oobWriter.Close()
+	rt.oobWriter = nil
+
+	return
+}
+
+//
+// PrintOobStat will print the out-of-bag statistic to standard output.
+//
+func (rt *Runtime) PrintOobStat(stat *Stat, cm *CM) {
+	fmt.Printf("%s OOB error rate: %.4f,"+
+		" total: %.4f, mean %.4f, true rate: %.4f\n", tag,
+		stat.OobError, rt.oobStatTotal.OobError,
+		stat.OobErrorMean, cm.GetTrueRate())
+}
+
+//
+// PrintStat will print statistic value to standard output.
+//
+func (rt *Runtime) PrintStat(stat *Stat) {
+	if stat == nil {
+		statslen := len(rt.oobStats)
+		if statslen <= 0 {
+			return
+		}
+		stat = rt.oobStats[statslen-1]
+	}
+
+	fmt.Printf("%s TPRate: %.4f, FPRate: %.4f,"+
+		" TNRate: %.4f, precision: %.4f, f-measure: %.4f,"+
+		" accuracy: %.4f\n", tag, stat.TPRate, stat.FPRate,
+		stat.TNRate, stat.Precision, stat.FMeasure, stat.Accuracy)
+}
+
+//
+// PrintStatTotal will print total statistic to standard output.
+//
+func (rt *Runtime) PrintStatTotal(st *Stat) {
+	if st == nil {
+		st = &rt.oobStatTotal
+	}
+	rt.PrintStat(st)
+}
+
+//
+// Performance given an actuals class label and their probabilities, compute
+// the performance statistic of classifier.
+//
+// Algorithm,
+// (1) Sort the probabilities in descending order.
+// (2) Sort the actuals and predicts using sorted index from probs
+// (3) Compute tpr, fpr, precision
+// (4) Write performance to file.
+//
+func (rt *Runtime) Performance(samples tabula.ClasetInterface,
+	predicts []string, probs []float64,
+) (
+	perfs Stats,
+) {
+	// (1)
+	actuals := samples.GetClassAsStrings()
+	sortedIds := numbers.IntCreateSeq(0, len(probs)-1)
+	numbers.Floats64InplaceMergesort(probs, sortedIds, 0, len(probs),
+		false)
+
+	// (2)
+	libstrings.SortByIndex(&actuals, sortedIds)
+	libstrings.SortByIndex(&predicts, sortedIds)
+
+	// (3)
+	rt.computePerfByProbs(samples, actuals, probs)
+
+	return rt.perfs
+}
+
+func trapezoidArea(fp, fpprev, tp, tpprev int64) float64 {
+	base := math.Abs(float64(fp - fpprev))
+	heightAvg := float64(tp+tpprev) / float64(2.0)
+	return base * heightAvg
+}
+
+//
+// computePerfByProbs will compute classifier performance using probabilities
+// or score `probs`.
+//
+// This currently only work for two class problem.
+//
+func (rt *Runtime) computePerfByProbs(samples tabula.ClasetInterface,
+	actuals []string, probs []float64,
+) {
+	vs := samples.GetClassValueSpace()
+	nactuals := numbers.IntsTo64(samples.Counts())
+	nclass := libstrings.CountTokens(actuals, vs, false)
+
+	pprev := math.Inf(-1)
+	tp := int64(0)
+	fp := int64(0)
+	tpprev := int64(0)
+	fpprev := int64(0)
+
+	auc := float64(0)
+
+	for x, p := range probs {
+		if p != pprev {
+			stat := Stat{}
+			stat.SetTPRate(tp, nactuals[0])
+			stat.SetFPRate(fp, nactuals[1])
+			stat.SetPrecisionFromRate(nactuals[0], nactuals[1])
+
+			auc = auc + trapezoidArea(fp, fpprev, tp, tpprev)
+			stat.SetAUC(auc)
+
+			rt.perfs = append(rt.perfs, &stat)
+
+			pprev = p
+			tpprev = tp
+			fpprev = fp
+		}
+
+		if actuals[x] == vs[0] {
+			tp++
+		} else {
+			fp++
+		}
+	}
+
+	stat := Stat{}
+	stat.SetTPRate(tp, nactuals[0])
+	stat.SetFPRate(fp, nactuals[1])
+	stat.SetPrecisionFromRate(nactuals[0], nactuals[1])
+
+	auc = auc + trapezoidArea(fp, fpprev, tp, tpprev)
+	auc = auc / float64(nclass[0]*nclass[1])
+	stat.SetAUC(auc)
+
+	rt.perfs = append(rt.perfs, &stat)
+
+	if len(rt.perfs) >= 2 {
+		// Replace the first stat with second stat, because of NaN
+		// value on the first precision.
+		rt.perfs[0] = rt.perfs[1]
+	}
+}
+
+//
+// WritePerformance will write performance data to file.
+//
+func (rt *Runtime) WritePerformance() error {
+	return rt.perfs.Write(rt.PerfFile)
+}
diff --git a/lib/mining/classifier/stat.go b/lib/mining/classifier/stat.go
new file mode 100644
index 00000000..790aec10
--- /dev/null
+++ b/lib/mining/classifier/stat.go
@@ -0,0 +1,176 @@
+// Copyright 2015-2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package classifier
+
+import (
+	"time"
+
+	"github.com/shuLhan/share/lib/dsv"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+//
+// Stat hold statistic value of classifier, including TP rate, FP rate, precision,
+// and recall.
+//
+type Stat struct {
+	// ID unique id for this statistic (e.g. number of tree).
+	ID int64
+	// StartTime contain the start time of classifier in unix timestamp.
+	StartTime int64
+	// EndTime contain the end time of classifier in unix timestamp.
+	EndTime int64
+	// ElapsedTime contain actual time, in seconds, between end and start
+	// time.
+	ElapsedTime int64
+	// TP contain true-positive value.
+	TP int64
+	// FP contain false-positive value.
+	FP int64
+	// TN contain true-negative value.
+	TN int64
+	// FN contain false-negative value.
+	FN int64
+	// OobError contain out-of-bag error.
+	OobError float64
+	// OobErrorMean contain mean of out-of-bag error.
+	OobErrorMean float64
+	// TPRate contain true-positive rate (recall): tp/(tp+fn)
+	TPRate float64
+	// FPRate contain false-positive rate: fp/(fp+tn)
+	FPRate float64
+	// TNRate contain true-negative rate: tn/(tn+fp)
+	TNRate float64
+	// Precision contain: tp/(tp+fp)
+	Precision float64
+	// FMeasure contain value of F-measure or the harmonic mean of
+	// precision and recall.
+	FMeasure float64
+	// Accuracy contain the degree of closeness of measurements of a
+	// quantity to that quantity's true value.
+	Accuracy float64
+	// AUC contain the area under curve.
+	AUC float64
+}
+
+// SetAUC will set the AUC value.
+func (stat *Stat) SetAUC(v float64) {
+	stat.AUC = v
+}
+
+//
+// SetTPRate will set TP and TPRate using number of positive `p`.
+//
+func (stat *Stat) SetTPRate(tp, p int64) {
+	stat.TP = tp
+	stat.TPRate = float64(tp) / float64(p)
+}
+
+//
+// SetFPRate will set FP and FPRate using number of negative `n`.
+//
+func (stat *Stat) SetFPRate(fp, n int64) {
+	stat.FP = fp
+	stat.FPRate = float64(fp) / float64(n)
+}
+
+//
+// SetPrecisionFromRate will set Precision value using tprate and fprate.
+// `p` and `n` is the number of positive and negative class in samples.
+//
+func (stat *Stat) SetPrecisionFromRate(p, n int64) {
+	stat.Precision = (stat.TPRate * float64(p)) /
+		((stat.TPRate * float64(p)) + (stat.FPRate * float64(n)))
+}
+
+//
+// Recall return value of recall.
+//
+func (stat *Stat) Recall() float64 {
+	return stat.TPRate
+}
+
+//
+// Sum will add statistic from other stat object to current stat, not including
+// the start and end time.
+//
+func (stat *Stat) Sum(other *Stat) {
+	stat.OobError += other.OobError
+	stat.OobErrorMean += other.OobErrorMean
+	stat.TP += other.TP
+	stat.FP += other.FP
+	stat.TN += other.TN
+	stat.FN += other.FN
+	stat.TPRate += other.TPRate
+	stat.FPRate += other.FPRate
+	stat.TNRate += other.TNRate
+	stat.Precision += other.Precision
+	stat.FMeasure += other.FMeasure
+	stat.Accuracy += other.Accuracy
+}
+
+//
+// ToRow will convert the stat to tabula.row in the order of Stat field.
+//
+func (stat *Stat) ToRow() (row *tabula.Row) {
+	row = &tabula.Row{}
+
+	row.PushBack(tabula.NewRecordInt(stat.ID))
+	row.PushBack(tabula.NewRecordInt(stat.StartTime))
+	row.PushBack(tabula.NewRecordInt(stat.EndTime))
+	row.PushBack(tabula.NewRecordInt(stat.ElapsedTime))
+	row.PushBack(tabula.NewRecordReal(stat.OobError))
+	row.PushBack(tabula.NewRecordReal(stat.OobErrorMean))
+	row.PushBack(tabula.NewRecordInt(stat.TP))
+	row.PushBack(tabula.NewRecordInt(stat.FP))
+	row.PushBack(tabula.NewRecordInt(stat.TN))
+	row.PushBack(tabula.NewRecordInt(stat.FN))
+	row.PushBack(tabula.NewRecordReal(stat.TPRate))
+	row.PushBack(tabula.NewRecordReal(stat.FPRate))
+	row.PushBack(tabula.NewRecordReal(stat.TNRate))
+	row.PushBack(tabula.NewRecordReal(stat.Precision))
+	row.PushBack(tabula.NewRecordReal(stat.FMeasure))
+	row.PushBack(tabula.NewRecordReal(stat.Accuracy))
+	row.PushBack(tabula.NewRecordReal(stat.AUC))
+
+	return
+}
+
+//
+// Start will start the timer.
+//
+func (stat *Stat) Start() {
+	stat.StartTime = time.Now().Unix()
+}
+
+//
+// End will stop the timer and compute the elapsed time.
+//
+func (stat *Stat) End() {
+	stat.EndTime = time.Now().Unix()
+	stat.ElapsedTime = stat.EndTime - stat.StartTime
+}
+
+//
+// Write will write the content of stat to `file`.
+//
+func (stat *Stat) Write(file string) (e error) {
+	if file == "" {
+		return
+	}
+
+	writer := &dsv.Writer{}
+	e = writer.OpenOutput(file)
+	if e != nil {
+		return e
+	}
+
+	e = writer.WriteRawRow(stat.ToRow(), nil, nil)
+	if e != nil {
+		return e
+	}
+
+	return writer.Close()
+}
diff --git a/lib/mining/classifier/stats.go b/lib/mining/classifier/stats.go
new file mode 100644
index 00000000..b29d3510
--- /dev/null
+++ b/lib/mining/classifier/stats.go
@@ -0,0 +1,143 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package classifier
+
+import (
+	"github.com/shuLhan/share/lib/dsv"
+)
+
+//
+// Stats define list of statistic values.
+//
+type Stats []*Stat
+
+//
+// Add will add other stat object to the slice.
+//
+func (stats *Stats) Add(stat *Stat) {
+	*stats = append(*stats, stat)
+}
+
+//
+// StartTimes return all start times in unix timestamp.
+//
+func (stats *Stats) StartTimes() (times []int64) {
+	for _, stat := range *stats {
+		times = append(times, stat.StartTime)
+	}
+	return
+}
+
+//
+// EndTimes return all end times in unix timestamp.
+//
+func (stats *Stats) EndTimes() (times []int64) {
+	for _, stat := range *stats {
+		times = append(times, stat.EndTime)
+	}
+	return
+}
+
+//
+// OobErrorMeans return all out-of-bag error mean values.
+//
+func (stats *Stats) OobErrorMeans() (oobmeans []float64) {
+	oobmeans = make([]float64, len(*stats))
+	for x, stat := range *stats {
+		oobmeans[x] = stat.OobErrorMean
+	}
+	return
+}
+
+//
+// TPRates return all true-positive rate values.
+//
+func (stats *Stats) TPRates() (tprates []float64) {
+	for _, stat := range *stats {
+		tprates = append(tprates, stat.TPRate)
+	}
+	return
+}
+
+//
+// FPRates return all false-positive rate values.
+//
+func (stats *Stats) FPRates() (fprates []float64) {
+	for _, stat := range *stats {
+		fprates = append(fprates, stat.FPRate)
+	}
+	return
+}
+
+//
+// TNRates will return all true-negative rate values.
+//
+func (stats *Stats) TNRates() (tnrates []float64) {
+	for _, stat := range *stats {
+		tnrates = append(tnrates, stat.TNRate)
+	}
+	return
+}
+
+//
+// Precisions return all precision values.
+//
+func (stats *Stats) Precisions() (precs []float64) {
+	for _, stat := range *stats {
+		precs = append(precs, stat.Precision)
+	}
+	return
+}
+
+//
+// Recalls return all recall values.
+//
+func (stats *Stats) Recalls() (recalls []float64) {
+	return stats.TPRates()
+}
+
+//
+// FMeasures return all F-measure values.
+//
+func (stats *Stats) FMeasures() (fmeasures []float64) {
+	for _, stat := range *stats {
+		fmeasures = append(fmeasures, stat.FMeasure)
+	}
+	return
+}
+
+//
+// Accuracies return all accuracy values.
+//
+func (stats *Stats) Accuracies() (accuracies []float64) {
+	for _, stat := range *stats {
+		accuracies = append(accuracies, stat.Accuracy)
+	}
+	return
+}
+
+//
+// Write will write all statistic data to `file`.
+//
+func (stats *Stats) Write(file string) (e error) {
+	if file == "" {
+		return
+	}
+
+	writer := &dsv.Writer{}
+	e = writer.OpenOutput(file)
+	if e != nil {
+		return e
+	}
+
+	for _, st := range *stats {
+		e = writer.WriteRawRow(st.ToRow(), nil, nil)
+		if e != nil {
+			return e
+		}
+	}
+
+	return writer.Close()
+}
diff --git a/lib/mining/classifier/stats_interface.go b/lib/mining/classifier/stats_interface.go
new file mode 100644
index 00000000..a9b60b9a
--- /dev/null
+++ b/lib/mining/classifier/stats_interface.go
@@ -0,0 +1,68 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package classifier
+
+//
+// ComputeFMeasures given array of precisions and recalls, compute F-measure
+// of each instance and return it.
+//
+func ComputeFMeasures(precisions, recalls []float64) (fmeasures []float64) {
+	// Get the minimum length of precision and recall.
+	// This is to make sure that we are not looping out of range.
+	minlen := len(precisions)
+	recallslen := len(recalls)
+	if recallslen < minlen {
+		minlen = recallslen
+	}
+
+	for x := 0; x < minlen; x++ {
+		f := 2 / ((1 / precisions[x]) + (1 / recalls[x]))
+		fmeasures = append(fmeasures, f)
+	}
+	return
+}
+
+//
+// ComputeAccuracies will compute and return accuracy from array of
+// true-positive, false-positive, true-negative, and false-negative; using
+// formula,
+//
+//	(tp + tn) / (tp + tn + tn + fn)
+//
+func ComputeAccuracies(tp, fp, tn, fn []int64) (accuracies []float64) {
+	// Get minimum length of input, just to make sure we are not looping
+	// out of range.
+	minlen := len(tp)
+	if len(fp) < len(tn) {
+		minlen = len(fp)
+	}
+	if len(fn) < minlen {
+		minlen = len(fn)
+	}
+
+	for x := 0; x < minlen; x++ {
+		acc := float64(tp[x]+tn[x]) /
+			float64(tp[x]+fp[x]+tn[x]+fn[x])
+		accuracies = append(accuracies, acc)
+	}
+	return
+}
+
+//
+// ComputeElapsedTimes will compute and return elapsed time between `start`
+// and `end` timestamps.
+//
+func ComputeElapsedTimes(start, end []int64) (elaps []int64) {
+	// Get minimum length.
+	minlen := len(start)
+	if len(end) < minlen {
+		minlen = len(end)
+	}
+
+	for x := 0; x < minlen; x++ {
+		elaps = append(elaps, end[x]-start[x])
+	}
+	return
+}
diff --git a/lib/mining/cmd/cart/iris.dsv b/lib/mining/cmd/cart/iris.dsv
new file mode 100644
index 00000000..82a52d04
--- /dev/null
+++ b/lib/mining/cmd/cart/iris.dsv
@@ -0,0 +1,35 @@
+{
+	"Input"			:"../../testdata/iris/iris.dat"
+,	"Rejected"		:"iris.rej"
+,	"MaxRows"		:-1
+,	"ClassIndex"		:4
+,	"DatasetMode"		:"matrix"
+,	"SplitMethod"		:"gini"
+,	"InputMetadata"		:
+	[{
+		"Name"			:"sepal-length"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"sepal-width"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"petal-length"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"petal-width"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"class"
+	,	"Type"			:"string"
+	,	"ValueSpace"		:
+		[
+			"Iris-setosa"
+		,	"Iris-versicolor"
+		,	"Iris-virginica"
+		]
+	}]
+}
diff --git a/lib/mining/cmd/cart/main.go b/lib/mining/cmd/cart/main.go
new file mode 100644
index 00000000..08510203
--- /dev/null
+++ b/lib/mining/cmd/cart/main.go
@@ -0,0 +1,107 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"encoding/json"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"time"
+
+	"github.com/shuLhan/share/lib/debug"
+	"github.com/shuLhan/share/lib/dsv"
+	"github.com/shuLhan/share/lib/mining/classifier/cart"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+var (
+	nRandomFeature = 0
+)
+
+var usage = func() {
+	cmd := os.Args[0]
+	fmt.Fprintf(os.Stderr, "Usage of %s: [-n number] [config.dsv]\n", cmd)
+	flag.PrintDefaults()
+}
+
+func init() {
+	flagUsage := []string{
+		"Number of random feature (default 0)",
+	}
+
+	flag.IntVar(&nRandomFeature, "n", 0, flagUsage[0])
+}
+
+func trace(s string) (string, time.Time) {
+	fmt.Println("[START]", s)
+	return s, time.Now()
+}
+
+func un(s string, startTime time.Time) {
+	endTime := time.Now()
+	fmt.Println("[END]", s, "with elapsed time",
+		endTime.Sub(startTime))
+}
+
+func createCart(fcfg string) (*cart.Runtime, error) {
+	cartrt := &cart.Runtime{}
+
+	config, e := ioutil.ReadFile(fcfg)
+	if e != nil {
+		return nil, e
+	}
+
+	e = json.Unmarshal(config, cartrt)
+	if e != nil {
+		return nil, e
+	}
+
+	if nRandomFeature > 0 {
+		cartrt.NRandomFeature = nRandomFeature
+	}
+
+	return cartrt, nil
+}
+
+func main() {
+	defer un(trace("cart"))
+
+	flag.Parse()
+
+	if len(flag.Args()) <= 0 {
+		usage()
+		os.Exit(1)
+	}
+
+	fcfg := flag.Arg(0)
+
+	// Parsing config file and check command parameter values.
+	cartrt, e := createCart(fcfg)
+	if e != nil {
+		panic(e)
+	}
+
+	// Get dataset
+	dataset := tabula.Claset{}
+	_, e = dsv.SimpleRead(fcfg, &dataset)
+	if e != nil {
+		panic(e)
+	}
+
+	if debug.Value >= 1 {
+		fmt.Printf("[cart] Class index: %v\n", dataset.GetClassIndex())
+	}
+
+	e = cartrt.Build(&dataset)
+	if e != nil {
+		panic(e)
+	}
+
+	if debug.Value >= 1 {
+		fmt.Println("[cart] CART tree:\n", cartrt)
+	}
+}
diff --git a/lib/mining/cmd/crf/main.go b/lib/mining/cmd/crf/main.go
new file mode 100644
index 00000000..2bdd698a
--- /dev/null
+++ b/lib/mining/cmd/crf/main.go
@@ -0,0 +1,205 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"encoding/json"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"time"
+
+	"github.com/shuLhan/share/lib/dsv"
+	"github.com/shuLhan/share/lib/mining/classifier/crf"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+const (
+	tag = "[crf]"
+)
+
+var (
+	// nStage number of stage.
+	nStage = 0
+	// nTree number of tree.
+	nTree = 0
+	// nRandomFeature number of feature to compute.
+	nRandomFeature = 0
+	// percentBoot percentage of sample for bootstraping.
+	percentBoot = 0
+	// oobStatsFile where statistic will be written.
+	oobStatsFile = ""
+	// perfFile where performance of classifier will be written.
+	perfFile = ""
+	// trainCfg point to the configuration file for training or creating
+	// a model
+	trainCfg = ""
+	// testCfg point to the configuration file for testing
+	testCfg = ""
+
+	// crforest the main object.
+	crforest crf.Runtime
+)
+
+var usage = func() {
+	flag.PrintDefaults()
+}
+
+func init() {
+	flagUsage := []string{
+		"Number of stage (default 200)",
+		"Number of tree in each stage (default 1)",
+		"Number of feature to compute (default 0, all features)",
+		"Percentage of bootstrap (default 64%)",
+		"OOB statistic file, where OOB data will be written",
+		"Performance file, where statistic of classifying data set will be written",
+		"Training configuration",
+		"Test configuration",
+	}
+
+	flag.IntVar(&nStage, "nstage", -1, flagUsage[0])
+	flag.IntVar(&nTree, "ntree", -1, flagUsage[1])
+	flag.IntVar(&nRandomFeature, "nrandomfeature", -1, flagUsage[2])
+	flag.IntVar(&percentBoot, "percentboot", -1, flagUsage[3])
+
+	flag.StringVar(&oobStatsFile, "oobstatsfile", "", flagUsage[4])
+	flag.StringVar(&perfFile, "perffile", "", flagUsage[5])
+
+	flag.StringVar(&trainCfg, "train", "", flagUsage[6])
+	flag.StringVar(&testCfg, "test", "", flagUsage[7])
+}
+
+func trace() (start time.Time) {
+	start = time.Now()
+	fmt.Println(tag, "start", start)
+	return
+}
+
+func un(startTime time.Time) {
+	endTime := time.Now()
+	fmt.Println(tag, "elapsed time", endTime.Sub(startTime))
+}
+
+//
+// createCRF will create cascaded random forest for training, with the
+// following steps,
+// (1) load training configuration.
+// (2) Overwrite configuration parameter if its set from command line.
+//
+func createCRF() error {
+	// (1)
+	config, e := ioutil.ReadFile(trainCfg)
+	if e != nil {
+		return e
+	}
+
+	crforest = crf.Runtime{}
+
+	e = json.Unmarshal(config, &crforest)
+	if e != nil {
+		return e
+	}
+
+	// (2)
+	if nStage > 0 {
+		crforest.NStage = nStage
+	}
+	if nTree > 0 {
+		crforest.NTree = nTree
+	}
+	if nRandomFeature > 0 {
+		crforest.NRandomFeature = nRandomFeature
+	}
+	if percentBoot > 0 {
+		crforest.PercentBoot = percentBoot
+	}
+	if oobStatsFile != "" {
+		crforest.OOBStatsFile = oobStatsFile
+	}
+	if perfFile != "" {
+		crforest.PerfFile = perfFile
+	}
+
+	crforest.RunOOB = true
+
+	return nil
+}
+
+func train() {
+	e := createCRF()
+	if e != nil {
+		panic(e)
+	}
+
+	trainset := tabula.Claset{}
+
+	_, e = dsv.SimpleRead(trainCfg, &trainset)
+	if e != nil {
+		panic(e)
+	}
+
+	e = crforest.Build(&trainset)
+	if e != nil {
+		panic(e)
+	}
+}
+
+func test() {
+	testset := tabula.Claset{}
+	_, e := dsv.SimpleRead(testCfg, &testset)
+	if e != nil {
+		panic(e)
+	}
+
+	fmt.Println(tag, "Test set:", &testset)
+	fmt.Println(tag, "Sample test set:", testset.GetRow(0))
+
+	predicts, cm, probs := crforest.ClassifySetByWeight(&testset, nil)
+
+	fmt.Println("[crf] Test set CM:", cm)
+
+	crforest.Performance(&testset, predicts, probs)
+
+	e = crforest.WritePerformance()
+	if e != nil {
+		panic(e)
+	}
+}
+
+//
+// (0) Parse and check command line parameters.
+// (1) If trainCfg parameter is set,
+// (1.1) train the model,
+// (1.2) TODO: load saved model.
+// (2) If testCfg parameter is set,
+// (2.1) Test the model using data from testCfg.
+//
+func main() {
+	defer un(trace())
+
+	// (0)
+	flag.Parse()
+
+	fmt.Println(tag, "Training config:", trainCfg)
+	fmt.Println(tag, "Test config:", testCfg)
+
+	// (1)
+	if trainCfg != "" {
+		// (1.1)
+		train()
+	} else {
+		// (1.2)
+		if len(flag.Args()) <= 0 {
+			usage()
+			os.Exit(1)
+		}
+	}
+
+	// (2)
+	if testCfg != "" {
+		test()
+	}
+}
diff --git a/lib/mining/cmd/crf/phoneme.dsv b/lib/mining/cmd/crf/phoneme.dsv
new file mode 100644
index 00000000..1219989f
--- /dev/null
+++ b/lib/mining/cmd/crf/phoneme.dsv
@@ -0,0 +1,45 @@
+{
+	"Input"			:"../../testdata/phoneme/phoneme.dat"
+,	"Rejected"		:"phoneme.rej"
+,	"MaxRows"		:-1
+,	"DatasetMode"		:"matrix"
+,	"ClassIndex"		:5
+
+,	"NTree"			:50
+,	"NRandomFeature"	:2
+,	"RunOOB"		:true
+,	"OOBStatsFile"		:"phoneme.oob.stat"
+,	"PerfFile"		:"phoneme.perf"
+,	"StatFile"		:"phoneme.stat"
+
+,	"InputMetadata"	:
+	[{
+		"Name"		:"f1"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f2"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f3"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f4"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f5"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"class"
+	,	"Type"		:"real"
+	,	"ValueSpace"	:
+		[
+			"1"
+		,	"0"
+		]
+	}]
+}
diff --git a/lib/mining/cmd/lnsmote/main.go b/lib/mining/cmd/lnsmote/main.go
new file mode 100644
index 00000000..692e9cf4
--- /dev/null
+++ b/lib/mining/cmd/lnsmote/main.go
@@ -0,0 +1,188 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"encoding/json"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"time"
+
+	"github.com/shuLhan/share/lib/debug"
+	"github.com/shuLhan/share/lib/dsv"
+	"github.com/shuLhan/share/lib/mining/resampling/lnsmote"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+var (
+	// percentOver contain percentage of over sampling.
+	percentOver = 100
+	// knn contain number of nearest neighbours considered when
+	// oversampling.
+	knn = 5
+	// synFile flag for synthetic file output.
+	synFile = ""
+	// merge flag, if its true the original and synthetic will be merged
+	// into `synFile`.
+	merge = false
+)
+
+var usage = func() {
+	cmd := os.Args[0]
+	fmt.Fprintf(os.Stderr, "Usage of %s:\n"+
+		"[-percentover number] "+
+		"[-knn number] "+
+		"[-syntheticfile string] "+
+		"[-merge bool] "+
+		"[config.dsv]\n", cmd)
+	flag.PrintDefaults()
+}
+
+func init() {
+	flagUsage := []string{
+		"Percentage of oversampling (default 100)",
+		"Number of nearest neighbours (default 5)",
+		"File where synthetic samples will be written (default '')",
+		"If true then original and synthetic will be merged when" +
+			" written to file (default false)",
+	}
+
+	flag.IntVar(&percentOver, "percentover", -1, flagUsage[0])
+	flag.IntVar(&knn, "knn", -1, flagUsage[1])
+	flag.StringVar(&synFile, "syntheticfile", "", flagUsage[2])
+	flag.BoolVar(&merge, "merge", false, flagUsage[3])
+}
+
+func trace(s string) (string, time.Time) {
+	fmt.Println("[START]", s)
+	return s, time.Now()
+}
+
+func un(s string, startTime time.Time) {
+	endTime := time.Now()
+	fmt.Println("[END]", s, "with elapsed time",
+		endTime.Sub(startTime))
+}
+
+//
+// createLnsmote will create and initialize SMOTE object from config file and
+// from command parameter.
+//
+func createLnsmote(fcfg string) (lnsmoteRun *lnsmote.Runtime, e error) {
+	lnsmoteRun = &lnsmote.Runtime{}
+
+	config, e := ioutil.ReadFile(fcfg)
+	if e != nil {
+		return nil, e
+	}
+
+	e = json.Unmarshal(config, lnsmoteRun)
+	if e != nil {
+		return nil, e
+	}
+
+	// Use option value from command parameter.
+	if percentOver > 0 {
+		lnsmoteRun.PercentOver = percentOver
+	}
+	if knn > 0 {
+		lnsmoteRun.K = knn
+	}
+
+	if debug.Value >= 1 {
+		fmt.Println("[lnsmote]", lnsmoteRun)
+	}
+
+	return
+}
+
+//
+// runLnsmote will select minority class from dataset and run oversampling.
+//
+func runLnsmote(lnsmoteRun *lnsmote.Runtime, dataset *tabula.Claset) (e error) {
+	e = lnsmoteRun.Resampling(dataset)
+	if e != nil {
+		return
+	}
+
+	if debug.Value >= 1 {
+		fmt.Println("[lnsmote] # synthetics:",
+			lnsmoteRun.GetSynthetics().Len())
+	}
+
+	return
+}
+
+// runMerge will append original dataset to synthetic file.
+func runMerge(lnsmoteRun *lnsmote.Runtime, dataset *tabula.Claset) (e error) {
+	writer, e := dsv.NewWriter("")
+	if e != nil {
+		return
+	}
+
+	e = writer.ReopenOutput(lnsmoteRun.SyntheticFile)
+	if e != nil {
+		return
+	}
+
+	sep := dsv.DefSeparator
+	n, e := writer.WriteRawDataset(dataset, &sep)
+	if e != nil {
+		return
+	}
+
+	if debug.Value >= 1 {
+		fmt.Println("[lnsmote] # appended:", n)
+	}
+
+	return writer.Close()
+}
+
+func main() {
+	defer un(trace("lnsmote"))
+
+	flag.Parse()
+
+	if len(flag.Args()) <= 0 {
+		usage()
+		os.Exit(1)
+	}
+
+	fcfg := flag.Arg(0)
+
+	// Parsing config file and parameter.
+	lnsmoteRun, e := createLnsmote(fcfg)
+	if e != nil {
+		panic(e)
+	}
+
+	// Get dataset.
+	dataset := tabula.Claset{}
+	_, e = dsv.SimpleRead(fcfg, &dataset)
+	if e != nil {
+		panic(e)
+	}
+
+	fmt.Println("[lnsmote] Dataset:", &dataset)
+
+	row := dataset.GetRow(0)
+	fmt.Println("[lnsmote] sample:", row)
+
+	e = runLnsmote(lnsmoteRun, &dataset)
+	if e != nil {
+		panic(e)
+	}
+
+	if !merge {
+		return
+	}
+
+	e = runMerge(lnsmoteRun, &dataset)
+	if e != nil {
+		panic(e)
+	}
+}
diff --git a/lib/mining/cmd/lnsmote/phoneme.dsv b/lib/mining/cmd/lnsmote/phoneme.dsv
new file mode 100644
index 00000000..6e179c50
--- /dev/null
+++ b/lib/mining/cmd/lnsmote/phoneme.dsv
@@ -0,0 +1,45 @@
+{
+	"Input"		: "../../testdata/phoneme/phoneme.dat"
+,	"Rejected"	: "phoneme.rej"
+,	"MaxRows"	: -1
+,	"ClassIndex"	: 5
+,	"DatasetMode"	: "matrix"
+
+,	"K"		: 5
+
+,	"PercentOver"	: 100
+,	"SyntheticFile" : "phoneme.lnsmote.dat"
+
+,	"ClassMinor"	: "1"
+
+,	"InputMetadata"	:
+	[{
+		"Name"		:"f1"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f2"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f3"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f4"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f5"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"class"
+	,	"Type"		:"string"
+	,	"ValueSpace"	:
+		[
+			"0"
+		,	"1"
+		]
+	}]
+}
diff --git a/lib/mining/cmd/rf/iris.dsv b/lib/mining/cmd/rf/iris.dsv
new file mode 100644
index 00000000..e86af6d1
--- /dev/null
+++ b/lib/mining/cmd/rf/iris.dsv
@@ -0,0 +1,41 @@
+{
+	"Input"			:"../../testdata/iris/iris.dat"
+,	"Rejected"		:"iris.rej"
+,	"MaxRows"		:-1
+,	"ClassIndex"		:4
+,	"DatasetMode"		:"matrix"
+,	"SplitMethod"		:"gini"
+,	"NTree"			:50
+,	"NRandomFeature"	:2
+,	"RunOOB"		:true
+,	"OOBStatsFile"		:"iris.oob.stat"
+,	"PerfFile"		:"iris.perf"
+,	"StatFile"		:"iris.stat"
+,	"InputMetadata"		:
+	[{
+		"Name"			:"sepal-length"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"sepal-width"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"petal-length"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"petal-width"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"class"
+	,	"Type"			:"string"
+	,	"ValueSpace"		:
+		[
+			"Iris-setosa"
+		,	"Iris-versicolor"
+		,	"Iris-virginica"
+		]
+	}]
+}
diff --git a/lib/mining/cmd/rf/main.go b/lib/mining/cmd/rf/main.go
new file mode 100644
index 00000000..2c029c37
--- /dev/null
+++ b/lib/mining/cmd/rf/main.go
@@ -0,0 +1,188 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"encoding/json"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"time"
+
+	"github.com/shuLhan/share/lib/dsv"
+	"github.com/shuLhan/share/lib/mining/classifier/rf"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+const (
+	tag = "[rf]"
+)
+
+var (
+	// nTree number of tree.
+	nTree = 0
+	// nRandomFeature number of feature to compute.
+	nRandomFeature = 0
+	// percentBoot percentage of sample for bootstraping.
+	percentBoot = 0
+	// oobStatsFile where statistic will be written.
+	oobStatsFile = ""
+	// perfFile where performance of classifier will be written.
+	perfFile = ""
+	// trainCfg point to the configuration file for training or creating
+	// a model
+	trainCfg = ""
+	// testCfg point to the configuration file for testing
+	testCfg = ""
+
+	// forest the main object.
+	forest rf.Runtime
+)
+
+var usage = func() {
+	flag.PrintDefaults()
+}
+
+func init() {
+	flagUsage := []string{
+		"Number of tree in forest (default 100)",
+		"Number of feature to compute (default 0)",
+		"Percentage of bootstrap (default 64%)",
+		"OOB statistic file, where OOB data will be written",
+		"Performance file, where statistic of classifying data set will be written",
+		"Training configuration",
+		"Test configuration",
+	}
+
+	flag.IntVar(&nTree, "ntree", -1, flagUsage[0])
+	flag.IntVar(&nRandomFeature, "nrandomfeature", -1, flagUsage[1])
+	flag.IntVar(&percentBoot, "percentboot", -1, flagUsage[2])
+
+	flag.StringVar(&oobStatsFile, "oobstatsfile", "", flagUsage[3])
+	flag.StringVar(&perfFile, "perffile", "", flagUsage[4])
+
+	flag.StringVar(&trainCfg, "train", "", flagUsage[5])
+	flag.StringVar(&testCfg, "test", "", flagUsage[6])
+}
+
+func trace() (start time.Time) {
+	start = time.Now()
+	fmt.Println(tag, "start", start)
+	return
+}
+
+func un(startTime time.Time) {
+	endTime := time.Now()
+	fmt.Println(tag, "elapsed time", endTime.Sub(startTime))
+}
+
+//
+// createRandomForest will create random forest for training, with the
+// following steps,
+// (1) load training configuration.
+// (2) Overwrite configuration parameter if its set from command line.
+//
+func createRandomForest() error {
+	// (1)
+	config, e := ioutil.ReadFile(trainCfg)
+	if e != nil {
+		return e
+	}
+
+	forest = rf.Runtime{}
+
+	e = json.Unmarshal(config, &forest)
+	if e != nil {
+		return e
+	}
+
+	// (2)
+	if nTree > 0 {
+		forest.NTree = nTree
+	}
+	if nRandomFeature > 0 {
+		forest.NRandomFeature = nRandomFeature
+	}
+	if percentBoot > 0 {
+		forest.PercentBoot = percentBoot
+	}
+	if oobStatsFile != "" {
+		forest.OOBStatsFile = oobStatsFile
+	}
+	if perfFile != "" {
+		forest.PerfFile = perfFile
+	}
+
+	return nil
+}
+
+func train() {
+	e := createRandomForest()
+	if e != nil {
+		panic(e)
+	}
+
+	trainset := tabula.Claset{}
+
+	_, e = dsv.SimpleRead(trainCfg, &trainset)
+	if e != nil {
+		panic(e)
+	}
+
+	e = forest.Build(&trainset)
+	if e != nil {
+		panic(e)
+	}
+}
+
+func test() {
+	testset := tabula.Claset{}
+	_, e := dsv.SimpleRead(testCfg, &testset)
+	if e != nil {
+		panic(e)
+	}
+
+	predicts, _, probs := forest.ClassifySet(&testset, nil)
+
+	forest.Performance(&testset, predicts, probs)
+
+	e = forest.WritePerformance()
+	if e != nil {
+		panic(e)
+	}
+}
+
+//
+// (0) Parse and check command line parameters.
+// (1) If trainCfg parameter is set,
+// (1.1) train the model,
+// (1.2) TODO: load saved model.
+// (2) If testCfg parameter is set,
+// (2.1) Test the model using data from testCfg.
+//
+func main() {
+	defer un(trace())
+
+	// (0)
+	flag.Parse()
+
+	// (1)
+	if trainCfg != "" {
+		// (1.1)
+		train()
+	} else {
+		// (1.2)
+		if len(flag.Args()) <= 0 {
+			usage()
+			os.Exit(1)
+		}
+	}
+
+	// (2)
+	if testCfg != "" {
+		test()
+	}
+}
diff --git a/lib/mining/cmd/rf/phoneme.dsv b/lib/mining/cmd/rf/phoneme.dsv
new file mode 100644
index 00000000..1219989f
--- /dev/null
+++ b/lib/mining/cmd/rf/phoneme.dsv
@@ -0,0 +1,45 @@
+{
+	"Input"			:"../../testdata/phoneme/phoneme.dat"
+,	"Rejected"		:"phoneme.rej"
+,	"MaxRows"		:-1
+,	"DatasetMode"		:"matrix"
+,	"ClassIndex"		:5
+
+,	"NTree"			:50
+,	"NRandomFeature"	:2
+,	"RunOOB"		:true
+,	"OOBStatsFile"		:"phoneme.oob.stat"
+,	"PerfFile"		:"phoneme.perf"
+,	"StatFile"		:"phoneme.stat"
+
+,	"InputMetadata"	:
+	[{
+		"Name"		:"f1"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f2"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f3"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f4"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f5"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"class"
+	,	"Type"		:"real"
+	,	"ValueSpace"	:
+		[
+			"1"
+		,	"0"
+		]
+	}]
+}
diff --git a/lib/mining/cmd/smote/main.go b/lib/mining/cmd/smote/main.go
new file mode 100644
index 00000000..48a53f51
--- /dev/null
+++ b/lib/mining/cmd/smote/main.go
@@ -0,0 +1,193 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"encoding/json"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"time"
+
+	"github.com/shuLhan/share/lib/debug"
+	"github.com/shuLhan/share/lib/dsv"
+	"github.com/shuLhan/share/lib/mining/resampling/smote"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+var (
+	// percentOver contain percentage of over sampling.
+	percentOver = 100
+	// knn contain number of nearest neighbours considered when
+	// oversampling.
+	knn = 5
+	// synFile flag for synthetic file output.
+	synFile = ""
+	// merge flag, if its true the original and synthetic will be merged
+	// into `synFile`.
+	merge = false
+)
+
+var usage = func() {
+	cmd := os.Args[0]
+	fmt.Fprintf(os.Stderr, "Usage of %s:\n"+
+		"[-percentover number] "+
+		"[-knn number] "+
+		"[-syntheticfile string] "+
+		"[-merge bool] "+
+		"[config.dsv]\n", cmd)
+	flag.PrintDefaults()
+}
+
+func init() {
+	flagUsage := []string{
+		"Percentage of oversampling (default 100)",
+		"Number of nearest neighbours (default 5)",
+		"File where synthetic samples will be written (default '')",
+		"If true then original and synthetic will be merged when" +
+			" written to file (default false)",
+	}
+
+	flag.IntVar(&percentOver, "percentover", -1, flagUsage[0])
+	flag.IntVar(&knn, "knn", -1, flagUsage[1])
+	flag.StringVar(&synFile, "syntheticfile", "", flagUsage[2])
+	flag.BoolVar(&merge, "merge", false, flagUsage[3])
+}
+
+func trace(s string) (string, time.Time) {
+	fmt.Println("[START]", s)
+	return s, time.Now()
+}
+
+func un(s string, startTime time.Time) {
+	endTime := time.Now()
+	fmt.Println("[END]", s, "with elapsed time",
+		endTime.Sub(startTime))
+}
+
+//
+// createSmote will create and initialize SMOTE object from config file and
+// from command parameter.
+//
+func createSmote(fcfg string) (smoteRun *smote.Runtime, e error) {
+	smoteRun = &smote.Runtime{}
+
+	config, e := ioutil.ReadFile(fcfg)
+	if e != nil {
+		return nil, e
+	}
+
+	e = json.Unmarshal(config, smoteRun)
+	if e != nil {
+		return nil, e
+	}
+
+	// Use option value from command parameter.
+	if percentOver > 0 {
+		smoteRun.PercentOver = percentOver
+	}
+	if knn > 0 {
+		smoteRun.K = knn
+	}
+
+	if debug.Value >= 1 {
+		fmt.Println("[smote]", smoteRun)
+	}
+
+	return
+}
+
+//
+// runSmote will select minority class from dataset and run oversampling.
+//
+func runSmote(smote *smote.Runtime, dataset *tabula.Claset) (e error) {
+	minorset := dataset.GetMinorityRows()
+
+	if debug.Value >= 1 {
+		fmt.Println("[smote] # minority samples:", minorset.Len())
+	}
+
+	e = smote.Resampling(*minorset)
+	if e != nil {
+		return
+	}
+
+	if debug.Value >= 1 {
+		fmt.Println("[smote] # synthetics:", smote.Synthetics.Len())
+	}
+
+	return
+}
+
+// runMerge will append original dataset to synthetic file.
+func runMerge(smote *smote.Runtime, dataset *tabula.Claset) (e error) {
+	writer, e := dsv.NewWriter("")
+	if e != nil {
+		return
+	}
+
+	e = writer.ReopenOutput(smote.SyntheticFile)
+	if e != nil {
+		return
+	}
+
+	sep := dsv.DefSeparator
+	n, e := writer.WriteRawDataset(dataset, &sep)
+	if e != nil {
+		return
+	}
+
+	if debug.Value >= 1 {
+		fmt.Println("[smote] # appended:", n)
+	}
+
+	return writer.Close()
+}
+
+func main() {
+	defer un(trace("smote"))
+
+	flag.Parse()
+
+	if len(flag.Args()) <= 0 {
+		usage()
+		os.Exit(1)
+	}
+
+	fcfg := flag.Arg(0)
+
+	// Parsing config file and parameter.
+	smote, e := createSmote(fcfg)
+	if e != nil {
+		panic(e)
+	}
+
+	// Get dataset.
+	dataset := tabula.Claset{}
+	_, e = dsv.SimpleRead(fcfg, &dataset)
+	if e != nil {
+		panic(e)
+	}
+
+	fmt.Println("[smote] Dataset:", &dataset)
+
+	row := dataset.GetRow(0)
+	fmt.Println("[smote] sample:", row)
+
+	e = runSmote(smote, &dataset)
+	if e != nil {
+		panic(e)
+	}
+
+	if !merge {
+		return
+	}
+
+	e = runMerge(smote, &dataset)
+	if e != nil {
+		panic(e)
+	}
+}
diff --git a/lib/mining/cmd/smote/phoneme.dsv b/lib/mining/cmd/smote/phoneme.dsv
new file mode 100644
index 00000000..8cba121c
--- /dev/null
+++ b/lib/mining/cmd/smote/phoneme.dsv
@@ -0,0 +1,40 @@
+{
+	"Input"		: "../../testdata/phoneme/phoneme.dat"
+,	"Rejected"	: "phoneme.rej"
+,	"MaxRows"	: -1
+,	"ClassIndex"	: 5
+,	"DatasetMode"	: "matrix"
+,	"PercentOver"	: 100
+,	"K"		: 5
+,	"SyntheticFile" : "phoneme_smote.dat"
+,	"InputMetadata"	:
+	[{
+		"Name"		:"f1"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f2"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f3"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f4"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f5"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"class"
+	,	"Type"		:"string"
+	,	"ValueSpace"	:
+		[
+			"0"
+		,	"1"
+		]
+	}]
+}
diff --git a/lib/mining/gain/gini/gini.go b/lib/mining/gain/gini/gini.go
new file mode 100644
index 00000000..2fcc2f9d
--- /dev/null
+++ b/lib/mining/gain/gini/gini.go
@@ -0,0 +1,461 @@
+// Copyright 2015 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// Package gini contain function to calculating Gini gain.
+//
+// Gini gain, which is an impurity-based criterion that measures the divergences
+// between the probability distribution of the target attributes' values.
+//
+package gini
+
+import (
+	"fmt"
+
+	"github.com/shuLhan/share/lib/debug"
+	"github.com/shuLhan/share/lib/numbers"
+	libstrings "github.com/shuLhan/share/lib/strings"
+)
+
+//
+// Gini contain slice of sorted index, slice of partition values, slice of Gini
+// index, Gini value for all samples.
+//
+type Gini struct {
+	// Skip if its true, the gain value would not be searched on this
+	// instance.
+	Skip bool
+	// IsContinue define whether the Gini index came from continuous
+	// attribute or not.
+	IsContinu bool
+	// Value of Gini index for all value in attribute.
+	Value float64
+	// MaxPartGain contain the index of partition which have the maximum
+	// gain.
+	MaxPartGain int
+	// MaxGainValue contain maximum gain of index.
+	MaxGainValue float64
+	// MinIndexPart contain the index of partition which have the minimum
+	// Gini index.
+	MinIndexPart int
+	// MinIndexGini contain minimum Gini index value.
+	MinIndexValue float64
+	// SortedIndex of attribute, sorted by values of attribute. This will
+	// be used to reference the unsorted target attribute.
+	SortedIndex []int
+	// ContinuPart contain list of partition value for continuous attribute.
+	ContinuPart []float64
+	// DiscretePart contain the possible combination of discrete values.
+	DiscretePart libstrings.Table
+	// Index contain list of Gini Index for each partition.
+	Index []float64
+	// Gain contain information gain for each partition.
+	Gain []float64
+}
+
+//
+// ComputeDiscrete Given an attribute A with discreate value 'discval', and the
+// target attribute T which contain N classes in C, compute the information gain
+// of A.
+//
+// The result is saved as gain value in MaxGainValue for each partition.
+//
+func (gini *Gini) ComputeDiscrete(A *[]string, discval *[]string, T *[]string,
+	C *[]string) {
+	gini.IsContinu = false
+
+	// create partition for possible combination of discrete values.
+	gini.createDiscretePartition((*discval))
+
+	if debug.Value >= 2 {
+		fmt.Println("[gini] part :", gini.DiscretePart)
+	}
+
+	gini.Index = make([]float64, len(gini.DiscretePart))
+	gini.Gain = make([]float64, len(gini.DiscretePart))
+	gini.MinIndexValue = 1.0
+
+	// compute gini index for all samples
+	gini.Value = gini.compute(T, C)
+
+	gini.computeDiscreteGain(A, T, C)
+}
+
+//
+// computeDiscreteGain will compute Gini index and Gain for each partition.
+//
+func (gini *Gini) computeDiscreteGain(A *[]string, T *[]string, C *[]string) {
+	// number of samples
+	nsample := float64(len(*A))
+
+	if debug.Value >= 3 {
+		fmt.Println("[gini] sample:", T)
+		fmt.Printf("[gini] Gini(a=%s) = %f\n", (*A), gini.Value)
+	}
+
+	// compute gini index for each discrete values
+	for i, subPart := range gini.DiscretePart {
+		// check if sub partition has at least an element
+		if len(subPart) <= 0 {
+			continue
+		}
+
+		sumGI := 0.0
+		for _, part := range subPart {
+			ndisc := 0.0
+			var subT []string
+
+			for _, el := range part {
+				for t, a := range *A {
+					if a != el {
+						continue
+					}
+
+					// count how many sample with this discrete value
+					ndisc++
+					// split the target by discrete value
+					subT = append(subT, (*T)[t])
+				}
+			}
+
+			// compute gini index for subtarget
+			giniIndex := gini.compute(&subT, C)
+
+			// compute probabilites of discrete value through all samples
+			p := ndisc / nsample
+
+			probIndex := p * giniIndex
+
+			// sum all probabilities times gini index.
+			sumGI += probIndex
+
+			if debug.Value >= 3 {
+				fmt.Printf("[gini] subsample: %v\n", subT)
+				fmt.Printf("[gini] Gini(a=%s) = %f/%f * %f = %f\n",
+					part, ndisc, nsample,
+					giniIndex, probIndex)
+			}
+		}
+
+		gini.Index[i] = sumGI
+		gini.Gain[i] = gini.Value - sumGI
+
+		if debug.Value >= 3 {
+			fmt.Printf("[gini] sample: %v\n", subPart)
+			fmt.Printf("[gini] Gain(a=%s) = %f - %f = %f\n",
+				subPart, gini.Value, sumGI,
+				gini.Gain[i])
+		}
+
+		if gini.MinIndexValue > gini.Index[i] && gini.Index[i] != 0 {
+			gini.MinIndexValue = gini.Index[i]
+			gini.MinIndexPart = i
+		}
+
+		if gini.MaxGainValue < gini.Gain[i] {
+			gini.MaxGainValue = gini.Gain[i]
+			gini.MaxPartGain = i
+		}
+	}
+}
+
+//
+// createDiscretePartition will create possible combination for discrete value
+// in DiscretePart.
+//
+func (gini *Gini) createDiscretePartition(discval []string) {
+	// no discrete values ?
+	if len(discval) <= 0 {
+		return
+	}
+
+	// use set partition function to group the discrete values into two
+	// subset.
+	gini.DiscretePart = libstrings.Partition(discval, 2)
+}
+
+//
+// ComputeContinu Given an attribute A and the target attribute T which contain
+// N classes in C, compute the information gain of A.
+//
+// The result of Gini partitions value, Gini Index, and Gini Gain is saved in
+// ContinuPart, Index, and Gain.
+//
+func (gini *Gini) ComputeContinu(A *[]float64, T *[]string, C *[]string) {
+	gini.IsContinu = true
+
+	// make a copy of attribute and target.
+	A2 := make([]float64, len(*A))
+	copy(A2, *A)
+
+	T2 := make([]string, len(*T))
+	copy(T2, *T)
+
+	gini.SortedIndex = numbers.Floats64IndirectSort(A2, true)
+
+	if debug.Value >= 1 {
+		fmt.Println("[gini] attr sorted :", A2)
+	}
+
+	// sort the target attribute using sorted index.
+	libstrings.SortByIndex(&T2, gini.SortedIndex)
+
+	// create partition
+	gini.createContinuPartition(&A2)
+
+	// create holder for gini index and gini gain
+	gini.Index = make([]float64, len(gini.ContinuPart))
+	gini.Gain = make([]float64, len(gini.ContinuPart))
+	gini.MinIndexValue = 1.0
+
+	// compute gini index for all samples
+	gini.Value = gini.compute(&T2, C)
+
+	gini.computeContinuGain(&A2, &T2, C)
+}
+
+//
+// createContinuPartition for dividing class and computing Gini index.
+//
+// This is assuming that the data `A` has been sorted in ascending order.
+//
+func (gini *Gini) createContinuPartition(A *[]float64) {
+	l := len(*A)
+	gini.ContinuPart = make([]float64, 0)
+
+	// loop from first index until last index - 1
+	for i := 0; i < l-1; i++ {
+		sum := (*A)[i] + (*A)[i+1]
+		med := sum / 2.0
+
+		// If median is zero, its mean both left and right value is
+		// zero. We are not allowing this, because it will result the
+		// minimum Gini Index or maximum Gain value.
+		if med == 0 {
+			continue
+		}
+
+		// Reject if median is contained in attribute's value.
+		// We use equality because if both A[i] and A[i+1] value is
+		// equal, the median is equal to both of them.
+		exist := false
+		for j := 0; j <= i; j++ {
+			if (*A)[j] == med {
+				exist = true
+				break
+			}
+		}
+		if !exist {
+			gini.ContinuPart = append(gini.ContinuPart, med)
+		}
+	}
+}
+
+//
+// compute value for attribute T.
+//
+// Return Gini value in the form of,
+//
+// 1 - sum (probability of each classes in T)
+//
+func (gini *Gini) compute(T *[]string, C *[]string) float64 {
+	n := float64(len(*T))
+	if n == 0 {
+		return 0
+	}
+
+	classCount := libstrings.CountTokens(*T, *C, true)
+
+	var sump2 float64
+
+	for x, v := range classCount {
+		p := float64(v) / n
+		sump2 += (p * p)
+
+		if debug.Value >= 3 {
+			fmt.Printf("[gini] compute (%s): (%d/%f)^2 = %f\n",
+				(*C)[x], v, n, p*p)
+		}
+
+	}
+
+	return 1 - sump2
+}
+
+//
+// computeContinuGain for each partition.
+//
+// The Gini gain formula we used here is,
+//
+// Gain(part,S) = Gini(S) - ((count(left)/S * Gini(left))
+// 			+ (count(right)/S * Gini(right)))
+//
+// where,
+// - left is sub-sample from S that is less than part value.
+// - right is sub-sample from S that is greater than part value.
+//
+func (gini *Gini) computeContinuGain(A *[]float64, T *[]string, C *[]string) {
+	var gleft, gright float64
+	var tleft, tright []string
+
+	nsample := len(*A)
+
+	if debug.Value >= 2 {
+		fmt.Println("[gini] sorted data:", A)
+		fmt.Println("[gini] Gini.Value:", gini.Value)
+	}
+
+	for p, contVal := range gini.ContinuPart {
+
+		// find the split of samples between partition based on
+		// partition value
+		partidx := nsample
+		for x, attrVal := range *A {
+			if attrVal > contVal {
+				partidx = x
+				break
+			}
+		}
+
+		nleft := partidx
+		nright := nsample - partidx
+		pleft := float64(nleft) / float64(nsample)
+		pright := float64(nright) / float64(nsample)
+
+		if partidx > 0 {
+			tleft = (*T)[0:partidx]
+			tright = (*T)[partidx:]
+
+			gleft = gini.compute(&tleft, C)
+			gright = gini.compute(&tright, C)
+		} else {
+			tleft = nil
+			tright = (*T)[0:]
+
+			gleft = 0
+			gright = gini.compute(&tright, C)
+		}
+
+		// count class in partition
+		gini.Index[p] = ((pleft * gleft) + (pright * gright))
+		gini.Gain[p] = gini.Value - gini.Index[p]
+
+		if debug.Value >= 3 {
+			fmt.Println("[gini] tleft:", tleft)
+			fmt.Println("[gini] tright:", tright)
+
+			fmt.Printf("[gini] GiniGain(%v) = %f - (%f * %f) + (%f * %f) = %f\n",
+				contVal, gini.Value, pleft, gleft,
+				pright, gright, gini.Gain[p])
+		}
+
+		if gini.MinIndexValue > gini.Index[p] && gini.Index[p] != 0 {
+			gini.MinIndexValue = gini.Index[p]
+			gini.MinIndexPart = p
+		}
+
+		if gini.MaxGainValue < gini.Gain[p] {
+			gini.MaxGainValue = gini.Gain[p]
+			gini.MaxPartGain = p
+		}
+	}
+}
+
+//
+// GetMaxPartGainValue return the partition that have the maximum Gini gain.
+//
+func (gini *Gini) GetMaxPartGainValue() interface{} {
+	if gini.IsContinu {
+		return gini.ContinuPart[gini.MaxPartGain]
+	}
+
+	return gini.DiscretePart[gini.MaxPartGain]
+}
+
+//
+// GetMaxGainValue return the value of partition which contain the maximum Gini
+// gain.
+//
+func (gini *Gini) GetMaxGainValue() float64 {
+	return gini.MaxGainValue
+}
+
+//
+// GetMinIndexPartValue return the partition that have the minimum Gini index.
+//
+func (gini *Gini) GetMinIndexPartValue() interface{} {
+	if gini.IsContinu {
+		return gini.ContinuPart[gini.MinIndexPart]
+	}
+
+	return gini.DiscretePart[gini.MinIndexPart]
+}
+
+//
+// GetMinIndexValue return the minimum Gini index value.
+//
+func (gini *Gini) GetMinIndexValue() float64 {
+	return gini.MinIndexValue
+}
+
+//
+// FindMaxGain find the attribute and value that have the maximum gain.
+// The returned value is index of attribute.
+//
+func FindMaxGain(gains *[]Gini) (MaxGainIdx int) {
+	var gainValue = 0.0
+	var maxGainValue = 0.0
+
+	for i := range *gains {
+		if (*gains)[i].Skip {
+			continue
+		}
+		gainValue = (*gains)[i].GetMaxGainValue()
+		if gainValue > maxGainValue {
+			maxGainValue = gainValue
+			MaxGainIdx = i
+		}
+	}
+
+	return
+}
+
+//
+// FindMinGiniIndex return the index of attribute that have the minimum Gini index.
+//
+func FindMinGiniIndex(ginis *[]Gini) (MinIndexIdx int) {
+	var indexV = 0.0
+	var minIndexV = 1.0
+
+	for i := range *ginis {
+		indexV = (*ginis)[i].GetMinIndexValue()
+		if indexV > minIndexV {
+			minIndexV = indexV
+			MinIndexIdx = i
+		}
+	}
+
+	return
+}
+
+//
+// String yes, it will print it JSON like format.
+//
+func (gini Gini) String() (s string) {
+	s = fmt.Sprint("{\n",
+		"  Skip          :", gini.Skip, "\n",
+		"  IsContinu     :", gini.IsContinu, "\n",
+		"  Index         :", gini.Index, "\n",
+		"  Value         :", gini.Value, "\n",
+		"  Gain          :", gini.Gain, "\n",
+		"  MaxPartGain   :", gini.MaxPartGain, "\n",
+		"  MaxGainValue  :", gini.MaxGainValue, "\n",
+		"  MinIndexPart  :", gini.MinIndexPart, "\n",
+		"  MinIndexValue :", gini.MinIndexValue, "\n",
+		"  SortedIndex   :", gini.SortedIndex, "\n",
+		"  ContinuPart   :", gini.ContinuPart, "\n",
+		"  DiscretePart  :", gini.DiscretePart, "\n",
+		"}")
+	return
+}
diff --git a/lib/mining/gain/gini/gini_test.go b/lib/mining/gain/gini/gini_test.go
new file mode 100644
index 00000000..682d5159
--- /dev/null
+++ b/lib/mining/gain/gini/gini_test.go
@@ -0,0 +1,59 @@
+// Copyright 2015 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gini
+
+import (
+	"fmt"
+	"testing"
+)
+
+var data = [][]float64{
+	{1.0, 6.0, 5.0, 4.0, 7.0, 3.0, 8.0, 7.0, 5.0},
+	{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
+}
+var targetValues = []string{"P", "P", "N", "P", "N", "N", "N", "P", "N"}
+var classes = []string{"P", "N"}
+
+func TestComputeContinu(t *testing.T) {
+	target := make([]string, len(targetValues))
+
+	copy(target, targetValues)
+
+	fmt.Println("target:", target)
+
+	fmt.Println("data:", data[0])
+	GINI := Gini{}
+	GINI.ComputeContinu(&data[0], &target, &classes)
+	fmt.Println(">>> gini:", GINI)
+
+	fmt.Println("data:", data[1])
+	GINI = Gini{}
+	GINI.ComputeContinu(&data[1], &target, &classes)
+	fmt.Println(">>> gini:", GINI)
+}
+
+var discreteSamples = [][]string{
+	{"T", "T", "T", "F", "F", "F", "F", "T", "F"},
+	{"T", "T", "F", "F", "T", "T", "F", "F", "T"},
+	{"T", "T", "F", "T", "F", "F", "F", "T", "F"},
+}
+var discreteValues = []string{"T", "F"}
+
+func TestComputeDiscrete(t *testing.T) {
+	gini := Gini{}
+	target := make([]string, len(targetValues))
+
+	for _, sample := range discreteSamples {
+		copy(target, targetValues)
+
+		fmt.Println("target:", target)
+		fmt.Println("data:", sample)
+
+		gini.ComputeDiscrete(&sample, &discreteValues, &target,
+			&classes)
+
+		fmt.Println(gini)
+	}
+}
diff --git a/lib/mining/gain/gini/ginifloat.go b/lib/mining/gain/gini/ginifloat.go
new file mode 100644
index 00000000..b890b39d
--- /dev/null
+++ b/lib/mining/gain/gini/ginifloat.go
@@ -0,0 +1,168 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gini
+
+import (
+	"fmt"
+
+	"github.com/shuLhan/share/lib/debug"
+	"github.com/shuLhan/share/lib/numbers"
+)
+
+//
+// ComputeContinuFloat Given an attribute A and the target attribute T which contain
+// N classes in C, compute the information gain of A.
+//
+// The result of Gini partitions value, Gini Index, and Gini Gain is saved in
+// ContinuPart, Index, and Gain.
+//
+// Algorithm,
+// (0) Sort the attribute.
+// (1) Sort the target attribute using sorted index.
+// (2) Create continu partition.
+// (3) Create temporary space for gini index and gini gain.
+// (4) Compute gini index for all target.
+//
+func (gini *Gini) ComputeContinuFloat(A, T, C *[]float64) {
+	gini.IsContinu = true
+
+	gini.SortedIndex = numbers.Floats64IndirectSort(*A, true)
+
+	if debug.Value >= 1 {
+		fmt.Println("[gini] attr sorted :", A)
+	}
+
+	// (1)
+	numbers.Floats64SortByIndex(T, gini.SortedIndex)
+
+	// (2)
+	gini.createContinuPartition(A)
+
+	// (3)
+	gini.Index = make([]float64, len(gini.ContinuPart))
+	gini.Gain = make([]float64, len(gini.ContinuPart))
+	gini.MinIndexValue = 1.0
+
+	// (4)
+	gini.Value = gini.computeFloat(T, C)
+
+	gini.computeContinuGainFloat(A, T, C)
+}
+
+//
+// computeFloat will compute Gini value for attribute T.
+//
+// Gini value is computed using formula,
+//
+//	1 - sum (probability of each classes in T)
+//
+func (gini *Gini) computeFloat(T, C *[]float64) float64 {
+	n := float64(len(*T))
+	if n == 0 {
+		return 0
+	}
+
+	classCount := numbers.Floats64Counts(*T, *C)
+
+	var sump2 float64
+
+	for x, v := range classCount {
+		p := float64(v) / n
+		sump2 += (p * p)
+
+		if debug.Value >= 3 {
+			fmt.Printf("[gini] compute (%f): (%d/%f)^2 = %f\n",
+				(*C)[x], v, n, p*p)
+		}
+
+	}
+
+	return 1 - sump2
+}
+
+//
+// computeContinuGainFloat will compute gain for each partition.
+//
+// The Gini gain formula we used here is,
+//
+//	Gain(part,S) = Gini(S) - ((count(left)/S * Gini(left))
+//				+ (count(right)/S * Gini(right)))
+//
+// where,
+//	- left is sub-sample from S that is less than part value.
+//	- right is sub-sample from S that is greater than part value.
+//
+// Algorithm,
+// (0) For each partition value,
+// (0.1) Find the split of samples between partition based on partition value.
+// (0.2) Count class in partition.
+//
+func (gini *Gini) computeContinuGainFloat(A, T, C *[]float64) {
+	var gainLeft, gainRight float64
+	var tleft, tright []float64
+
+	nsample := len(*A)
+
+	if debug.Value >= 2 {
+		fmt.Println("[gini] sorted data:", A)
+		fmt.Println("[gini] Gini.Value:", gini.Value)
+	}
+
+	// (0)
+	for p, contVal := range gini.ContinuPart {
+
+		// (0.1)
+		partidx := nsample
+		for x, attrVal := range *A {
+			if attrVal > contVal {
+				partidx = x
+				break
+			}
+		}
+
+		nleft := float64(partidx)
+		nright := float64(nsample - partidx)
+		probLeft := nleft / float64(nsample)
+		probRight := nright / float64(nsample)
+
+		if partidx > 0 {
+			tleft = (*T)[0:partidx]
+			tright = (*T)[partidx:]
+
+			gainLeft = gini.computeFloat(&tleft, C)
+			gainRight = gini.computeFloat(&tright, C)
+		} else {
+			tleft = nil
+			tright = (*T)[0:]
+
+			gainLeft = 0
+			gainRight = gini.computeFloat(&tright, C)
+		}
+
+		// (0.2)
+		gini.Index[p] = ((probLeft * gainLeft) +
+			(probRight * gainRight))
+		gini.Gain[p] = gini.Value - gini.Index[p]
+
+		if debug.Value >= 3 {
+			fmt.Println("[gini] tleft:", tleft)
+			fmt.Println("[gini] tright:", tright)
+
+			fmt.Printf("[gini] GiniGain(%v) = %f - (%f * %f) + (%f * %f) = %f\n",
+				contVal, gini.Value, probLeft, gainLeft,
+				probRight, gainRight, gini.Gain[p])
+		}
+
+		if gini.MinIndexValue > gini.Index[p] && gini.Index[p] != 0 {
+			gini.MinIndexValue = gini.Index[p]
+			gini.MinIndexPart = p
+		}
+
+		if gini.MaxGainValue < gini.Gain[p] {
+			gini.MaxGainValue = gini.Gain[p]
+			gini.MaxPartGain = p
+		}
+	}
+}
diff --git a/lib/mining/knn/knn.go b/lib/mining/knn/knn.go
new file mode 100644
index 00000000..0fa38f9c
--- /dev/null
+++ b/lib/mining/knn/knn.go
@@ -0,0 +1,109 @@
+// Copyright 2015-2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// Package knn implement the K Nearest Neighbor using Euclidian to compute the
+// distance between samples.
+//
+package knn
+
+import (
+	"fmt"
+	"math"
+	"sort"
+
+	"github.com/shuLhan/share/lib/debug"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+const (
+	// TEuclidianDistance used in Runtime.DistanceMethod.
+	TEuclidianDistance = 0
+)
+
+//
+// Runtime parameters for KNN processing.
+//
+type Runtime struct {
+	// DistanceMethod define how the distance between sample will be
+	// measured.
+	DistanceMethod int
+	// ClassIndex define index of class in dataset.
+	ClassIndex int `json:"ClassIndex"`
+	// K define number of nearest neighbors that will be searched.
+	K int `json:"K"`
+
+	// AllNeighbors contain all neighbours
+	AllNeighbors Neighbors
+}
+
+//
+// ComputeEuclidianDistance compute the distance of instance with each sample in
+// dataset `samples` and return it.
+//
+func (in *Runtime) ComputeEuclidianDistance(samples *tabula.Rows,
+	instance *tabula.Row,
+) {
+	for x := range *samples {
+		row := (*samples)[x]
+
+		// compute euclidian distance
+		d := 0.0
+		for y, rec := range *row {
+			if y == in.ClassIndex {
+				// skip class attribute
+				continue
+			}
+
+			ir := (*instance)[y]
+			diff := 0.0
+
+			diff = ir.Float() - rec.Float()
+
+			d += math.Abs(diff)
+		}
+
+		// only add sample distance which is not zero (its probably
+		// we calculating with the instance itself)
+		if d != 0 {
+			in.AllNeighbors.Add(row, math.Sqrt(d))
+		}
+	}
+
+	sort.Sort(&in.AllNeighbors)
+}
+
+//
+// FindNeighbors Given sample set and an instance, return the nearest neighbors as
+// a slice of neighbors.
+//
+func (in *Runtime) FindNeighbors(samples *tabula.Rows, instance *tabula.Row) (
+	kneighbors Neighbors,
+) {
+	// Reset current input neighbours
+	in.AllNeighbors = Neighbors{}
+
+	switch in.DistanceMethod {
+	case TEuclidianDistance:
+		in.ComputeEuclidianDistance(samples, instance)
+	}
+
+	// Make sure number of neighbors is greater than request.
+	minK := in.AllNeighbors.Len()
+	if minK > in.K {
+		minK = in.K
+	}
+
+	if debug.Value >= 2 {
+		fmt.Println("[knn] all neighbors:", in.AllNeighbors.Len())
+	}
+
+	kneighbors = in.AllNeighbors.SelectRange(0, minK)
+
+	if debug.Value >= 2 {
+		fmt.Println("[knn] k neighbors:", kneighbors.Len())
+	}
+
+	return
+}
diff --git a/lib/mining/knn/knn_test.go b/lib/mining/knn/knn_test.go
new file mode 100644
index 00000000..86ef75c0
--- /dev/null
+++ b/lib/mining/knn/knn_test.go
@@ -0,0 +1,62 @@
+// Copyright 2015 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package knn
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/shuLhan/share/lib/dsv"
+	"github.com/shuLhan/share/lib/tabula"
+	"github.com/shuLhan/share/lib/test"
+)
+
+func TestComputeEuclidianDistance(t *testing.T) {
+	var exp = []string{
+		`[0.302891 0.608544 0.47413 1.42718 -0.811085 1]`,
+		`[0.243474 0.505146 0.472892 1.34802 -0.844252 1]` +
+			`[0.202343 0.485983 0.527533 1.47307 -0.809672 1]` +
+			`[0.215496 0.523418 0.51719 1.43548 -0.933981 1]` +
+			`[0.214331 0.546086 0.414773 1.38542 -0.702336 1]` +
+			`[0.301676 0.554505 0.594757 1.21258 -0.873084 1]`,
+	}
+	var expDistances = "[0.5257185558832786" +
+		" 0.5690474496911485" +
+		" 0.5888777462258191" +
+		" 0.6007362149895741" +
+		" 0.672666336306493]"
+
+	// Reading data
+	dataset := tabula.Dataset{}
+	_, e := dsv.SimpleRead("../testdata/phoneme/phoneme.dsv", &dataset)
+	if nil != e {
+		return
+	}
+
+	// Processing
+	knnIn := Runtime{
+		DistanceMethod: TEuclidianDistance,
+		ClassIndex:     5,
+		K:              5,
+	}
+
+	classes := dataset.GetRows().GroupByValue(knnIn.ClassIndex)
+
+	_, minoritySet := classes.GetMinority()
+
+	kneighbors := knnIn.FindNeighbors(&minoritySet, minoritySet[0])
+
+	var got string
+	rows := kneighbors.Rows()
+	for _, row := range *rows {
+		got += fmt.Sprint(*row)
+	}
+
+	test.Assert(t, "", exp[1], got, true)
+
+	distances := kneighbors.Distances()
+	got = fmt.Sprint(*distances)
+	test.Assert(t, "", expDistances, got, true)
+}
diff --git a/lib/mining/knn/neighbor.go b/lib/mining/knn/neighbor.go
new file mode 100644
index 00000000..e472d09f
--- /dev/null
+++ b/lib/mining/knn/neighbor.go
@@ -0,0 +1,150 @@
+// Copyright 2015 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package knn
+
+import (
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+//
+// Neighbors is a mapping between sample and their distance.
+// This type implement the sort interface.
+//
+type Neighbors struct {
+	// rows contain pointer to rows.
+	rows []*tabula.Row
+	// Distance value
+	distances []float64
+}
+
+//
+// Rows return all rows.
+//
+func (neighbors *Neighbors) Rows() *[]*tabula.Row {
+	return &neighbors.rows
+}
+
+//
+// Row return pointer to row at index `idx`.
+//
+func (neighbors *Neighbors) Row(idx int) *tabula.Row {
+	return neighbors.rows[idx]
+}
+
+//
+// Distances return slice of distance of each neighbours.
+//
+func (neighbors *Neighbors) Distances() *[]float64 {
+	return &neighbors.distances
+}
+
+//
+// Distance return distance value at index `idx`.
+//
+func (neighbors *Neighbors) Distance(idx int) float64 {
+	return neighbors.distances[idx]
+}
+
+//
+// Len return the number of neighbors.
+// This is for sort interface.
+//
+func (neighbors *Neighbors) Len() int {
+	return len(neighbors.distances)
+}
+
+//
+// Less return true if i < j.
+// This is for sort interface.
+//
+func (neighbors *Neighbors) Less(i, j int) bool {
+	if neighbors.distances[i] < neighbors.distances[j] {
+		return true
+	}
+	return false
+}
+
+//
+// Swap content of object in index i with index j.
+// This is for sort interface.
+//
+func (neighbors *Neighbors) Swap(i, j int) {
+	row := neighbors.rows[i]
+	distance := neighbors.distances[i]
+
+	neighbors.rows[i] = neighbors.rows[j]
+	neighbors.distances[i] = neighbors.distances[j]
+
+	neighbors.rows[j] = row
+	neighbors.distances[j] = distance
+}
+
+//
+// Add new neighbor.
+//
+func (neighbors *Neighbors) Add(row *tabula.Row, distance float64) {
+	neighbors.rows = append(neighbors.rows, row)
+	neighbors.distances = append(neighbors.distances, distance)
+}
+
+//
+// SelectRange select all neighbors from index `start` to `end`.
+// Return an empty set if start or end is out of range.
+//
+func (neighbors *Neighbors) SelectRange(start, end int) (newn Neighbors) {
+	if start < 0 {
+		return
+	}
+
+	if end > neighbors.Len() {
+		return
+	}
+
+	for x := start; x < end; x++ {
+		row := neighbors.rows[x]
+		newn.Add(row, neighbors.distances[x])
+	}
+	return
+}
+
+//
+// SelectWhere return all neighbors where row value at index `idx` is equal
+// to string `val`.
+//
+func (neighbors *Neighbors) SelectWhere(idx int, val string) (newn Neighbors) {
+	for x, row := range neighbors.rows {
+		colval := (*row)[idx].String()
+
+		if colval == val {
+			newn.Add(row, neighbors.Distance(x))
+		}
+	}
+	return
+}
+
+//
+// Contain return true if `row` is in neighbors and their index, otherwise
+// return false and -1.
+//
+func (neighbors *Neighbors) Contain(row *tabula.Row) (bool, int) {
+	for x, xrow := range neighbors.rows {
+		if xrow.IsEqual(row) {
+			return true, x
+		}
+	}
+	return false, -1
+}
+
+//
+// Replace neighbor at index `idx` with new row and distance value.
+//
+func (neighbors *Neighbors) Replace(idx int, row *tabula.Row, distance float64) {
+	if idx > len(neighbors.rows) {
+		return
+	}
+
+	neighbors.rows[idx] = row
+	neighbors.distances[idx] = distance
+}
diff --git a/lib/mining/knn/neighbor_test.go b/lib/mining/knn/neighbor_test.go
new file mode 100644
index 00000000..047d9a32
--- /dev/null
+++ b/lib/mining/knn/neighbor_test.go
@@ -0,0 +1,84 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package knn
+
+import (
+	"math/rand"
+	"sort"
+	"testing"
+	"time"
+
+	"github.com/shuLhan/share/lib/tabula"
+	"github.com/shuLhan/share/lib/test"
+)
+
+var dataFloat64 = [][]float64{
+	{0.243474, 0.505146, 0.472892, 1.34802, -0.844252, 1},
+	{0.202343, 0.485983, 0.527533, 1.47307, -0.809672, 1},
+	{0.215496, 0.523418, 0.517190, 1.43548, -0.933981, 1},
+	{0.214331, 0.546086, 0.414773, 1.38542, -0.702336, 1},
+	{0.301676, 0.554505, 0.594757, 1.21258, -0.873084, 1},
+}
+
+var distances = []int{4, 3, 2, 1, 0}
+
+func createNeigbours() (neighbors Neighbors) {
+	for x, d := range dataFloat64 {
+		row := tabula.Row{}
+
+		for _, v := range d {
+			rec := tabula.NewRecordReal(v)
+			row.PushBack(rec)
+		}
+
+		neighbors.Add(&row, float64(distances[x]))
+	}
+	return
+}
+
+func createNeigboursByIdx(indices []int) (neighbors Neighbors) {
+	for x, idx := range indices {
+		row := tabula.Row{}
+
+		for _, v := range dataFloat64[idx] {
+			rec := tabula.NewRecordReal(v)
+			row.PushBack(rec)
+		}
+
+		neighbors.Add(&row, float64(distances[x]))
+	}
+	return
+}
+
+func TestContain(t *testing.T) {
+	rand.Seed(time.Now().UnixNano())
+
+	neighbors := createNeigbours()
+
+	// pick random sample from neighbors
+	pickIdx := rand.Intn(neighbors.Len())
+	randSample := neighbors.Row(pickIdx).Clone()
+
+	isin, idx := neighbors.Contain(randSample)
+
+	test.Assert(t, "", true, isin, true)
+	test.Assert(t, "", pickIdx, idx, true)
+
+	// change one of record value to check for false.
+	(*randSample)[0].SetFloat(0)
+
+	isin, _ = neighbors.Contain(randSample)
+
+	test.Assert(t, "", false, isin, true)
+}
+
+func TestSort(t *testing.T) {
+	neighbors := createNeigbours()
+	exp := createNeigboursByIdx(distances)
+
+	sort.Sort(&neighbors)
+
+	test.Assert(t, "", exp.Rows(), neighbors.Rows(), true)
+}
diff --git a/lib/mining/math/math.go b/lib/mining/math/math.go
new file mode 100644
index 00000000..944890de
--- /dev/null
+++ b/lib/mining/math/math.go
@@ -0,0 +1,68 @@
+// Copyright 2015 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// Package math provide generic functions working with math.
+//
+package math
+
+import (
+	"math"
+)
+
+//
+// Factorial compute the factorial of n.
+//
+func Factorial(n int) (f int) {
+	if n >= 0 {
+		f = 1
+	} else {
+		f = -1
+		n = n * -1
+	}
+
+	for ; n > 0; n-- {
+		f = f * n
+	}
+
+	return f
+}
+
+//
+// BinomialCoefficient or combination, compute number of picking k from
+// n possibilities.
+//
+// Result is n! / ((n - k)! * k!)
+//
+func BinomialCoefficient(n int, k int) int {
+	if k > n {
+		return 0
+	}
+	return Factorial(n) / (Factorial(n-k) * Factorial(k))
+}
+
+//
+// StirlingS2 The number of ways of partitioning a set of n elements into
+// k nonempty sets (i.e., k set blocks), also called a Stirling set number.
+//
+// For example, the set {1,2,3} can be partitioned into three subsets in one way:
+// {{1},{2},{3}}; into two subsets in three ways: {{1,2},{3}}, {{1,3},{2}},
+// and {{1},{2,3}}; and into one subset in one way: {{1,2,3}}.
+//
+// Ref: http://mathworld.wolfram.com/StirlingNumberoftheSecondKind.html
+//
+func StirlingS2(n int, k int) int {
+	if k == 1 || n == k {
+		return 1
+	}
+	var sum int
+
+	for i := 0; i <= k; i++ {
+		sum += int(math.Pow(-1, float64(i))) *
+			BinomialCoefficient(k, i) *
+			int(math.Pow(float64(k-i), float64(n)))
+	}
+
+	return sum / Factorial(k)
+}
diff --git a/lib/mining/math/math_test.go b/lib/mining/math/math_test.go
new file mode 100644
index 00000000..ae7bf563
--- /dev/null
+++ b/lib/mining/math/math_test.go
@@ -0,0 +1,48 @@
+// Copyright 2015 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+import (
+	"testing"
+)
+
+func TestFactorial(t *testing.T) {
+	in := []int{-3, -2, -1, 0, 1, 2, 3}
+	exp := []int{-6, -2, -1, 1, 1, 2, 6}
+
+	for i := range in {
+		res := Factorial(in[i])
+
+		if res != exp[i] {
+			t.Fatal("Expecting ", exp[i], ", got ", res)
+		}
+	}
+}
+
+func TestBinomialCoefficient(t *testing.T) {
+	in := [][]int{{1, 2}, {1, 1}, {3, 2}, {5, 3}}
+	exp := []int{0, 1, 3, 10}
+
+	for i := range in {
+		res := BinomialCoefficient(in[i][0], in[i][1])
+
+		if res != exp[i] {
+			t.Fatal("Expecting ", exp[i], ", got ", res)
+		}
+	}
+}
+
+func TestStirlingS2(t *testing.T) {
+	in := [][]int{{3, 1}, {3, 2}, {3, 3}, {5, 3}}
+	exp := []int{1, 3, 1, 25}
+
+	for i := range in {
+		res := StirlingS2(in[i][0], in[i][1])
+
+		if res != exp[i] {
+			t.Fatal("Expecting ", exp[i], ", got ", res)
+		}
+	}
+}
diff --git a/lib/mining/resampling/interface.go b/lib/mining/resampling/interface.go
new file mode 100644
index 00000000..58c333ce
--- /dev/null
+++ b/lib/mining/resampling/interface.go
@@ -0,0 +1,52 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// Package resampling provide common interface, constants, and methods for
+// resampling modules.
+//
+package resampling
+
+import (
+	"github.com/shuLhan/share/lib/dsv"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+const (
+	// DefaultK nearest neighbors.
+	DefaultK = 5
+	// DefaultPercentOver sampling.
+	DefaultPercentOver = 100
+)
+
+//
+// Interface define common methods used by resampling module.
+//
+type Interface interface {
+	GetSynthetics() tabula.DatasetInterface
+}
+
+//
+// WriteSynthetics will write synthetic samples in resampling module `ri` into
+// `file`.
+//
+func WriteSynthetics(ri Interface, file string) (e error) {
+	writer, e := dsv.NewWriter("")
+	if nil != e {
+		return
+	}
+
+	e = writer.OpenOutput(file)
+	if e != nil {
+		return
+	}
+
+	sep := dsv.DefSeparator
+	_, e = writer.WriteRawDataset(ri.GetSynthetics(), &sep)
+	if e != nil {
+		return
+	}
+
+	return writer.Close()
+}
diff --git a/lib/mining/resampling/lnsmote/lnsmote.go b/lib/mining/resampling/lnsmote/lnsmote.go
new file mode 100644
index 00000000..ce6f26e9
--- /dev/null
+++ b/lib/mining/resampling/lnsmote/lnsmote.go
@@ -0,0 +1,287 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// Package lnsmote implement the Local-Neighborhood algorithm from the paper,
+//
+// Maciejewski, Tomasz, and Jerzy Stefanowski. "Local neighbourhood
+// extension of SMOTE for mining imbalanced data." Computational
+// Intelligence and Data Mining (CIDM), 2011 IEEE Symposium on. IEEE,
+// 2011.
+//
+package lnsmote
+
+import (
+	"fmt"
+	"math/rand"
+
+	"github.com/shuLhan/share/lib/debug"
+	"github.com/shuLhan/share/lib/dsv"
+	"github.com/shuLhan/share/lib/mining/knn"
+	"github.com/shuLhan/share/lib/mining/resampling/smote"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+//
+// Runtime parameters for input and output.
+//
+type Runtime struct {
+	// Runtime of SMOTE, since this module extend the SMOTE method.
+	smote.Runtime
+
+	// ClassMinor the minority sample in dataset that we want to
+	// oversampling.
+	ClassMinor string `json:"ClassMinor"`
+
+	// minorset contain minor class in samples.
+	minorset tabula.DatasetInterface
+
+	// datasetRows contain all rows in dataset.
+	datasetRows *tabula.Rows
+
+	// outliersRows contain all sample that is detected as outliers.
+	outliers tabula.Rows
+
+	// OutliersFile if its not empty then outliers will be saved in file
+	// specified by this option.
+	OutliersFile string `json:"OutliersFile"`
+}
+
+//
+// New create and return new LnSmote object.
+//
+func New(percentOver, k, classIndex int, classMinor, outliers string) (
+	lnsmoteRun *Runtime,
+) {
+	lnsmoteRun = &Runtime{
+		Runtime: smote.Runtime{
+			Runtime: knn.Runtime{
+				DistanceMethod: knn.TEuclidianDistance,
+				ClassIndex:     classIndex,
+				K:              k,
+			},
+			PercentOver: percentOver,
+		},
+		ClassMinor:   classMinor,
+		OutliersFile: outliers,
+	}
+
+	return
+}
+
+//
+// Init will initialize LNSmote runtime by checking input values and set it to
+// default if not set or invalid.
+//
+func (in *Runtime) Init(dataset tabula.ClasetInterface) {
+	in.Runtime.Init()
+
+	in.NSynthetic = in.PercentOver / 100.0
+	in.datasetRows = dataset.GetDataAsRows()
+
+	in.minorset = tabula.SelectRowsWhere(dataset, in.ClassIndex,
+		in.ClassMinor)
+
+	in.outliers = make(tabula.Rows, 0)
+
+	if debug.Value >= 1 {
+		fmt.Println("[lnsmote] n:", in.NSynthetic)
+		fmt.Println("[lnsmote] n minority:", in.minorset.Len())
+	}
+}
+
+//
+// Resampling will run resampling process on dataset and return the synthetic
+// samples.
+//
+func (in *Runtime) Resampling(dataset tabula.ClasetInterface) (
+	e error,
+) {
+	in.Init(dataset)
+
+	minorRows := in.minorset.GetDataAsRows()
+
+	for x := range *minorRows {
+		p := (*minorRows)[x]
+
+		neighbors := in.FindNeighbors(in.datasetRows, p)
+
+		if debug.Value >= 3 {
+			fmt.Println("[lnsmote] neighbors:", neighbors.Rows())
+		}
+
+		for y := 0; y < in.NSynthetic; y++ {
+			syn := in.createSynthetic(p, neighbors)
+
+			if syn != nil {
+				in.Synthetics.PushRow(syn)
+			}
+		}
+
+		if debug.Value >= 1 {
+			fmt.Printf("[lnsmote] %-4d n synthetics: %v\n", x,
+				in.Synthetics.Len())
+		}
+	}
+
+	if in.SyntheticFile != "" {
+		e = in.Write(in.SyntheticFile)
+	}
+	if in.OutliersFile != "" && in.outliers.Len() > 0 {
+		e = in.writeOutliers()
+	}
+
+	return
+}
+
+//
+// createSynthetic will create synthetics row from original row `p` and their
+// `neighbors`.
+//
+func (in *Runtime) createSynthetic(p *tabula.Row, neighbors knn.Neighbors) (
+	synthetic *tabula.Row,
+) {
+	// choose one of the K nearest neighbors
+	randIdx := rand.Intn(neighbors.Len())
+	n := neighbors.Row(randIdx)
+
+	// Check if synthetic sample can be created from p and n.
+	canit, slp, sln := in.canCreate(p, n)
+	if !canit {
+		if debug.Value >= 2 {
+			fmt.Println("[lnsmote] can not create synthetic")
+		}
+
+		if slp.Len() <= 0 {
+			in.outliers.PushBack(p)
+		}
+
+		// we can not create from p and synthetic.
+		return nil
+	}
+
+	synthetic = p.Clone()
+
+	for x, srec := range *synthetic {
+		// Skip class attribute.
+		if x == in.ClassIndex {
+			continue
+		}
+
+		delta := in.randomGap(p, n, slp.Len(), sln.Len())
+		pv := (*p)[x].Float()
+		diff := (*n)[x].Float() - pv
+		srec.SetFloat(pv + delta*diff)
+	}
+
+	return
+}
+
+//
+// canCreate return true if synthetic can be created between two sample `p` and
+// `n`. Otherwise it will return false.
+//
+func (in *Runtime) canCreate(p, n *tabula.Row) (bool, knn.Neighbors,
+	knn.Neighbors,
+) {
+	slp := in.safeLevel(p)
+	sln := in.safeLevel2(p, n)
+
+	if debug.Value >= 2 {
+		fmt.Println("[lnsmote] slp : ", slp.Len())
+		fmt.Println("[lnsmote] sln : ", sln.Len())
+	}
+
+	return slp.Len() != 0 || sln.Len() != 0, slp, sln
+}
+
+//
+// safeLevel return the minority neighbors in sample `p`.
+//
+func (in *Runtime) safeLevel(p *tabula.Row) knn.Neighbors {
+	neighbors := in.FindNeighbors(in.datasetRows, p)
+	minorNeighbors := neighbors.SelectWhere(in.ClassIndex, in.ClassMinor)
+
+	return minorNeighbors
+}
+
+//
+// safeLevel2 return the minority neighbors between sample `p` and `n`.
+//
+func (in *Runtime) safeLevel2(p, n *tabula.Row) knn.Neighbors {
+	neighbors := in.FindNeighbors(in.datasetRows, n)
+
+	// check if n is in minority class.
+	nIsMinor := (*n)[in.ClassIndex].IsEqualToString(in.ClassMinor)
+
+	// check if p is in neighbors.
+	pInNeighbors, pidx := neighbors.Contain(p)
+
+	// if p in neighbors, replace it with neighbours in K+1
+	if nIsMinor && pInNeighbors {
+		if debug.Value >= 1 {
+			fmt.Println("[lnsmote] Replacing ", pidx)
+		}
+		if debug.Value >= 2 {
+			fmt.Println("[lnsmote] Replacing ", pidx, " in ", neighbors)
+		}
+
+		row := in.AllNeighbors.Row(in.K + 1)
+		dist := in.AllNeighbors.Distance(in.K + 1)
+		neighbors.Replace(pidx, row, dist)
+
+		if debug.Value >= 2 {
+			fmt.Println("[lnsmote] Replacement ", neighbors)
+		}
+	}
+
+	minorNeighbors := neighbors.SelectWhere(in.ClassIndex, in.ClassMinor)
+
+	return minorNeighbors
+}
+
+//
+// randomGap return the neighbors gap between sample `p` and `n` using safe
+// level (number of minority neighbors) of p in `lenslp` and `n` in `lensln`.
+//
+func (in *Runtime) randomGap(p, n *tabula.Row, lenslp, lensln int) (
+	delta float64,
+) {
+	if lensln == 0 && lenslp > 0 {
+		return
+	}
+
+	slratio := float64(lenslp) / float64(lensln)
+	if slratio == 1 {
+		delta = rand.Float64()
+	} else if slratio > 1 {
+		delta = rand.Float64() * (1 / slratio)
+	} else {
+		delta = 1 - rand.Float64()*slratio
+	}
+
+	return delta
+}
+
+// writeOutliers will save the `outliers` to file specified by
+// `OutliersFile`.
+func (in *Runtime) writeOutliers() (e error) {
+	writer, e := dsv.NewWriter("")
+	if nil != e {
+		return
+	}
+
+	e = writer.OpenOutput(in.OutliersFile)
+	if e != nil {
+		return
+	}
+
+	sep := dsv.DefSeparator
+	_, e = writer.WriteRawRows(&in.outliers, &sep)
+	if e != nil {
+		return
+	}
+
+	return writer.Close()
+}
diff --git a/lib/mining/resampling/lnsmote/lnsmote_test.go b/lib/mining/resampling/lnsmote/lnsmote_test.go
new file mode 100644
index 00000000..1c8e45e4
--- /dev/null
+++ b/lib/mining/resampling/lnsmote/lnsmote_test.go
@@ -0,0 +1,74 @@
+// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lnsmote
+
+import (
+	"fmt"
+	"os"
+	"testing"
+
+	"github.com/shuLhan/share/lib/dsv"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+const (
+	fcfg = "../../testdata/phoneme/phoneme.dsv"
+)
+
+func TestLNSmote(t *testing.T) {
+	// Read sample dataset.
+	dataset := tabula.Claset{}
+	_, e := dsv.SimpleRead(fcfg, &dataset)
+	if nil != e {
+		t.Fatal(e)
+	}
+
+	fmt.Println("[lnsmote_test] Total samples:", dataset.GetNRow())
+
+	// Write original samples.
+	writer, e := dsv.NewWriter("")
+
+	if nil != e {
+		t.Fatal(e)
+	}
+
+	e = writer.OpenOutput("phoneme_lnsmote.csv")
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	sep := dsv.DefSeparator
+	_, e = writer.WriteRawRows(dataset.GetRows(), &sep)
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	// Initialize LN-SMOTE.
+	lnsmoteRun := New(100, 5, 5, "1", "lnsmote.outliers")
+
+	e = lnsmoteRun.Resampling(&dataset)
+
+	fmt.Println("[lnsmote_test] # synthetic:", lnsmoteRun.Synthetics.Len())
+
+	sep = dsv.DefSeparator
+	_, e = writer.WriteRawRows(lnsmoteRun.Synthetics.GetRows(), &sep)
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	e = writer.Close()
+	if e != nil {
+		t.Fatal(e)
+	}
+}
+
+func TestMain(m *testing.M) {
+	envTestLnsmote := os.Getenv("TEST_LNSMOTE")
+	if len(envTestLnsmote) == 0 {
+		os.Exit(0)
+	}
+
+	os.Exit(m.Run())
+}
diff --git a/lib/mining/resampling/lnsmote/phoneme_lnsmote.dsv b/lib/mining/resampling/lnsmote/phoneme_lnsmote.dsv
new file mode 100644
index 00000000..cd1fef8b
--- /dev/null
+++ b/lib/mining/resampling/lnsmote/phoneme_lnsmote.dsv
@@ -0,0 +1,38 @@
+{
+	"Input"		:"phoneme_lnsmote.csv"
+,	"Rejected"	:"phoneme.rej"
+,	"MaxRows"	:-1
+,	"ClassMetadataIndex"	:5
+,	"ClassIndex"	:5
+,	"DatasetMode"	:"matrix"
+,	"InputMetadata"	:
+	[{
+		"Name"		:"f1"
+	,	"Type"		:"real"
+	,	"Separator"	:","
+	},{
+		"Name"		:"f2"
+	,	"Type"		:"real"
+	,	"Separator"	:","
+	},{
+		"Name"		:"f3"
+	,	"Type"		:"real"
+	,	"Separator"	:","
+	},{
+		"Name"		:"f4"
+	,	"Type"		:"real"
+	,	"Separator"	:","
+	},{
+		"Name"		:"f5"
+	,	"Type"		:"real"
+	,	"Separator"	:","
+	},{
+		"Name"		:"class"
+	,	"Type"		:"string"
+	,	"ValueSpace"	:
+		[
+			"0"
+		,	"1"
+		]
+	}]
+}
diff --git a/lib/mining/resampling/smote/phoneme_smote.dsv b/lib/mining/resampling/smote/phoneme_smote.dsv
new file mode 100644
index 00000000..63180379
--- /dev/null
+++ b/lib/mining/resampling/smote/phoneme_smote.dsv
@@ -0,0 +1,38 @@
+{
+	"Input"		:"phoneme_smote.csv"
+,	"Rejected"	:"phoneme.rej"
+,	"MaxRows"	:-1
+,	"ClassMetadataIndex"	:5
+,	"ClassIndex"	:5
+,	"DatasetMode"	:"matrix"
+,	"InputMetadata"	:
+	[{
+		"Name"		:"f1"
+	,	"Type"		:"real"
+	,	"Separator"	:","
+	},{
+		"Name"		:"f2"
+	,	"Type"		:"real"
+	,	"Separator"	:","
+	},{
+		"Name"		:"f3"
+	,	"Type"		:"real"
+	,	"Separator"	:","
+	},{
+		"Name"		:"f4"
+	,	"Type"		:"real"
+	,	"Separator"	:","
+	},{
+		"Name"		:"f5"
+	,	"Type"		:"real"
+	,	"Separator"	:","
+	},{
+		"Name"		:"class"
+	,	"Type"		:"string"
+	,	"ValueSpace"	:
+		[
+			"0"
+		,	"1"
+		]
+	}]
+}
diff --git a/lib/mining/resampling/smote/smote.go b/lib/mining/resampling/smote/smote.go
new file mode 100644
index 00000000..32f21b7e
--- /dev/null
+++ b/lib/mining/resampling/smote/smote.go
@@ -0,0 +1,183 @@
+// Copyright 2015 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// Package smote resamples a dataset by applying the Synthetic Minority
+// Oversampling TEchnique (SMOTE). The original dataset must fit entirely in
+// memory.  The amount of SMOTE and number of nearest neighbors may be specified.
+// For more information, see
+//
+// Nitesh V. Chawla et. al. (2002). Synthetic Minority Over-sampling
+// Technique. Journal of Artificial Intelligence Research. 16:321-357.
+//
+package smote
+
+import (
+	"fmt"
+	"math/rand"
+	"time"
+
+	"github.com/shuLhan/share/lib/mining/knn"
+	"github.com/shuLhan/share/lib/mining/resampling"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+//
+// Runtime for input and output.
+//
+type Runtime struct {
+	// Runtime the K-Nearest-Neighbourhood parameters.
+	knn.Runtime
+	// PercentOver input for oversampling percentage.
+	PercentOver int `json:"PercentOver"`
+	// SyntheticFile is a filename where synthetic samples will be written.
+	SyntheticFile string `json:"SyntheticFile"`
+	// NSynthetic input for number of new synthetic per sample.
+	NSynthetic int
+	// Synthetics contain output of resampling as synthetic samples.
+	Synthetics tabula.Dataset
+}
+
+//
+// New create and return new smote runtime.
+//
+func New(percentOver, k, classIndex int) (smoteRun *Runtime) {
+	smoteRun = &Runtime{
+		Runtime: knn.Runtime{
+			DistanceMethod: knn.TEuclidianDistance,
+			ClassIndex:     classIndex,
+			K:              k,
+		},
+		PercentOver: percentOver,
+	}
+	return
+}
+
+//
+// Init will recheck input and set to default value if its not valid.
+//
+func (smote *Runtime) Init() {
+	rand.Seed(time.Now().UnixNano())
+
+	if smote.K <= 0 {
+		smote.K = resampling.DefaultK
+	}
+	if smote.PercentOver <= 0 {
+		smote.PercentOver = resampling.DefaultPercentOver
+	}
+}
+
+//
+// GetSynthetics return synthetic samples.
+//
+func (smote *Runtime) GetSynthetics() tabula.DatasetInterface {
+	return &smote.Synthetics
+}
+
+//
+// populate will generate new synthetic sample using nearest neighbors.
+//
+func (smote *Runtime) populate(instance *tabula.Row, neighbors knn.Neighbors) {
+	lenAttr := len(*instance)
+
+	for x := 0; x < smote.NSynthetic; x++ {
+		// choose one of the K nearest neighbors
+		n := rand.Intn(neighbors.Len())
+		sample := neighbors.Row(n)
+
+		newSynt := make(tabula.Row, lenAttr)
+
+		// Compute new synthetic attributes.
+		for attr, sr := range *sample {
+			if attr == smote.ClassIndex {
+				continue
+			}
+
+			ir := (*instance)[attr]
+
+			iv := ir.Float()
+			sv := sr.Float()
+
+			dif := sv - iv
+			gap := rand.Float64()
+			newAttr := iv + (gap * dif)
+
+			record := &tabula.Record{}
+			record.SetFloat(newAttr)
+			newSynt[attr] = record
+		}
+
+		newSynt[smote.ClassIndex] = (*instance)[smote.ClassIndex]
+
+		smote.Synthetics.PushRow(&newSynt)
+	}
+}
+
+//
+// Resampling will run resampling algorithm using values that has been defined
+// in `Runtime` and return list of synthetic samples.
+//
+// The `dataset` must be samples of minority class not the whole dataset.
+//
+// Algorithms,
+//
+// (0) If oversampling percentage less than 100, then
+// (0.1) replace the input dataset by selecting n random sample from dataset
+//       without replacement, where n is
+//
+//	(percentage-oversampling / 100) * number-of-sample
+//
+// (1) For each `sample` in dataset,
+// (1.1) find k-nearest-neighbors of `sample`,
+// (1.2) generate synthetic sample in neighbors.
+// (2) Write synthetic samples to file, only if `SyntheticFile` is not empty.
+//
+func (smote *Runtime) Resampling(dataset tabula.Rows) (e error) {
+	smote.Init()
+
+	if smote.PercentOver < 100 {
+		// (0.1)
+		smote.NSynthetic = (smote.PercentOver / 100.0) * len(dataset)
+		dataset, _, _, _ = dataset.RandomPick(smote.NSynthetic, false)
+	} else {
+		smote.NSynthetic = smote.PercentOver / 100.0
+	}
+
+	// (1)
+	for x := range dataset {
+		sample := dataset[x]
+
+		// (1.1)
+		neighbors := smote.FindNeighbors(&dataset, sample)
+
+		// (1.2)
+		smote.populate(sample, neighbors)
+	}
+
+	// (2)
+	if smote.SyntheticFile != "" {
+		e = resampling.WriteSynthetics(smote, smote.SyntheticFile)
+	}
+
+	return
+}
+
+//
+// Write will write synthetic samples to file defined in `file`.
+//
+func (smote *Runtime) Write(file string) error {
+	return resampling.WriteSynthetics(smote, file)
+}
+
+func (smote *Runtime) String() (s string) {
+	s = fmt.Sprintf("'smote' : {\n"+
+		"		'ClassIndex'     :%d\n"+
+		"	,	'K'              :%d\n"+
+		"	,	'PercentOver'    :%d\n"+
+		"	,	'DistanceMethod' :%d\n"+
+		"}", smote.ClassIndex, smote.K, smote.PercentOver,
+		smote.DistanceMethod)
+
+	return
+}
diff --git a/lib/mining/resampling/smote/smote_test.go b/lib/mining/resampling/smote/smote_test.go
new file mode 100644
index 00000000..c3a633cb
--- /dev/null
+++ b/lib/mining/resampling/smote/smote_test.go
@@ -0,0 +1,49 @@
+// Copyright 2015 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package smote
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/shuLhan/share/lib/dsv"
+	"github.com/shuLhan/share/lib/tabula"
+)
+
+var (
+	fcfg        = "../../testdata/phoneme/phoneme.dsv"
+	PercentOver = 100
+	K           = 5
+)
+
+func TestSmote(t *testing.T) {
+	smot := New(PercentOver, K, 5)
+
+	// Read samples.
+	dataset := tabula.Claset{}
+
+	_, e := dsv.SimpleRead(fcfg, &dataset)
+	if nil != e {
+		t.Fatal(e)
+	}
+
+	fmt.Println("[smote_test] Total samples:", dataset.Len())
+
+	minorset := dataset.GetMinorityRows()
+
+	fmt.Println("[smote_test] # minority samples:", minorset.Len())
+
+	e = smot.Resampling(*minorset)
+	if e != nil {
+		t.Fatal(e)
+	}
+
+	fmt.Println("[smote_test] # synthetic:", smot.GetSynthetics().Len())
+
+	e = smot.Write("phoneme_smote.csv")
+	if e != nil {
+		t.Fatal(e)
+	}
+}
diff --git a/lib/mining/testdata/forensic_glass/fgl.data b/lib/mining/testdata/forensic_glass/fgl.data
new file mode 100644
index 00000000..8dc6fc0c
--- /dev/null
+++ b/lib/mining/testdata/forensic_glass/fgl.data
@@ -0,0 +1,214 @@
+1    3.01 13.64 4.49 1.10 71.78 0.06  8.75 0.00 0.00  WinF
+2   -0.39 13.89 3.60 1.36 72.73 0.48  7.83 0.00 0.00  WinF
+3   -1.82 13.53 3.55 1.54 72.99 0.39  7.78 0.00 0.00  WinF
+4   -0.34 13.21 3.69 1.29 72.61 0.57  8.22 0.00 0.00  WinF
+5   -0.58 13.27 3.62 1.24 73.08 0.55  8.07 0.00 0.00  WinF
+6   -2.04 12.79 3.61 1.62 72.97 0.64  8.07 0.00 0.26  WinF
+7   -0.57 13.30 3.60 1.14 73.09 0.58  8.17 0.00 0.00  WinF
+8   -0.44 13.15 3.61 1.05 73.24 0.57  8.24 0.00 0.00  WinF
+9    1.18 14.04 3.58 1.37 72.08 0.56  8.30 0.00 0.00  WinF
+10  -0.45 13.00 3.60 1.36 72.99 0.57  8.40 0.00 0.11  WinF
+11  -2.29 12.72 3.46 1.56 73.20 0.67  8.09 0.00 0.24  WinF
+12  -0.37 12.80 3.66 1.27 73.01 0.60  8.56 0.00 0.00  WinF
+13  -2.11 12.88 3.43 1.40 73.28 0.69  8.05 0.00 0.24  WinF
+14  -0.52 12.86 3.56 1.27 73.21 0.54  8.38 0.00 0.17  WinF
+15  -0.37 12.61 3.59 1.31 73.29 0.58  8.50 0.00 0.00  WinF
+16  -0.39 12.81 3.54 1.23 73.24 0.58  8.39 0.00 0.00  WinF
+17  -0.16 12.68 3.67 1.16 73.11 0.61  8.70 0.00 0.00  WinF
+18   3.96 14.36 3.85 0.89 71.36 0.15  9.15 0.00 0.00  WinF
+19   1.11 13.90 3.73 1.18 72.12 0.06  8.89 0.00 0.00  WinF
+20  -0.65 13.02 3.54 1.69 72.73 0.54  8.44 0.00 0.07  WinF
+21  -0.50 12.82 3.55 1.49 72.75 0.54  8.52 0.00 0.19  WinF
+22   1.66 14.77 3.75 0.29 72.02 0.03  9.00 0.00 0.00  WinF
+23  -0.64 12.78 3.62 1.29 72.79 0.59  8.70 0.00 0.00  WinF
+24  -0.49 12.81 3.57 1.35 73.02 0.62  8.59 0.00 0.00  WinF
+25  -0.80 13.38 3.50 1.15 72.85 0.50  8.43 0.00 0.00  WinF
+26  -0.36 12.98 3.54 1.21 73.00 0.65  8.53 0.00 0.00  WinF
+27  -0.07 13.21 3.48 1.41 72.64 0.59  8.43 0.00 0.00  WinF
+28  -0.79 12.87 3.48 1.33 73.04 0.56  8.43 0.00 0.00  WinF
+29  -0.32 12.56 3.52 1.43 73.15 0.57  8.54 0.00 0.00  WinF
+30  -0.16 13.08 3.49 1.28 72.86 0.60  8.49 0.00 0.00  WinF
+31  -0.32 12.65 3.56 1.30 73.08 0.61  8.69 0.00 0.14  WinF
+32  -0.53 12.84 3.50 1.14 73.27 0.56  8.55 0.00 0.00  WinF
+33  -0.25 12.85 3.48 1.23 72.97 0.61  8.56 0.09 0.22  WinF
+34  -0.47 12.57 3.47 1.38 73.39 0.60  8.55 0.00 0.06  WinF
+35  -0.17 12.69 3.54 1.34 72.95 0.57  8.75 0.00 0.00  WinF
+36  -2.33 13.29 3.45 1.21 72.74 0.56  8.57 0.00 0.00  WinF
+37   1.09 13.89 3.53 1.32 71.81 0.51  8.78 0.11 0.00  WinF
+38  -0.03 12.74 3.48 1.35 72.96 0.64  8.68 0.00 0.00  WinF
+39   4.13 14.21 3.82 0.47 71.77 0.11  9.57 0.00 0.00  WinF
+40   4.13 14.21 3.82 0.47 71.77 0.11  9.57 0.00 0.00  WinF
+41  -0.07 12.79 3.50 1.12 73.03 0.64  8.77 0.00 0.00  WinF
+42  -0.45 12.71 3.42 1.20 73.20 0.59  8.64 0.00 0.00  WinF
+43  -0.21 13.21 3.39 1.33 72.76 0.59  8.59 0.00 0.00  WinF
+44   4.10 13.73 3.84 0.72 71.76 0.17  9.74 0.00 0.00  WinF
+45  -0.14 12.73 3.43 1.19 72.95 0.62  8.76 0.00 0.30  WinF
+46   1.00 13.49 3.48 1.35 71.95 0.55  9.00 0.00 0.00  WinF
+47   0.69 13.19 3.37 1.18 72.72 0.57  8.83 0.00 0.16  WinF
+48   8.67 13.99 3.70 0.71 71.57 0.02  9.82 0.00 0.10  WinF
+49   4.23 13.21 3.77 0.79 71.99 0.13 10.02 0.00 0.00  WinF
+50   0.98 13.58 3.35 1.23 72.08 0.59  8.91 0.00 0.00  WinF
+51   5.20 13.72 3.72 0.51 71.75 0.09 10.06 0.00 0.16  WinF
+52   1.26 13.20 3.33 1.28 72.36 0.60  9.14 0.00 0.11  WinF
+53   0.08 13.43 2.87 1.19 72.84 0.55  9.03 0.00 0.00  WinF
+54   0.37 13.14 2.84 1.28 72.85 0.55  9.07 0.00 0.00  WinF
+55  -0.22 13.21 2.81 1.29 72.98 0.51  9.02 0.00 0.09  WinF
+56  -0.31 12.45 2.71 1.29 73.70 0.56  9.06 0.00 0.24  WinF
+57  -5.85 12.99 3.47 1.12 72.98 0.62  8.35 0.00 0.31  WinF
+58   0.24 12.87 3.48 1.29 72.95 0.60  8.43 0.00 0.00  WinF
+59  -0.46 13.48 3.74 1.17 72.99 0.59  8.03 0.00 0.00  WinF
+60  -0.46 13.39 3.66 1.19 72.79 0.57  8.27 0.00 0.11  WinF
+61   1.05 13.60 3.62 1.11 72.64 0.14  8.76 0.00 0.00  WinF
+62   1.77 13.81 3.58 1.32 71.72 0.12  8.67 0.69 0.00  WinF
+63   3.72 13.51 3.86 0.88 71.79 0.23  9.54 0.00 0.11  WinF
+64   4.27 14.17 3.81 0.78 71.35 0.00  9.69 0.00 0.00  WinF
+65   3.72 13.48 3.74 0.90 72.01 0.18  9.61 0.00 0.07  WinF
+66   2.99 13.69 3.59 1.12 71.96 0.09  9.40 0.00 0.00  WinF
+67   3.52 13.05 3.65 0.87 72.22 0.19  9.85 0.00 0.17  WinF
+68   3.52 13.05 3.65 0.87 72.32 0.19  9.85 0.00 0.17  WinF
+69   3.52 13.12 3.58 0.90 72.20 0.23  9.82 0.00 0.16  WinF
+70   5.00 13.31 3.58 0.82 71.99 0.12 10.17 0.00 0.03  WinF
+71  -2.26 14.86 3.67 1.74 71.87 0.16  7.36 0.00 0.12 WinNF
+72   0.48 13.64 3.87 1.27 71.96 0.54  8.32 0.00 0.32 WinNF
+73  -2.07 13.09 3.59 1.52 73.10 0.67  7.83 0.00 0.00 WinNF
+74  -1.69 13.34 3.57 1.57 72.87 0.61  7.89 0.00 0.00 WinNF
+75  -2.04 13.02 3.56 1.54 73.11 0.72  7.90 0.00 0.00 WinNF
+76  -2.10 13.02 3.58 1.51 73.12 0.69  7.96 0.00 0.00 WinNF
+77  -1.55 13.44 3.61 1.54 72.39 0.66  8.03 0.00 0.00 WinNF
+78  -1.73 13.00 3.58 1.54 72.83 0.61  8.04 0.00 0.00 WinNF
+79  -1.87 13.92 3.52 1.25 72.88 0.37  7.94 0.00 0.14 WinNF
+80  -2.10 12.82 3.52 1.90 72.86 0.69  7.97 0.00 0.00 WinNF
+81  -2.08 12.86 3.52 2.12 72.66 0.69  7.97 0.00 0.00 WinNF
+82  -2.07 13.25 3.45 1.43 73.17 0.61  7.86 0.00 0.00 WinNF
+83  -1.54 13.41 3.55 1.25 72.81 0.68  8.10 0.00 0.00 WinNF
+84  -2.06 13.09 3.52 1.55 72.87 0.68  8.05 0.00 0.09 WinNF
+85  -3.91 14.25 3.09 2.08 72.28 1.10  7.08 0.00 0.00 WinNF
+86  -1.75 13.36 3.58 1.49 72.72 0.45  8.21 0.00 0.00 WinNF
+87  -2.31 13.24 3.49 1.47 73.25 0.38  8.03 0.00 0.00 WinNF
+88  -1.55 13.40 3.49 1.52 72.65 0.67  8.08 0.00 0.10 WinNF
+89  -1.82 13.01 3.50 1.48 72.89 0.60  8.12 0.00 0.00 WinNF
+90  -1.60 12.55 3.48 1.87 73.23 0.63  8.08 0.00 0.09 WinNF
+91   0.41 12.93 3.74 1.11 72.28 0.64  8.96 0.00 0.22 WinNF
+92  -1.95 12.90 3.44 1.45 73.06 0.44  8.27 0.00 0.00 WinNF
+93  -2.12 13.12 3.41 1.58 73.26 0.07  8.39 0.00 0.19 WinNF
+94  -2.10 13.24 3.34 1.47 73.10 0.39  8.22 0.00 0.00 WinNF
+95  -1.71 12.71 3.33 1.49 73.28 0.67  8.24 0.00 0.00 WinNF
+96   0.60 13.36 3.43 1.43 72.26 0.51  8.60 0.00 0.00 WinNF
+97   0.41 13.02 3.62 1.06 72.34 0.64  9.13 0.00 0.15 WinNF
+98  -0.57 12.20 3.25 1.16 73.55 0.62  8.90 0.00 0.24 WinNF
+99  -1.11 12.67 2.88 1.71 73.21 0.73  8.54 0.00 0.00 WinNF
+100  0.11 12.96 2.96 1.43 72.92 0.60  8.79 0.14 0.00 WinNF
+101 -1.45 12.75 2.85 1.44 73.27 0.57  8.79 0.11 0.22 WinNF
+102 -0.70 12.35 2.72 1.63 72.87 0.70  9.23 0.00 0.00 WinNF
+103  0.20 12.62 2.76 0.83 73.81 0.35  9.42 0.00 0.20 WinNF
+104  9.25 13.80 3.15 0.66 70.57 0.08 11.64 0.00 0.00 WinNF
+105  6.10 13.83 2.90 1.17 71.15 0.08 10.79 0.00 0.00 WinNF
+106  6.75 11.45 0.00 1.88 72.19 0.81 13.24 0.00 0.34 WinNF
+107 13.25 10.73 0.00 2.10 69.81 0.58 13.30 3.15 0.28 WinNF
+108 15.93 12.30 0.00 1.00 70.16 0.12 16.19 0.00 0.24 WinNF
+109  4.22 14.43 0.00 1.00 72.67 0.10 11.52 0.00 0.08 WinNF
+110  0.18 13.72 0.00 0.56 74.45 0.00 10.99 0.00 0.00 WinNF
+111  8.64 11.23 0.00 0.77 73.21 0.00 14.68 0.00 0.00 WinNF
+112  9.39 11.02 0.00 0.75 73.08 0.00 14.96 0.00 0.00 WinNF
+113  9.77 12.64 0.00 0.67 72.02 0.06 14.40 0.00 0.00 WinNF
+114  0.92 13.46 3.83 1.26 72.55 0.57  8.21 0.00 0.14 WinNF
+115  0.47 13.10 3.97 1.19 72.44 0.60  8.43 0.00 0.00 WinNF
+116  0.46 13.41 3.89 1.33 72.38 0.51  8.28 0.00 0.00 WinNF
+117  0.29 13.24 3.90 1.41 72.33 0.55  8.31 0.00 0.10 WinNF
+118 -0.92 13.72 3.68 1.81 72.06 0.64  7.88 0.00 0.00 WinNF
+119 -1.27 13.30 3.64 1.53 72.53 0.65  8.03 0.00 0.29 WinNF
+120 -1.48 13.56 3.57 1.47 72.45 0.64  7.96 0.00 0.00 WinNF
+121  0.44 13.25 3.76 1.32 72.40 0.58  8.42 0.00 0.00 WinNF
+122 -1.37 12.93 3.54 1.62 72.96 0.64  8.03 0.00 0.21 WinNF
+123 -1.13 13.23 3.54 1.48 72.84 0.56  8.10 0.00 0.00 WinNF
+124 -0.93 13.48 3.48 1.71 72.52 0.62  7.99 0.00 0.00 WinNF
+125  3.77 13.20 3.68 1.15 72.75 0.54  8.52 0.00 0.00 WinNF
+126  0.72 12.93 3.66 1.56 72.51 0.58  8.55 0.00 0.12 WinNF
+127 -1.33 12.94 3.61 1.26 72.75 0.56  8.60 0.00 0.00 WinNF
+128  2.81 13.78 2.28 1.43 71.99 0.49  9.85 0.00 0.17 WinNF
+129  2.68 13.55 2.09 1.67 72.18 0.53  9.57 0.27 0.17 WinNF
+130  2.20 13.98 1.35 1.63 71.76 0.39 10.56 0.00 0.18 WinNF
+131  3.77 13.75 1.01 1.36 72.19 0.33 11.14 0.00 0.00 WinNF
+132  8.14 13.70 0.00 1.36 71.24 0.19 13.44 0.00 0.10 WinNF
+133  0.13 13.43 3.98 1.18 72.49 0.58  8.15 0.00 0.00 WinNF
+134  0.00 13.71 3.93 1.54 71.81 0.54  8.21 0.00 0.15 WinNF
+135  0.11 13.33 3.85 1.25 72.78 0.52  8.12 0.00 0.00 WinNF
+136 -0.11 13.19 3.90 1.30 72.33 0.55  8.44 0.00 0.28 WinNF
+137  0.06 13.00 3.80 1.08 73.07 0.56  8.38 0.00 0.12 WinNF
+138 -0.89 12.89 3.62 1.57 72.96 0.61  8.11 0.00 0.00 WinNF
+139 -1.26 12.79 3.52 1.54 73.36 0.66  7.90 0.00 0.00 WinNF
+140 -1.26 12.87 3.56 1.64 73.14 0.65  7.99 0.00 0.00 WinNF
+141 -1.10 13.33 3.54 1.61 72.54 0.68  8.11 0.00 0.00 WinNF
+142  0.51 13.20 3.63 1.07 72.83 0.57  8.41 0.09 0.17 WinNF
+143 -1.38 12.85 3.51 1.44 73.01 0.68  8.23 0.06 0.25 WinNF
+144 -0.91 13.00 3.47 1.79 72.72 0.66  8.18 0.00 0.00 WinNF
+145 -1.40 12.99 3.18 1.23 72.97 0.58  8.81 0.00 0.24 WinNF
+146  0.39 12.85 3.67 1.24 72.57 0.62  8.68 0.00 0.35 WinNF
+147 -0.31 13.65 3.66 1.11 72.77 0.11  8.60 0.00 0.00   Veh
+148 -1.90 13.33 3.53 1.34 72.67 0.56  8.33 0.00 0.00   Veh
+149 -1.30 13.24 3.57 1.38 72.70 0.56  8.44 0.00 0.10   Veh
+150 -1.57 12.16 3.52 1.35 72.89 0.57  8.53 0.00 0.00   Veh
+151 -1.35 13.14 3.45 1.76 72.48 0.60  8.38 0.00 0.17   Veh
+152  3.27 14.32 3.90 0.83 71.50 0.00  9.49 0.00 0.00   Veh
+153 -0.21 13.64 3.65 0.65 73.00 0.06  8.93 0.00 0.00   Veh
+154 -1.90 13.42 3.40 1.22 72.69 0.59  8.32 0.00 0.00   Veh
+155 -1.06 12.86 3.58 1.31 72.61 0.61  8.79 0.00 0.00   Veh
+156 -1.54 13.04 3.40 1.26 73.01 0.52  8.58 0.00 0.00   Veh
+157 -1.45 13.41 3.39 1.28 72.64 0.52  8.65 0.00 0.00   Veh
+158  3.21 14.03 3.76 0.58 71.79 0.11  9.65 0.00 0.00   Veh
+159 -0.24 13.53 3.41 1.52 72.04 0.58  8.79 0.00 0.00   Veh
+160 -0.04 13.50 3.36 1.63 71.94 0.57  8.81 0.00 0.09   Veh
+161  0.32 13.33 3.34 1.54 72.14 0.56  8.99 0.00 0.00   Veh
+162  1.34 13.64 3.54 0.75 72.65 0.16  8.89 0.15 0.24   Veh
+163  4.11 14.19 3.78 0.91 71.36 0.23  9.14 0.00 0.37   Veh
+164 -2.86 14.01 2.68 3.50 69.89 1.68  5.87 2.20 0.00   Con
+165  1.15 12.73 1.85 1.86 72.69 0.60 10.09 0.00 0.00   Con
+166  3.71 11.56 1.88 1.56 72.86 0.47 11.41 0.00 0.00   Con
+167  3.51 11.03 1.71 1.56 73.44 0.58 11.62 0.00 0.00   Con
+168  1.69 12.64 0.00 1.65 73.75 0.38 11.53 0.00 0.00   Con
+169 -1.34 12.86 0.00 1.83 73.88 0.97 10.17 0.00 0.00   Con
+170  1.94 13.27 0.00 1.76 73.03 0.47 11.32 0.00 0.00   Con
+171  5.69 13.44 0.00 1.58 72.22 0.32 12.24 0.00 0.00   Con
+172 -4.84 13.02 0.00 3.04 70.48 6.21  6.96 0.00 0.00   Con
+173 -4.79 13.00 0.00 3.02 70.70 6.21  6.93 0.00 0.00   Con
+174  2.43 13.38 0.00 1.40 72.25 0.33 12.50 0.00 0.00   Con
+175  2.58 12.85 1.61 2.17 72.18 0.76  9.70 0.24 0.51   Con
+176  3.19 12.97 0.33 1.51 73.39 0.13 11.27 0.00 0.28   Con
+177  1.05 14.00 2.39 1.56 72.37 0.00  9.57 0.00 0.00  Tabl
+178  1.37 13.79 2.41 1.19 72.76 0.00  9.77 0.00 0.00  Tabl
+179  0.29 14.46 2.24 1.62 72.38 0.00  9.26 0.00 0.00  Tabl
+180  0.52 14.09 2.19 1.66 72.67 0.00  9.32 0.00 0.00  Tabl
+181 -5.01 14.40 1.74 1.54 74.55 0.00  7.59 0.00 0.00  Tabl
+182  0.88 14.99 0.78 1.74 72.50 0.00  9.95 0.00 0.00  Tabl
+183  1.16 14.15 0.00 2.09 72.74 0.00 10.88 0.00 0.00  Tabl
+184  1.69 14.56 0.00 0.56 73.48 0.00 11.22 0.00 0.00  Tabl
+185 -6.85 17.38 0.00 0.34 75.41 0.00  6.65 0.00 0.00  Tabl
+186 -6.69 13.69 3.20 1.81 72.81 1.76  5.43 1.19 0.00  Head
+187  0.38 14.32 3.26 2.22 71.25 1.46  5.79 1.63 0.00  Head
+188  5.15 13.44 3.34 1.23 72.38 0.60  8.83 0.00 0.00  Head
+189  4.47 14.86 2.20 2.06 70.26 0.76  9.76 0.00 0.00  Head
+190  5.65 15.79 1.83 1.31 70.43 0.31  8.61 1.68 0.00  Head
+191 -1.87 13.88 1.78 1.79 73.10 0.00  8.67 0.76 0.00  Head
+192 -1.98 14.85 0.00 2.38 73.28 0.00  8.76 0.64 0.09  Head
+193 -1.77 14.20 0.00 2.79 73.46 0.04  9.04 0.40 0.09  Head
+194 -0.81 14.75 0.00 2.00 73.02 0.00  8.53 1.59 0.08  Head
+195 -1.17 14.56 0.00 1.98 73.29 0.00  8.52 1.57 0.07  Head
+196 -2.55 14.14 0.00 2.68 73.39 0.08  9.07 0.61 0.05  Head
+197 -2.44 13.87 0.00 2.54 73.23 0.14  9.41 0.81 0.01  Head
+198 -0.73 14.70 0.00 2.34 73.28 0.00  8.95 0.66 0.00  Head
+199 -2.69 14.38 0.00 2.66 73.10 0.04  9.08 0.64 0.00  Head
+200 -1.91 15.01 0.00 2.51 73.05 0.05  8.83 0.53 0.00  Head
+201 -2.92 15.15 0.00 2.25 73.50 0.00  8.34 0.63 0.00  Head
+202 -1.47 11.95 0.00 1.19 75.18 2.70  8.93 0.00 0.00  Head
+203 -2.86 14.85 0.00 2.42 73.72 0.00  8.39 0.56 0.00  Head
+204 -1.42 14.80 0.00 1.99 73.11 0.00  8.28 1.71 0.00  Head
+205 -1.83 14.95 0.00 2.27 73.30 0.00  8.71 0.67 0.00  Head
+206 -0.68 14.95 0.00 1.80 72.99 0.00  8.61 1.55 0.00  Head
+207 -1.55 14.94 0.00 1.87 73.11 0.00  8.67 1.38 0.00  Head
+208  0.31 14.39 0.00 1.82 72.86 1.41  6.47 2.88 0.00  Head
+209 -1.60 14.37 0.00 2.74 72.85 0.00  9.45 0.54 0.00  Head
+210 -1.77 14.14 0.00 2.88 72.61 0.08  9.18 1.06 0.00  Head
+211 -1.15 14.92 0.00 1.99 73.06 0.00  8.40 1.59 0.00  Head
+212  2.65 14.36 0.00 2.02 73.42 0.00  8.44 1.64 0.00  Head
+213 -1.49 14.38 0.00 1.94 73.61 0.00  8.48 1.57 0.00  Head
+214 -0.89 14.23 0.00 2.08 73.36 0.00  8.62 1.67 0.00  Head
diff --git a/lib/mining/testdata/forensic_glass/fgl.dsv b/lib/mining/testdata/forensic_glass/fgl.dsv
new file mode 100644
index 00000000..fd33e0d5
--- /dev/null
+++ b/lib/mining/testdata/forensic_glass/fgl.dsv
@@ -0,0 +1,97 @@
+{
+	"Input"			:"fgl.data"
+,	"Rejected"		:"glass.rej"
+,	"MaxRows"		:-1
+,	"ClassMetadataIndex"	:10
+,	"ClassIndex"		:9
+,	"DatasetMode"		:"matrix"
+,	"InputMetadata"		:
+	[{
+		"Name"			:"ID"
+	,	"Separator"		:" "
+	,	"Type"			:"integer"
+	,	"Skip"			:true
+	},{
+		"Name"			:"RI"
+	,	"Separator"		:" "
+	,	"Type"			:"real"
+	},{
+		"Name"			:"NA20"
+	,	"Separator"		:" "
+	,	"Type"			:"real"
+	},{
+		"Name"			:"MGO"
+	,	"Separator"		:" "
+	,	"Type"			:"real"
+	},{
+		"Name"			:"AL203"
+	,	"Separator"		:" "
+	,	"Type"			:"real"
+	},{
+		"Name"			:"SI02"
+	,	"Separator"		:" "
+	,	"Type"			:"real"
+	},{
+		"Name"			:"K20"
+	,	"Separator"		:" "
+	,	"Type"			:"real"
+	},{
+		"Name"			:"CAO"
+	,	"Separator"		:" "
+	,	"Type"			:"real"
+	},{
+		"Name"			:"BAO"
+	,	"Separator"		:" "
+	,	"Type"			:"real"
+	},{
+		"Name"			:"FE203"
+	,	"Separator"		:" "
+	,	"Type"			:"real"
+	},{
+		"Name"			:"TYPE"
+	,	"Type"			:"string"
+	,	"ValueSpace"		:
+		[
+			"WinF"
+		,	"WinNF"
+		,	"Veh"
+		,	"Con"
+		,	"Tabl"
+		,	"Head"
+		]
+	}]
+
+,	"Output"		:"glass.out"
+,	"OutputMetadata"	:
+	[{
+		"Name"			:"RI"
+	,	"Separator"		:","
+	},{
+		"Name"			:"NA20"
+	,	"Separator"		:","
+	},{
+		"Name"			:"MGO"
+	,	"Separator"		:","
+	},{
+		"Name"			:"AL203"
+	,	"Separator"		:","
+	},{
+		"Name"			:"SI02"
+	,	"Separator"		:","
+	},{
+		"Name"			:"K20"
+	,	"Separator"		:","
+	},{
+		"Name"			:"CAO"
+	,	"Separator"		:","
+	},{
+		"Name"			:"BAO"
+	,	"Separator"		:","
+	},{
+		"Name"			:"FE203"
+	,	"Separator"		:","
+	},{
+		"Name"			:"TYPE"
+	,	"Type"			:"integer"
+	}]
+}
diff --git a/lib/mining/testdata/forensic_glass/glass.data b/lib/mining/testdata/forensic_glass/glass.data
new file mode 100644
index 00000000..ab21361c
--- /dev/null
+++ b/lib/mining/testdata/forensic_glass/glass.data
@@ -0,0 +1,214 @@
+1,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.00,1
+2,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.00,1
+3,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.00,1
+4,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.00,1
+5,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.00,1
+6,1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0.00,0.26,1
+7,1.51743,13.30,3.60,1.14,73.09,0.58,8.17,0.00,0.00,1
+8,1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0.00,0.00,1
+9,1.51918,14.04,3.58,1.37,72.08,0.56,8.30,0.00,0.00,1
+10,1.51755,13.00,3.60,1.36,72.99,0.57,8.40,0.00,0.11,1
+11,1.51571,12.72,3.46,1.56,73.20,0.67,8.09,0.00,0.24,1
+12,1.51763,12.80,3.66,1.27,73.01,0.60,8.56,0.00,0.00,1
+13,1.51589,12.88,3.43,1.40,73.28,0.69,8.05,0.00,0.24,1
+14,1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0.00,0.17,1
+15,1.51763,12.61,3.59,1.31,73.29,0.58,8.50,0.00,0.00,1
+16,1.51761,12.81,3.54,1.23,73.24,0.58,8.39,0.00,0.00,1
+17,1.51784,12.68,3.67,1.16,73.11,0.61,8.70,0.00,0.00,1
+18,1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0.00,0.00,1
+19,1.51911,13.90,3.73,1.18,72.12,0.06,8.89,0.00,0.00,1
+20,1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0.00,0.07,1
+21,1.51750,12.82,3.55,1.49,72.75,0.54,8.52,0.00,0.19,1
+22,1.51966,14.77,3.75,0.29,72.02,0.03,9.00,0.00,0.00,1
+23,1.51736,12.78,3.62,1.29,72.79,0.59,8.70,0.00,0.00,1
+24,1.51751,12.81,3.57,1.35,73.02,0.62,8.59,0.00,0.00,1
+25,1.51720,13.38,3.50,1.15,72.85,0.50,8.43,0.00,0.00,1
+26,1.51764,12.98,3.54,1.21,73.00,0.65,8.53,0.00,0.00,1
+27,1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.00,0.00,1
+28,1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0.00,0.00,1
+29,1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0.00,0.00,1
+30,1.51784,13.08,3.49,1.28,72.86,0.60,8.49,0.00,0.00,1
+31,1.51768,12.65,3.56,1.30,73.08,0.61,8.69,0.00,0.14,1
+32,1.51747,12.84,3.50,1.14,73.27,0.56,8.55,0.00,0.00,1
+33,1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22,1
+34,1.51753,12.57,3.47,1.38,73.39,0.60,8.55,0.00,0.06,1
+35,1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0.00,0.00,1
+36,1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0.00,0.00,1
+37,1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0.00,1
+38,1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0.00,0.00,1
+39,1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.00,0.00,1
+40,1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.00,0.00,1
+41,1.51793,12.79,3.50,1.12,73.03,0.64,8.77,0.00,0.00,1
+42,1.51755,12.71,3.42,1.20,73.20,0.59,8.64,0.00,0.00,1
+43,1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0.00,0.00,1
+44,1.52210,13.73,3.84,0.72,71.76,0.17,9.74,0.00,0.00,1
+45,1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0.00,0.30,1
+46,1.51900,13.49,3.48,1.35,71.95,0.55,9.00,0.00,0.00,1
+47,1.51869,13.19,3.37,1.18,72.72,0.57,8.83,0.00,0.16,1
+48,1.52667,13.99,3.70,0.71,71.57,0.02,9.82,0.00,0.10,1
+49,1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0.00,0.00,1
+50,1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0.00,0.00,1
+51,1.52320,13.72,3.72,0.51,71.75,0.09,10.06,0.00,0.16,1
+52,1.51926,13.20,3.33,1.28,72.36,0.60,9.14,0.00,0.11,1
+53,1.51808,13.43,2.87,1.19,72.84,0.55,9.03,0.00,0.00,1
+54,1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0.00,0.00,1
+55,1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0.00,0.09,1
+56,1.51769,12.45,2.71,1.29,73.70,0.56,9.06,0.00,0.24,1
+57,1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0.00,0.31,1
+58,1.51824,12.87,3.48,1.29,72.95,0.60,8.43,0.00,0.00,1
+59,1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0.00,0.00,1
+60,1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0.00,0.11,1
+61,1.51905,13.60,3.62,1.11,72.64,0.14,8.76,0.00,0.00,1
+62,1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0.00,1
+63,1.52172,13.51,3.86,0.88,71.79,0.23,9.54,0.00,0.11,1
+64,1.52227,14.17,3.81,0.78,71.35,0.00,9.69,0.00,0.00,1
+65,1.52172,13.48,3.74,0.90,72.01,0.18,9.61,0.00,0.07,1
+66,1.52099,13.69,3.59,1.12,71.96,0.09,9.40,0.00,0.00,1
+67,1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0.00,0.17,1
+68,1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0.00,0.17,1
+69,1.52152,13.12,3.58,0.90,72.20,0.23,9.82,0.00,0.16,1
+70,1.52300,13.31,3.58,0.82,71.99,0.12,10.17,0.00,0.03,1
+71,1.51574,14.86,3.67,1.74,71.87,0.16,7.36,0.00,0.12,2
+72,1.51848,13.64,3.87,1.27,71.96,0.54,8.32,0.00,0.32,2
+73,1.51593,13.09,3.59,1.52,73.10,0.67,7.83,0.00,0.00,2
+74,1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0.00,0.00,2
+75,1.51596,13.02,3.56,1.54,73.11,0.72,7.90,0.00,0.00,2
+76,1.51590,13.02,3.58,1.51,73.12,0.69,7.96,0.00,0.00,2
+77,1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0.00,0.00,2
+78,1.51627,13.00,3.58,1.54,72.83,0.61,8.04,0.00,0.00,2
+79,1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0.00,0.14,2
+80,1.51590,12.82,3.52,1.90,72.86,0.69,7.97,0.00,0.00,2
+81,1.51592,12.86,3.52,2.12,72.66,0.69,7.97,0.00,0.00,2
+82,1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0.00,0.00,2
+83,1.51646,13.41,3.55,1.25,72.81,0.68,8.10,0.00,0.00,2
+84,1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0.00,0.09,2
+85,1.51409,14.25,3.09,2.08,72.28,1.10,7.08,0.00,0.00,2
+86,1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0.00,0.00,2
+87,1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0.00,0.00,2
+88,1.51645,13.40,3.49,1.52,72.65,0.67,8.08,0.00,0.10,2
+89,1.51618,13.01,3.50,1.48,72.89,0.60,8.12,0.00,0.00,2
+90,1.51640,12.55,3.48,1.87,73.23,0.63,8.08,0.00,0.09,2
+91,1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0.00,0.22,2
+92,1.51605,12.90,3.44,1.45,73.06,0.44,8.27,0.00,0.00,2
+93,1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0.00,0.19,2
+94,1.51590,13.24,3.34,1.47,73.10,0.39,8.22,0.00,0.00,2
+95,1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0.00,0.00,2
+96,1.51860,13.36,3.43,1.43,72.26,0.51,8.60,0.00,0.00,2
+97,1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0.00,0.15,2
+98,1.51743,12.20,3.25,1.16,73.55,0.62,8.90,0.00,0.24,2
+99,1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0.00,0.00,2
+100,1.51811,12.96,2.96,1.43,72.92,0.60,8.79,0.14,0.00,2
+101,1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,2
+102,1.51730,12.35,2.72,1.63,72.87,0.70,9.23,0.00,0.00,2
+103,1.51820,12.62,2.76,0.83,73.81,0.35,9.42,0.00,0.20,2
+104,1.52725,13.80,3.15,0.66,70.57,0.08,11.64,0.00,0.00,2
+105,1.52410,13.83,2.90,1.17,71.15,0.08,10.79,0.00,0.00,2
+106,1.52475,11.45,0.00,1.88,72.19,0.81,13.24,0.00,0.34,2
+107,1.53125,10.73,0.00,2.10,69.81,0.58,13.30,3.15,0.28,2
+108,1.53393,12.30,0.00,1.00,70.16,0.12,16.19,0.00,0.24,2
+109,1.52222,14.43,0.00,1.00,72.67,0.10,11.52,0.00,0.08,2
+110,1.51818,13.72,0.00,0.56,74.45,0.00,10.99,0.00,0.00,2
+111,1.52664,11.23,0.00,0.77,73.21,0.00,14.68,0.00,0.00,2
+112,1.52739,11.02,0.00,0.75,73.08,0.00,14.96,0.00,0.00,2
+113,1.52777,12.64,0.00,0.67,72.02,0.06,14.40,0.00,0.00,2
+114,1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0.00,0.14,2
+115,1.51847,13.10,3.97,1.19,72.44,0.60,8.43,0.00,0.00,2
+116,1.51846,13.41,3.89,1.33,72.38,0.51,8.28,0.00,0.00,2
+117,1.51829,13.24,3.90,1.41,72.33,0.55,8.31,0.00,0.10,2
+118,1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0.00,0.00,2
+119,1.51673,13.30,3.64,1.53,72.53,0.65,8.03,0.00,0.29,2
+120,1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0.00,0.00,2
+121,1.51844,13.25,3.76,1.32,72.40,0.58,8.42,0.00,0.00,2
+122,1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0.00,0.21,2
+123,1.51687,13.23,3.54,1.48,72.84,0.56,8.10,0.00,0.00,2
+124,1.51707,13.48,3.48,1.71,72.52,0.62,7.99,0.00,0.00,2
+125,1.52177,13.20,3.68,1.15,72.75,0.54,8.52,0.00,0.00,2
+126,1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0.00,0.12,2
+127,1.51667,12.94,3.61,1.26,72.75,0.56,8.60,0.00,0.00,2
+128,1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0.00,0.17,2
+129,1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,2
+130,1.52020,13.98,1.35,1.63,71.76,0.39,10.56,0.00,0.18,2
+131,1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0.00,0.00,2
+132,1.52614,13.70,0.00,1.36,71.24,0.19,13.44,0.00,0.10,2
+133,1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0.00,0.00,2
+134,1.51800,13.71,3.93,1.54,71.81,0.54,8.21,0.00,0.15,2
+135,1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0.00,0.00,2
+136,1.51789,13.19,3.90,1.30,72.33,0.55,8.44,0.00,0.28,2
+137,1.51806,13.00,3.80,1.08,73.07,0.56,8.38,0.00,0.12,2
+138,1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0.00,0.00,2
+139,1.51674,12.79,3.52,1.54,73.36,0.66,7.90,0.00,0.00,2
+140,1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0.00,0.00,2
+141,1.51690,13.33,3.54,1.61,72.54,0.68,8.11,0.00,0.00,2
+142,1.51851,13.20,3.63,1.07,72.83,0.57,8.41,0.09,0.17,2
+143,1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,2
+144,1.51709,13.00,3.47,1.79,72.72,0.66,8.18,0.00,0.00,2
+145,1.51660,12.99,3.18,1.23,72.97,0.58,8.81,0.00,0.24,2
+146,1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0.00,0.35,2
+147,1.51769,13.65,3.66,1.11,72.77,0.11,8.60,0.00,0.00,3
+148,1.51610,13.33,3.53,1.34,72.67,0.56,8.33,0.00,0.00,3
+149,1.51670,13.24,3.57,1.38,72.70,0.56,8.44,0.00,0.10,3
+150,1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0.00,0.00,3
+151,1.51665,13.14,3.45,1.76,72.48,0.60,8.38,0.00,0.17,3
+152,1.52127,14.32,3.90,0.83,71.50,0.00,9.49,0.00,0.00,3
+153,1.51779,13.64,3.65,0.65,73.00,0.06,8.93,0.00,0.00,3
+154,1.51610,13.42,3.40,1.22,72.69,0.59,8.32,0.00,0.00,3
+155,1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0.00,0.00,3
+156,1.51646,13.04,3.40,1.26,73.01,0.52,8.58,0.00,0.00,3
+157,1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0.00,0.00,3
+158,1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0.00,0.00,3
+159,1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0.00,0.00,3
+160,1.51796,13.50,3.36,1.63,71.94,0.57,8.81,0.00,0.09,3
+161,1.51832,13.33,3.34,1.54,72.14,0.56,8.99,0.00,0.00,3
+162,1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,3
+163,1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0.00,0.37,3
+164,1.51514,14.01,2.68,3.50,69.89,1.68,5.87,2.20,0.00,5
+165,1.51915,12.73,1.85,1.86,72.69,0.60,10.09,0.00,0.00,5
+166,1.52171,11.56,1.88,1.56,72.86,0.47,11.41,0.00,0.00,5
+167,1.52151,11.03,1.71,1.56,73.44,0.58,11.62,0.00,0.00,5
+168,1.51969,12.64,0.00,1.65,73.75,0.38,11.53,0.00,0.00,5
+169,1.51666,12.86,0.00,1.83,73.88,0.97,10.17,0.00,0.00,5
+170,1.51994,13.27,0.00,1.76,73.03,0.47,11.32,0.00,0.00,5
+171,1.52369,13.44,0.00,1.58,72.22,0.32,12.24,0.00,0.00,5
+172,1.51316,13.02,0.00,3.04,70.48,6.21,6.96,0.00,0.00,5
+173,1.51321,13.00,0.00,3.02,70.70,6.21,6.93,0.00,0.00,5
+174,1.52043,13.38,0.00,1.40,72.25,0.33,12.50,0.00,0.00,5
+175,1.52058,12.85,1.61,2.17,72.18,0.76,9.70,0.24,0.51,5
+176,1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0.00,0.28,5
+177,1.51905,14.00,2.39,1.56,72.37,0.00,9.57,0.00,0.00,6
+178,1.51937,13.79,2.41,1.19,72.76,0.00,9.77,0.00,0.00,6
+179,1.51829,14.46,2.24,1.62,72.38,0.00,9.26,0.00,0.00,6
+180,1.51852,14.09,2.19,1.66,72.67,0.00,9.32,0.00,0.00,6
+181,1.51299,14.40,1.74,1.54,74.55,0.00,7.59,0.00,0.00,6
+182,1.51888,14.99,0.78,1.74,72.50,0.00,9.95,0.00,0.00,6
+183,1.51916,14.15,0.00,2.09,72.74,0.00,10.88,0.00,0.00,6
+184,1.51969,14.56,0.00,0.56,73.48,0.00,11.22,0.00,0.00,6
+185,1.51115,17.38,0.00,0.34,75.41,0.00,6.65,0.00,0.00,6
+186,1.51131,13.69,3.20,1.81,72.81,1.76,5.43,1.19,0.00,7
+187,1.51838,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0.00,7
+188,1.52315,13.44,3.34,1.23,72.38,0.60,8.83,0.00,0.00,7
+189,1.52247,14.86,2.20,2.06,70.26,0.76,9.76,0.00,0.00,7
+190,1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0.00,7
+191,1.51613,13.88,1.78,1.79,73.10,0.00,8.67,0.76,0.00,7
+192,1.51602,14.85,0.00,2.38,73.28,0.00,8.76,0.64,0.09,7
+193,1.51623,14.20,0.00,2.79,73.46,0.04,9.04,0.40,0.09,7
+194,1.51719,14.75,0.00,2.00,73.02,0.00,8.53,1.59,0.08,7
+195,1.51683,14.56,0.00,1.98,73.29,0.00,8.52,1.57,0.07,7
+196,1.51545,14.14,0.00,2.68,73.39,0.08,9.07,0.61,0.05,7
+197,1.51556,13.87,0.00,2.54,73.23,0.14,9.41,0.81,0.01,7
+198,1.51727,14.70,0.00,2.34,73.28,0.00,8.95,0.66,0.00,7
+199,1.51531,14.38,0.00,2.66,73.10,0.04,9.08,0.64,0.00,7
+200,1.51609,15.01,0.00,2.51,73.05,0.05,8.83,0.53,0.00,7
+201,1.51508,15.15,0.00,2.25,73.50,0.00,8.34,0.63,0.00,7
+202,1.51653,11.95,0.00,1.19,75.18,2.70,8.93,0.00,0.00,7
+203,1.51514,14.85,0.00,2.42,73.72,0.00,8.39,0.56,0.00,7
+204,1.51658,14.80,0.00,1.99,73.11,0.00,8.28,1.71,0.00,7
+205,1.51617,14.95,0.00,2.27,73.30,0.00,8.71,0.67,0.00,7
+206,1.51732,14.95,0.00,1.80,72.99,0.00,8.61,1.55,0.00,7
+207,1.51645,14.94,0.00,1.87,73.11,0.00,8.67,1.38,0.00,7
+208,1.51831,14.39,0.00,1.82,72.86,1.41,6.47,2.88,0.00,7
+209,1.51640,14.37,0.00,2.74,72.85,0.00,9.45,0.54,0.00,7
+210,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.00,7
+211,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.00,7
+212,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.00,7
+213,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.00,7
+214,1.51711,14.23,0.00,2.08,73.36,0.00,8.62,1.67,0.00,7
diff --git a/lib/mining/testdata/forensic_glass/glass.dsv b/lib/mining/testdata/forensic_glass/glass.dsv
new file mode 100644
index 00000000..10e9ba98
--- /dev/null
+++ b/lib/mining/testdata/forensic_glass/glass.dsv
@@ -0,0 +1,98 @@
+{
+	"Input"			:"glass.data"
+,	"Rejected"		:"glass.rej"
+,	"MaxRows"		:-1
+,	"ClassMetadataIndex"	:10
+,	"ClassIndex"		:9
+,	"DatasetMode"		:"matrix"
+,	"InputMetadata"		:
+	[{
+		"Name"			:"ID"
+	,	"Separator"		:","
+	,	"Type"			:"integer"
+	,	"Skip"			:true
+	},{
+		"Name"			:"RI"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"NA20"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"MGO"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"AL203"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"SI02"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"K20"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"CAO"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"BAO"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"FE203"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"TYPE"
+	,	"Type"			:"real"
+	,	"ValueSpace"		:
+		[
+			"1"
+		,	"2"
+		,	"3"
+		,	"4"
+		,	"5"
+		,	"6"
+		,	"7"
+		]
+	}]
+
+,	"Output"		:"glass.out"
+,	"OutputMetadata"	:
+	[{
+		"Name"			:"RI"
+	,	"Separator"		:","
+	},{
+		"Name"			:"NA20"
+	,	"Separator"		:","
+	},{
+		"Name"			:"MGO"
+	,	"Separator"		:","
+	},{
+		"Name"			:"AL203"
+	,	"Separator"		:","
+	},{
+		"Name"			:"SI02"
+	,	"Separator"		:","
+	},{
+		"Name"			:"K20"
+	,	"Separator"		:","
+	},{
+		"Name"			:"CAO"
+	,	"Separator"		:","
+	},{
+		"Name"			:"BAO"
+	,	"Separator"		:","
+	},{
+		"Name"			:"FE203"
+	,	"Separator"		:","
+	},{
+		"Name"			:"TYPE"
+	,	"Type"			:"integer"
+	}]
+}
diff --git a/lib/mining/testdata/forensic_glass/glass.names b/lib/mining/testdata/forensic_glass/glass.names
new file mode 100644
index 00000000..caff715c
--- /dev/null
+++ b/lib/mining/testdata/forensic_glass/glass.names
@@ -0,0 +1,90 @@
+1. Title: Glass Identification Database
+
+2. Sources:
+    (a) Creator: B. German
+        -- Central Research Establishment
+           Home Office Forensic Science Service
+           Aldermaston, Reading, Berkshire RG7 4PN
+    (b) Donor: Vina Spiehler, Ph.D., DABFT
+               Diagnostic Products Corporation
+               (213) 776-0180 (ext 3014)
+    (c) Date: September, 1987
+
+3. Past Usage:
+    -- Rule Induction in Forensic Science
+       -- Ian W. Evett and Ernest J. Spiehler
+       -- Central Research Establishment
+          Home Office Forensic Science Service
+          Aldermaston, Reading, Berkshire RG7 4PN
+       -- Unknown technical note number (sorry, not listed here)
+       -- General Results: nearest neighbor held its own with respect to the
+             rule-based system
+
+4. Relevant Information:n
+      Vina conducted a comparison test of her rule-based system, BEAGLE, the
+      nearest-neighbor algorithm, and discriminant analysis.  BEAGLE is 
+      a product available through VRS Consulting, Inc.; 4676 Admiralty Way,
+      Suite 206; Marina Del Ray, CA 90292 (213) 827-7890 and FAX: -3189.
+      In determining whether the glass was a type of "float" glass or not,
+      the following results were obtained (# incorrect answers):
+
+             Type of Sample                            Beagle   NN    DA
+             Windows that were float processed (87)     10      12    21
+             Windows that were not:            (76)     19      16    22
+
+      The study of classification of types of glass was motivated by 
+      criminological investigation.  At the scene of the crime, the glass left
+      can be used as evidence...if it is correctly identified!
+
+5. Number of Instances: 214
+
+6. Number of Attributes: 10 (including an Id#) plus the class attribute
+   -- all attributes are continuously valued
+
+7. Attribute Information:
+   1. Id number: 1 to 214
+   2. RI: refractive index
+   3. Na: Sodium (unit measurement: weight percent in corresponding oxide, as 
+                  are attributes 4-10)
+   4. Mg: Magnesium
+   5. Al: Aluminum
+   6. Si: Silicon
+   7. K: Potassium
+   8. Ca: Calcium
+   9. Ba: Barium
+  10. Fe: Iron
+  11. Type of glass: (class attribute)
+      -- 1 building_windows_float_processed
+      -- 2 building_windows_non_float_processed
+      -- 3 vehicle_windows_float_processed
+      -- 4 vehicle_windows_non_float_processed (none in this database)
+      -- 5 containers
+      -- 6 tableware
+      -- 7 headlamps
+
+8. Missing Attribute Values: None
+
+Summary Statistics:
+Attribute:   Min     Max      Mean     SD      Correlation with class
+ 2. RI:       1.5112  1.5339   1.5184  0.0030  -0.1642
+ 3. Na:      10.73   17.38    13.4079  0.8166   0.5030
+ 4. Mg:       0       4.49     2.6845  1.4424  -0.7447
+ 5. Al:       0.29    3.5      1.4449  0.4993   0.5988
+ 6. Si:      69.81   75.41    72.6509  0.7745   0.1515
+ 7. K:        0       6.21     0.4971  0.6522  -0.0100
+ 8. Ca:       5.43   16.19     8.9570  1.4232   0.0007
+ 9. Ba:       0       3.15     0.1750  0.4972   0.5751
+10. Fe:       0       0.51     0.0570  0.0974  -0.1879
+
+9. Class Distribution: (out of 214 total instances)
+    -- 163 Window glass (building windows and vehicle windows)
+       -- 87 float processed
+          -- 70 building windows
+          -- 17 vehicle windows
+       -- 76 non-float processed
+          -- 76 building windows
+          -- 0 vehicle windows
+    -- 51 Non-window glass
+       -- 13 containers
+       -- 9 tableware
+       -- 29 headlamps
diff --git a/lib/mining/testdata/forensic_glass/glass.tag b/lib/mining/testdata/forensic_glass/glass.tag
new file mode 100644
index 00000000..8bf70515
--- /dev/null
+++ b/lib/mining/testdata/forensic_glass/glass.tag
@@ -0,0 +1,27 @@
+An original file donated by Vina Speihler
+
+ID, N    -- numeric identifier of the instance
+RI, N    -- refractive index
+NA2O, N  -- Sodium oxide
+MGO, N   -- magnesium oxide
+AL2O3, N -- aluminum oxide
+SIO2, N  -- silcon oxide
+K2O,  N  -- potassium oxide
+CAO, N   -- calcium oxide
+BAO, N   -- barium oxide
+FE2O3, N -- iron oxide
+TYPE, N  -- An unknown, but must correspond to the types in the paper
+CAMG, N  -- Unsure
+
+Types include:
+  1. WF (Float Window)
+  2. WNF (Non-float Window)
+  3. C (Container)
+  4. T (Tableware)
+  5. H (Headlamp)     214    2568   14127 glass.dat
+      19      92     518 glass.tag
+      62     742    4775 glassx.dat
+      51     610    3928 nonwindo.dat
+       6      14     120 phones
+     163    1955   12552 window.dat
+     515    5981   36020 total
diff --git a/lib/mining/testdata/iris/iris.dsv b/lib/mining/testdata/iris/iris.dsv
new file mode 100644
index 00000000..9839b12b
--- /dev/null
+++ b/lib/mining/testdata/iris/iris.dsv
@@ -0,0 +1,35 @@
+{
+	"Input"			:"iris.dat"
+,	"Rejected"		:"iris.rej"
+,	"MaxRows"		:-1
+,	"ClassMetadataIndex"	:4
+,	"ClassIndex"		:4
+,	"DatasetMode"		:"matrix"
+,	"InputMetadata"		:
+	[{
+		"Name"			:"sepal-length"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"sepal-width"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"petal-length"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"petal-width"
+	,	"Separator"		:","
+	,	"Type"			:"real"
+	},{
+		"Name"			:"class"
+	,	"Type"			:"string"
+	,	"ValueSpace"		:
+		[
+			"Iris-setosa"
+		,	"Iris-versicolor"
+		,	"Iris-virginica"
+		]
+	}]
+}
diff --git a/lib/mining/testdata/phoneme/phoneme.dsv b/lib/mining/testdata/phoneme/phoneme.dsv
new file mode 100644
index 00000000..595576b5
--- /dev/null
+++ b/lib/mining/testdata/phoneme/phoneme.dsv
@@ -0,0 +1,59 @@
+{
+	"Input"		:"phoneme.dat"
+,	"Rejected"	:"phoneme.rej"
+,	"MaxRows"	:-1
+,	"ClassMetadataIndex"	:5
+,	"ClassIndex"	:5
+,	"DatasetMode"	:"matrix"
+,	"OutliersFile"	:"phoneme.outliers"
+,	"InputMetadata"	:
+	[{
+		"Name"		:"f1"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f2"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f3"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f4"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"f5"
+	,	"Type"		:"real"
+	,	"Separator"	:" "
+	},{
+		"Name"		:"class"
+	,	"Type"		:"real"
+	,	"ValueSpace"	:
+		[
+			"1"
+		,	"0"
+		]
+	}]
+,	"Output"	:"phoneme.csv"
+,	"OutputMetadata":
+	[{
+		"Name"		:"f1"
+	,	"Separator"	:","
+	},{
+		"Name"		:"f2"
+	,	"Separator"	:","
+	},{
+		"Name"		:"f3"
+	,	"Separator"	:","
+	},{
+		"Name"		:"f4"
+	,	"Separator"	:","
+	},{
+		"Name"		:"f5"
+	,	"Separator"	:","
+	},{
+		"Name"		:"class"
+	}]
+}
diff --git a/lib/mining/testdata/wvc2010lnsmote/wvc2010_features.lnsmote.dsv b/lib/mining/testdata/wvc2010lnsmote/wvc2010_features.lnsmote.dsv
new file mode 100644
index 00000000..3c88ca38
--- /dev/null
+++ b/lib/mining/testdata/wvc2010lnsmote/wvc2010_features.lnsmote.dsv
@@ -0,0 +1,129 @@
+{
+	"Input"			: "wvc2010_features.lnsmote.dat"
+,	"Rejected"		: "wvc2010_features.lnsmote.rej"
+,	"DatasetMode"		: "matrix"
+,	"MaxRows"		: -1
+,	"ClassIndex"		: 27
+
+,	"NTree"			: 500
+,	"PercentBoot"		: 66
+,	"StatsFile"		: "randomforest.lnsmote.stats.dat"
+
+,	"InputMetadata"		: [{
+		"Name"		: "anonim"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "comment_length"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "size_increment"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "size_ratio"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "upper_lower_ratio"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "upper_to_all_ratio"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "digit_ratio"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "non_alnum_ratio"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "char_diversity"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "char_distribution_insert"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "compress_rate"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "good_token"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "term_frequency"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "longest_word"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "longest_char_sequence"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "words_vulgar_frequency"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "words_vulgar_impact"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "words_pronoun_frequency"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "words_pronoun_impact"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "words_bias_frequency"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "words_bias_impact"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "words_sex_frequency"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "words_sex_impact"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "words_bad_frequency"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "words_bad_impact"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "words_all_frequency"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "words_all_impact"
+	,	"Type"		: "real"
+	,	"Separator"	: ","
+	},{
+		"Name"		: "class"
+	,	"Type"		: "real"
+	,	"ValueSpace"	:
+		[
+			"1"
+		,	"0"
+		]
+	}]
+}
diff --git a/lib/mining/tree/binary/binary.go b/lib/mining/tree/binary/binary.go
new file mode 100644
index 00000000..57c459b0
--- /dev/null
+++ b/lib/mining/tree/binary/binary.go
@@ -0,0 +1,64 @@
+// Copyright 2015 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package binary contain implementation of binary tree.
+package binary
+
+import (
+	"fmt"
+)
+
+//
+// Tree is a abstract data type for tree with only two branch: left and right.
+//
+type Tree struct {
+	// Root is pointer to root of tree.
+	Root *BTNode
+}
+
+//
+// NewTree will create new binary tree with empty root.
+//
+func NewTree() *Tree {
+	return &Tree{
+		Root: nil,
+	}
+}
+
+//
+// String will print all the branch and leaf in tree.
+//
+func (btree *Tree) String() (s string) {
+	var parent, node *BTNode
+
+	parent = btree.Root
+
+	s = fmt.Sprint(parent)
+
+	node = parent.Right
+
+	for parent != nil {
+		// Print right node down to the leaf.
+		for node.Right != nil {
+			s += fmt.Sprint(node)
+
+			parent = node
+			node = node.Right
+		}
+		s += fmt.Sprint(node)
+
+		// crawling to the stop one at a time ...
+		for parent != nil && node.Parent == parent {
+			if parent.Right == node {
+				node = parent.Left
+				break
+			} else if parent.Left == node {
+				node = parent
+				parent = parent.Parent
+			}
+		}
+	}
+
+	return s
+}
diff --git a/lib/mining/tree/binary/binary_test.go b/lib/mining/tree/binary/binary_test.go
new file mode 100644
index 00000000..4f61deb0
--- /dev/null
+++ b/lib/mining/tree/binary/binary_test.go
@@ -0,0 +1,47 @@
+// Copyright 2015 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package binary
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestTree(t *testing.T) {
+	exp := `1
+	12
+		24
+			34
+			33
+		23
+	11
+		22
+			32
+			31
+		21
+`
+
+	btree := NewTree()
+
+	root := NewBTNode(1,
+		NewBTNode(11,
+			NewBTNode(21, nil, nil),
+			NewBTNode(22,
+				NewBTNode(31, nil, nil),
+				NewBTNode(32, nil, nil))),
+		NewBTNode(12,
+			NewBTNode(23, nil, nil),
+			NewBTNode(24,
+				NewBTNode(33, nil, nil),
+				NewBTNode(34, nil, nil))))
+
+	btree.Root = root
+
+	res := fmt.Sprint(btree)
+
+	if res != exp {
+		t.Fatal("error, expecting:\n", exp, "\n got:\n", res)
+	}
+}
diff --git a/lib/mining/tree/binary/btnode.go b/lib/mining/tree/binary/btnode.go
new file mode 100644
index 00000000..57524265
--- /dev/null
+++ b/lib/mining/tree/binary/btnode.go
@@ -0,0 +1,77 @@
+// Copyright 2015 Mhd Sulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package binary
+
+import (
+	"fmt"
+	"reflect"
+)
+
+//
+// BTNode is a data type for node in binary tree.
+//
+type BTNode struct {
+	// Left branch of node.
+	Left *BTNode
+	// Right branch of node.
+	Right *BTNode
+	// Parent of node.
+	Parent *BTNode
+	// Value of node.
+	Value interface{}
+}
+
+//
+// NewBTNode create new node for binary tree.
+//
+func NewBTNode(v interface{}, l *BTNode, r *BTNode) (p *BTNode) {
+	p = &BTNode{
+		Left:   l,
+		Right:  r,
+		Parent: nil,
+		Value:  v,
+	}
+	if l != nil {
+		l.Parent = p
+	}
+	if r != nil {
+		r.Parent = p
+	}
+
+	return p
+}
+
+//
+// SetLeft will set left branch of node to 'c'.
+//
+func (n *BTNode) SetLeft(c *BTNode) {
+	n.Left = c
+	c.Parent = n
+}
+
+//
+// SetRight will set right branch of node to 'c'.
+//
+func (n *BTNode) SetRight(c *BTNode) {
+	n.Right = c
+	c.Parent = n
+}
+
+//
+// String will convert the node to string.
+//
+func (n *BTNode) String() (s string) {
+	var p = n.Parent
+
+	// add tab until it reached nil
+	for p != nil {
+		s += "\t"
+		p = p.Parent
+	}
+
+	s += fmt.Sprintln(reflect.ValueOf(n.Value))
+
+	return s
+}
author	Shulhan <ms@kilabit.info>	2018-09-17 05:04:26 +0700
committer	Shulhan <ms@kilabit.info>	2018-09-18 01:50:21 +0700
commit	1cae4ca316afa5d177fdbf7a98a0ec7fffe76a3e (patch)
tree	5fa83fc0faa31e09cae82ac4d467cf8ba5f87fc2
parent	446fef94cd712861221c0098dcdd9ae52aaed0eb (diff)
download	pakakeh.go-1cae4ca316afa5d177fdbf7a98a0ec7fffe76a3e.tar.xz