aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2019-01-29 05:09:28 +0700
committerShulhan <ms@kilabit.info>2019-01-29 05:09:28 +0700
commit42c47e88bb17d9edbac608b48150fa244e40df22 (patch)
tree9cc7fb834b06c7c0fac862331f9e8aaf46fcb918
parent4fa1b3f0ba614703dc02bd781cc25372fb38d514 (diff)
downloadpakakeh.go-42c47e88bb17d9edbac608b48150fa244e40df22.tar.xz
lib/mining: fix linter warnings on using capitalized parameters
-rw-r--r--lib/mining/classifier/cart/cart.go70
-rw-r--r--lib/mining/gain/gini/gini.go95
-rw-r--r--lib/mining/gain/gini/ginifloat.go46
3 files changed, 105 insertions, 106 deletions
diff --git a/lib/mining/classifier/cart/cart.go b/lib/mining/classifier/cart/cart.go
index 5d0a8008..1745bb8f 100644
--- a/lib/mining/classifier/cart/cart.go
+++ b/lib/mining/classifier/cart/cart.go
@@ -61,7 +61,7 @@ type Runtime struct {
//
// New create new Runtime object.
//
-func New(D tabula.ClasetInterface, splitMethod string, nRandomFeature int) (
+func New(claset tabula.ClasetInterface, splitMethod string, nRandomFeature int) (
*Runtime, error,
) {
runtime := &Runtime{
@@ -70,7 +70,7 @@ func New(D tabula.ClasetInterface, splitMethod string, nRandomFeature int) (
Tree: binary.Tree{},
}
- e := runtime.Build(D)
+ e := runtime.Build(claset)
if e != nil {
return nil, e
}
@@ -81,7 +81,7 @@ func New(D tabula.ClasetInterface, splitMethod string, nRandomFeature int) (
//
// Build will create a tree using CART algorithm.
//
-func (runtime *Runtime) Build(D tabula.ClasetInterface) (e error) {
+func (runtime *Runtime) Build(claset tabula.ClasetInterface) (e error) {
// Re-check input configuration.
switch runtime.SplitMethod {
case SplitMethodGini:
@@ -91,7 +91,7 @@ func (runtime *Runtime) Build(D tabula.ClasetInterface) (e error) {
runtime.SplitMethod = SplitMethodGini
}
- runtime.Tree.Root, e = runtime.splitTreeByGain(D)
+ runtime.Tree.Root, e = runtime.splitTreeByGain(claset)
return
}
@@ -102,27 +102,27 @@ func (runtime *Runtime) Build(D tabula.ClasetInterface) (e error) {
//
// Return node with the split information.
//
-func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) (
+func (runtime *Runtime) splitTreeByGain(claset tabula.ClasetInterface) (
node *binary.BTNode,
e error,
) {
node = &binary.BTNode{}
- D.RecountMajorMinor()
+ claset.RecountMajorMinor()
// if dataset is empty return node labeled with majority classes in
// dataset.
- nrow := D.GetNRow()
+ nrow := claset.GetNRow()
if nrow <= 0 {
if debug.Value >= 2 {
fmt.Printf("[cart] empty dataset (%s) : %v\n",
- D.MajorityClass(), D)
+ claset.MajorityClass(), claset)
}
node.Value = NodeValue{
IsLeaf: true,
- Class: D.MajorityClass(),
+ Class: claset.MajorityClass(),
Size: 0,
}
return node, nil
@@ -130,11 +130,11 @@ func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) (
// if all dataset is in the same class, return node as leaf with class
// is set to that class.
- single, name := D.IsInSingleClass()
+ single, name := claset.IsInSingleClass()
if single {
if debug.Value >= 2 {
fmt.Printf("[cart] in single class (%s): %v\n", name,
- D.GetColumns())
+ claset.GetColumns())
}
node.Value = NodeValue{
@@ -146,11 +146,11 @@ func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) (
}
if debug.Value >= 2 {
- fmt.Println("[cart] D:", D)
+ fmt.Println("[cart] claset:", claset)
}
// calculate the Gini gain for each attribute.
- gains := runtime.computeGain(D)
+ gains := runtime.computeGain(claset)
// get attribute with maximum Gini gain.
MaxGainIdx := gini.FindMaxGain(&gains)
@@ -161,20 +161,20 @@ func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) (
if MaxGain.GetMaxGainValue() == 0 {
if debug.Value >= 2 {
fmt.Println("[cart] max gain 0 with target",
- D.GetClassAsStrings(),
- " and majority class is ", D.MajorityClass())
+ claset.GetClassAsStrings(),
+ " and majority class is ", claset.MajorityClass())
}
node.Value = NodeValue{
IsLeaf: true,
- Class: D.MajorityClass(),
+ Class: claset.MajorityClass(),
Size: 0,
}
return node, nil
}
// using the sorted index in MaxGain, sort all field in dataset
- tabula.SortColumnsByIndex(D, MaxGain.SortedIndex)
+ tabula.SortColumnsByIndex(claset, MaxGain.SortedIndex)
if debug.Value >= 2 {
fmt.Println("[cart] maxgain:", MaxGain)
@@ -203,7 +203,7 @@ func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) (
}
node.Value = NodeValue{
- SplitAttrName: D.GetColumn(MaxGainIdx).GetName(),
+ SplitAttrName: claset.GetColumn(MaxGainIdx).GetName(),
IsLeaf: false,
IsContinu: MaxGain.IsContinu,
Size: nrow,
@@ -211,7 +211,7 @@ func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) (
SplitV: splitV,
}
- dsL, dsR, e := tabula.SplitRowsByValue(D, MaxGainIdx, splitV)
+ dsL, dsR, e := tabula.SplitRowsByValue(claset, MaxGainIdx, splitV)
if e != nil {
return node, e
@@ -258,13 +258,13 @@ func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) (
// SelectRandomFeature if NRandomFeature is greater than zero, select and
// compute gain in n random features instead of in all features
-func (runtime *Runtime) SelectRandomFeature(D tabula.ClasetInterface) {
+func (runtime *Runtime) SelectRandomFeature(claset tabula.ClasetInterface) {
if runtime.NRandomFeature <= 0 {
// all features selected
return
}
- ncols := D.GetNColumn()
+ ncols := claset.GetNColumn()
// count all features minus class
nfeature := ncols - 1
@@ -275,8 +275,8 @@ func (runtime *Runtime) SelectRandomFeature(D tabula.ClasetInterface) {
}
// exclude class index and parent node index
- excludeIdx := []int{D.GetClassIndex()}
- cols := D.GetColumns()
+ excludeIdx := []int{claset.GetClassIndex()}
+ cols := claset.GetColumns()
for x, col := range *cols {
if (col.Flag & ColFlagParent) == ColFlagParent {
excludeIdx = append(excludeIdx, x)
@@ -293,34 +293,34 @@ func (runtime *Runtime) SelectRandomFeature(D tabula.ClasetInterface) {
pickedIdx = append(pickedIdx, idx)
// Remove skip flag on selected column
- col := D.GetColumn(idx)
+ col := claset.GetColumn(idx)
col.Flag &^= ColFlagSkip
}
if debug.Value >= 1 {
fmt.Println("[cart] selected random features:", pickedIdx)
- fmt.Println("[cart] selected columns :", D.GetColumns())
+ fmt.Println("[cart] selected columns :", claset.GetColumns())
}
}
//
// computeGain calculate the gini index for each value in each attribute.
//
-func (runtime *Runtime) computeGain(D tabula.ClasetInterface) (
+func (runtime *Runtime) computeGain(claset tabula.ClasetInterface) (
gains []gini.Gini,
) {
if runtime.SplitMethod == SplitMethodGini {
// create gains value for all attribute minus target class.
- gains = make([]gini.Gini, D.GetNColumn())
+ gains = make([]gini.Gini, claset.GetNColumn())
}
- runtime.SelectRandomFeature(D)
+ runtime.SelectRandomFeature(claset)
- classVS := D.GetClassValueSpace()
- classIdx := D.GetClassIndex()
- classType := D.GetClassType()
+ classVS := claset.GetClassValueSpace()
+ classIdx := claset.GetClassIndex()
+ classType := claset.GetClassType()
- for x, col := range *D.GetColumns() {
+ for x, col := range *claset.GetColumns() {
// skip class attribute.
if x == classIdx {
continue
@@ -343,11 +343,11 @@ func (runtime *Runtime) computeGain(D tabula.ClasetInterface) (
attr := col.ToFloatSlice()
if classType == tabula.TString {
- target := D.GetClassAsStrings()
+ target := claset.GetClassAsStrings()
gains[x].ComputeContinu(&attr, &target,
&classVS)
} else {
- targetReal := D.GetClassAsReals()
+ targetReal := claset.GetClassAsReals()
classVSReal := libstrings.ToFloat64(classVS)
gains[x].ComputeContinuFloat(&attr,
@@ -362,7 +362,7 @@ func (runtime *Runtime) computeGain(D tabula.ClasetInterface) (
fmt.Println("[cart] attrV:", attrV)
}
- target := D.GetClassAsStrings()
+ target := claset.GetClassAsStrings()
gains[x].ComputeDiscrete(&attr, &attrV, &target,
&classVS)
}
diff --git a/lib/mining/gain/gini/gini.go b/lib/mining/gain/gini/gini.go
index 911ea130..e4f6c28f 100644
--- a/lib/mining/gain/gini/gini.go
+++ b/lib/mining/gain/gini/gini.go
@@ -55,14 +55,13 @@ type Gini struct {
}
//
-// ComputeDiscrete Given an attribute A with discreate value 'discval', and the
-// target attribute T which contain N classes in C, compute the information gain
-// of A.
+// ComputeDiscrete Given an attribute "src" with discrete value 'discval', and
+// the target attribute "target" which contain n classes, compute the
+// information gain of "src".
//
// The result is saved as gain value in MaxGainValue for each partition.
//
-func (gini *Gini) ComputeDiscrete(A *[]string, discval *[]string, T *[]string,
- C *[]string) {
+func (gini *Gini) ComputeDiscrete(src, discval, target, classes *[]string) {
gini.IsContinu = false
// create partition for possible combination of discrete values.
@@ -77,21 +76,21 @@ func (gini *Gini) ComputeDiscrete(A *[]string, discval *[]string, T *[]string,
gini.MinIndexValue = 1.0
// compute gini index for all samples
- gini.Value = gini.compute(T, C)
+ gini.Value = gini.compute(target, classes)
- gini.computeDiscreteGain(A, T, C)
+ gini.computeDiscreteGain(src, target, classes)
}
//
// computeDiscreteGain will compute Gini index and Gain for each partition.
//
-func (gini *Gini) computeDiscreteGain(A *[]string, T *[]string, C *[]string) {
+func (gini *Gini) computeDiscreteGain(src, target, classes *[]string) {
// number of samples
- nsample := float64(len(*A))
+ nsample := float64(len(*src))
if debug.Value >= 3 {
- fmt.Println("[gini] sample:", T)
- fmt.Printf("[gini] Gini(a=%s) = %f\n", (*A), gini.Value)
+ fmt.Println("[gini] sample:", target)
+ fmt.Printf("[gini] Gini(a=%s) = %f\n", (*src), gini.Value)
}
// compute gini index for each discrete values
@@ -107,7 +106,7 @@ func (gini *Gini) computeDiscreteGain(A *[]string, T *[]string, C *[]string) {
var subT []string
for _, el := range part {
- for t, a := range *A {
+ for t, a := range *src {
if a != el {
continue
}
@@ -115,12 +114,12 @@ func (gini *Gini) computeDiscreteGain(A *[]string, T *[]string, C *[]string) {
// count how many sample with this discrete value
ndisc++
// split the target by discrete value
- subT = append(subT, (*T)[t])
+ subT = append(subT, (*target)[t])
}
}
// compute gini index for subtarget
- giniIndex := gini.compute(&subT, C)
+ giniIndex := gini.compute(&subT, classes)
// compute probabilities of discrete value through all
// samples
@@ -177,21 +176,21 @@ func (gini *Gini) createDiscretePartition(discval []string) {
}
//
-// ComputeContinu Given an attribute A and the target attribute T which contain
-// N classes in C, compute the information gain of A.
+// ComputeContinu Given an attribute "src" and the target attribute "target"
+// which contain n classes, compute the information gain of "src".
//
// The result of Gini partitions value, Gini Index, and Gini Gain is saved in
// ContinuPart, Index, and Gain.
//
-func (gini *Gini) ComputeContinu(A *[]float64, T *[]string, C *[]string) {
+func (gini *Gini) ComputeContinu(src *[]float64, target, classes *[]string) {
gini.IsContinu = true
// make a copy of attribute and target.
- A2 := make([]float64, len(*A))
- copy(A2, *A)
+ A2 := make([]float64, len(*src))
+ copy(A2, *src)
- T2 := make([]string, len(*T))
- copy(T2, *T)
+ T2 := make([]string, len(*target))
+ copy(T2, *target)
gini.SortedIndex = numbers.Floats64IndirectSort(A2, true)
@@ -211,23 +210,23 @@ func (gini *Gini) ComputeContinu(A *[]float64, T *[]string, C *[]string) {
gini.MinIndexValue = 1.0
// compute gini index for all samples
- gini.Value = gini.compute(&T2, C)
+ gini.Value = gini.compute(&T2, classes)
- gini.computeContinuGain(&A2, &T2, C)
+ gini.computeContinuGain(&A2, &T2, classes)
}
//
// createContinuPartition for dividing class and computing Gini index.
//
-// This is assuming that the data `A` has been sorted in ascending order.
+// This is assuming that the data `src` has been sorted in ascending order.
//
-func (gini *Gini) createContinuPartition(A *[]float64) {
- l := len(*A)
+func (gini *Gini) createContinuPartition(src *[]float64) {
+ l := len(*src)
gini.ContinuPart = make([]float64, 0)
// loop from first index until last index - 1
for i := 0; i < l-1; i++ {
- sum := (*A)[i] + (*A)[i+1]
+ sum := (*src)[i] + (*src)[i+1]
med := sum / 2.0
// If median is zero, its mean both left and right value is
@@ -238,11 +237,11 @@ func (gini *Gini) createContinuPartition(A *[]float64) {
}
// Reject if median is contained in attribute's value.
- // We use equality because if both A[i] and A[i+1] value is
- // equal, the median is equal to both of them.
+ // We use equality because if both src[i] and src[i+1] value
+ // is equal, the median is equal to both of them.
exist := false
for j := 0; j <= i; j++ {
- if (*A)[j] == med {
+ if (*src)[j] == med {
exist = true
break
}
@@ -260,13 +259,13 @@ func (gini *Gini) createContinuPartition(A *[]float64) {
//
// 1 - sum (probability of each classes in T)
//
-func (gini *Gini) compute(T *[]string, C *[]string) float64 {
- n := float64(len(*T))
+func (gini *Gini) compute(target, classes *[]string) float64 {
+ n := float64(len(*target))
if n == 0 {
return 0
}
- classCount := libstrings.CountTokens(*T, *C, true)
+ classCount := libstrings.CountTokens(*target, *classes, true)
var sump2 float64
@@ -276,7 +275,7 @@ func (gini *Gini) compute(T *[]string, C *[]string) float64 {
if debug.Value >= 3 {
fmt.Printf("[gini] compute (%s): (%d/%f)^2 = %f\n",
- (*C)[x], v, n, p*p)
+ (*classes)[x], v, n, p*p)
}
}
@@ -296,14 +295,14 @@ func (gini *Gini) compute(T *[]string, C *[]string) float64 {
// - left is sub-sample from S that is less than part value.
// - right is sub-sample from S that is greater than part value.
//
-func (gini *Gini) computeContinuGain(A *[]float64, T *[]string, C *[]string) {
+func (gini *Gini) computeContinuGain(src *[]float64, target, classes *[]string) {
var gleft, gright float64
var tleft, tright []string
- nsample := len(*A)
+ nsample := len(*src)
if debug.Value >= 2 {
- fmt.Println("[gini] sorted data:", A)
+ fmt.Println("[gini] sorted data:", src)
fmt.Println("[gini] Gini.Value:", gini.Value)
}
@@ -312,7 +311,7 @@ func (gini *Gini) computeContinuGain(A *[]float64, T *[]string, C *[]string) {
// find the split of samples between partition based on
// partition value
partidx := nsample
- for x, attrVal := range *A {
+ for x, attrVal := range *src {
if attrVal > contVal {
partidx = x
break
@@ -325,17 +324,17 @@ func (gini *Gini) computeContinuGain(A *[]float64, T *[]string, C *[]string) {
pright := float64(nright) / float64(nsample)
if partidx > 0 {
- tleft = (*T)[0:partidx]
- tright = (*T)[partidx:]
+ tleft = (*target)[0:partidx]
+ tright = (*target)[partidx:]
- gleft = gini.compute(&tleft, C)
- gright = gini.compute(&tright, C)
+ gleft = gini.compute(&tleft, classes)
+ gright = gini.compute(&tright, classes)
} else {
tleft = nil
- tright = (*T)[0:]
+ tright = (*target)[0:]
gleft = 0
- gright = gini.compute(&tright, C)
+ gright = gini.compute(&tright, classes)
}
// count class in partition
@@ -404,7 +403,7 @@ func (gini *Gini) GetMinIndexValue() float64 {
// FindMaxGain find the attribute and value that have the maximum gain.
// The returned value is index of attribute.
//
-func FindMaxGain(gains *[]Gini) (MaxGainIdx int) {
+func FindMaxGain(gains *[]Gini) (maxGainIdx int) {
var gainValue float64
var maxGainValue float64
@@ -415,7 +414,7 @@ func FindMaxGain(gains *[]Gini) (MaxGainIdx int) {
gainValue = (*gains)[i].GetMaxGainValue()
if gainValue > maxGainValue {
maxGainValue = gainValue
- MaxGainIdx = i
+ maxGainIdx = i
}
}
@@ -425,7 +424,7 @@ func FindMaxGain(gains *[]Gini) (MaxGainIdx int) {
//
// FindMinGiniIndex return the index of attribute that have the minimum Gini index.
//
-func FindMinGiniIndex(ginis *[]Gini) (MinIndexIdx int) {
+func FindMinGiniIndex(ginis *[]Gini) (minIndexIdx int) {
var indexV float64
minIndexV := 1.0
@@ -433,7 +432,7 @@ func FindMinGiniIndex(ginis *[]Gini) (MinIndexIdx int) {
indexV = (*ginis)[i].GetMinIndexValue()
if indexV > minIndexV {
minIndexV = indexV
- MinIndexIdx = i
+ minIndexIdx = i
}
}
diff --git a/lib/mining/gain/gini/ginifloat.go b/lib/mining/gain/gini/ginifloat.go
index b890b39d..c0e51824 100644
--- a/lib/mining/gain/gini/ginifloat.go
+++ b/lib/mining/gain/gini/ginifloat.go
@@ -25,20 +25,20 @@ import (
// (3) Create temporary space for gini index and gini gain.
// (4) Compute gini index for all target.
//
-func (gini *Gini) ComputeContinuFloat(A, T, C *[]float64) {
+func (gini *Gini) ComputeContinuFloat(src, target, classes *[]float64) {
gini.IsContinu = true
- gini.SortedIndex = numbers.Floats64IndirectSort(*A, true)
+ gini.SortedIndex = numbers.Floats64IndirectSort(*src, true)
if debug.Value >= 1 {
- fmt.Println("[gini] attr sorted :", A)
+ fmt.Println("[gini] attr sorted :", src)
}
// (1)
- numbers.Floats64SortByIndex(T, gini.SortedIndex)
+ numbers.Floats64SortByIndex(target, gini.SortedIndex)
// (2)
- gini.createContinuPartition(A)
+ gini.createContinuPartition(src)
// (3)
gini.Index = make([]float64, len(gini.ContinuPart))
@@ -46,25 +46,25 @@ func (gini *Gini) ComputeContinuFloat(A, T, C *[]float64) {
gini.MinIndexValue = 1.0
// (4)
- gini.Value = gini.computeFloat(T, C)
+ gini.Value = gini.computeFloat(target, classes)
- gini.computeContinuGainFloat(A, T, C)
+ gini.computeContinuGainFloat(src, target, classes)
}
//
-// computeFloat will compute Gini value for attribute T.
+// computeFloat will compute Gini value for attribute "target".
//
// Gini value is computed using formula,
//
-// 1 - sum (probability of each classes in T)
+// 1 - sum (probability of each classes in target)
//
-func (gini *Gini) computeFloat(T, C *[]float64) float64 {
- n := float64(len(*T))
+func (gini *Gini) computeFloat(target, classes *[]float64) float64 {
+ n := float64(len(*target))
if n == 0 {
return 0
}
- classCount := numbers.Floats64Counts(*T, *C)
+ classCount := numbers.Floats64Counts(*target, *classes)
var sump2 float64
@@ -74,7 +74,7 @@ func (gini *Gini) computeFloat(T, C *[]float64) float64 {
if debug.Value >= 3 {
fmt.Printf("[gini] compute (%f): (%d/%f)^2 = %f\n",
- (*C)[x], v, n, p*p)
+ (*classes)[x], v, n, p*p)
}
}
@@ -99,14 +99,14 @@ func (gini *Gini) computeFloat(T, C *[]float64) float64 {
// (0.1) Find the split of samples between partition based on partition value.
// (0.2) Count class in partition.
//
-func (gini *Gini) computeContinuGainFloat(A, T, C *[]float64) {
+func (gini *Gini) computeContinuGainFloat(src, target, classes *[]float64) {
var gainLeft, gainRight float64
var tleft, tright []float64
- nsample := len(*A)
+ nsample := len(*src)
if debug.Value >= 2 {
- fmt.Println("[gini] sorted data:", A)
+ fmt.Println("[gini] sorted data:", src)
fmt.Println("[gini] Gini.Value:", gini.Value)
}
@@ -115,7 +115,7 @@ func (gini *Gini) computeContinuGainFloat(A, T, C *[]float64) {
// (0.1)
partidx := nsample
- for x, attrVal := range *A {
+ for x, attrVal := range *src {
if attrVal > contVal {
partidx = x
break
@@ -128,17 +128,17 @@ func (gini *Gini) computeContinuGainFloat(A, T, C *[]float64) {
probRight := nright / float64(nsample)
if partidx > 0 {
- tleft = (*T)[0:partidx]
- tright = (*T)[partidx:]
+ tleft = (*target)[0:partidx]
+ tright = (*target)[partidx:]
- gainLeft = gini.computeFloat(&tleft, C)
- gainRight = gini.computeFloat(&tright, C)
+ gainLeft = gini.computeFloat(&tleft, classes)
+ gainRight = gini.computeFloat(&tright, classes)
} else {
tleft = nil
- tright = (*T)[0:]
+ tright = (*target)[0:]
gainLeft = 0
- gainRight = gini.computeFloat(&tright, C)
+ gainRight = gini.computeFloat(&tright, classes)
}
// (0.2)