diff options
| author | Shulhan <ms@kilabit.info> | 2019-01-29 05:09:28 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2019-01-29 05:09:28 +0700 |
| commit | 42c47e88bb17d9edbac608b48150fa244e40df22 (patch) | |
| tree | 9cc7fb834b06c7c0fac862331f9e8aaf46fcb918 | |
| parent | 4fa1b3f0ba614703dc02bd781cc25372fb38d514 (diff) | |
| download | pakakeh.go-42c47e88bb17d9edbac608b48150fa244e40df22.tar.xz | |
lib/mining: fix linter warnings on using capitalized parameters
| -rw-r--r-- | lib/mining/classifier/cart/cart.go | 70 | ||||
| -rw-r--r-- | lib/mining/gain/gini/gini.go | 95 | ||||
| -rw-r--r-- | lib/mining/gain/gini/ginifloat.go | 46 |
3 files changed, 105 insertions, 106 deletions
diff --git a/lib/mining/classifier/cart/cart.go b/lib/mining/classifier/cart/cart.go index 5d0a8008..1745bb8f 100644 --- a/lib/mining/classifier/cart/cart.go +++ b/lib/mining/classifier/cart/cart.go @@ -61,7 +61,7 @@ type Runtime struct { // // New create new Runtime object. // -func New(D tabula.ClasetInterface, splitMethod string, nRandomFeature int) ( +func New(claset tabula.ClasetInterface, splitMethod string, nRandomFeature int) ( *Runtime, error, ) { runtime := &Runtime{ @@ -70,7 +70,7 @@ func New(D tabula.ClasetInterface, splitMethod string, nRandomFeature int) ( Tree: binary.Tree{}, } - e := runtime.Build(D) + e := runtime.Build(claset) if e != nil { return nil, e } @@ -81,7 +81,7 @@ func New(D tabula.ClasetInterface, splitMethod string, nRandomFeature int) ( // // Build will create a tree using CART algorithm. // -func (runtime *Runtime) Build(D tabula.ClasetInterface) (e error) { +func (runtime *Runtime) Build(claset tabula.ClasetInterface) (e error) { // Re-check input configuration. switch runtime.SplitMethod { case SplitMethodGini: @@ -91,7 +91,7 @@ func (runtime *Runtime) Build(D tabula.ClasetInterface) (e error) { runtime.SplitMethod = SplitMethodGini } - runtime.Tree.Root, e = runtime.splitTreeByGain(D) + runtime.Tree.Root, e = runtime.splitTreeByGain(claset) return } @@ -102,27 +102,27 @@ func (runtime *Runtime) Build(D tabula.ClasetInterface) (e error) { // // Return node with the split information. // -func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) ( +func (runtime *Runtime) splitTreeByGain(claset tabula.ClasetInterface) ( node *binary.BTNode, e error, ) { node = &binary.BTNode{} - D.RecountMajorMinor() + claset.RecountMajorMinor() // if dataset is empty return node labeled with majority classes in // dataset. - nrow := D.GetNRow() + nrow := claset.GetNRow() if nrow <= 0 { if debug.Value >= 2 { fmt.Printf("[cart] empty dataset (%s) : %v\n", - D.MajorityClass(), D) + claset.MajorityClass(), claset) } node.Value = NodeValue{ IsLeaf: true, - Class: D.MajorityClass(), + Class: claset.MajorityClass(), Size: 0, } return node, nil @@ -130,11 +130,11 @@ func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) ( // if all dataset is in the same class, return node as leaf with class // is set to that class. - single, name := D.IsInSingleClass() + single, name := claset.IsInSingleClass() if single { if debug.Value >= 2 { fmt.Printf("[cart] in single class (%s): %v\n", name, - D.GetColumns()) + claset.GetColumns()) } node.Value = NodeValue{ @@ -146,11 +146,11 @@ func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) ( } if debug.Value >= 2 { - fmt.Println("[cart] D:", D) + fmt.Println("[cart] claset:", claset) } // calculate the Gini gain for each attribute. - gains := runtime.computeGain(D) + gains := runtime.computeGain(claset) // get attribute with maximum Gini gain. MaxGainIdx := gini.FindMaxGain(&gains) @@ -161,20 +161,20 @@ func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) ( if MaxGain.GetMaxGainValue() == 0 { if debug.Value >= 2 { fmt.Println("[cart] max gain 0 with target", - D.GetClassAsStrings(), - " and majority class is ", D.MajorityClass()) + claset.GetClassAsStrings(), + " and majority class is ", claset.MajorityClass()) } node.Value = NodeValue{ IsLeaf: true, - Class: D.MajorityClass(), + Class: claset.MajorityClass(), Size: 0, } return node, nil } // using the sorted index in MaxGain, sort all field in dataset - tabula.SortColumnsByIndex(D, MaxGain.SortedIndex) + tabula.SortColumnsByIndex(claset, MaxGain.SortedIndex) if debug.Value >= 2 { fmt.Println("[cart] maxgain:", MaxGain) @@ -203,7 +203,7 @@ func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) ( } node.Value = NodeValue{ - SplitAttrName: D.GetColumn(MaxGainIdx).GetName(), + SplitAttrName: claset.GetColumn(MaxGainIdx).GetName(), IsLeaf: false, IsContinu: MaxGain.IsContinu, Size: nrow, @@ -211,7 +211,7 @@ func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) ( SplitV: splitV, } - dsL, dsR, e := tabula.SplitRowsByValue(D, MaxGainIdx, splitV) + dsL, dsR, e := tabula.SplitRowsByValue(claset, MaxGainIdx, splitV) if e != nil { return node, e @@ -258,13 +258,13 @@ func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) ( // SelectRandomFeature if NRandomFeature is greater than zero, select and // compute gain in n random features instead of in all features -func (runtime *Runtime) SelectRandomFeature(D tabula.ClasetInterface) { +func (runtime *Runtime) SelectRandomFeature(claset tabula.ClasetInterface) { if runtime.NRandomFeature <= 0 { // all features selected return } - ncols := D.GetNColumn() + ncols := claset.GetNColumn() // count all features minus class nfeature := ncols - 1 @@ -275,8 +275,8 @@ func (runtime *Runtime) SelectRandomFeature(D tabula.ClasetInterface) { } // exclude class index and parent node index - excludeIdx := []int{D.GetClassIndex()} - cols := D.GetColumns() + excludeIdx := []int{claset.GetClassIndex()} + cols := claset.GetColumns() for x, col := range *cols { if (col.Flag & ColFlagParent) == ColFlagParent { excludeIdx = append(excludeIdx, x) @@ -293,34 +293,34 @@ func (runtime *Runtime) SelectRandomFeature(D tabula.ClasetInterface) { pickedIdx = append(pickedIdx, idx) // Remove skip flag on selected column - col := D.GetColumn(idx) + col := claset.GetColumn(idx) col.Flag &^= ColFlagSkip } if debug.Value >= 1 { fmt.Println("[cart] selected random features:", pickedIdx) - fmt.Println("[cart] selected columns :", D.GetColumns()) + fmt.Println("[cart] selected columns :", claset.GetColumns()) } } // // computeGain calculate the gini index for each value in each attribute. // -func (runtime *Runtime) computeGain(D tabula.ClasetInterface) ( +func (runtime *Runtime) computeGain(claset tabula.ClasetInterface) ( gains []gini.Gini, ) { if runtime.SplitMethod == SplitMethodGini { // create gains value for all attribute minus target class. - gains = make([]gini.Gini, D.GetNColumn()) + gains = make([]gini.Gini, claset.GetNColumn()) } - runtime.SelectRandomFeature(D) + runtime.SelectRandomFeature(claset) - classVS := D.GetClassValueSpace() - classIdx := D.GetClassIndex() - classType := D.GetClassType() + classVS := claset.GetClassValueSpace() + classIdx := claset.GetClassIndex() + classType := claset.GetClassType() - for x, col := range *D.GetColumns() { + for x, col := range *claset.GetColumns() { // skip class attribute. if x == classIdx { continue @@ -343,11 +343,11 @@ func (runtime *Runtime) computeGain(D tabula.ClasetInterface) ( attr := col.ToFloatSlice() if classType == tabula.TString { - target := D.GetClassAsStrings() + target := claset.GetClassAsStrings() gains[x].ComputeContinu(&attr, &target, &classVS) } else { - targetReal := D.GetClassAsReals() + targetReal := claset.GetClassAsReals() classVSReal := libstrings.ToFloat64(classVS) gains[x].ComputeContinuFloat(&attr, @@ -362,7 +362,7 @@ func (runtime *Runtime) computeGain(D tabula.ClasetInterface) ( fmt.Println("[cart] attrV:", attrV) } - target := D.GetClassAsStrings() + target := claset.GetClassAsStrings() gains[x].ComputeDiscrete(&attr, &attrV, &target, &classVS) } diff --git a/lib/mining/gain/gini/gini.go b/lib/mining/gain/gini/gini.go index 911ea130..e4f6c28f 100644 --- a/lib/mining/gain/gini/gini.go +++ b/lib/mining/gain/gini/gini.go @@ -55,14 +55,13 @@ type Gini struct { } // -// ComputeDiscrete Given an attribute A with discreate value 'discval', and the -// target attribute T which contain N classes in C, compute the information gain -// of A. +// ComputeDiscrete Given an attribute "src" with discrete value 'discval', and +// the target attribute "target" which contain n classes, compute the +// information gain of "src". // // The result is saved as gain value in MaxGainValue for each partition. // -func (gini *Gini) ComputeDiscrete(A *[]string, discval *[]string, T *[]string, - C *[]string) { +func (gini *Gini) ComputeDiscrete(src, discval, target, classes *[]string) { gini.IsContinu = false // create partition for possible combination of discrete values. @@ -77,21 +76,21 @@ func (gini *Gini) ComputeDiscrete(A *[]string, discval *[]string, T *[]string, gini.MinIndexValue = 1.0 // compute gini index for all samples - gini.Value = gini.compute(T, C) + gini.Value = gini.compute(target, classes) - gini.computeDiscreteGain(A, T, C) + gini.computeDiscreteGain(src, target, classes) } // // computeDiscreteGain will compute Gini index and Gain for each partition. // -func (gini *Gini) computeDiscreteGain(A *[]string, T *[]string, C *[]string) { +func (gini *Gini) computeDiscreteGain(src, target, classes *[]string) { // number of samples - nsample := float64(len(*A)) + nsample := float64(len(*src)) if debug.Value >= 3 { - fmt.Println("[gini] sample:", T) - fmt.Printf("[gini] Gini(a=%s) = %f\n", (*A), gini.Value) + fmt.Println("[gini] sample:", target) + fmt.Printf("[gini] Gini(a=%s) = %f\n", (*src), gini.Value) } // compute gini index for each discrete values @@ -107,7 +106,7 @@ func (gini *Gini) computeDiscreteGain(A *[]string, T *[]string, C *[]string) { var subT []string for _, el := range part { - for t, a := range *A { + for t, a := range *src { if a != el { continue } @@ -115,12 +114,12 @@ func (gini *Gini) computeDiscreteGain(A *[]string, T *[]string, C *[]string) { // count how many sample with this discrete value ndisc++ // split the target by discrete value - subT = append(subT, (*T)[t]) + subT = append(subT, (*target)[t]) } } // compute gini index for subtarget - giniIndex := gini.compute(&subT, C) + giniIndex := gini.compute(&subT, classes) // compute probabilities of discrete value through all // samples @@ -177,21 +176,21 @@ func (gini *Gini) createDiscretePartition(discval []string) { } // -// ComputeContinu Given an attribute A and the target attribute T which contain -// N classes in C, compute the information gain of A. +// ComputeContinu Given an attribute "src" and the target attribute "target" +// which contain n classes, compute the information gain of "src". // // The result of Gini partitions value, Gini Index, and Gini Gain is saved in // ContinuPart, Index, and Gain. // -func (gini *Gini) ComputeContinu(A *[]float64, T *[]string, C *[]string) { +func (gini *Gini) ComputeContinu(src *[]float64, target, classes *[]string) { gini.IsContinu = true // make a copy of attribute and target. - A2 := make([]float64, len(*A)) - copy(A2, *A) + A2 := make([]float64, len(*src)) + copy(A2, *src) - T2 := make([]string, len(*T)) - copy(T2, *T) + T2 := make([]string, len(*target)) + copy(T2, *target) gini.SortedIndex = numbers.Floats64IndirectSort(A2, true) @@ -211,23 +210,23 @@ func (gini *Gini) ComputeContinu(A *[]float64, T *[]string, C *[]string) { gini.MinIndexValue = 1.0 // compute gini index for all samples - gini.Value = gini.compute(&T2, C) + gini.Value = gini.compute(&T2, classes) - gini.computeContinuGain(&A2, &T2, C) + gini.computeContinuGain(&A2, &T2, classes) } // // createContinuPartition for dividing class and computing Gini index. // -// This is assuming that the data `A` has been sorted in ascending order. +// This is assuming that the data `src` has been sorted in ascending order. // -func (gini *Gini) createContinuPartition(A *[]float64) { - l := len(*A) +func (gini *Gini) createContinuPartition(src *[]float64) { + l := len(*src) gini.ContinuPart = make([]float64, 0) // loop from first index until last index - 1 for i := 0; i < l-1; i++ { - sum := (*A)[i] + (*A)[i+1] + sum := (*src)[i] + (*src)[i+1] med := sum / 2.0 // If median is zero, its mean both left and right value is @@ -238,11 +237,11 @@ func (gini *Gini) createContinuPartition(A *[]float64) { } // Reject if median is contained in attribute's value. - // We use equality because if both A[i] and A[i+1] value is - // equal, the median is equal to both of them. + // We use equality because if both src[i] and src[i+1] value + // is equal, the median is equal to both of them. exist := false for j := 0; j <= i; j++ { - if (*A)[j] == med { + if (*src)[j] == med { exist = true break } @@ -260,13 +259,13 @@ func (gini *Gini) createContinuPartition(A *[]float64) { // // 1 - sum (probability of each classes in T) // -func (gini *Gini) compute(T *[]string, C *[]string) float64 { - n := float64(len(*T)) +func (gini *Gini) compute(target, classes *[]string) float64 { + n := float64(len(*target)) if n == 0 { return 0 } - classCount := libstrings.CountTokens(*T, *C, true) + classCount := libstrings.CountTokens(*target, *classes, true) var sump2 float64 @@ -276,7 +275,7 @@ func (gini *Gini) compute(T *[]string, C *[]string) float64 { if debug.Value >= 3 { fmt.Printf("[gini] compute (%s): (%d/%f)^2 = %f\n", - (*C)[x], v, n, p*p) + (*classes)[x], v, n, p*p) } } @@ -296,14 +295,14 @@ func (gini *Gini) compute(T *[]string, C *[]string) float64 { // - left is sub-sample from S that is less than part value. // - right is sub-sample from S that is greater than part value. // -func (gini *Gini) computeContinuGain(A *[]float64, T *[]string, C *[]string) { +func (gini *Gini) computeContinuGain(src *[]float64, target, classes *[]string) { var gleft, gright float64 var tleft, tright []string - nsample := len(*A) + nsample := len(*src) if debug.Value >= 2 { - fmt.Println("[gini] sorted data:", A) + fmt.Println("[gini] sorted data:", src) fmt.Println("[gini] Gini.Value:", gini.Value) } @@ -312,7 +311,7 @@ func (gini *Gini) computeContinuGain(A *[]float64, T *[]string, C *[]string) { // find the split of samples between partition based on // partition value partidx := nsample - for x, attrVal := range *A { + for x, attrVal := range *src { if attrVal > contVal { partidx = x break @@ -325,17 +324,17 @@ func (gini *Gini) computeContinuGain(A *[]float64, T *[]string, C *[]string) { pright := float64(nright) / float64(nsample) if partidx > 0 { - tleft = (*T)[0:partidx] - tright = (*T)[partidx:] + tleft = (*target)[0:partidx] + tright = (*target)[partidx:] - gleft = gini.compute(&tleft, C) - gright = gini.compute(&tright, C) + gleft = gini.compute(&tleft, classes) + gright = gini.compute(&tright, classes) } else { tleft = nil - tright = (*T)[0:] + tright = (*target)[0:] gleft = 0 - gright = gini.compute(&tright, C) + gright = gini.compute(&tright, classes) } // count class in partition @@ -404,7 +403,7 @@ func (gini *Gini) GetMinIndexValue() float64 { // FindMaxGain find the attribute and value that have the maximum gain. // The returned value is index of attribute. // -func FindMaxGain(gains *[]Gini) (MaxGainIdx int) { +func FindMaxGain(gains *[]Gini) (maxGainIdx int) { var gainValue float64 var maxGainValue float64 @@ -415,7 +414,7 @@ func FindMaxGain(gains *[]Gini) (MaxGainIdx int) { gainValue = (*gains)[i].GetMaxGainValue() if gainValue > maxGainValue { maxGainValue = gainValue - MaxGainIdx = i + maxGainIdx = i } } @@ -425,7 +424,7 @@ func FindMaxGain(gains *[]Gini) (MaxGainIdx int) { // // FindMinGiniIndex return the index of attribute that have the minimum Gini index. // -func FindMinGiniIndex(ginis *[]Gini) (MinIndexIdx int) { +func FindMinGiniIndex(ginis *[]Gini) (minIndexIdx int) { var indexV float64 minIndexV := 1.0 @@ -433,7 +432,7 @@ func FindMinGiniIndex(ginis *[]Gini) (MinIndexIdx int) { indexV = (*ginis)[i].GetMinIndexValue() if indexV > minIndexV { minIndexV = indexV - MinIndexIdx = i + minIndexIdx = i } } diff --git a/lib/mining/gain/gini/ginifloat.go b/lib/mining/gain/gini/ginifloat.go index b890b39d..c0e51824 100644 --- a/lib/mining/gain/gini/ginifloat.go +++ b/lib/mining/gain/gini/ginifloat.go @@ -25,20 +25,20 @@ import ( // (3) Create temporary space for gini index and gini gain. // (4) Compute gini index for all target. // -func (gini *Gini) ComputeContinuFloat(A, T, C *[]float64) { +func (gini *Gini) ComputeContinuFloat(src, target, classes *[]float64) { gini.IsContinu = true - gini.SortedIndex = numbers.Floats64IndirectSort(*A, true) + gini.SortedIndex = numbers.Floats64IndirectSort(*src, true) if debug.Value >= 1 { - fmt.Println("[gini] attr sorted :", A) + fmt.Println("[gini] attr sorted :", src) } // (1) - numbers.Floats64SortByIndex(T, gini.SortedIndex) + numbers.Floats64SortByIndex(target, gini.SortedIndex) // (2) - gini.createContinuPartition(A) + gini.createContinuPartition(src) // (3) gini.Index = make([]float64, len(gini.ContinuPart)) @@ -46,25 +46,25 @@ func (gini *Gini) ComputeContinuFloat(A, T, C *[]float64) { gini.MinIndexValue = 1.0 // (4) - gini.Value = gini.computeFloat(T, C) + gini.Value = gini.computeFloat(target, classes) - gini.computeContinuGainFloat(A, T, C) + gini.computeContinuGainFloat(src, target, classes) } // -// computeFloat will compute Gini value for attribute T. +// computeFloat will compute Gini value for attribute "target". // // Gini value is computed using formula, // -// 1 - sum (probability of each classes in T) +// 1 - sum (probability of each classes in target) // -func (gini *Gini) computeFloat(T, C *[]float64) float64 { - n := float64(len(*T)) +func (gini *Gini) computeFloat(target, classes *[]float64) float64 { + n := float64(len(*target)) if n == 0 { return 0 } - classCount := numbers.Floats64Counts(*T, *C) + classCount := numbers.Floats64Counts(*target, *classes) var sump2 float64 @@ -74,7 +74,7 @@ func (gini *Gini) computeFloat(T, C *[]float64) float64 { if debug.Value >= 3 { fmt.Printf("[gini] compute (%f): (%d/%f)^2 = %f\n", - (*C)[x], v, n, p*p) + (*classes)[x], v, n, p*p) } } @@ -99,14 +99,14 @@ func (gini *Gini) computeFloat(T, C *[]float64) float64 { // (0.1) Find the split of samples between partition based on partition value. // (0.2) Count class in partition. // -func (gini *Gini) computeContinuGainFloat(A, T, C *[]float64) { +func (gini *Gini) computeContinuGainFloat(src, target, classes *[]float64) { var gainLeft, gainRight float64 var tleft, tright []float64 - nsample := len(*A) + nsample := len(*src) if debug.Value >= 2 { - fmt.Println("[gini] sorted data:", A) + fmt.Println("[gini] sorted data:", src) fmt.Println("[gini] Gini.Value:", gini.Value) } @@ -115,7 +115,7 @@ func (gini *Gini) computeContinuGainFloat(A, T, C *[]float64) { // (0.1) partidx := nsample - for x, attrVal := range *A { + for x, attrVal := range *src { if attrVal > contVal { partidx = x break @@ -128,17 +128,17 @@ func (gini *Gini) computeContinuGainFloat(A, T, C *[]float64) { probRight := nright / float64(nsample) if partidx > 0 { - tleft = (*T)[0:partidx] - tright = (*T)[partidx:] + tleft = (*target)[0:partidx] + tright = (*target)[partidx:] - gainLeft = gini.computeFloat(&tleft, C) - gainRight = gini.computeFloat(&tright, C) + gainLeft = gini.computeFloat(&tleft, classes) + gainRight = gini.computeFloat(&tright, classes) } else { tleft = nil - tright = (*T)[0:] + tright = (*target)[0:] gainLeft = 0 - gainRight = gini.computeFloat(&tright, C) + gainRight = gini.computeFloat(&tright, classes) } // (0.2) |
