aboutsummaryrefslogtreecommitdiff
path: root/lib/mining/classifier/cart
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2023-05-20 13:42:39 +0700
committerShulhan <ms@kilabit.info>2023-05-20 13:44:07 +0700
commit3eae1d3df5eeef14f9e8389895bb6b835ac2cf78 (patch)
treef35ca6c111bed2ecf85ada965accf22ce83d3e8c /lib/mining/classifier/cart
parentf43b8ead50575c6a279bef403af0204df98323c9 (diff)
downloadpakakeh.go-3eae1d3df5eeef14f9e8389895bb6b835ac2cf78.tar.xz
all: remove any usage of debug.Value in all packages
Using global debug value for all packages turns out is not a good idea.
Diffstat (limited to 'lib/mining/classifier/cart')
-rw-r--r--lib/mining/classifier/cart/cart.go54
1 files changed, 0 insertions, 54 deletions
diff --git a/lib/mining/classifier/cart/cart.go b/lib/mining/classifier/cart/cart.go
index 4ee79198..99eb5b5d 100644
--- a/lib/mining/classifier/cart/cart.go
+++ b/lib/mining/classifier/cart/cart.go
@@ -17,7 +17,6 @@ package cart
import (
"fmt"
- "github.com/shuLhan/share/lib/debug"
"github.com/shuLhan/share/lib/mining/gain/gini"
"github.com/shuLhan/share/lib/mining/tree/binary"
"github.com/shuLhan/share/lib/numbers"
@@ -105,11 +104,6 @@ func (runtime *Runtime) splitTreeByGain(claset tabula.ClasetInterface) (
nrow := claset.GetNRow()
if nrow <= 0 {
- if debug.Value >= 2 {
- fmt.Printf("[cart] empty dataset (%s) : %v\n",
- claset.MajorityClass(), claset)
- }
-
node.Value = NodeValue{
IsLeaf: true,
Class: claset.MajorityClass(),
@@ -122,11 +116,6 @@ func (runtime *Runtime) splitTreeByGain(claset tabula.ClasetInterface) (
// is set to that class.
single, name := claset.IsInSingleClass()
if single {
- if debug.Value >= 2 {
- fmt.Printf("[cart] in single class (%s): %v\n", name,
- claset.GetColumns())
- }
-
node.Value = NodeValue{
IsLeaf: true,
Class: name,
@@ -135,10 +124,6 @@ func (runtime *Runtime) splitTreeByGain(claset tabula.ClasetInterface) (
return node, nil
}
- if debug.Value >= 2 {
- fmt.Println("[cart] claset:", claset)
- }
-
// calculate the Gini gain for each attribute.
gains := runtime.computeGain(claset)
@@ -149,12 +134,6 @@ func (runtime *Runtime) splitTreeByGain(claset tabula.ClasetInterface) (
// if maxgain value is 0, use majority class as node and terminate
// the process
if MaxGain.GetMaxGainValue() == 0 {
- if debug.Value >= 2 {
- fmt.Println("[cart] max gain 0 with target",
- claset.GetClassAsStrings(),
- " and majority class is ", claset.MajorityClass())
- }
-
node.Value = NodeValue{
IsLeaf: true,
Class: claset.MajorityClass(),
@@ -166,10 +145,6 @@ func (runtime *Runtime) splitTreeByGain(claset tabula.ClasetInterface) (
// using the sorted index in MaxGain, sort all field in dataset
tabula.SortColumnsByIndex(claset, MaxGain.SortedIndex)
- if debug.Value >= 2 {
- fmt.Println("[cart] maxgain:", MaxGain)
- }
-
// Now that we have attribute with max gain in MaxGainIdx, and their
// gain dan partition value in Gains[MaxGainIdx] and
// GetMaxPartValue(), we split the dataset based on type of max-gain
@@ -187,11 +162,6 @@ func (runtime *Runtime) splitTreeByGain(claset tabula.ClasetInterface) (
splitV = attrSubV[0]
}
- if debug.Value >= 2 {
- fmt.Println("[cart] maxgainindex:", MaxGainIdx)
- fmt.Println("[cart] split v:", splitV)
- }
-
node.Value = NodeValue{
SplitAttrName: claset.GetColumn(MaxGainIdx).GetName(),
IsLeaf: false,
@@ -286,11 +256,6 @@ func (runtime *Runtime) SelectRandomFeature(claset tabula.ClasetInterface) {
col := claset.GetColumn(idx)
col.Flag &^= ColFlagSkip
}
-
- if debug.Value >= 1 {
- fmt.Println("[cart] selected random features:", pickedIdx)
- fmt.Println("[cart] selected columns :", claset.GetColumns())
- }
}
// computeGain calculate the gini index for each value in each attribute.
@@ -345,19 +310,10 @@ func (runtime *Runtime) computeGain(claset tabula.ClasetInterface) (
attr := col.ToStringSlice()
attrV := col.ValueSpace
- if debug.Value >= 2 {
- fmt.Println("[cart] attr :", attr)
- fmt.Println("[cart] attrV:", attrV)
- }
-
target := claset.GetClassAsStrings()
gains[x].ComputeDiscrete(&attr, &attrV, &target,
&classVS)
}
-
- if debug.Value >= 2 {
- fmt.Println("[cart] gain :", gains[x])
- }
}
return gains
}
@@ -415,11 +371,6 @@ func (runtime *Runtime) CountOOBError(oob tabula.Claset) (
// save the original target to be compared later.
origTarget := oob.GetClassAsStrings()
- if debug.Value >= 2 {
- fmt.Println("[cart] OOB:", oob.Columns)
- fmt.Println("[cart] TREE:", &runtime.Tree)
- }
-
// reset the target.
oobtarget := oob.GetClassColumn()
oobtarget.ClearValues()
@@ -434,11 +385,6 @@ func (runtime *Runtime) CountOOBError(oob tabula.Claset) (
target := oobtarget.ToStringSlice()
- if debug.Value >= 2 {
- fmt.Println("[cart] original target:", origTarget)
- fmt.Println("[cart] classify target:", target)
- }
-
// count how many target value is miss-classified.
runtime.OOBErrVal, _, _ = libstrings.CountMissRate(origTarget, target)