aboutsummaryrefslogtreecommitdiff
path: root/lib/mining/classifier
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2023-05-20 13:42:39 +0700
committerShulhan <ms@kilabit.info>2023-05-20 13:44:07 +0700
commit3eae1d3df5eeef14f9e8389895bb6b835ac2cf78 (patch)
treef35ca6c111bed2ecf85ada965accf22ce83d3e8c /lib/mining/classifier
parentf43b8ead50575c6a279bef403af0204df98323c9 (diff)
downloadpakakeh.go-3eae1d3df5eeef14f9e8389895bb6b835ac2cf78.tar.xz
all: remove any usage of debug.Value in all packages
Using global debug value for all packages turns out is not a good idea.
Diffstat (limited to 'lib/mining/classifier')
-rw-r--r--lib/mining/classifier/cart/cart.go54
-rw-r--r--lib/mining/classifier/crf/crf.go25
-rw-r--r--lib/mining/classifier/rf/rf.go18
-rw-r--r--lib/mining/classifier/runtime.go10
4 files changed, 0 insertions, 107 deletions
diff --git a/lib/mining/classifier/cart/cart.go b/lib/mining/classifier/cart/cart.go
index 4ee79198..99eb5b5d 100644
--- a/lib/mining/classifier/cart/cart.go
+++ b/lib/mining/classifier/cart/cart.go
@@ -17,7 +17,6 @@ package cart
import (
"fmt"
- "github.com/shuLhan/share/lib/debug"
"github.com/shuLhan/share/lib/mining/gain/gini"
"github.com/shuLhan/share/lib/mining/tree/binary"
"github.com/shuLhan/share/lib/numbers"
@@ -105,11 +104,6 @@ func (runtime *Runtime) splitTreeByGain(claset tabula.ClasetInterface) (
nrow := claset.GetNRow()
if nrow <= 0 {
- if debug.Value >= 2 {
- fmt.Printf("[cart] empty dataset (%s) : %v\n",
- claset.MajorityClass(), claset)
- }
-
node.Value = NodeValue{
IsLeaf: true,
Class: claset.MajorityClass(),
@@ -122,11 +116,6 @@ func (runtime *Runtime) splitTreeByGain(claset tabula.ClasetInterface) (
// is set to that class.
single, name := claset.IsInSingleClass()
if single {
- if debug.Value >= 2 {
- fmt.Printf("[cart] in single class (%s): %v\n", name,
- claset.GetColumns())
- }
-
node.Value = NodeValue{
IsLeaf: true,
Class: name,
@@ -135,10 +124,6 @@ func (runtime *Runtime) splitTreeByGain(claset tabula.ClasetInterface) (
return node, nil
}
- if debug.Value >= 2 {
- fmt.Println("[cart] claset:", claset)
- }
-
// calculate the Gini gain for each attribute.
gains := runtime.computeGain(claset)
@@ -149,12 +134,6 @@ func (runtime *Runtime) splitTreeByGain(claset tabula.ClasetInterface) (
// if maxgain value is 0, use majority class as node and terminate
// the process
if MaxGain.GetMaxGainValue() == 0 {
- if debug.Value >= 2 {
- fmt.Println("[cart] max gain 0 with target",
- claset.GetClassAsStrings(),
- " and majority class is ", claset.MajorityClass())
- }
-
node.Value = NodeValue{
IsLeaf: true,
Class: claset.MajorityClass(),
@@ -166,10 +145,6 @@ func (runtime *Runtime) splitTreeByGain(claset tabula.ClasetInterface) (
// using the sorted index in MaxGain, sort all field in dataset
tabula.SortColumnsByIndex(claset, MaxGain.SortedIndex)
- if debug.Value >= 2 {
- fmt.Println("[cart] maxgain:", MaxGain)
- }
-
// Now that we have attribute with max gain in MaxGainIdx, and their
// gain dan partition value in Gains[MaxGainIdx] and
// GetMaxPartValue(), we split the dataset based on type of max-gain
@@ -187,11 +162,6 @@ func (runtime *Runtime) splitTreeByGain(claset tabula.ClasetInterface) (
splitV = attrSubV[0]
}
- if debug.Value >= 2 {
- fmt.Println("[cart] maxgainindex:", MaxGainIdx)
- fmt.Println("[cart] split v:", splitV)
- }
-
node.Value = NodeValue{
SplitAttrName: claset.GetColumn(MaxGainIdx).GetName(),
IsLeaf: false,
@@ -286,11 +256,6 @@ func (runtime *Runtime) SelectRandomFeature(claset tabula.ClasetInterface) {
col := claset.GetColumn(idx)
col.Flag &^= ColFlagSkip
}
-
- if debug.Value >= 1 {
- fmt.Println("[cart] selected random features:", pickedIdx)
- fmt.Println("[cart] selected columns :", claset.GetColumns())
- }
}
// computeGain calculate the gini index for each value in each attribute.
@@ -345,19 +310,10 @@ func (runtime *Runtime) computeGain(claset tabula.ClasetInterface) (
attr := col.ToStringSlice()
attrV := col.ValueSpace
- if debug.Value >= 2 {
- fmt.Println("[cart] attr :", attr)
- fmt.Println("[cart] attrV:", attrV)
- }
-
target := claset.GetClassAsStrings()
gains[x].ComputeDiscrete(&attr, &attrV, &target,
&classVS)
}
-
- if debug.Value >= 2 {
- fmt.Println("[cart] gain :", gains[x])
- }
}
return gains
}
@@ -415,11 +371,6 @@ func (runtime *Runtime) CountOOBError(oob tabula.Claset) (
// save the original target to be compared later.
origTarget := oob.GetClassAsStrings()
- if debug.Value >= 2 {
- fmt.Println("[cart] OOB:", oob.Columns)
- fmt.Println("[cart] TREE:", &runtime.Tree)
- }
-
// reset the target.
oobtarget := oob.GetClassColumn()
oobtarget.ClearValues()
@@ -434,11 +385,6 @@ func (runtime *Runtime) CountOOBError(oob tabula.Claset) (
target := oobtarget.ToStringSlice()
- if debug.Value >= 2 {
- fmt.Println("[cart] original target:", origTarget)
- fmt.Println("[cart] classify target:", target)
- }
-
// count how many target value is miss-classified.
runtime.OOBErrVal, _, _ = libstrings.CountMissRate(origTarget, target)
diff --git a/lib/mining/classifier/crf/crf.go b/lib/mining/classifier/crf/crf.go
index 3c6f25c8..1a40e1c0 100644
--- a/lib/mining/classifier/crf/crf.go
+++ b/lib/mining/classifier/crf/crf.go
@@ -15,7 +15,6 @@ import (
"math"
"sort"
- "github.com/shuLhan/share/lib/debug"
"github.com/shuLhan/share/lib/floats64"
"github.com/shuLhan/share/lib/mining/classifier"
"github.com/shuLhan/share/lib/mining/classifier/rf"
@@ -151,10 +150,6 @@ func (crf *Runtime) Build(samples tabula.ClasetInterface) (e error) {
fmt.Println(tag, "Config:", crf)
for x := 0; x < crf.NStage; x++ {
- if debug.Value >= 1 {
- fmt.Println(tag, "Stage #", x)
- }
-
forest, e := crf.createForest(samples)
if e != nil {
return e
@@ -206,10 +201,6 @@ func (crf *Runtime) createForest(samples tabula.ClasetInterface) (
// (2)
for t := 0; t < crf.NTree; t++ {
- if debug.Value >= 2 {
- fmt.Println(tag, "Tree #", t)
- }
-
// (2.1)
for {
cm, stat, e = forest.GrowTree(samples)
@@ -233,10 +224,6 @@ func (crf *Runtime) createForest(samples tabula.ClasetInterface) (
// (3)
crf.computeWeight(stat)
- if debug.Value >= 1 {
- fmt.Println(tag, "Weight:", stat.FMeasure)
- }
-
// (4)
crf.deleteTrueNegative(samples, cm)
@@ -261,10 +248,6 @@ func (crf *Runtime) finalizeStage(forest *rf.Runtime) (e error) {
crf.AddStat(stat)
crf.ComputeStatTotal(stat)
- if debug.Value >= 1 {
- crf.PrintStatTotal(nil)
- }
-
// (7)
crf.AddForest(forest)
@@ -309,10 +292,6 @@ func (crf *Runtime) deleteTrueNegative(samples tabula.ClasetInterface,
c++
}
}
-
- if debug.Value >= 1 {
- fmt.Println(tag, "# TN", len(tnids), "# deleted", c)
- }
}
// refillWithFP will copy the false-positive data in training set `tnset`
@@ -338,10 +317,6 @@ func (crf *Runtime) refillWithFP(samples, tnset tabula.ClasetInterface,
c++
}
}
-
- if debug.Value >= 1 {
- fmt.Println(tag, "# FP", len(fpids), "# refilled", c)
- }
}
// runTPSet will run true-positive set into trained stage, to get the
diff --git a/lib/mining/classifier/rf/rf.go b/lib/mining/classifier/rf/rf.go
index 38612b97..3eb08aec 100644
--- a/lib/mining/classifier/rf/rf.go
+++ b/lib/mining/classifier/rf/rf.go
@@ -15,7 +15,6 @@ import (
"fmt"
"math"
- "github.com/shuLhan/share/lib/debug"
"github.com/shuLhan/share/lib/floats64"
"github.com/shuLhan/share/lib/ints"
"github.com/shuLhan/share/lib/mining/classifier"
@@ -147,10 +146,6 @@ func (forest *Runtime) Build(samples tabula.ClasetInterface) (e error) {
// (1)
for t := 0; t < forest.NTree; t++ {
- if debug.Value >= 1 {
- fmt.Println(tag, "tree #", t)
- }
-
// (1.1)
for {
_, _, e = forest.GrowTree(samples)
@@ -191,11 +186,6 @@ func (forest *Runtime) GrowTree(samples tabula.ClasetInterface) (
bagset := bag.(tabula.ClasetInterface)
- if debug.Value >= 2 {
- bagset.RecountMajorMinor()
- fmt.Println(tag, "Bagging:", bagset)
- }
-
// (2)
cart, e := cart.New(bagset, cart.SplitMethodGini,
forest.NRandomFeature)
@@ -219,19 +209,11 @@ func (forest *Runtime) GrowTree(samples tabula.ClasetInterface) (
stat.End()
- if debug.Value >= 3 && forest.RunOOB {
- fmt.Println(tag, "Elapsed time (s):", stat.ElapsedTime)
- }
-
forest.AddStat(stat)
// (6)
if forest.RunOOB {
forest.ComputeStatFromCM(stat, cm)
-
- if debug.Value >= 2 {
- fmt.Println(tag, "OOB stat:", stat)
- }
}
forest.ComputeStatTotal(stat)
diff --git a/lib/mining/classifier/runtime.go b/lib/mining/classifier/runtime.go
index 963c54b6..2022e8c4 100644
--- a/lib/mining/classifier/runtime.go
+++ b/lib/mining/classifier/runtime.go
@@ -8,7 +8,6 @@ import (
"fmt"
"math"
- "github.com/shuLhan/share/lib/debug"
"github.com/shuLhan/share/lib/dsv"
"github.com/shuLhan/share/lib/floats64"
"github.com/shuLhan/share/lib/ints"
@@ -110,10 +109,6 @@ func (rt *Runtime) ComputeCM(sampleIds []int,
cm.ComputeStrings(vs, actuals, predicts)
cm.GroupIndexPredictionsStrings(sampleIds, actuals, predicts)
- if debug.Value >= 2 {
- fmt.Println(tag, cm)
- }
-
return cm
}
@@ -170,11 +165,6 @@ func (rt *Runtime) ComputeStatFromCM(stat *Stat, cm *CM) {
} else {
stat.Accuracy = float64(stat.TP+stat.TN) / t
}
-
- if debug.Value >= 1 {
- rt.PrintOobStat(stat, cm)
- rt.PrintStat(stat)
- }
}
// ComputeStatTotal compute total statistic.