aboutsummaryrefslogtreecommitdiff
path: root/lib/mining/classifier
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2024-01-24 02:24:36 +0700
committerShulhan <ms@kilabit.info>2024-01-24 02:24:58 +0700
commite0bb07c340e5d0821840b4f09655dc9653dc3105 (patch)
treef392d8d86b709293b694c1b54e5a5e499d365f59 /lib/mining/classifier
parentb8a84637a476a05097d98a87e5c6af59b0d3e413 (diff)
downloadpakakeh.go-e0bb07c340e5d0821840b4f09655dc9653dc3105.tar.xz
all: fix the warnings from linter revive
This rename all variable "Ids" into "ListID".
Diffstat (limited to 'lib/mining/classifier')
-rw-r--r--lib/mining/classifier/cm.go72
-rw-r--r--lib/mining/classifier/cm_test.go4
-rw-r--r--lib/mining/classifier/crf/crf.go8
-rw-r--r--lib/mining/classifier/crf/crf_test.go4
-rw-r--r--lib/mining/classifier/rf/rf.go18
-rw-r--r--lib/mining/classifier/runtime.go12
6 files changed, 59 insertions, 59 deletions
diff --git a/lib/mining/classifier/cm.go b/lib/mining/classifier/cm.go
index 786cf4e4..19d86349 100644
--- a/lib/mining/classifier/cm.go
+++ b/lib/mining/classifier/cm.go
@@ -16,14 +16,14 @@ type CM struct {
// rowNames contain name in each row.
rowNames []string
- // tpIds contain index of true-positive samples.
- tpIds []int
- // fpIds contain index of false-positive samples.
- fpIds []int
- // tnIds contain index of true-negative samples.
- tnIds []int
- // fnIds contain index of false-negative samples.
- fnIds []int
+ // tpListID contain index of true-positive samples.
+ tpListID []int
+ // fpListID contain index of false-positive samples.
+ fpListID []int
+ // tnListID contain index of true-negative samples.
+ tnListID []int
+ // fnListID contain index of false-negative samples.
+ fnListID []int
tabula.Dataset
@@ -201,7 +201,7 @@ func (cm *CM) computeClassError() {
// GroupIndexPredictions given index of samples, group the samples by their
// class of prediction. For example,
//
-// sampleIds: [0, 1, 2, 3, 4, 5]
+// sampleListID: [0, 1, 2, 3, 4, 5]
// actuals: [1, 1, 0, 0, 1, 0]
// predictions: [1, 0, 1, 0, 1, 1]
//
@@ -214,18 +214,18 @@ func (cm *CM) computeClassError() {
// false-negative indices: [1]
//
// This function assume that positive value as "1" and negative value as "0".
-func (cm *CM) GroupIndexPredictions(sampleIds []int,
+func (cm *CM) GroupIndexPredictions(sampleListID []int,
actuals, predictions []int64,
) {
// Reset indices.
- cm.tpIds = nil
- cm.fpIds = nil
- cm.tnIds = nil
- cm.fnIds = nil
+ cm.tpListID = nil
+ cm.fpListID = nil
+ cm.tnListID = nil
+ cm.fnListID = nil
// Make sure we are not out-of-range when looping, always pick the
// minimum length between the three parameters.
- min := len(sampleIds)
+ min := len(sampleListID)
if len(actuals) < min {
min = len(actuals)
}
@@ -236,15 +236,15 @@ func (cm *CM) GroupIndexPredictions(sampleIds []int,
for x := 0; x < min; x++ {
if actuals[x] == 1 {
if predictions[x] == 1 {
- cm.tpIds = append(cm.tpIds, sampleIds[x])
+ cm.tpListID = append(cm.tpListID, sampleListID[x])
} else {
- cm.fnIds = append(cm.fnIds, sampleIds[x])
+ cm.fnListID = append(cm.fnListID, sampleListID[x])
}
} else {
if predictions[x] == 1 {
- cm.fpIds = append(cm.fpIds, sampleIds[x])
+ cm.fpListID = append(cm.fpListID, sampleListID[x])
} else {
- cm.tnIds = append(cm.tnIds, sampleIds[x])
+ cm.tnListID = append(cm.tnListID, sampleListID[x])
}
}
}
@@ -252,22 +252,22 @@ func (cm *CM) GroupIndexPredictions(sampleIds []int,
// GroupIndexPredictionsStrings is an alternative to GroupIndexPredictions
// which work with string class.
-func (cm *CM) GroupIndexPredictionsStrings(sampleIds []int,
+func (cm *CM) GroupIndexPredictionsStrings(sampleListID []int,
actuals, predictions []string,
) {
- if len(sampleIds) == 0 {
+ if len(sampleListID) == 0 {
return
}
// Reset indices.
- cm.tpIds = nil
- cm.fpIds = nil
- cm.tnIds = nil
- cm.fnIds = nil
+ cm.tpListID = nil
+ cm.fpListID = nil
+ cm.tnListID = nil
+ cm.fnListID = nil
// Make sure we are not out-of-range when looping, always pick the
// minimum length between the three parameters.
- min := len(sampleIds)
+ min := len(sampleListID)
if len(actuals) < min {
min = len(actuals)
}
@@ -278,15 +278,15 @@ func (cm *CM) GroupIndexPredictionsStrings(sampleIds []int,
for x := 0; x < min; x++ {
if actuals[x] == "1" {
if predictions[x] == "1" {
- cm.tpIds = append(cm.tpIds, sampleIds[x])
+ cm.tpListID = append(cm.tpListID, sampleListID[x])
} else {
- cm.fnIds = append(cm.fnIds, sampleIds[x])
+ cm.fnListID = append(cm.fnListID, sampleListID[x])
}
} else {
if predictions[x] == "1" {
- cm.fpIds = append(cm.fpIds, sampleIds[x])
+ cm.fpListID = append(cm.fpListID, sampleListID[x])
} else {
- cm.tnIds = append(cm.tnIds, sampleIds[x])
+ cm.tnListID = append(cm.tnListID, sampleListID[x])
}
}
}
@@ -356,22 +356,22 @@ func (cm *CM) TN() int {
// TPIndices return indices of all true-positive samples.
func (cm *CM) TPIndices() []int {
- return cm.tpIds
+ return cm.tpListID
}
// FNIndices return indices of all false-negative samples.
func (cm *CM) FNIndices() []int {
- return cm.fnIds
+ return cm.fnListID
}
// FPIndices return indices of all false-positive samples.
func (cm *CM) FPIndices() []int {
- return cm.fpIds
+ return cm.fpListID
}
// TNIndices return indices of all true-negative samples.
func (cm *CM) TNIndices() []int {
- return cm.tnIds
+ return cm.tnListID
}
// String will return the output of confusion matrix in table like format.
@@ -395,8 +395,8 @@ func (cm *CM) String() (s string) {
s += "\n"
}
- s += fmt.Sprintf("TP-FP indices %d %d\n", len(cm.tpIds), len(cm.fpIds))
- s += fmt.Sprintf("FN-TN indices %d %d\n", len(cm.fnIds), len(cm.tnIds))
+ s += fmt.Sprintf("TP-FP indices %d %d\n", len(cm.tpListID), len(cm.fpListID))
+ s += fmt.Sprintf("FN-TN indices %d %d\n", len(cm.fnListID), len(cm.tnListID))
return
}
diff --git a/lib/mining/classifier/cm_test.go b/lib/mining/classifier/cm_test.go
index 30041388..9c2b023e 100644
--- a/lib/mining/classifier/cm_test.go
+++ b/lib/mining/classifier/cm_test.go
@@ -48,7 +48,7 @@ func TestComputeStrings(t *testing.T) {
}
func TestGroupIndexPredictions(t *testing.T) {
- testIds := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
+ testListID := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
actuals := []int64{1, 1, 1, 1, 0, 0, 0, 0, 0, 0}
predics := []int64{1, 1, 0, 1, 0, 0, 0, 0, 1, 0}
exp := [][]int{
@@ -60,7 +60,7 @@ func TestGroupIndexPredictions(t *testing.T) {
cm := &CM{}
- cm.GroupIndexPredictions(testIds, actuals, predics)
+ cm.GroupIndexPredictions(testListID, actuals, predics)
test.Assert(t, "", exp[0], cm.TPIndices())
test.Assert(t, "", exp[1], cm.FNIndices())
diff --git a/lib/mining/classifier/crf/crf.go b/lib/mining/classifier/crf/crf.go
index eba0dab7..f49e35d4 100644
--- a/lib/mining/classifier/crf/crf.go
+++ b/lib/mining/classifier/crf/crf.go
@@ -328,8 +328,8 @@ func (crf *Runtime) runTPSet(samples tabula.ClasetInterface) {
return
}
- tnIds := numbers.IntCreateSeq(0, crf.tnset.Len()-1)
- _, cm, _ := crf.ClassifySetByWeight(crf.tnset, tnIds)
+ tnListID := numbers.IntCreateSeq(0, crf.tnset.Len()-1)
+ _, cm, _ := crf.ClassifySetByWeight(crf.tnset, tnListID)
crf.refillWithFP(samples, crf.tnset, cm)
}
@@ -358,7 +358,7 @@ func (crf *Runtime) runTPSet(samples tabula.ClasetInterface) {
// (1.4) Save stage probabilities for positive class.
// (2) Compute confusion matrix.
func (crf *Runtime) ClassifySetByWeight(samples tabula.ClasetInterface,
- sampleIds []int,
+ sampleListID []int,
) (
predicts []string, cm *classifier.CM, probs []float64,
) {
@@ -410,7 +410,7 @@ func (crf *Runtime) ClassifySetByWeight(samples tabula.ClasetInterface,
// (2)
actuals := samples.GetClassAsStrings()
- cm = crf.ComputeCM(sampleIds, vs, actuals, predicts)
+ cm = crf.ComputeCM(sampleListID, vs, actuals, predicts)
crf.ComputeStatFromCM(&stat, cm)
stat.End()
diff --git a/lib/mining/classifier/crf/crf_test.go b/lib/mining/classifier/crf/crf_test.go
index 402f6213..d84045f0 100644
--- a/lib/mining/classifier/crf/crf_test.go
+++ b/lib/mining/classifier/crf/crf_test.go
@@ -23,7 +23,7 @@ func runCRF(t *testing.T, sampleFile, statFile, perfFile string, nstage, ntree i
}
nbag := (samples.Len() * 63) / 100
- train, test, _, testIds := tabula.RandomPickRows(&samples, nbag, false)
+ train, test, _, testListID := tabula.RandomPickRows(&samples, nbag, false)
trainset := train.(tabula.ClasetInterface)
testset := test.(tabula.ClasetInterface)
@@ -45,7 +45,7 @@ func runCRF(t *testing.T, sampleFile, statFile, perfFile string, nstage, ntree i
testset.RecountMajorMinor()
fmt.Println("Testset:", testset)
- predicts, cm, probs := crfRuntime.ClassifySetByWeight(testset, testIds)
+ predicts, cm, probs := crfRuntime.ClassifySetByWeight(testset, testListID)
fmt.Println("Confusion matrix:", cm)
diff --git a/lib/mining/classifier/rf/rf.go b/lib/mining/classifier/rf/rf.go
index f725fff1..6a1ae6fd 100644
--- a/lib/mining/classifier/rf/rf.go
+++ b/lib/mining/classifier/rf/rf.go
@@ -228,9 +228,9 @@ func (forest *Runtime) GrowTree(samples tabula.ClasetInterface) (
// ClassifySet given a samples predict their class by running each sample in
// forest, and return their class prediction with confusion matrix.
-// `samples` is the sample that will be predicted, `sampleIds` is the index of
+// `samples` is the sample that will be predicted, `sampleListID` is the index of
// samples.
-// If `sampleIds` is not nil, then sample index will be checked in each tree,
+// If `sampleListID` is not nil, then sample index will be checked in each tree,
// if the sample is used for training, their vote is not counted.
//
// Algorithm,
@@ -242,17 +242,17 @@ func (forest *Runtime) GrowTree(samples tabula.ClasetInterface) (
// (1.3) compute and save the actual class probabilities.
// (2) Compute confusion matrix from predictions.
// (3) Compute stat from confusion matrix.
-// (4) Write the stat to file only if sampleIds is empty, which mean its run
+// (4) Write the stat to file only if sampleListID is empty, which mean its run
// not from OOB set.
func (forest *Runtime) ClassifySet(samples tabula.ClasetInterface,
- sampleIds []int,
+ sampleListID []int,
) (
predicts []string, cm *classifier.CM, probs []float64,
) {
stat := classifier.Stat{}
stat.Start()
- if len(sampleIds) == 0 {
+ if len(sampleListID) == 0 {
fmt.Println(tag, "Classify set:", samples)
fmt.Println(tag, "Classify set sample (one row):",
samples.GetRow(0))
@@ -267,8 +267,8 @@ func (forest *Runtime) ClassifySet(samples tabula.ClasetInterface,
rows := samples.GetRows()
for x, row := range *rows {
// (1.1)
- if len(sampleIds) > 0 {
- sampleIdx = sampleIds[x]
+ if len(sampleListID) > 0 {
+ sampleIdx = sampleListID[x]
}
votes := forest.Votes(row, sampleIdx)
@@ -286,13 +286,13 @@ func (forest *Runtime) ClassifySet(samples tabula.ClasetInterface,
}
// (2)
- cm = forest.ComputeCM(sampleIds, vs, actuals, predicts)
+ cm = forest.ComputeCM(sampleListID, vs, actuals, predicts)
// (3)
forest.ComputeStatFromCM(&stat, cm)
stat.End()
- if len(sampleIds) == 0 {
+ if len(sampleListID) == 0 {
fmt.Println(tag, "CM:", cm)
fmt.Println(tag, "Classifying stat:", stat)
_ = stat.Write(forest.StatFile)
diff --git a/lib/mining/classifier/runtime.go b/lib/mining/classifier/runtime.go
index 09804f11..56296c7d 100644
--- a/lib/mining/classifier/runtime.go
+++ b/lib/mining/classifier/runtime.go
@@ -99,7 +99,7 @@ func (rt *Runtime) AddStat(stat *Stat) {
// ComputeCM will compute confusion matrix of sample using value space, actual
// and prediction values.
-func (rt *Runtime) ComputeCM(sampleIds []int,
+func (rt *Runtime) ComputeCM(sampleListID []int,
vs, actuals, predicts []string,
) (
cm *CM,
@@ -107,7 +107,7 @@ func (rt *Runtime) ComputeCM(sampleIds []int,
cm = &CM{}
cm.ComputeStrings(vs, actuals, predicts)
- cm.GroupIndexPredictionsStrings(sampleIds, actuals, predicts)
+ cm.GroupIndexPredictionsStrings(sampleListID, actuals, predicts)
return cm
}
@@ -309,12 +309,12 @@ func (rt *Runtime) Performance(samples tabula.ClasetInterface,
) {
// (1)
actuals := samples.GetClassAsStrings()
- sortedIds := numbers.IntCreateSeq(0, len(probs)-1)
- floats64.InplaceMergesort(probs, sortedIds, 0, len(probs), false)
+ sortedListID := numbers.IntCreateSeq(0, len(probs)-1)
+ floats64.InplaceMergesort(probs, sortedListID, 0, len(probs), false)
// (2)
- libstrings.SortByIndex(&actuals, sortedIds)
- libstrings.SortByIndex(&predicts, sortedIds)
+ libstrings.SortByIndex(&actuals, sortedListID)
+ libstrings.SortByIndex(&predicts, sortedListID)
// (3)
rt.computePerfByProbs(samples, actuals, probs)