aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2026-04-12 17:13:10 +0700
committerShulhan <ms@kilabit.info>2026-04-12 17:39:22 +0700
commit8604b86d6e18bfdc9d2f8123a19bc8ed4dbfddb2 (patch)
tree797984d5312b3c4fd280ee381bbc3fad2941af06 /lib
parent50178d3a571213161675752ac1f32dbc75cd448d (diff)
downloadpakakeh.go-8604b86d6e18bfdc9d2f8123a19bc8ed4dbfddb2.tar.xz
text/diff: add an examples for Bytes and BytesRatio
While at it, rename the source file to reflect the content, instead of diffinterface.go name it func.go.
Diffstat (limited to 'lib')
-rw-r--r--lib/text/diff/diff.go19
-rw-r--r--lib/text/diff/func.go (renamed from lib/text/diff/diffinterface.go)284
-rw-r--r--lib/text/diff/func_example_test.go88
3 files changed, 224 insertions, 167 deletions
diff --git a/lib/text/diff/diff.go b/lib/text/diff/diff.go
index 037ac79b..7a2844c9 100644
--- a/lib/text/diff/diff.go
+++ b/lib/text/diff/diff.go
@@ -281,3 +281,22 @@ func (diff Data) String() (s string) {
}
return sb.String()
}
+
+// findLine returns true if line is found in the beginning of text start at
+// line `startat`.
+// It also return line number of matching line.
+// If no match found, it will return false and `startat` value.
+func findLine(line Line, text []Line, startat int) (
+ found bool,
+ n int,
+) {
+ textlen := len(text)
+
+ for n = startat; n < textlen; n++ {
+ if bytes.Equal(line.Val, text[n].Val) {
+ return true, n
+ }
+ }
+
+ return false, startat
+}
diff --git a/lib/text/diff/diffinterface.go b/lib/text/diff/func.go
index be9d739e..6acc5178 100644
--- a/lib/text/diff/diffinterface.go
+++ b/lib/text/diff/func.go
@@ -4,8 +4,6 @@
package diff
import (
- "bytes"
-
inbytes "git.sr.ht/~shulhan/pakakeh.go/internal/bytes"
"git.sr.ht/~shulhan/pakakeh.go/lib/text"
)
@@ -27,156 +25,9 @@ const (
DefMatchRatio = 0.7
)
-// BytesRatio compare two slice of bytes and return ratio of matching bytes.
-// The ratio in in range of 0.0 to 1.0, where 1.0 if both are similar, and 0.0
-// if no matchs even found.
-// `minTokenLen` define the minimum length of token for searching in both of
-// slice.
-func BytesRatio(old, newline []byte, minTokenLen int) (ratio float32, m int, maxlen int) {
- x, y := 0, 0
-
- oldlen := len(old)
- newlen := len(newline)
- minlen := oldlen
- maxlen = newlen
- if newlen < oldlen {
- minlen = newlen
- maxlen = oldlen
- }
-
- if minTokenLen < 0 {
- minTokenLen = DefMatchLen
- }
-
- for {
- // Count matching bytes from beginning of slice.
- for x < minlen {
- if old[x] != newline[y] {
- break
- }
- m++
- x++
- y++
- }
-
- if x == minlen {
- // All bytes is matched but probably some trailing in
- // one of them.
- break
- }
-
- // Count matching bytes from end of slice
- xend := oldlen - 1
- yend := newlen - 1
-
- for xend >= x && yend >= y {
- if old[xend] != newline[yend] {
- break
- }
- m++
- xend--
- yend--
- }
-
- // One of the line have changes in the middle.
- if xend == x || yend == y {
- break
- }
-
- // Cut the matching bytes
- old = old[x : xend+1]
- newline = newline[y : yend+1]
- oldlen = len(old)
-
- // Get minimal token to search in the newline left over.
- minlen = min(oldlen, minTokenLen)
-
- // Search old token in newline, chunk by chunk.
- x = 0
- y = -1
- max := oldlen - minlen
- for ; x < max; x++ {
- token := old[x : x+minlen]
-
- y = inbytes.TokenFind(newline, token, 0)
- if y > 0 {
- break
- }
- }
-
- if y < 0 {
- // We did not found anything.
- break
- }
-
- // Cut the changes
- old = old[x:]
- newline = newline[y:]
- oldlen = len(old)
- newlen = len(newline)
-
- minlen = min(newlen, oldlen)
-
- x, y = 0, 0
- // start again from beginning...
- }
-
- ratio = float32(m) / float32(maxlen)
-
- return ratio, m, maxlen
-}
-
-// findLine return true if line is found in text beginning at line `startat`.
-// It also return line number of matching line.
-// If no match found, it will return false and `startat` value.
-func findLine(line Line, text []Line, startat int) (
- found bool,
- n int,
-) {
- textlen := len(text)
-
- for n = startat; n < textlen; n++ {
- if bytes.Equal(line.Val, text[n].Val) {
- return true, n
- }
- }
-
- return false, startat
-}
-
-// Bytes given two similar lines, find and return the differences (additions and
-// deletion) between them.
-//
-// Case 1: addition on new or deletion on old.
-//
-// old: 00000
-// new: 00000111
-//
-// or
-//
-// old: 00000111
-// new: 00000
-//
-// Case 2: addition on new line
-//
-// old: 000000
-// new: 0001000
-//
-// Case 3: deletion on old line (reverse of case 2)
-//
-// old: 0001000
-// new: 000000
-//
-// Case 4: change happened in the beginning
-//
-// old: 11000
-// new: 22000
-//
-// Case 5: both changed
-//
-// old: 0001000
-// new: 0002000
-func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) {
+// Bytes returns the character differences (additions and deletion) between
+// old and new bytes, start at specific position.
+func Bytes(old, new []byte, oldat, newat int) (adds, dels text.Chunks) {
oldlen := len(old)
newlen := len(new)
@@ -196,10 +47,10 @@ func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) {
if x == minlen {
if oldlen < newlen {
v := new[y:]
- adds = append(adds, text.Chunk{StartAt: atx + y, V: v})
+ adds = append(adds, text.Chunk{StartAt: oldat + y, V: v})
} else {
v := old[x:]
- dels = append(dels, text.Chunk{StartAt: atx + x, V: v})
+ dels = append(dels, text.Chunk{StartAt: oldat + x, V: v})
}
return adds, dels
}
@@ -219,14 +70,14 @@ func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) {
// Case 2: addition in new line.
if x == xend+1 {
v := new[y : yend+1]
- adds = append(adds, text.Chunk{StartAt: aty + y, V: v})
+ adds = append(adds, text.Chunk{StartAt: newat + y, V: v})
return adds, dels
}
// Case 3: deletion in old line.
if y == yend+1 {
v := old[x : xend+1]
- dels = append(dels, text.Chunk{StartAt: atx + x, V: v})
+ dels = append(dels, text.Chunk{StartAt: oldat + x, V: v})
return adds, dels
}
@@ -254,7 +105,7 @@ func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) {
// We did not find matching token of x in y, its mean the some chunk
// in x and y has been replaced.
if xaty < 0 && yatx < 0 {
- addsleft, delsleft := searchForward(atx, aty, &x, &y, &oldleft,
+ addsleft, delsleft := searchForward(oldat, newat, &x, &y, &oldleft,
&newleft)
if len(addsleft) > 0 {
@@ -268,7 +119,7 @@ func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) {
if len(oldleft) == 0 {
if len(newleft) > 0 {
adds = append(adds, text.Chunk{
- StartAt: atx + x,
+ StartAt: oldat + x,
V: newleft,
})
}
@@ -277,7 +128,7 @@ func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) {
if len(newleft) == 0 {
if len(oldleft) > 0 {
dels = append(dels, text.Chunk{
- StartAt: aty + y,
+ StartAt: newat + y,
V: oldleft,
})
}
@@ -290,17 +141,17 @@ func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) {
// be an addition.
if xaty >= 0 {
v := new[y : y+xaty]
- adds = append(adds, text.Chunk{StartAt: aty + y, V: v})
+ adds = append(adds, text.Chunk{StartAt: newat + y, V: v})
newleft = new[y+xaty : yend+1]
} else if yatx >= 0 {
// Case 3: We found y token at x: yatx. Previous byte before that must
// be a deletion.
v := old[x : x+yatx]
- dels = append(dels, text.Chunk{StartAt: atx + x, V: v})
+ dels = append(dels, text.Chunk{StartAt: oldat + x, V: v})
oldleft = old[x+yatx : xend+1]
}
- addsleft, delsleft := Bytes(oldleft, newleft, atx+x, aty+y)
+ addsleft, delsleft := Bytes(oldleft, newleft, oldat+x, newat+y)
if len(addsleft) > 0 {
adds = append(adds, addsleft...)
@@ -312,7 +163,106 @@ func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) {
return adds, dels
}
-func searchForward(atx, aty int, x, y *int, oldleft, newleft *[]byte) (
+// BytesRatio returns the ratio of matching bytes between old and new.
+// The ratio in in range of 0.0 to 1.0, where 1.0 when both are similar, and
+// 0.0 when no single character matchs found.
+// The minTokenLen define the minimum length of token for searching in both of
+// slice.
+func BytesRatio(old, newline []byte, minTokenLen int) (ratio float32, m int, maxlen int) {
+ x, y := 0, 0
+
+ oldlen := len(old)
+ newlen := len(newline)
+ minlen := oldlen
+ maxlen = newlen
+ if newlen < oldlen {
+ minlen = newlen
+ maxlen = oldlen
+ }
+
+ if minTokenLen < 0 {
+ minTokenLen = DefMatchLen
+ }
+
+ for {
+ // Count matching bytes from beginning of slice.
+ for x < minlen {
+ if old[x] != newline[y] {
+ break
+ }
+ m++
+ x++
+ y++
+ }
+
+ if x == minlen {
+ // All bytes is matched but probably some trailing in
+ // one of them.
+ break
+ }
+
+ // Count matching bytes from end of slice
+ xend := oldlen - 1
+ yend := newlen - 1
+
+ for xend >= x && yend >= y {
+ if old[xend] != newline[yend] {
+ break
+ }
+ m++
+ xend--
+ yend--
+ }
+
+ // One of the line have changes in the middle.
+ if xend == x || yend == y {
+ break
+ }
+
+ // Cut the matching bytes
+ old = old[x : xend+1]
+ newline = newline[y : yend+1]
+ oldlen = len(old)
+
+ // Get minimal token to search in the newline left over.
+ minlen = min(oldlen, minTokenLen)
+
+ // Search old token in newline, chunk by chunk.
+ x = 0
+ y = -1
+ max := oldlen - minlen
+ for ; x < max; x++ {
+ token := old[x : x+minlen]
+
+ y = inbytes.TokenFind(newline, token, 0)
+ if y > 0 {
+ break
+ }
+ }
+
+ if y < 0 {
+ // We did not found anything.
+ break
+ }
+
+ // Cut the changes
+ old = old[x:]
+ newline = newline[y:]
+ oldlen = len(old)
+ newlen = len(newline)
+
+ minlen = min(newlen, oldlen)
+
+ x, y = 0, 0
+ // start again from beginning...
+ }
+
+ ratio = float32(m) / float32(maxlen)
+
+ return ratio, m, maxlen
+}
+
+func searchForward(oldat, newat int, x, y *int, oldleft, newleft *[]byte) (
adds, dels text.Chunks,
) {
oldleftlen := len(*oldleft)
@@ -347,8 +297,8 @@ func searchForward(atx, aty int, x, y *int, oldleft, newleft *[]byte) (
if xaty < 0 && yatx < 0 {
// still no token found, means whole chunk has been replaced.
- dels = append(dels, text.Chunk{StartAt: atx + *x, V: *oldleft})
- adds = append(adds, text.Chunk{StartAt: aty + *y, V: *newleft})
+ dels = append(dels, text.Chunk{StartAt: oldat + *x, V: *oldleft})
+ adds = append(adds, text.Chunk{StartAt: newat + *y, V: *newleft})
*oldleft = []byte{}
*newleft = []byte{}
return adds, dels
@@ -356,12 +306,12 @@ func searchForward(atx, aty int, x, y *int, oldleft, newleft *[]byte) (
// Some chunk has been replaced.
v := (*oldleft)[:xx]
- dels = append(dels, text.Chunk{StartAt: atx + *x, V: v})
+ dels = append(dels, text.Chunk{StartAt: oldat + *x, V: v})
*oldleft = (*oldleft)[xx:]
*x += xx
v = (*newleft)[:yy]
- adds = append(adds, text.Chunk{StartAt: aty + *y, V: v})
+ adds = append(adds, text.Chunk{StartAt: newat + *y, V: v})
*newleft = (*newleft)[yy:]
*y += yy
diff --git a/lib/text/diff/func_example_test.go b/lib/text/diff/func_example_test.go
new file mode 100644
index 00000000..713e70fd
--- /dev/null
+++ b/lib/text/diff/func_example_test.go
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: BSD-3-Clause
+// SPDX-FileCopyrightText: 2026 Shulhan <ms@kilabit.info>
+
+package diff
+
+import (
+ "fmt"
+)
+
+func ExampleBytes() {
+ fmt.Println(`Additions 1:`)
+ adds, dels := Bytes([]byte(`00000`), []byte(`00000111`), 0, 0)
+ fmt.Printf(" adds: %+v\n", adds)
+ fmt.Printf(" dels: %+v\n", dels)
+
+ fmt.Println(`Additions 2:`)
+ adds, dels = Bytes([]byte(`000000`), []byte(`0001000`), 0, 0)
+ fmt.Printf(" adds: %+v\n", adds)
+ fmt.Printf(" dels: %+v\n", dels)
+
+ fmt.Println(`Deletions 1:`)
+ adds, dels = Bytes([]byte(`00000111`), []byte(`00000`), 0, 0)
+ fmt.Printf(" adds: %+v\n", adds)
+ fmt.Printf(" dels: %+v\n", dels)
+
+ fmt.Println(`Deletions 2:`)
+ adds, dels = Bytes([]byte(`0001000`), []byte(`000000`), 0, 0)
+ fmt.Printf(" adds: %+v\n", adds)
+ fmt.Printf(" dels: %+v\n", dels)
+
+ fmt.Println(`Both changes 1:`)
+ adds, dels = Bytes([]byte(`11000`), []byte(`22000`), 0, 0)
+ fmt.Printf(" adds: %+v\n", adds)
+ fmt.Printf(" dels: %+v\n", dels)
+
+ fmt.Println(`Both changes 2:`)
+ adds, dels = Bytes([]byte(`0001000`), []byte(`0002000`), 0, 0)
+ fmt.Printf(" adds: %+v\n", adds)
+ fmt.Printf(" dels: %+v\n", dels)
+
+ // Output:
+ // Additions 1:
+ // adds: [{StartAt:5,V:111}]
+ // dels: []
+ // Additions 2:
+ // adds: [{StartAt:3,V:1}]
+ // dels: []
+ // Deletions 1:
+ // adds: []
+ // dels: [{StartAt:5,V:111}]
+ // Deletions 2:
+ // adds: []
+ // dels: [{StartAt:3,V:1}]
+ // Both changes 1:
+ // adds: [{StartAt:0,V:22}]
+ // dels: [{StartAt:0,V:11}]
+ // Both changes 2:
+ // adds: [{StartAt:3,V:2}]
+ // dels: [{StartAt:3,V:1}]
+}
+
+func ExampleBytesRatio() {
+ ratio, m, maxlen := BytesRatio([]byte(`00000`), []byte(`00000111`), 1)
+ fmt.Printf("Additions 1: %f %d %d\n", ratio, m, maxlen)
+
+ ratio, m, maxlen = BytesRatio([]byte(`000000`), []byte(`0001000`), 1)
+ fmt.Printf("Additions 2: %f %d %d\n", ratio, m, maxlen)
+
+ ratio, m, maxlen = BytesRatio([]byte(`00000111`), []byte(`00000`), 1)
+ fmt.Printf("Deletions 1: %f %d %d\n", ratio, m, maxlen)
+
+ ratio, m, maxlen = BytesRatio([]byte(`0001000`), []byte(`000000`), 1)
+ fmt.Printf("Deletions 2: %f %d %d\n", ratio, m, maxlen)
+
+ ratio, m, maxlen = BytesRatio([]byte(`11000`), []byte(`22000`), 1)
+ fmt.Printf("Both changes 1: %f %d %d\n", ratio, m, maxlen)
+
+ ratio, m, maxlen = BytesRatio([]byte(`0001000`), []byte(`0002000`), 1)
+ fmt.Printf("Both changes 2: %f %d %d\n", ratio, m, maxlen)
+
+ // Output:
+ // Additions 1: 0.625000 5 8
+ // Additions 2: 0.857143 6 7
+ // Deletions 1: 0.625000 5 8
+ // Deletions 2: 0.857143 6 7
+ // Both changes 1: 0.600000 3 5
+ // Both changes 2: 0.857143 6 7
+}