diff options
| author | Shulhan <ms@kilabit.info> | 2026-04-12 17:13:10 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2026-04-12 17:39:22 +0700 |
| commit | 8604b86d6e18bfdc9d2f8123a19bc8ed4dbfddb2 (patch) | |
| tree | 797984d5312b3c4fd280ee381bbc3fad2941af06 /lib/text/diff | |
| parent | 50178d3a571213161675752ac1f32dbc75cd448d (diff) | |
| download | pakakeh.go-8604b86d6e18bfdc9d2f8123a19bc8ed4dbfddb2.tar.xz | |
text/diff: add an examples for Bytes and BytesRatio
While at it, rename the source file to reflect the content, instead of
diffinterface.go name it func.go.
Diffstat (limited to 'lib/text/diff')
| -rw-r--r-- | lib/text/diff/diff.go | 19 | ||||
| -rw-r--r-- | lib/text/diff/func.go (renamed from lib/text/diff/diffinterface.go) | 284 | ||||
| -rw-r--r-- | lib/text/diff/func_example_test.go | 88 |
3 files changed, 224 insertions, 167 deletions
diff --git a/lib/text/diff/diff.go b/lib/text/diff/diff.go index 037ac79b..7a2844c9 100644 --- a/lib/text/diff/diff.go +++ b/lib/text/diff/diff.go @@ -281,3 +281,22 @@ func (diff Data) String() (s string) { } return sb.String() } + +// findLine returns true if line is found in the beginning of text start at +// line `startat`. +// It also return line number of matching line. +// If no match found, it will return false and `startat` value. +func findLine(line Line, text []Line, startat int) ( + found bool, + n int, +) { + textlen := len(text) + + for n = startat; n < textlen; n++ { + if bytes.Equal(line.Val, text[n].Val) { + return true, n + } + } + + return false, startat +} diff --git a/lib/text/diff/diffinterface.go b/lib/text/diff/func.go index be9d739e..6acc5178 100644 --- a/lib/text/diff/diffinterface.go +++ b/lib/text/diff/func.go @@ -4,8 +4,6 @@ package diff import ( - "bytes" - inbytes "git.sr.ht/~shulhan/pakakeh.go/internal/bytes" "git.sr.ht/~shulhan/pakakeh.go/lib/text" ) @@ -27,156 +25,9 @@ const ( DefMatchRatio = 0.7 ) -// BytesRatio compare two slice of bytes and return ratio of matching bytes. -// The ratio in in range of 0.0 to 1.0, where 1.0 if both are similar, and 0.0 -// if no matchs even found. -// `minTokenLen` define the minimum length of token for searching in both of -// slice. -func BytesRatio(old, newline []byte, minTokenLen int) (ratio float32, m int, maxlen int) { - x, y := 0, 0 - - oldlen := len(old) - newlen := len(newline) - minlen := oldlen - maxlen = newlen - if newlen < oldlen { - minlen = newlen - maxlen = oldlen - } - - if minTokenLen < 0 { - minTokenLen = DefMatchLen - } - - for { - // Count matching bytes from beginning of slice. - for x < minlen { - if old[x] != newline[y] { - break - } - m++ - x++ - y++ - } - - if x == minlen { - // All bytes is matched but probably some trailing in - // one of them. - break - } - - // Count matching bytes from end of slice - xend := oldlen - 1 - yend := newlen - 1 - - for xend >= x && yend >= y { - if old[xend] != newline[yend] { - break - } - m++ - xend-- - yend-- - } - - // One of the line have changes in the middle. - if xend == x || yend == y { - break - } - - // Cut the matching bytes - old = old[x : xend+1] - newline = newline[y : yend+1] - oldlen = len(old) - - // Get minimal token to search in the newline left over. - minlen = min(oldlen, minTokenLen) - - // Search old token in newline, chunk by chunk. - x = 0 - y = -1 - max := oldlen - minlen - for ; x < max; x++ { - token := old[x : x+minlen] - - y = inbytes.TokenFind(newline, token, 0) - if y > 0 { - break - } - } - - if y < 0 { - // We did not found anything. - break - } - - // Cut the changes - old = old[x:] - newline = newline[y:] - oldlen = len(old) - newlen = len(newline) - - minlen = min(newlen, oldlen) - - x, y = 0, 0 - // start again from beginning... - } - - ratio = float32(m) / float32(maxlen) - - return ratio, m, maxlen -} - -// findLine return true if line is found in text beginning at line `startat`. -// It also return line number of matching line. -// If no match found, it will return false and `startat` value. -func findLine(line Line, text []Line, startat int) ( - found bool, - n int, -) { - textlen := len(text) - - for n = startat; n < textlen; n++ { - if bytes.Equal(line.Val, text[n].Val) { - return true, n - } - } - - return false, startat -} - -// Bytes given two similar lines, find and return the differences (additions and -// deletion) between them. -// -// Case 1: addition on new or deletion on old. -// -// old: 00000 -// new: 00000111 -// -// or -// -// old: 00000111 -// new: 00000 -// -// Case 2: addition on new line -// -// old: 000000 -// new: 0001000 -// -// Case 3: deletion on old line (reverse of case 2) -// -// old: 0001000 -// new: 000000 -// -// Case 4: change happened in the beginning -// -// old: 11000 -// new: 22000 -// -// Case 5: both changed -// -// old: 0001000 -// new: 0002000 -func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) { +// Bytes returns the character differences (additions and deletion) between +// old and new bytes, start at specific position. +func Bytes(old, new []byte, oldat, newat int) (adds, dels text.Chunks) { oldlen := len(old) newlen := len(new) @@ -196,10 +47,10 @@ func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) { if x == minlen { if oldlen < newlen { v := new[y:] - adds = append(adds, text.Chunk{StartAt: atx + y, V: v}) + adds = append(adds, text.Chunk{StartAt: oldat + y, V: v}) } else { v := old[x:] - dels = append(dels, text.Chunk{StartAt: atx + x, V: v}) + dels = append(dels, text.Chunk{StartAt: oldat + x, V: v}) } return adds, dels } @@ -219,14 +70,14 @@ func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) { // Case 2: addition in new line. if x == xend+1 { v := new[y : yend+1] - adds = append(adds, text.Chunk{StartAt: aty + y, V: v}) + adds = append(adds, text.Chunk{StartAt: newat + y, V: v}) return adds, dels } // Case 3: deletion in old line. if y == yend+1 { v := old[x : xend+1] - dels = append(dels, text.Chunk{StartAt: atx + x, V: v}) + dels = append(dels, text.Chunk{StartAt: oldat + x, V: v}) return adds, dels } @@ -254,7 +105,7 @@ func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) { // We did not find matching token of x in y, its mean the some chunk // in x and y has been replaced. if xaty < 0 && yatx < 0 { - addsleft, delsleft := searchForward(atx, aty, &x, &y, &oldleft, + addsleft, delsleft := searchForward(oldat, newat, &x, &y, &oldleft, &newleft) if len(addsleft) > 0 { @@ -268,7 +119,7 @@ func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) { if len(oldleft) == 0 { if len(newleft) > 0 { adds = append(adds, text.Chunk{ - StartAt: atx + x, + StartAt: oldat + x, V: newleft, }) } @@ -277,7 +128,7 @@ func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) { if len(newleft) == 0 { if len(oldleft) > 0 { dels = append(dels, text.Chunk{ - StartAt: aty + y, + StartAt: newat + y, V: oldleft, }) } @@ -290,17 +141,17 @@ func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) { // be an addition. if xaty >= 0 { v := new[y : y+xaty] - adds = append(adds, text.Chunk{StartAt: aty + y, V: v}) + adds = append(adds, text.Chunk{StartAt: newat + y, V: v}) newleft = new[y+xaty : yend+1] } else if yatx >= 0 { // Case 3: We found y token at x: yatx. Previous byte before that must // be a deletion. v := old[x : x+yatx] - dels = append(dels, text.Chunk{StartAt: atx + x, V: v}) + dels = append(dels, text.Chunk{StartAt: oldat + x, V: v}) oldleft = old[x+yatx : xend+1] } - addsleft, delsleft := Bytes(oldleft, newleft, atx+x, aty+y) + addsleft, delsleft := Bytes(oldleft, newleft, oldat+x, newat+y) if len(addsleft) > 0 { adds = append(adds, addsleft...) @@ -312,7 +163,106 @@ func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) { return adds, dels } -func searchForward(atx, aty int, x, y *int, oldleft, newleft *[]byte) ( +// BytesRatio returns the ratio of matching bytes between old and new. +// The ratio in in range of 0.0 to 1.0, where 1.0 when both are similar, and +// 0.0 when no single character matchs found. +// The minTokenLen define the minimum length of token for searching in both of +// slice. +func BytesRatio(old, newline []byte, minTokenLen int) (ratio float32, m int, maxlen int) { + x, y := 0, 0 + + oldlen := len(old) + newlen := len(newline) + minlen := oldlen + maxlen = newlen + if newlen < oldlen { + minlen = newlen + maxlen = oldlen + } + + if minTokenLen < 0 { + minTokenLen = DefMatchLen + } + + for { + // Count matching bytes from beginning of slice. + for x < minlen { + if old[x] != newline[y] { + break + } + m++ + x++ + y++ + } + + if x == minlen { + // All bytes is matched but probably some trailing in + // one of them. + break + } + + // Count matching bytes from end of slice + xend := oldlen - 1 + yend := newlen - 1 + + for xend >= x && yend >= y { + if old[xend] != newline[yend] { + break + } + m++ + xend-- + yend-- + } + + // One of the line have changes in the middle. + if xend == x || yend == y { + break + } + + // Cut the matching bytes + old = old[x : xend+1] + newline = newline[y : yend+1] + oldlen = len(old) + + // Get minimal token to search in the newline left over. + minlen = min(oldlen, minTokenLen) + + // Search old token in newline, chunk by chunk. + x = 0 + y = -1 + max := oldlen - minlen + for ; x < max; x++ { + token := old[x : x+minlen] + + y = inbytes.TokenFind(newline, token, 0) + if y > 0 { + break + } + } + + if y < 0 { + // We did not found anything. + break + } + + // Cut the changes + old = old[x:] + newline = newline[y:] + oldlen = len(old) + newlen = len(newline) + + minlen = min(newlen, oldlen) + + x, y = 0, 0 + // start again from beginning... + } + + ratio = float32(m) / float32(maxlen) + + return ratio, m, maxlen +} + +func searchForward(oldat, newat int, x, y *int, oldleft, newleft *[]byte) ( adds, dels text.Chunks, ) { oldleftlen := len(*oldleft) @@ -347,8 +297,8 @@ func searchForward(atx, aty int, x, y *int, oldleft, newleft *[]byte) ( if xaty < 0 && yatx < 0 { // still no token found, means whole chunk has been replaced. - dels = append(dels, text.Chunk{StartAt: atx + *x, V: *oldleft}) - adds = append(adds, text.Chunk{StartAt: aty + *y, V: *newleft}) + dels = append(dels, text.Chunk{StartAt: oldat + *x, V: *oldleft}) + adds = append(adds, text.Chunk{StartAt: newat + *y, V: *newleft}) *oldleft = []byte{} *newleft = []byte{} return adds, dels @@ -356,12 +306,12 @@ func searchForward(atx, aty int, x, y *int, oldleft, newleft *[]byte) ( // Some chunk has been replaced. v := (*oldleft)[:xx] - dels = append(dels, text.Chunk{StartAt: atx + *x, V: v}) + dels = append(dels, text.Chunk{StartAt: oldat + *x, V: v}) *oldleft = (*oldleft)[xx:] *x += xx v = (*newleft)[:yy] - adds = append(adds, text.Chunk{StartAt: aty + *y, V: v}) + adds = append(adds, text.Chunk{StartAt: newat + *y, V: v}) *newleft = (*newleft)[yy:] *y += yy diff --git a/lib/text/diff/func_example_test.go b/lib/text/diff/func_example_test.go new file mode 100644 index 00000000..713e70fd --- /dev/null +++ b/lib/text/diff/func_example_test.go @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: BSD-3-Clause +// SPDX-FileCopyrightText: 2026 Shulhan <ms@kilabit.info> + +package diff + +import ( + "fmt" +) + +func ExampleBytes() { + fmt.Println(`Additions 1:`) + adds, dels := Bytes([]byte(`00000`), []byte(`00000111`), 0, 0) + fmt.Printf(" adds: %+v\n", adds) + fmt.Printf(" dels: %+v\n", dels) + + fmt.Println(`Additions 2:`) + adds, dels = Bytes([]byte(`000000`), []byte(`0001000`), 0, 0) + fmt.Printf(" adds: %+v\n", adds) + fmt.Printf(" dels: %+v\n", dels) + + fmt.Println(`Deletions 1:`) + adds, dels = Bytes([]byte(`00000111`), []byte(`00000`), 0, 0) + fmt.Printf(" adds: %+v\n", adds) + fmt.Printf(" dels: %+v\n", dels) + + fmt.Println(`Deletions 2:`) + adds, dels = Bytes([]byte(`0001000`), []byte(`000000`), 0, 0) + fmt.Printf(" adds: %+v\n", adds) + fmt.Printf(" dels: %+v\n", dels) + + fmt.Println(`Both changes 1:`) + adds, dels = Bytes([]byte(`11000`), []byte(`22000`), 0, 0) + fmt.Printf(" adds: %+v\n", adds) + fmt.Printf(" dels: %+v\n", dels) + + fmt.Println(`Both changes 2:`) + adds, dels = Bytes([]byte(`0001000`), []byte(`0002000`), 0, 0) + fmt.Printf(" adds: %+v\n", adds) + fmt.Printf(" dels: %+v\n", dels) + + // Output: + // Additions 1: + // adds: [{StartAt:5,V:111}] + // dels: [] + // Additions 2: + // adds: [{StartAt:3,V:1}] + // dels: [] + // Deletions 1: + // adds: [] + // dels: [{StartAt:5,V:111}] + // Deletions 2: + // adds: [] + // dels: [{StartAt:3,V:1}] + // Both changes 1: + // adds: [{StartAt:0,V:22}] + // dels: [{StartAt:0,V:11}] + // Both changes 2: + // adds: [{StartAt:3,V:2}] + // dels: [{StartAt:3,V:1}] +} + +func ExampleBytesRatio() { + ratio, m, maxlen := BytesRatio([]byte(`00000`), []byte(`00000111`), 1) + fmt.Printf("Additions 1: %f %d %d\n", ratio, m, maxlen) + + ratio, m, maxlen = BytesRatio([]byte(`000000`), []byte(`0001000`), 1) + fmt.Printf("Additions 2: %f %d %d\n", ratio, m, maxlen) + + ratio, m, maxlen = BytesRatio([]byte(`00000111`), []byte(`00000`), 1) + fmt.Printf("Deletions 1: %f %d %d\n", ratio, m, maxlen) + + ratio, m, maxlen = BytesRatio([]byte(`0001000`), []byte(`000000`), 1) + fmt.Printf("Deletions 2: %f %d %d\n", ratio, m, maxlen) + + ratio, m, maxlen = BytesRatio([]byte(`11000`), []byte(`22000`), 1) + fmt.Printf("Both changes 1: %f %d %d\n", ratio, m, maxlen) + + ratio, m, maxlen = BytesRatio([]byte(`0001000`), []byte(`0002000`), 1) + fmt.Printf("Both changes 2: %f %d %d\n", ratio, m, maxlen) + + // Output: + // Additions 1: 0.625000 5 8 + // Additions 2: 0.857143 6 7 + // Deletions 1: 0.625000 5 8 + // Deletions 2: 0.857143 6 7 + // Both changes 1: 0.600000 3 5 + // Both changes 2: 0.857143 6 7 +} |
