aboutsummaryrefslogtreecommitdiff
path: root/lib/text/diff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2021-07-30 21:02:09 +0700
committerShulhan <ms@kilabit.info>2021-07-30 21:02:09 +0700
commit942efa6c66cda97d6fe02e02c4479930a47f6b4c (patch)
treed8836a333661cce89fb45584b1a65c1079b10aae /lib/text/diff
parent88c32305eb9a702a53294e815068851fe2b5a687 (diff)
downloadpakakeh.go-942efa6c66cda97d6fe02e02c4479930a47f6b4c.tar.xz
text/diff: add functions to compare raw bytes as text and text.Lines
This changes refactoring some functions, notably, * Rename Bytes function to IsEqual * Rename Lines function to Bytes * Add function Text that compare two text (raw bytes) * Add function Lines that compare two instances of text.Lines
Diffstat (limited to 'lib/text/diff')
-rw-r--r--lib/text/diff/diff.go169
-rw-r--r--lib/text/diff/diff_test.go14
-rw-r--r--lib/text/diff/diffinterface.go176
-rw-r--r--lib/text/diff/linechange.go8
4 files changed, 189 insertions, 178 deletions
diff --git a/lib/text/diff/diff.go b/lib/text/diff/diff.go
index e0599b05..6f0496b3 100644
--- a/lib/text/diff/diff.go
+++ b/lib/text/diff/diff.go
@@ -8,6 +8,7 @@
package diff
import (
+ "bytes"
"fmt"
"github.com/shuLhan/share/lib/text"
@@ -28,6 +29,174 @@ type Data struct {
}
//
+// Text search the difference between two texts.
+//
+func Text(before, after []byte, level int) (diffs Data) {
+ beforeLines := text.ParseLines(before)
+ afterLines := text.ParseLines(after)
+ return Lines(beforeLines, afterLines, level)
+}
+
+//
+// Lines search the difference between two Lines.
+//
+func Lines(oldlines, newlines text.Lines, level int) (diffs Data) {
+ oldlen := len(oldlines)
+ newlen := len(newlines)
+ x := 0
+ y := 0
+
+ for x < oldlen {
+ if y == newlen {
+ // New text has been full examined. Leave out the old
+ // text that means deletion at the end of text.
+ diffs.PushDel(oldlines[x])
+ oldlines[x].V = nil
+ x++
+ continue
+ }
+
+ // Compare old line with new line.
+ if IsEqual(oldlines[x].V, newlines[y].V) {
+ oldlines[x].V = nil
+ newlines[y].V = nil
+ x++
+ y++
+ continue
+ }
+
+ // Check for whitespace changes
+ oldlinetrim := bytes.TrimSpace(oldlines[x].V)
+ newlinetrim := bytes.TrimSpace(newlines[y].V)
+ oldtrimlen := len(oldlinetrim)
+ newtrimlen := len(newlinetrim)
+
+ // Both are empty, probably one of them is changing
+ if oldtrimlen <= 0 && newtrimlen <= 0 {
+ diffs.PushChange(oldlines[x], newlines[y])
+ oldlines[x].V = nil
+ newlines[y].V = nil
+ x++
+ y++
+ continue
+ }
+
+ // Old is empty or contain only whitespaces.
+ if oldtrimlen <= 0 {
+ diffs.PushDel(oldlines[x])
+ oldlines[x].V = nil
+ x++
+ continue
+ }
+
+ // New is empty or contain only whitespaces.
+ if newtrimlen <= 0 {
+ diffs.PushAdd(newlines[y])
+ newlines[y].V = nil
+ y++
+ continue
+ }
+
+ ratio, _, _ := BytesRatio(oldlines[x].V, newlines[y].V,
+ DefMatchLen)
+
+ if ratio > DefMatchRatio {
+ // Ratio of similar bytes is higher than minimum
+ // expectation. So, it must be changes
+ diffs.PushChange(oldlines[x], newlines[y])
+ oldlines[x].V = nil
+ newlines[y].V = nil
+ x++
+ y++
+ continue
+ }
+
+ // x is not equal with y, search down...
+ foundx, xaty := findLine(oldlines[x], newlines, y+1)
+
+ // Cross check the y with the rest of x...
+ foundy, yatx := findLine(newlines[y], oldlines, x+1)
+
+ // Both line is missing, its mean changes on current line
+ if !foundx && !foundy {
+ diffs.PushChange(oldlines[x], newlines[y])
+ oldlines[x].V = nil
+ newlines[y].V = nil
+ x++
+ y++
+ continue
+ }
+
+ // x still missing, means deletion in old text.
+ if !foundx && foundy {
+ for ; x < yatx && x < oldlen; x++ {
+ diffs.PushDel(oldlines[x])
+ oldlines[x].V = nil
+ }
+ continue
+ }
+
+ // we found x but y is missing, its mean addition in new text.
+ if foundx && !foundy {
+ for ; y < xaty && y < newlen; y++ {
+ diffs.PushAdd(newlines[y])
+ newlines[y].V = nil
+ }
+ continue
+ }
+
+ if foundx && foundy {
+ // We found x and y. Check which one is the
+ // addition or deletion based on line range.
+ addlen := xaty - y
+ dellen := yatx - x
+
+ switch {
+ case addlen < dellen:
+ for ; y < xaty && y < newlen; y++ {
+ diffs.PushAdd(newlines[y])
+ newlines[y].V = nil
+ }
+
+ case addlen == dellen:
+ // Both changes occur between lines
+ for x < yatx && y < xaty {
+ diffs.PushChange(oldlines[x],
+ newlines[y])
+ oldlines[x].V = nil
+ newlines[y].V = nil
+ x++
+ y++
+ }
+ default:
+ for ; x < yatx && x < oldlen; x++ {
+ diffs.PushDel(oldlines[x])
+ oldlines[x].V = nil
+ }
+ }
+ continue
+ }
+ }
+
+ // Check if there is a left over from new text.
+ for ; y < newlen; y++ {
+ diffs.PushAdd(newlines[y])
+ newlines[y].V = nil
+ }
+
+ if level == LevelWords {
+ // Process each changes to find modified chunkes.
+ for x, change := range diffs.Changes {
+ adds, dels := Bytes(change.Old.V, change.New.V, 0, 0)
+ diffs.Changes[x].Adds = adds
+ diffs.Changes[x].Dels = dels
+ }
+ }
+
+ return diffs
+}
+
+//
// PushAdd will add new line to diff set.
//
func (diffs *Data) PushAdd(new text.Line) {
diff --git a/lib/text/diff/diff_test.go b/lib/text/diff/diff_test.go
index a652b9dd..dc32de7e 100644
--- a/lib/text/diff/diff_test.go
+++ b/lib/text/diff/diff_test.go
@@ -288,15 +288,15 @@ func compareChunks(t *testing.T, adds, dels text.Chunks,
}
}
-func testDiffLines(t *testing.T, old, new text.Line,
+func testDiffBytes(t *testing.T, old, new text.Line,
expAdds, expDels []string,
) {
- adds, dels := Lines(old.V, new.V, 0, 0)
+ adds, dels := Bytes(old.V, new.V, 0, 0)
compareChunks(t, adds, dels, expAdds, expDels)
}
-func TestDiffLines(t *testing.T) {
+func TestBytes(t *testing.T) {
old := text.Line{N: 0, V: []byte("lorem ipsum dolmet")}
new := text.Line{N: 0, V: []byte("lorem all ipsum")}
@@ -307,22 +307,22 @@ func TestDiffLines(t *testing.T) {
[]string{" dolmet"},
}
- testDiffLines(t, old, new, expAdds[0], expDels[0])
+ testDiffBytes(t, old, new, expAdds[0], expDels[0])
old = text.Line{N: 0, V: []byte("lorem ipsum dolmet")}
new = text.Line{N: 0, V: []byte("lorem ipsum")}
- testDiffLines(t, old, new, []string{}, expDels[0])
+ testDiffBytes(t, old, new, []string{}, expDels[0])
old = text.Line{N: 0, V: []byte("lorem ipsum")}
new = text.Line{N: 0, V: []byte("lorem ipsum dolmet")}
- testDiffLines(t, old, new, expDels[0], []string{})
+ testDiffBytes(t, old, new, expDels[0], []string{})
old = text.Line{N: 0, V: []byte("{{Pharaoh Infobox |")}
new = text.Line{N: 0, V: []byte("{{Infobox pharaoh")}
- testDiffLines(t, old, new, []string{"pharaoh"},
+ testDiffBytes(t, old, new, []string{"pharaoh"},
[]string{"Pharaoh ", "|"})
}
diff --git a/lib/text/diff/diffinterface.go b/lib/text/diff/diffinterface.go
index 4e61948c..a876906f 100644
--- a/lib/text/diff/diffinterface.go
+++ b/lib/text/diff/diffinterface.go
@@ -6,7 +6,6 @@ package diff
import (
"bufio"
- "bytes"
"io"
"os"
@@ -64,10 +63,10 @@ func ReadLines(f string) (lines text.Lines, e error) {
}
//
-// Bytes compare two slice of bytes and return true if equal or false
+// IsEqual compare two slice of bytes and return true if equal or false
// otherwise.
//
-func Bytes(oldb, newb []byte) (equal bool) {
+func IsEqual(oldb, newb []byte) (equal bool) {
oldblen := len(oldb)
newblen := len(newb)
@@ -214,9 +213,7 @@ func findLine(line text.Line, text text.Lines, startat int) (
textlen := len(text)
for n = startat; n < textlen; n++ {
- isEqual := Bytes(line.V, text[n].V)
-
- if isEqual {
+ if IsEqual(line.V, text[n].V) {
return true, n
}
}
@@ -227,176 +224,21 @@ func findLine(line text.Line, text text.Lines, startat int) (
//
// Files compare two files.
//
-func Files(oldf, newf string, difflevel int) (diffs Data, e error) {
+func Files(oldf, newf string, level int) (diffs Data, e error) {
oldlines, e := ReadLines(oldf)
if e != nil {
return
}
-
newlines, e := ReadLines(newf)
if e != nil {
return
}
-
- oldlen := len(oldlines)
- newlen := len(newlines)
- x := 0
- y := 0
-
- for x < oldlen {
- if y == newlen {
- // New text has been full examined. Leave out the old
- // text that means deletion at the end of text.
- diffs.PushDel(oldlines[x])
- oldlines[x].V = nil
- x++
- continue
- }
-
- // Compare old line with new line.
- isEqual := Bytes(oldlines[x].V, newlines[y].V)
-
- if isEqual {
- oldlines[x].V = nil
- newlines[y].V = nil
- x++
- y++
- continue
- }
-
- // Check for whitespace changes
- oldlinetrim := bytes.TrimSpace(oldlines[x].V)
- newlinetrim := bytes.TrimSpace(newlines[y].V)
- oldtrimlen := len(oldlinetrim)
- newtrimlen := len(newlinetrim)
-
- // Both are empty, probably one of them is changing
- if oldtrimlen <= 0 && newtrimlen <= 0 {
- diffs.PushChange(oldlines[x], newlines[y])
- oldlines[x].V = nil
- newlines[y].V = nil
- x++
- y++
- continue
- }
-
- // Old is empty or contain only whitespaces.
- if oldtrimlen <= 0 {
- diffs.PushDel(oldlines[x])
- oldlines[x].V = nil
- x++
- continue
- }
-
- // New is empty or contain only whitespaces.
- if newtrimlen <= 0 {
- diffs.PushAdd(newlines[y])
- newlines[y].V = nil
- y++
- continue
- }
-
- ratio, _, _ := BytesRatio(oldlines[x].V, newlines[y].V,
- DefMatchLen)
-
- if ratio > DefMatchRatio {
- // Ratio of similar bytes is higher than minimum
- // expectation. So, it must be changes
- diffs.PushChange(oldlines[x], newlines[y])
- oldlines[x].V = nil
- newlines[y].V = nil
- x++
- y++
- continue
- }
-
- // x is not equal with y, search down...
- foundx, xaty := findLine(oldlines[x], newlines, y+1)
-
- // Cross check the y with the rest of x...
- foundy, yatx := findLine(newlines[y], oldlines, x+1)
-
- // Both line is missing, its mean changes on current line
- if !foundx && !foundy {
- diffs.PushChange(oldlines[x], newlines[y])
- oldlines[x].V = nil
- newlines[y].V = nil
- x++
- y++
- continue
- }
-
- // x still missing, means deletion in old text.
- if !foundx && foundy {
- for ; x < yatx && x < oldlen; x++ {
- diffs.PushDel(oldlines[x])
- oldlines[x].V = nil
- }
- continue
- }
-
- // we found x but y is missing, its mean addition in new text.
- if foundx && !foundy {
- for ; y < xaty && y < newlen; y++ {
- diffs.PushAdd(newlines[y])
- newlines[y].V = nil
- }
- continue
- }
-
- if foundx && foundy {
- // We found x and y. Check which one is the
- // addition or deletion based on line range.
- addlen := xaty - y
- dellen := yatx - x
-
- switch {
- case addlen < dellen:
- for ; y < xaty && y < newlen; y++ {
- diffs.PushAdd(newlines[y])
- newlines[y].V = nil
- }
-
- case addlen == dellen:
- // Both changes occur between lines
- for x < yatx && y < xaty {
- diffs.PushChange(oldlines[x],
- newlines[y])
- oldlines[x].V = nil
- newlines[y].V = nil
- x++
- y++
- }
- default:
- for ; x < yatx && x < oldlen; x++ {
- diffs.PushDel(oldlines[x])
- oldlines[x].V = nil
- }
- }
- continue
- }
- }
-
- // Check if there is a left over from new text.
- for ; y < newlen; y++ {
- diffs.PushAdd(newlines[y])
- newlines[y].V = nil
- }
-
- if difflevel == LevelWords {
- // Process each changes to find modified chunkes.
- for x, change := range diffs.Changes {
- adds, dels := Lines(change.Old.V, change.New.V, 0, 0)
- diffs.Changes[x].Adds = adds
- diffs.Changes[x].Dels = dels
- }
- }
-
- return diffs, e
+ diffs = Lines(oldlines, newlines, level)
+ return diffs, nil
}
//
-// Lines given two similar lines, find and return the differences (additions and
+// Bytes given two similar lines, find and return the differences (additions and
// deletion) between them.
//
// Case 1: addition on new or deletion on old.
@@ -429,7 +271,7 @@ func Files(oldf, newf string, difflevel int) (diffs Data, e error) {
// old: 0001000
// new: 0002000
//
-func Lines(old, new []byte, atx, aty int) (adds, dels text.Chunks) {
+func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) {
oldlen := len(old)
newlen := len(new)
@@ -563,7 +405,7 @@ func Lines(old, new []byte, atx, aty int) (adds, dels text.Chunks) {
oldleft = old[x+yatx : xend+1]
}
- addsleft, delsleft := Lines(oldleft, newleft, atx+x, aty+y)
+ addsleft, delsleft := Bytes(oldleft, newleft, atx+x, aty+y)
if len(addsleft) > 0 {
adds = append(adds, addsleft...)
diff --git a/lib/text/diff/linechange.go b/lib/text/diff/linechange.go
index c450e26c..a280d10b 100644
--- a/lib/text/diff/linechange.go
+++ b/lib/text/diff/linechange.go
@@ -32,9 +32,9 @@ func NewLineChange(old, new text.Line) *LineChange {
//
func (change LineChange) String() string {
return fmt.Sprintf("LineChange: {\n"+
- " Old : %v\n"+
- " New : %v\n"+
- " Adds : %v\n"+
- " Dels : %v\n"+
+ " Old : %s\n"+
+ " New : %s\n"+
+ " Adds : %s\n"+
+ " Dels : %s\n"+
"}\n", change.Old, change.New, change.Adds, change.Dels)
}