diff options
| author | Shulhan <ms@kilabit.info> | 2021-07-30 21:02:09 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2021-07-30 21:02:09 +0700 |
| commit | 942efa6c66cda97d6fe02e02c4479930a47f6b4c (patch) | |
| tree | d8836a333661cce89fb45584b1a65c1079b10aae /lib/text/diff | |
| parent | 88c32305eb9a702a53294e815068851fe2b5a687 (diff) | |
| download | pakakeh.go-942efa6c66cda97d6fe02e02c4479930a47f6b4c.tar.xz | |
text/diff: add functions to compare raw bytes as text and text.Lines
This changes refactoring some functions, notably,
* Rename Bytes function to IsEqual
* Rename Lines function to Bytes
* Add function Text that compare two text (raw bytes)
* Add function Lines that compare two instances of text.Lines
Diffstat (limited to 'lib/text/diff')
| -rw-r--r-- | lib/text/diff/diff.go | 169 | ||||
| -rw-r--r-- | lib/text/diff/diff_test.go | 14 | ||||
| -rw-r--r-- | lib/text/diff/diffinterface.go | 176 | ||||
| -rw-r--r-- | lib/text/diff/linechange.go | 8 |
4 files changed, 189 insertions, 178 deletions
diff --git a/lib/text/diff/diff.go b/lib/text/diff/diff.go index e0599b05..6f0496b3 100644 --- a/lib/text/diff/diff.go +++ b/lib/text/diff/diff.go @@ -8,6 +8,7 @@ package diff import ( + "bytes" "fmt" "github.com/shuLhan/share/lib/text" @@ -28,6 +29,174 @@ type Data struct { } // +// Text search the difference between two texts. +// +func Text(before, after []byte, level int) (diffs Data) { + beforeLines := text.ParseLines(before) + afterLines := text.ParseLines(after) + return Lines(beforeLines, afterLines, level) +} + +// +// Lines search the difference between two Lines. +// +func Lines(oldlines, newlines text.Lines, level int) (diffs Data) { + oldlen := len(oldlines) + newlen := len(newlines) + x := 0 + y := 0 + + for x < oldlen { + if y == newlen { + // New text has been full examined. Leave out the old + // text that means deletion at the end of text. + diffs.PushDel(oldlines[x]) + oldlines[x].V = nil + x++ + continue + } + + // Compare old line with new line. + if IsEqual(oldlines[x].V, newlines[y].V) { + oldlines[x].V = nil + newlines[y].V = nil + x++ + y++ + continue + } + + // Check for whitespace changes + oldlinetrim := bytes.TrimSpace(oldlines[x].V) + newlinetrim := bytes.TrimSpace(newlines[y].V) + oldtrimlen := len(oldlinetrim) + newtrimlen := len(newlinetrim) + + // Both are empty, probably one of them is changing + if oldtrimlen <= 0 && newtrimlen <= 0 { + diffs.PushChange(oldlines[x], newlines[y]) + oldlines[x].V = nil + newlines[y].V = nil + x++ + y++ + continue + } + + // Old is empty or contain only whitespaces. + if oldtrimlen <= 0 { + diffs.PushDel(oldlines[x]) + oldlines[x].V = nil + x++ + continue + } + + // New is empty or contain only whitespaces. + if newtrimlen <= 0 { + diffs.PushAdd(newlines[y]) + newlines[y].V = nil + y++ + continue + } + + ratio, _, _ := BytesRatio(oldlines[x].V, newlines[y].V, + DefMatchLen) + + if ratio > DefMatchRatio { + // Ratio of similar bytes is higher than minimum + // expectation. So, it must be changes + diffs.PushChange(oldlines[x], newlines[y]) + oldlines[x].V = nil + newlines[y].V = nil + x++ + y++ + continue + } + + // x is not equal with y, search down... + foundx, xaty := findLine(oldlines[x], newlines, y+1) + + // Cross check the y with the rest of x... + foundy, yatx := findLine(newlines[y], oldlines, x+1) + + // Both line is missing, its mean changes on current line + if !foundx && !foundy { + diffs.PushChange(oldlines[x], newlines[y]) + oldlines[x].V = nil + newlines[y].V = nil + x++ + y++ + continue + } + + // x still missing, means deletion in old text. + if !foundx && foundy { + for ; x < yatx && x < oldlen; x++ { + diffs.PushDel(oldlines[x]) + oldlines[x].V = nil + } + continue + } + + // we found x but y is missing, its mean addition in new text. + if foundx && !foundy { + for ; y < xaty && y < newlen; y++ { + diffs.PushAdd(newlines[y]) + newlines[y].V = nil + } + continue + } + + if foundx && foundy { + // We found x and y. Check which one is the + // addition or deletion based on line range. + addlen := xaty - y + dellen := yatx - x + + switch { + case addlen < dellen: + for ; y < xaty && y < newlen; y++ { + diffs.PushAdd(newlines[y]) + newlines[y].V = nil + } + + case addlen == dellen: + // Both changes occur between lines + for x < yatx && y < xaty { + diffs.PushChange(oldlines[x], + newlines[y]) + oldlines[x].V = nil + newlines[y].V = nil + x++ + y++ + } + default: + for ; x < yatx && x < oldlen; x++ { + diffs.PushDel(oldlines[x]) + oldlines[x].V = nil + } + } + continue + } + } + + // Check if there is a left over from new text. + for ; y < newlen; y++ { + diffs.PushAdd(newlines[y]) + newlines[y].V = nil + } + + if level == LevelWords { + // Process each changes to find modified chunkes. + for x, change := range diffs.Changes { + adds, dels := Bytes(change.Old.V, change.New.V, 0, 0) + diffs.Changes[x].Adds = adds + diffs.Changes[x].Dels = dels + } + } + + return diffs +} + +// // PushAdd will add new line to diff set. // func (diffs *Data) PushAdd(new text.Line) { diff --git a/lib/text/diff/diff_test.go b/lib/text/diff/diff_test.go index a652b9dd..dc32de7e 100644 --- a/lib/text/diff/diff_test.go +++ b/lib/text/diff/diff_test.go @@ -288,15 +288,15 @@ func compareChunks(t *testing.T, adds, dels text.Chunks, } } -func testDiffLines(t *testing.T, old, new text.Line, +func testDiffBytes(t *testing.T, old, new text.Line, expAdds, expDels []string, ) { - adds, dels := Lines(old.V, new.V, 0, 0) + adds, dels := Bytes(old.V, new.V, 0, 0) compareChunks(t, adds, dels, expAdds, expDels) } -func TestDiffLines(t *testing.T) { +func TestBytes(t *testing.T) { old := text.Line{N: 0, V: []byte("lorem ipsum dolmet")} new := text.Line{N: 0, V: []byte("lorem all ipsum")} @@ -307,22 +307,22 @@ func TestDiffLines(t *testing.T) { []string{" dolmet"}, } - testDiffLines(t, old, new, expAdds[0], expDels[0]) + testDiffBytes(t, old, new, expAdds[0], expDels[0]) old = text.Line{N: 0, V: []byte("lorem ipsum dolmet")} new = text.Line{N: 0, V: []byte("lorem ipsum")} - testDiffLines(t, old, new, []string{}, expDels[0]) + testDiffBytes(t, old, new, []string{}, expDels[0]) old = text.Line{N: 0, V: []byte("lorem ipsum")} new = text.Line{N: 0, V: []byte("lorem ipsum dolmet")} - testDiffLines(t, old, new, expDels[0], []string{}) + testDiffBytes(t, old, new, expDels[0], []string{}) old = text.Line{N: 0, V: []byte("{{Pharaoh Infobox |")} new = text.Line{N: 0, V: []byte("{{Infobox pharaoh")} - testDiffLines(t, old, new, []string{"pharaoh"}, + testDiffBytes(t, old, new, []string{"pharaoh"}, []string{"Pharaoh ", "|"}) } diff --git a/lib/text/diff/diffinterface.go b/lib/text/diff/diffinterface.go index 4e61948c..a876906f 100644 --- a/lib/text/diff/diffinterface.go +++ b/lib/text/diff/diffinterface.go @@ -6,7 +6,6 @@ package diff import ( "bufio" - "bytes" "io" "os" @@ -64,10 +63,10 @@ func ReadLines(f string) (lines text.Lines, e error) { } // -// Bytes compare two slice of bytes and return true if equal or false +// IsEqual compare two slice of bytes and return true if equal or false // otherwise. // -func Bytes(oldb, newb []byte) (equal bool) { +func IsEqual(oldb, newb []byte) (equal bool) { oldblen := len(oldb) newblen := len(newb) @@ -214,9 +213,7 @@ func findLine(line text.Line, text text.Lines, startat int) ( textlen := len(text) for n = startat; n < textlen; n++ { - isEqual := Bytes(line.V, text[n].V) - - if isEqual { + if IsEqual(line.V, text[n].V) { return true, n } } @@ -227,176 +224,21 @@ func findLine(line text.Line, text text.Lines, startat int) ( // // Files compare two files. // -func Files(oldf, newf string, difflevel int) (diffs Data, e error) { +func Files(oldf, newf string, level int) (diffs Data, e error) { oldlines, e := ReadLines(oldf) if e != nil { return } - newlines, e := ReadLines(newf) if e != nil { return } - - oldlen := len(oldlines) - newlen := len(newlines) - x := 0 - y := 0 - - for x < oldlen { - if y == newlen { - // New text has been full examined. Leave out the old - // text that means deletion at the end of text. - diffs.PushDel(oldlines[x]) - oldlines[x].V = nil - x++ - continue - } - - // Compare old line with new line. - isEqual := Bytes(oldlines[x].V, newlines[y].V) - - if isEqual { - oldlines[x].V = nil - newlines[y].V = nil - x++ - y++ - continue - } - - // Check for whitespace changes - oldlinetrim := bytes.TrimSpace(oldlines[x].V) - newlinetrim := bytes.TrimSpace(newlines[y].V) - oldtrimlen := len(oldlinetrim) - newtrimlen := len(newlinetrim) - - // Both are empty, probably one of them is changing - if oldtrimlen <= 0 && newtrimlen <= 0 { - diffs.PushChange(oldlines[x], newlines[y]) - oldlines[x].V = nil - newlines[y].V = nil - x++ - y++ - continue - } - - // Old is empty or contain only whitespaces. - if oldtrimlen <= 0 { - diffs.PushDel(oldlines[x]) - oldlines[x].V = nil - x++ - continue - } - - // New is empty or contain only whitespaces. - if newtrimlen <= 0 { - diffs.PushAdd(newlines[y]) - newlines[y].V = nil - y++ - continue - } - - ratio, _, _ := BytesRatio(oldlines[x].V, newlines[y].V, - DefMatchLen) - - if ratio > DefMatchRatio { - // Ratio of similar bytes is higher than minimum - // expectation. So, it must be changes - diffs.PushChange(oldlines[x], newlines[y]) - oldlines[x].V = nil - newlines[y].V = nil - x++ - y++ - continue - } - - // x is not equal with y, search down... - foundx, xaty := findLine(oldlines[x], newlines, y+1) - - // Cross check the y with the rest of x... - foundy, yatx := findLine(newlines[y], oldlines, x+1) - - // Both line is missing, its mean changes on current line - if !foundx && !foundy { - diffs.PushChange(oldlines[x], newlines[y]) - oldlines[x].V = nil - newlines[y].V = nil - x++ - y++ - continue - } - - // x still missing, means deletion in old text. - if !foundx && foundy { - for ; x < yatx && x < oldlen; x++ { - diffs.PushDel(oldlines[x]) - oldlines[x].V = nil - } - continue - } - - // we found x but y is missing, its mean addition in new text. - if foundx && !foundy { - for ; y < xaty && y < newlen; y++ { - diffs.PushAdd(newlines[y]) - newlines[y].V = nil - } - continue - } - - if foundx && foundy { - // We found x and y. Check which one is the - // addition or deletion based on line range. - addlen := xaty - y - dellen := yatx - x - - switch { - case addlen < dellen: - for ; y < xaty && y < newlen; y++ { - diffs.PushAdd(newlines[y]) - newlines[y].V = nil - } - - case addlen == dellen: - // Both changes occur between lines - for x < yatx && y < xaty { - diffs.PushChange(oldlines[x], - newlines[y]) - oldlines[x].V = nil - newlines[y].V = nil - x++ - y++ - } - default: - for ; x < yatx && x < oldlen; x++ { - diffs.PushDel(oldlines[x]) - oldlines[x].V = nil - } - } - continue - } - } - - // Check if there is a left over from new text. - for ; y < newlen; y++ { - diffs.PushAdd(newlines[y]) - newlines[y].V = nil - } - - if difflevel == LevelWords { - // Process each changes to find modified chunkes. - for x, change := range diffs.Changes { - adds, dels := Lines(change.Old.V, change.New.V, 0, 0) - diffs.Changes[x].Adds = adds - diffs.Changes[x].Dels = dels - } - } - - return diffs, e + diffs = Lines(oldlines, newlines, level) + return diffs, nil } // -// Lines given two similar lines, find and return the differences (additions and +// Bytes given two similar lines, find and return the differences (additions and // deletion) between them. // // Case 1: addition on new or deletion on old. @@ -429,7 +271,7 @@ func Files(oldf, newf string, difflevel int) (diffs Data, e error) { // old: 0001000 // new: 0002000 // -func Lines(old, new []byte, atx, aty int) (adds, dels text.Chunks) { +func Bytes(old, new []byte, atx, aty int) (adds, dels text.Chunks) { oldlen := len(old) newlen := len(new) @@ -563,7 +405,7 @@ func Lines(old, new []byte, atx, aty int) (adds, dels text.Chunks) { oldleft = old[x+yatx : xend+1] } - addsleft, delsleft := Lines(oldleft, newleft, atx+x, aty+y) + addsleft, delsleft := Bytes(oldleft, newleft, atx+x, aty+y) if len(addsleft) > 0 { adds = append(adds, addsleft...) diff --git a/lib/text/diff/linechange.go b/lib/text/diff/linechange.go index c450e26c..a280d10b 100644 --- a/lib/text/diff/linechange.go +++ b/lib/text/diff/linechange.go @@ -32,9 +32,9 @@ func NewLineChange(old, new text.Line) *LineChange { // func (change LineChange) String() string { return fmt.Sprintf("LineChange: {\n"+ - " Old : %v\n"+ - " New : %v\n"+ - " Adds : %v\n"+ - " Dels : %v\n"+ + " Old : %s\n"+ + " New : %s\n"+ + " Adds : %s\n"+ + " Dels : %s\n"+ "}\n", change.Old, change.New, change.Adds, change.Dels) } |
