Ian Cottrell | 85edb9e | 2019-08-19 19:28:08 -0400 | [diff] [blame] | 1 | // Copyright 2019 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
Alan Donovan | f2c4579 | 2022-10-06 16:11:42 -0400 | [diff] [blame] | 5 | // Package diff computes differences between text files or strings. |
Ian Cottrell | 85edb9e | 2019-08-19 19:28:08 -0400 | [diff] [blame] | 6 | package diff |
| 7 | |
| 8 | import ( |
Alan Donovan | 60ddcca | 2022-10-05 18:58:00 -0400 | [diff] [blame] | 9 | "fmt" |
Ian Cottrell | 85edb9e | 2019-08-19 19:28:08 -0400 | [diff] [blame] | 10 | "sort" |
Ian Cottrell | c9f9432 | 2019-10-03 14:44:07 -0400 | [diff] [blame] | 11 | "strings" |
Ian Cottrell | 85edb9e | 2019-08-19 19:28:08 -0400 | [diff] [blame] | 12 | ) |
| 13 | |
Alan Donovan | f2c4579 | 2022-10-06 16:11:42 -0400 | [diff] [blame] | 14 | // An Edit describes the replacement of a portion of a text file. |
Alan Donovan | d96b238 | 2022-09-30 21:58:21 -0400 | [diff] [blame] | 15 | type Edit struct { |
| 16 | Start, End int // byte offsets of the region to replace |
| 17 | New string // the replacement |
Ian Cottrell | 85edb9e | 2019-08-19 19:28:08 -0400 | [diff] [blame] | 18 | } |
| 19 | |
Peter Weinbergr | 1faecd3 | 2022-12-22 13:48:53 -0500 | [diff] [blame] | 20 | func (e Edit) String() string { |
Alan Donovan | 0b4461b | 2023-05-30 22:54:01 -0400 | [diff] [blame] | 21 | return fmt.Sprintf("{Start:%d,End:%d,New:%q}", e.Start, e.End, e.New) |
Peter Weinbergr | 1faecd3 | 2022-12-22 13:48:53 -0500 | [diff] [blame] | 22 | } |
| 23 | |
Alan Donovan | 60ddcca | 2022-10-05 18:58:00 -0400 | [diff] [blame] | 24 | // Apply applies a sequence of edits to the src buffer and returns the |
| 25 | // result. Edits are applied in order of start offset; edits with the |
| 26 | // same start offset are applied in they order they were provided. |
Alan Donovan | d96b238 | 2022-09-30 21:58:21 -0400 | [diff] [blame] | 27 | // |
Alan Donovan | 60ddcca | 2022-10-05 18:58:00 -0400 | [diff] [blame] | 28 | // Apply returns an error if any edit is out of bounds, |
| 29 | // or if any pair of edits is overlapping. |
| 30 | func Apply(src string, edits []Edit) (string, error) { |
Alan Donovan | a410e98 | 2022-10-07 08:26:53 -0400 | [diff] [blame] | 31 | edits, size, err := validate(src, edits) |
| 32 | if err != nil { |
| 33 | return "", err |
Ian Cottrell | 5bee6a6 | 2019-10-03 14:18:24 -0400 | [diff] [blame] | 34 | } |
Alan Donovan | 60ddcca | 2022-10-05 18:58:00 -0400 | [diff] [blame] | 35 | |
| 36 | // Apply edits. |
| 37 | out := make([]byte, 0, size) |
Alan Donovan | a410e98 | 2022-10-07 08:26:53 -0400 | [diff] [blame] | 38 | lastEnd := 0 |
Alan Donovan | 60ddcca | 2022-10-05 18:58:00 -0400 | [diff] [blame] | 39 | for _, edit := range edits { |
| 40 | if lastEnd < edit.Start { |
| 41 | out = append(out, src[lastEnd:edit.Start]...) |
| 42 | } |
| 43 | out = append(out, edit.New...) |
| 44 | lastEnd = edit.End |
Ian Cottrell | 5bee6a6 | 2019-10-03 14:18:24 -0400 | [diff] [blame] | 45 | } |
Alan Donovan | 60ddcca | 2022-10-05 18:58:00 -0400 | [diff] [blame] | 46 | out = append(out, src[lastEnd:]...) |
| 47 | |
| 48 | if len(out) != size { |
| 49 | panic("wrong size") |
| 50 | } |
| 51 | |
| 52 | return string(out), nil |
Ian Cottrell | 5bee6a6 | 2019-10-03 14:18:24 -0400 | [diff] [blame] | 53 | } |
Ian Cottrell | c9f9432 | 2019-10-03 14:44:07 -0400 | [diff] [blame] | 54 | |
Alan Donovan | e5c9e63 | 2023-02-07 15:42:30 -0500 | [diff] [blame] | 55 | // ApplyBytes is like Apply, but it accepts a byte slice. |
| 56 | // The result is always a new array. |
| 57 | func ApplyBytes(src []byte, edits []Edit) ([]byte, error) { |
| 58 | res, err := Apply(string(src), edits) |
| 59 | return []byte(res), err |
| 60 | } |
| 61 | |
Alan Donovan | a410e98 | 2022-10-07 08:26:53 -0400 | [diff] [blame] | 62 | // validate checks that edits are consistent with src, |
| 63 | // and returns the size of the patched output. |
| 64 | // It may return a different slice. |
| 65 | func validate(src string, edits []Edit) ([]Edit, int, error) { |
| 66 | if !sort.IsSorted(editsSort(edits)) { |
| 67 | edits = append([]Edit(nil), edits...) |
Alan Donovan | 29429f5 | 2022-10-11 13:52:53 -0400 | [diff] [blame] | 68 | SortEdits(edits) |
Alan Donovan | a410e98 | 2022-10-07 08:26:53 -0400 | [diff] [blame] | 69 | } |
| 70 | |
| 71 | // Check validity of edits and compute final size. |
| 72 | size := len(src) |
| 73 | lastEnd := 0 |
| 74 | for _, edit := range edits { |
| 75 | if !(0 <= edit.Start && edit.Start <= edit.End && edit.End <= len(src)) { |
| 76 | return nil, 0, fmt.Errorf("diff has out-of-bounds edits") |
| 77 | } |
| 78 | if edit.Start < lastEnd { |
| 79 | return nil, 0, fmt.Errorf("diff has overlapping edits") |
| 80 | } |
| 81 | size += len(edit.New) + edit.Start - edit.End |
| 82 | lastEnd = edit.End |
| 83 | } |
| 84 | |
| 85 | return edits, size, nil |
| 86 | } |
| 87 | |
Alan Donovan | 29429f5 | 2022-10-11 13:52:53 -0400 | [diff] [blame] | 88 | // SortEdits orders a slice of Edits by (start, end) offset. |
| 89 | // This ordering puts insertions (end = start) before deletions |
| 90 | // (end > start) at the same point, but uses a stable sort to preserve |
Alan Donovan | 60ddcca | 2022-10-05 18:58:00 -0400 | [diff] [blame] | 91 | // the order of multiple insertions at the same point. |
| 92 | // (Apply detects multiple deletions at the same point as an error.) |
Alan Donovan | 29429f5 | 2022-10-11 13:52:53 -0400 | [diff] [blame] | 93 | func SortEdits(edits []Edit) { |
| 94 | sort.Stable(editsSort(edits)) |
Alan Donovan | 60ddcca | 2022-10-05 18:58:00 -0400 | [diff] [blame] | 95 | } |
| 96 | |
| 97 | type editsSort []Edit |
| 98 | |
| 99 | func (a editsSort) Len() int { return len(a) } |
| 100 | func (a editsSort) Less(i, j int) bool { |
| 101 | if cmp := a[i].Start - a[j].Start; cmp != 0 { |
| 102 | return cmp < 0 |
| 103 | } |
| 104 | return a[i].End < a[j].End |
| 105 | } |
| 106 | func (a editsSort) Swap(i, j int) { a[i], a[j] = a[j], a[i] } |
| 107 | |
Alan Donovan | f2c4579 | 2022-10-06 16:11:42 -0400 | [diff] [blame] | 108 | // lineEdits expands and merges a sequence of edits so that each |
Alan Donovan | d96b238 | 2022-09-30 21:58:21 -0400 | [diff] [blame] | 109 | // resulting edit replaces one or more complete lines. |
Alan Donovan | a410e98 | 2022-10-07 08:26:53 -0400 | [diff] [blame] | 110 | // See ApplyEdits for preconditions. |
| 111 | func lineEdits(src string, edits []Edit) ([]Edit, error) { |
| 112 | edits, _, err := validate(src, edits) |
| 113 | if err != nil { |
| 114 | return nil, err |
| 115 | } |
Ian Cottrell | 7025dca | 2019-10-03 14:56:48 -0400 | [diff] [blame] | 116 | |
Alan Donovan | 1943c1e | 2023-05-30 21:10:18 -0400 | [diff] [blame] | 117 | // Do all deletions begin and end at the start of a line, |
| 118 | // and all insertions end with a newline? |
Alan Donovan | 0b4461b | 2023-05-30 22:54:01 -0400 | [diff] [blame] | 119 | // (This is merely a fast path.) |
Ian Cottrell | 7025dca | 2019-10-03 14:56:48 -0400 | [diff] [blame] | 120 | for _, edit := range edits { |
Alan Donovan | d96b238 | 2022-09-30 21:58:21 -0400 | [diff] [blame] | 121 | if edit.Start >= len(src) || // insertion at EOF |
| 122 | edit.Start > 0 && src[edit.Start-1] != '\n' || // not at line start |
Alan Donovan | 1943c1e | 2023-05-30 21:10:18 -0400 | [diff] [blame] | 123 | edit.End > 0 && src[edit.End-1] != '\n' || // not at line start |
| 124 | edit.New != "" && edit.New[len(edit.New)-1] != '\n' { // partial insert |
Alan Donovan | 0b4461b | 2023-05-30 22:54:01 -0400 | [diff] [blame] | 125 | goto expand // slow path |
Ian Cottrell | 7025dca | 2019-10-03 14:56:48 -0400 | [diff] [blame] | 126 | } |
| 127 | } |
Alan Donovan | a410e98 | 2022-10-07 08:26:53 -0400 | [diff] [blame] | 128 | return edits, nil // aligned |
Alan Donovan | d96b238 | 2022-09-30 21:58:21 -0400 | [diff] [blame] | 129 | |
| 130 | expand: |
Alan Donovan | 0b4461b | 2023-05-30 22:54:01 -0400 | [diff] [blame] | 131 | if len(edits) == 0 { |
| 132 | return edits, nil // no edits (unreachable due to fast path) |
| 133 | } |
Alan Donovan | d96b238 | 2022-09-30 21:58:21 -0400 | [diff] [blame] | 134 | expanded := make([]Edit, 0, len(edits)) // a guess |
| 135 | prev := edits[0] |
| 136 | // TODO(adonovan): opt: start from the first misaligned edit. |
| 137 | // TODO(adonovan): opt: avoid quadratic cost of string += string. |
| 138 | for _, edit := range edits[1:] { |
| 139 | between := src[prev.End:edit.Start] |
| 140 | if !strings.Contains(between, "\n") { |
| 141 | // overlapping lines: combine with previous edit. |
| 142 | prev.New += between + edit.New |
| 143 | prev.End = edit.End |
| 144 | } else { |
| 145 | // non-overlapping lines: flush previous edit. |
| 146 | expanded = append(expanded, expandEdit(prev, src)) |
| 147 | prev = edit |
| 148 | } |
| 149 | } |
Alan Donovan | a410e98 | 2022-10-07 08:26:53 -0400 | [diff] [blame] | 150 | return append(expanded, expandEdit(prev, src)), nil // flush final edit |
Ian Cottrell | 7025dca | 2019-10-03 14:56:48 -0400 | [diff] [blame] | 151 | } |
| 152 | |
Alan Donovan | d96b238 | 2022-09-30 21:58:21 -0400 | [diff] [blame] | 153 | // expandEdit returns edit expanded to complete whole lines. |
| 154 | func expandEdit(edit Edit, src string) Edit { |
| 155 | // Expand start left to start of line. |
cui fliter | 9161e3a | 2023-07-14 14:55:17 +0800 | [diff] [blame^] | 156 | // (delta is the zero-based column number of start.) |
Alan Donovan | d96b238 | 2022-09-30 21:58:21 -0400 | [diff] [blame] | 157 | start := edit.Start |
| 158 | if delta := start - 1 - strings.LastIndex(src[:start], "\n"); delta > 0 { |
| 159 | edit.Start -= delta |
| 160 | edit.New = src[start-delta:start] + edit.New |
Ian Cottrell | 7025dca | 2019-10-03 14:56:48 -0400 | [diff] [blame] | 161 | } |
Alan Donovan | d96b238 | 2022-09-30 21:58:21 -0400 | [diff] [blame] | 162 | |
| 163 | // Expand end right to end of line. |
Alan Donovan | d96b238 | 2022-09-30 21:58:21 -0400 | [diff] [blame] | 164 | end := edit.End |
Alan Donovan | 0b4461b | 2023-05-30 22:54:01 -0400 | [diff] [blame] | 165 | if end > 0 && src[end-1] != '\n' || |
| 166 | edit.New != "" && edit.New[len(edit.New)-1] != '\n' { |
| 167 | if nl := strings.IndexByte(src[end:], '\n'); nl < 0 { |
| 168 | edit.End = len(src) // extend to EOF |
| 169 | } else { |
| 170 | edit.End = end + nl + 1 // extend beyond \n |
| 171 | } |
Ian Cottrell | 7025dca | 2019-10-03 14:56:48 -0400 | [diff] [blame] | 172 | } |
Peter Weinbergr | 1faecd3 | 2022-12-22 13:48:53 -0500 | [diff] [blame] | 173 | edit.New += src[end:edit.End] |
Alan Donovan | d96b238 | 2022-09-30 21:58:21 -0400 | [diff] [blame] | 174 | |
| 175 | return edit |
Ian Cottrell | c9f9432 | 2019-10-03 14:44:07 -0400 | [diff] [blame] | 176 | } |