blob: a13547b7a7e358ad1a314d33874dc97a7a0169d9 [file] [log] [blame]
Ian Cottrell85edb9e2019-08-19 19:28:08 -04001// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Alan Donovanf2c45792022-10-06 16:11:42 -04005// Package diff computes differences between text files or strings.
Ian Cottrell85edb9e2019-08-19 19:28:08 -04006package diff
7
8import (
Alan Donovan60ddcca2022-10-05 18:58:00 -04009 "fmt"
Ian Cottrell85edb9e2019-08-19 19:28:08 -040010 "sort"
Ian Cottrellc9f94322019-10-03 14:44:07 -040011 "strings"
Ian Cottrell85edb9e2019-08-19 19:28:08 -040012)
13
Alan Donovanf2c45792022-10-06 16:11:42 -040014// An Edit describes the replacement of a portion of a text file.
Alan Donovand96b2382022-09-30 21:58:21 -040015type Edit struct {
16 Start, End int // byte offsets of the region to replace
17 New string // the replacement
Ian Cottrell85edb9e2019-08-19 19:28:08 -040018}
19
Peter Weinbergr1faecd32022-12-22 13:48:53 -050020func (e Edit) String() string {
Alan Donovan0b4461b2023-05-30 22:54:01 -040021 return fmt.Sprintf("{Start:%d,End:%d,New:%q}", e.Start, e.End, e.New)
Peter Weinbergr1faecd32022-12-22 13:48:53 -050022}
23
Alan Donovan60ddcca2022-10-05 18:58:00 -040024// Apply applies a sequence of edits to the src buffer and returns the
25// result. Edits are applied in order of start offset; edits with the
26// same start offset are applied in they order they were provided.
Alan Donovand96b2382022-09-30 21:58:21 -040027//
Alan Donovan60ddcca2022-10-05 18:58:00 -040028// Apply returns an error if any edit is out of bounds,
29// or if any pair of edits is overlapping.
30func Apply(src string, edits []Edit) (string, error) {
Alan Donovana410e982022-10-07 08:26:53 -040031 edits, size, err := validate(src, edits)
32 if err != nil {
33 return "", err
Ian Cottrell5bee6a62019-10-03 14:18:24 -040034 }
Alan Donovan60ddcca2022-10-05 18:58:00 -040035
36 // Apply edits.
37 out := make([]byte, 0, size)
Alan Donovana410e982022-10-07 08:26:53 -040038 lastEnd := 0
Alan Donovan60ddcca2022-10-05 18:58:00 -040039 for _, edit := range edits {
40 if lastEnd < edit.Start {
41 out = append(out, src[lastEnd:edit.Start]...)
42 }
43 out = append(out, edit.New...)
44 lastEnd = edit.End
Ian Cottrell5bee6a62019-10-03 14:18:24 -040045 }
Alan Donovan60ddcca2022-10-05 18:58:00 -040046 out = append(out, src[lastEnd:]...)
47
48 if len(out) != size {
49 panic("wrong size")
50 }
51
52 return string(out), nil
Ian Cottrell5bee6a62019-10-03 14:18:24 -040053}
Ian Cottrellc9f94322019-10-03 14:44:07 -040054
Alan Donovane5c9e632023-02-07 15:42:30 -050055// ApplyBytes is like Apply, but it accepts a byte slice.
56// The result is always a new array.
57func ApplyBytes(src []byte, edits []Edit) ([]byte, error) {
58 res, err := Apply(string(src), edits)
59 return []byte(res), err
60}
61
Alan Donovana410e982022-10-07 08:26:53 -040062// validate checks that edits are consistent with src,
63// and returns the size of the patched output.
64// It may return a different slice.
65func validate(src string, edits []Edit) ([]Edit, int, error) {
66 if !sort.IsSorted(editsSort(edits)) {
67 edits = append([]Edit(nil), edits...)
Alan Donovan29429f52022-10-11 13:52:53 -040068 SortEdits(edits)
Alan Donovana410e982022-10-07 08:26:53 -040069 }
70
71 // Check validity of edits and compute final size.
72 size := len(src)
73 lastEnd := 0
74 for _, edit := range edits {
75 if !(0 <= edit.Start && edit.Start <= edit.End && edit.End <= len(src)) {
76 return nil, 0, fmt.Errorf("diff has out-of-bounds edits")
77 }
78 if edit.Start < lastEnd {
79 return nil, 0, fmt.Errorf("diff has overlapping edits")
80 }
81 size += len(edit.New) + edit.Start - edit.End
82 lastEnd = edit.End
83 }
84
85 return edits, size, nil
86}
87
Alan Donovan29429f52022-10-11 13:52:53 -040088// SortEdits orders a slice of Edits by (start, end) offset.
89// This ordering puts insertions (end = start) before deletions
90// (end > start) at the same point, but uses a stable sort to preserve
Alan Donovan60ddcca2022-10-05 18:58:00 -040091// the order of multiple insertions at the same point.
92// (Apply detects multiple deletions at the same point as an error.)
Alan Donovan29429f52022-10-11 13:52:53 -040093func SortEdits(edits []Edit) {
94 sort.Stable(editsSort(edits))
Alan Donovan60ddcca2022-10-05 18:58:00 -040095}
96
97type editsSort []Edit
98
99func (a editsSort) Len() int { return len(a) }
100func (a editsSort) Less(i, j int) bool {
101 if cmp := a[i].Start - a[j].Start; cmp != 0 {
102 return cmp < 0
103 }
104 return a[i].End < a[j].End
105}
106func (a editsSort) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
107
Alan Donovanf2c45792022-10-06 16:11:42 -0400108// lineEdits expands and merges a sequence of edits so that each
Alan Donovand96b2382022-09-30 21:58:21 -0400109// resulting edit replaces one or more complete lines.
Alan Donovana410e982022-10-07 08:26:53 -0400110// See ApplyEdits for preconditions.
111func lineEdits(src string, edits []Edit) ([]Edit, error) {
112 edits, _, err := validate(src, edits)
113 if err != nil {
114 return nil, err
115 }
Ian Cottrell7025dca2019-10-03 14:56:48 -0400116
Alan Donovan1943c1e2023-05-30 21:10:18 -0400117 // Do all deletions begin and end at the start of a line,
118 // and all insertions end with a newline?
Alan Donovan0b4461b2023-05-30 22:54:01 -0400119 // (This is merely a fast path.)
Ian Cottrell7025dca2019-10-03 14:56:48 -0400120 for _, edit := range edits {
Alan Donovand96b2382022-09-30 21:58:21 -0400121 if edit.Start >= len(src) || // insertion at EOF
122 edit.Start > 0 && src[edit.Start-1] != '\n' || // not at line start
Alan Donovan1943c1e2023-05-30 21:10:18 -0400123 edit.End > 0 && src[edit.End-1] != '\n' || // not at line start
124 edit.New != "" && edit.New[len(edit.New)-1] != '\n' { // partial insert
Alan Donovan0b4461b2023-05-30 22:54:01 -0400125 goto expand // slow path
Ian Cottrell7025dca2019-10-03 14:56:48 -0400126 }
127 }
Alan Donovana410e982022-10-07 08:26:53 -0400128 return edits, nil // aligned
Alan Donovand96b2382022-09-30 21:58:21 -0400129
130expand:
Alan Donovan0b4461b2023-05-30 22:54:01 -0400131 if len(edits) == 0 {
132 return edits, nil // no edits (unreachable due to fast path)
133 }
Alan Donovand96b2382022-09-30 21:58:21 -0400134 expanded := make([]Edit, 0, len(edits)) // a guess
135 prev := edits[0]
136 // TODO(adonovan): opt: start from the first misaligned edit.
137 // TODO(adonovan): opt: avoid quadratic cost of string += string.
138 for _, edit := range edits[1:] {
139 between := src[prev.End:edit.Start]
140 if !strings.Contains(between, "\n") {
141 // overlapping lines: combine with previous edit.
142 prev.New += between + edit.New
143 prev.End = edit.End
144 } else {
145 // non-overlapping lines: flush previous edit.
146 expanded = append(expanded, expandEdit(prev, src))
147 prev = edit
148 }
149 }
Alan Donovana410e982022-10-07 08:26:53 -0400150 return append(expanded, expandEdit(prev, src)), nil // flush final edit
Ian Cottrell7025dca2019-10-03 14:56:48 -0400151}
152
Alan Donovand96b2382022-09-30 21:58:21 -0400153// expandEdit returns edit expanded to complete whole lines.
154func expandEdit(edit Edit, src string) Edit {
155 // Expand start left to start of line.
cui fliter9161e3a2023-07-14 14:55:17 +0800156 // (delta is the zero-based column number of start.)
Alan Donovand96b2382022-09-30 21:58:21 -0400157 start := edit.Start
158 if delta := start - 1 - strings.LastIndex(src[:start], "\n"); delta > 0 {
159 edit.Start -= delta
160 edit.New = src[start-delta:start] + edit.New
Ian Cottrell7025dca2019-10-03 14:56:48 -0400161 }
Alan Donovand96b2382022-09-30 21:58:21 -0400162
163 // Expand end right to end of line.
Alan Donovand96b2382022-09-30 21:58:21 -0400164 end := edit.End
Alan Donovan0b4461b2023-05-30 22:54:01 -0400165 if end > 0 && src[end-1] != '\n' ||
166 edit.New != "" && edit.New[len(edit.New)-1] != '\n' {
167 if nl := strings.IndexByte(src[end:], '\n'); nl < 0 {
168 edit.End = len(src) // extend to EOF
169 } else {
170 edit.End = end + nl + 1 // extend beyond \n
171 }
Ian Cottrell7025dca2019-10-03 14:56:48 -0400172 }
Peter Weinbergr1faecd32022-12-22 13:48:53 -0500173 edit.New += src[end:edit.End]
Alan Donovand96b2382022-09-30 21:58:21 -0400174
175 return edit
Ian Cottrellc9f94322019-10-03 14:44:07 -0400176}