internal/diff: abolish errors
Computing the difference between two strings is logically an
infallible operation. This change makes the code reflect that. The
actual failures were unreachable given consistent inputs, but that was
hard to see from the complexity of the logic surrounding span.Span.
(The problem only occurs when converting offsets beyond the end of the
file to Spans, but the code preserves the integrity of offsets.)
gopls' "old" hooks.ComputeEdits impl (based on sergi/go-diff) now
reports a bug and returns a single diff for the entire file if it
panics.
Also, first steps towards simpler API and a
reusable diff package in x/tools:
- add TODO for new API. In particular, the diff package shouldn't care
about filenames, spans, and URIs. These are gopls concerns.
- diff.String is the main diff function.
- diff.Unified prints edits in unified form;
all its internals are now hidden.
- the ComputeEdits func type is moved to gopls (source.DiffFunction)
- remove all non-critical uses of myers.ComputeEdits. The only
remaining one is in gopls' defaults, but perhaps that gets
overridden by the default GoDiff setting in hooks, to BothDiffs
(sergi + pjw impls), so maybe it's now actually unused in practice?
Change-Id: I6ceb5c670897abbf285b243530a7372dfa41edf6
Reviewed-on: https://go-review.googlesource.com/c/tools/+/436778
Run-TryBot: Alan Donovan <adonovan@google.com>
Reviewed-by: Robert Findley <rfindley@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
gopls-CI: kokoro <noreply+kokoro@google.com>
diff --git a/internal/diff/diff.go b/internal/diff/diff.go
index 8fd6824..b00ffbc 100644
--- a/internal/diff/diff.go
+++ b/internal/diff/diff.go
@@ -12,6 +12,19 @@
"golang.org/x/tools/internal/span"
)
+// TODO(adonovan): simplify this package to just:
+//
+// package diff
+// type Edit struct { Start, End int; New string }
+// func Strings(old, new string) []Edit
+// func Unified(oldLabel, newLabel string, old string, edits []Edit) string
+// func Apply(old string, edits []Edit) (string, error)
+//
+// and move everything else into gopls, including the concepts of filenames and spans.
+// Observe that TextEdit.URI is irrelevant to Unified.
+// - delete LineEdits? (used only by Unified and test)
+// - delete Lines (unused except by its test)
+
// TextEdit represents a change to a section of a document.
// The text within the specified span should be replaced by the supplied new text.
type TextEdit struct {
@@ -19,10 +32,6 @@
NewText string
}
-// ComputeEdits is the type for a function that produces a set of edits that
-// convert from the before content to the after content.
-type ComputeEdits func(uri span.URI, before, after string) ([]TextEdit, error)
-
// SortTextEdits attempts to order all edits by their starting points.
// The sort is stable so that edits with the same starting point will not
// be reordered.
@@ -37,6 +46,9 @@
// content.
// It may panic or produce garbage if the edits are not valid for the provided
// before content.
+// TODO(adonovan): this function must not panic! Make it either cope
+// or report an error. We should not trust that (e.g.) patches supplied
+// as RPC inputs to gopls are consistent.
func ApplyEdits(before string, edits []TextEdit) string {
// Preconditions:
// - all of the edits apply to before
diff --git a/internal/diff/diff_test.go b/internal/diff/diff_test.go
index b199298..11930de 100644
--- a/internal/diff/diff_test.go
+++ b/internal/diff/diff_test.go
@@ -33,10 +33,7 @@
func TestNEdits(t *testing.T) {
for i, tc := range difftest.TestCases {
sp := fmt.Sprintf("file://%s.%d", tc.Name, i)
- edits, err := diff.NComputeEdits(span.URI(sp), tc.In, tc.Out)
- if err != nil {
- t.Fatal(err)
- }
+ edits := diff.Strings(span.URI(sp), tc.In, tc.Out)
got := diff.ApplyEdits(tc.In, edits)
if got != tc.Out {
t.Fatalf("%s: got %q wanted %q", tc.Name, got, tc.Out)
@@ -53,10 +50,7 @@
fname := fmt.Sprintf("file://%x", i)
a := randstr("abω", 16)
b := randstr("abωc", 16)
- edits, err := diff.NComputeEdits(span.URI(fname), a, b)
- if err != nil {
- t.Fatalf("%q,%q %v", a, b, err)
- }
+ edits := diff.Strings(span.URI(fname), a, b)
got := diff.ApplyEdits(a, edits)
if got != b {
t.Fatalf("%d: got %q, wanted %q, starting with %q", i, got, b, a)
@@ -84,10 +78,7 @@
y = y[:len(y)-1]
}
a, b := strings.SplitAfter(x, "\n"), strings.SplitAfter(y, "\n")
- edits, err := diff.NComputeLineEdits(span.URI(fname), a, b)
- if err != nil {
- t.Fatalf("%q,%q %v", a, b, err)
- }
+ edits := diff.Lines(span.URI(fname), a, b)
got := diff.ApplyEdits(x, edits)
if got != y {
t.Fatalf("%d: got\n%q, wanted\n%q, starting with %q", i, got, y, a)
@@ -113,12 +104,12 @@
func TestUnified(t *testing.T) {
for _, tc := range difftest.TestCases {
t.Run(tc.Name, func(t *testing.T) {
- unified := fmt.Sprint(diff.ToUnified(difftest.FileA, difftest.FileB, tc.In, tc.Edits))
+ unified := diff.Unified(difftest.FileA, difftest.FileB, tc.In, tc.Edits)
if unified != tc.Unified {
t.Errorf("Unified(Edits): got diff:\n%v\nexpected:\n%v", unified, tc.Unified)
}
if tc.LineEdits != nil {
- unified := fmt.Sprint(diff.ToUnified(difftest.FileA, difftest.FileB, tc.In, tc.LineEdits))
+ unified := diff.Unified(difftest.FileA, difftest.FileB, tc.In, tc.LineEdits)
if unified != tc.Unified {
t.Errorf("Unified(LineEdits): got diff:\n%v\nexpected:\n%v", unified, tc.Unified)
}
@@ -131,10 +122,7 @@
a := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/diff/difftest\"\n\t\"golang.org/x/tools/internal/span\"\n)\n"
b := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/diff/difftest\"\n\t\"golang.org/x/tools/internal/span\"\n)\n"
- diffs, err := diff.NComputeEdits(span.URI("file://one"), a, b)
- if err != nil {
- t.Error(err)
- }
+ diffs := diff.Strings(span.URI("file://one"), a, b)
got := diff.ApplyEdits(a, diffs)
if got != b {
i := 0
@@ -148,10 +136,7 @@
func TestRegressionOld002(t *testing.T) {
a := "n\"\n)\n"
b := "n\"\n\t\"golang.org/x//nnal/stack\"\n)\n"
- diffs, err := diff.NComputeEdits(span.URI("file://two"), a, b)
- if err != nil {
- t.Error(err)
- }
+ diffs := diff.Strings(span.URI("file://two"), a, b)
got := diff.ApplyEdits(a, diffs)
if got != b {
i := 0
diff --git a/internal/diff/difftest/difftest.go b/internal/diff/difftest/difftest.go
index e42e0d7..c9808a5 100644
--- a/internal/diff/difftest/difftest.go
+++ b/internal/diff/difftest/difftest.go
@@ -8,7 +8,6 @@
package difftest
import (
- "fmt"
"testing"
"golang.org/x/tools/internal/diff"
@@ -246,15 +245,12 @@
}
}
-func DiffTest(t *testing.T, compute diff.ComputeEdits) {
+func DiffTest(t *testing.T, compute func(uri span.URI, before, after string) []diff.TextEdit) {
for _, test := range TestCases {
t.Run(test.Name, func(t *testing.T) {
- edits, err := compute(span.URIFromPath("/"+test.Name), test.In, test.Out)
- if err != nil {
- t.Fatal(err)
- }
+ edits := compute(span.URIFromPath("/"+test.Name), test.In, test.Out)
got := diff.ApplyEdits(test.In, edits)
- unified := fmt.Sprint(diff.ToUnified(FileA, FileB, test.In, edits))
+ unified := diff.Unified(FileA, FileB, test.In, edits)
if got != test.Out {
t.Errorf("Apply: got patched:\n%v\nfrom diff:\n%v\nexpected:\n%v", got, unified, test.Out)
}
diff --git a/internal/diff/myers/diff.go b/internal/diff/myers/diff.go
index 07e4028..2188c0d 100644
--- a/internal/diff/myers/diff.go
+++ b/internal/diff/myers/diff.go
@@ -16,7 +16,7 @@
// https://blog.jcoglan.com/2017/02/17/the-myers-diff-algorithm-part-3/
// https://www.codeproject.com/Articles/42279/%2FArticles%2F42279%2FInvestigating-Myers-diff-algorithm-Part-1-of-2
-func ComputeEdits(uri span.URI, before, after string) ([]diff.TextEdit, error) {
+func ComputeEdits(uri span.URI, before, after string) []diff.TextEdit {
ops := operations(splitLines(before), splitLines(after))
edits := make([]diff.TextEdit, 0, len(ops))
for _, op := range ops {
@@ -32,7 +32,7 @@
}
}
}
- return edits, nil
+ return edits
}
type operation struct {
diff --git a/internal/diff/ndiff.go b/internal/diff/ndiff.go
index e537b72..e369f46 100644
--- a/internal/diff/ndiff.go
+++ b/internal/diff/ndiff.go
@@ -5,6 +5,8 @@
package diff
import (
+ "go/token"
+ "log"
"strings"
"unicode/utf8"
@@ -16,51 +18,66 @@
// the value is just a guess
const maxDiffs = 30
-// NComputeEdits computes TextEdits for strings
-// (both it and the diff in the myers package have type ComputeEdits, which
+// Strings computes the differences between two strings.
+// (Both it and the diff in the myers package have type ComputeEdits, which
// is why the arguments are strings, not []bytes.)
-func NComputeEdits(uri span.URI, before, after string) ([]TextEdit, error) {
+// TODO(adonovan): opt: consider switching everything to []bytes, if
+// that's the more common type in practice. Or provide both flavors?
+func Strings(uri span.URI, before, after string) []TextEdit {
if before == after {
// very frequently true
- return nil, nil
+ return nil
}
// the diffs returned by the lcs package use indexes into whatever slice
- // was passed in. TextEdits need a span.Span which is computed with
+ // was passed in. TextEdits need a span.Span which is computed with
// byte offsets, so rune or line offsets need to be converted.
+ // TODO(adonovan): opt: eliminate all the unnecessary allocations.
if needrunes(before) || needrunes(after) {
diffs, _ := lcs.Compute([]rune(before), []rune(after), maxDiffs/2)
diffs = runeOffsets(diffs, []rune(before))
- ans, err := convertDiffs(uri, diffs, []byte(before))
- return ans, err
+ return convertDiffs(uri, diffs, []byte(before))
} else {
diffs, _ := lcs.Compute([]byte(before), []byte(after), maxDiffs/2)
- ans, err := convertDiffs(uri, diffs, []byte(before))
- return ans, err
+ return convertDiffs(uri, diffs, []byte(before))
}
}
-// NComputeLineEdits computes TextEdits for []strings
-func NComputeLineEdits(uri span.URI, before, after []string) ([]TextEdit, error) {
+// Lines computes the differences between two list of lines.
+// TODO(adonovan): unused except by its test. Do we actually need it?
+func Lines(uri span.URI, before, after []string) []TextEdit {
diffs, _ := lcs.Compute(before, after, maxDiffs/2)
diffs = lineOffsets(diffs, before)
- ans, err := convertDiffs(uri, diffs, []byte(strJoin(before)))
+ return convertDiffs(uri, diffs, []byte(strJoin(before)))
// the code is not coping with possible missing \ns at the ends
- return ans, err
}
// convert diffs with byte offsets into diffs with line and column
-func convertDiffs(uri span.URI, diffs []lcs.Diff, src []byte) ([]TextEdit, error) {
+func convertDiffs(uri span.URI, diffs []lcs.Diff, src []byte) []TextEdit {
ans := make([]TextEdit, len(diffs))
- tf := span.NewTokenFile(uri.Filename(), src)
- for i, d := range diffs {
- s := newSpan(uri, d.Start, d.End)
- s, err := s.WithPosition(tf)
+
+ // Reuse the machinery of go/token to convert (content, offset) to (line, column).
+ tf := token.NewFileSet().AddFile("", -1, len(src))
+ tf.SetLinesForContent(src)
+
+ offsetToPoint := func(offset int) span.Point {
+ // Re-use span.ToPosition's EOF workaround.
+ // It is infallible if the diffs are consistent with src.
+ line, col, err := span.ToPosition(tf, offset)
if err != nil {
- return nil, err
+ log.Fatalf("invalid offset: %v", err)
}
- ans[i] = TextEdit{s, d.Text}
+ return span.NewPoint(line, col, offset)
}
- return ans, nil
+
+ for i, d := range diffs {
+ start := offsetToPoint(d.Start)
+ end := start
+ if d.End != d.Start {
+ end = offsetToPoint(d.End)
+ }
+ ans[i] = TextEdit{span.New(uri, start, end), d.Text}
+ }
+ return ans
}
// convert diffs with rune offsets into diffs with byte offsets
@@ -114,10 +131,6 @@
return b.String()
}
-func newSpan(uri span.URI, left, right int) span.Span {
- return span.New(uri, span.NewPoint(0, 0, left), span.NewPoint(0, 0, right))
-}
-
// need runes is true if the string needs to be converted to []rune
// for random access
func needrunes(s string) bool {
diff --git a/internal/diff/unified.go b/internal/diff/unified.go
index 3ea0697..13ef677 100644
--- a/internal/diff/unified.go
+++ b/internal/diff/unified.go
@@ -9,28 +9,34 @@
"strings"
)
-// Unified represents a set of edits as a unified diff.
-type Unified struct {
+// Unified applies the edits to oldContent and presents a unified diff.
+// The two labels are the names of the old and new files.
+func Unified(oldLabel, newLabel string, oldContent string, edits []TextEdit) string {
+ return toUnified(oldLabel, newLabel, oldContent, edits).String()
+}
+
+// unified represents a set of edits as a unified diff.
+type unified struct {
// From is the name of the original file.
From string
// To is the name of the modified file.
To string
// Hunks is the set of edit hunks needed to transform the file content.
- Hunks []*Hunk
+ Hunks []*hunk
}
// Hunk represents a contiguous set of line edits to apply.
-type Hunk struct {
+type hunk struct {
// The line in the original source where the hunk starts.
FromLine int
// The line in the original source where the hunk finishes.
ToLine int
// The set of line based edits to apply.
- Lines []Line
+ Lines []line
}
// Line represents a single line operation to apply as part of a Hunk.
-type Line struct {
+type line struct {
// Kind is the type of line this represents, deletion, insertion or copy.
Kind OpKind
// Content is the content of this line.
@@ -40,6 +46,7 @@
}
// OpKind is used to denote the type of operation a line represents.
+// TODO(adonovan): hide this once the myers package no longer references it.
type OpKind int
const (
@@ -73,10 +80,10 @@
gap = edge * 2
)
-// ToUnified takes a file contents and a sequence of edits, and calculates
+// toUnified takes a file contents and a sequence of edits, and calculates
// a unified diff that represents those edits.
-func ToUnified(fromName, toName string, content string, edits []TextEdit) Unified {
- u := Unified{
+func toUnified(fromName, toName string, content string, edits []TextEdit) unified {
+ u := unified{
From: fromName,
To: toName,
}
@@ -88,7 +95,7 @@
edits = lineEdits(content, edits)
}
lines := splitLines(content)
- var h *Hunk
+ var h *hunk
last := 0
toLine := 0
for _, edit := range edits {
@@ -108,7 +115,7 @@
u.Hunks = append(u.Hunks, h)
}
toLine += start - last
- h = &Hunk{
+ h = &hunk{
FromLine: start + 1,
ToLine: toLine + 1,
}
@@ -119,12 +126,12 @@
}
last = start
for i := start; i < end; i++ {
- h.Lines = append(h.Lines, Line{Kind: Delete, Content: lines[i]})
+ h.Lines = append(h.Lines, line{Kind: Delete, Content: lines[i]})
last++
}
if edit.NewText != "" {
- for _, line := range splitLines(edit.NewText) {
- h.Lines = append(h.Lines, Line{Kind: Insert, Content: line})
+ for _, content := range splitLines(edit.NewText) {
+ h.Lines = append(h.Lines, line{Kind: Insert, Content: content})
toLine++
}
}
@@ -145,7 +152,7 @@
return lines
}
-func addEqualLines(h *Hunk, lines []string, start, end int) int {
+func addEqualLines(h *hunk, lines []string, start, end int) int {
delta := 0
for i := start; i < end; i++ {
if i < 0 {
@@ -154,25 +161,15 @@
if i >= len(lines) {
return delta
}
- h.Lines = append(h.Lines, Line{Kind: Equal, Content: lines[i]})
+ h.Lines = append(h.Lines, line{Kind: Equal, Content: lines[i]})
delta++
}
return delta
}
-// Format write a textual representation of u to f (see the String method).
-//
-// TODO(rfindley): investigate (and possibly remove) this method. It's not
-// clear why Unified implements fmt.Formatter, since the formatting rune is not
-// used. Probably it is sufficient to only implement Stringer, but this method
-// was left here defensively.
-func (u Unified) Format(f fmt.State, r rune) {
- fmt.Fprintf(f, "%s", u.String())
-}
-
// String converts a unified diff to the standard textual form for that diff.
// The output of this function can be passed to tools like patch.
-func (u Unified) String() string {
+func (u unified) String() string {
if len(u.Hunks) == 0 {
return ""
}