blob: 050b08ded4604e3c8bb244d66c2e08b59652f6f0 [file] [log] [blame]
Robert Findleyb15dac22022-08-30 14:40:12 -04001// Copyright 2022 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package diff
6
7import (
Alan Donovanf2c45792022-10-06 16:11:42 -04008 "bytes"
Robert Findleyb15dac22022-08-30 14:40:12 -04009 "unicode/utf8"
10
11 "golang.org/x/tools/internal/diff/lcs"
Robert Findleyb15dac22022-08-30 14:40:12 -040012)
13
Alan Donovanf2c45792022-10-06 16:11:42 -040014// Strings computes the differences between two strings.
15// The resulting edits respect rune boundaries.
16func Strings(before, after string) []Edit {
17 if before == after {
18 return nil // common case
19 }
20
21 if stringIsASCII(before) && stringIsASCII(after) {
Alan Donovan6df6eee2022-10-30 17:24:53 -040022 // TODO(adonovan): opt: specialize diffASCII for strings.
Alan Donovanf2c45792022-10-06 16:11:42 -040023 return diffASCII([]byte(before), []byte(after))
24 }
25 return diffRunes([]rune(before), []rune(after))
26}
27
28// Bytes computes the differences between two byte slices.
29// The resulting edits respect rune boundaries.
30func Bytes(before, after []byte) []Edit {
31 if bytes.Equal(before, after) {
32 return nil // common case
33 }
34
35 if bytesIsASCII(before) && bytesIsASCII(after) {
36 return diffASCII(before, after)
37 }
38 return diffRunes(runes(before), runes(after))
39}
40
41func diffASCII(before, after []byte) []Edit {
Alan Donovan6df6eee2022-10-30 17:24:53 -040042 diffs := lcs.DiffBytes(before, after)
Alan Donovanf2c45792022-10-06 16:11:42 -040043
44 // Convert from LCS diffs.
45 res := make([]Edit, len(diffs))
46 for i, d := range diffs {
Alan Donovan6df6eee2022-10-30 17:24:53 -040047 res[i] = Edit{d.Start, d.End, string(after[d.ReplStart:d.ReplEnd])}
Alan Donovanf2c45792022-10-06 16:11:42 -040048 }
49 return res
50}
51
52func diffRunes(before, after []rune) []Edit {
Alan Donovan6df6eee2022-10-30 17:24:53 -040053 diffs := lcs.DiffRunes(before, after)
Alan Donovanf2c45792022-10-06 16:11:42 -040054
55 // The diffs returned by the lcs package use indexes
56 // into whatever slice was passed in.
57 // Convert rune offsets to byte offsets.
58 res := make([]Edit, len(diffs))
59 lastEnd := 0
60 utf8Len := 0
61 for i, d := range diffs {
62 utf8Len += runesLen(before[lastEnd:d.Start]) // text between edits
63 start := utf8Len
64 utf8Len += runesLen(before[d.Start:d.End]) // text deleted by this edit
Alan Donovan6df6eee2022-10-30 17:24:53 -040065 res[i] = Edit{start, utf8Len, string(after[d.ReplStart:d.ReplEnd])}
Alan Donovanf2c45792022-10-06 16:11:42 -040066 lastEnd = d.End
67 }
68 return res
69}
70
Alan Donovanf2c45792022-10-06 16:11:42 -040071// runes is like []rune(string(bytes)) without the duplicate allocation.
72func runes(bytes []byte) []rune {
73 n := utf8.RuneCount(bytes)
74 runes := make([]rune, n)
75 for i := 0; i < n; i++ {
76 r, sz := utf8.DecodeRune(bytes)
77 bytes = bytes[sz:]
78 runes[i] = r
Robert Findleyb15dac22022-08-30 14:40:12 -040079 }
Alan Donovanf2c45792022-10-06 16:11:42 -040080 return runes
Robert Findleyb15dac22022-08-30 14:40:12 -040081}
82
Alan Donovanf2c45792022-10-06 16:11:42 -040083// runesLen returns the length in bytes of the UTF-8 encoding of runes.
84func runesLen(runes []rune) (len int) {
85 for _, r := range runes {
86 len += utf8.RuneLen(r)
Alan Donovan98560772022-09-30 14:18:37 -040087 }
Alan Donovanf2c45792022-10-06 16:11:42 -040088 return len
Robert Findleyb15dac22022-08-30 14:40:12 -040089}
90
cui fliterf90d8ad2022-10-10 20:58:54 +080091// stringIsASCII reports whether s contains only ASCII.
Alan Donovanf2c45792022-10-06 16:11:42 -040092// TODO(adonovan): combine when x/tools allows generics.
93func stringIsASCII(s string) bool {
94 for i := 0; i < len(s); i++ {
95 if s[i] >= utf8.RuneSelf {
96 return false
97 }
98 }
99 return true
100}
101
102func bytesIsASCII(s []byte) bool {
Robert Findleyb15dac22022-08-30 14:40:12 -0400103 for i := 0; i < len(s); i++ {
104 if s[i] >= utf8.RuneSelf {
Alan Donovand96b2382022-09-30 21:58:21 -0400105 return false
Robert Findleyb15dac22022-08-30 14:40:12 -0400106 }
107 }
Alan Donovand96b2382022-09-30 21:58:21 -0400108 return true
Robert Findleyb15dac22022-08-30 14:40:12 -0400109}