blob: 08ce2a8085bf2ba9baff3bbfb15e50c444ccf6e9 [file] [log] [blame] [edit]
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package modernize
import (
"fmt"
"go/ast"
"go/constant"
"go/token"
"go/types"
"iter"
"strconv"
"golang.org/x/tools/go/analysis"
"golang.org/x/tools/go/analysis/passes/inspect"
"golang.org/x/tools/go/ast/edge"
"golang.org/x/tools/go/ast/inspector"
"golang.org/x/tools/go/types/typeutil"
"golang.org/x/tools/internal/analysis/analyzerutil"
typeindexanalyzer "golang.org/x/tools/internal/analysis/typeindex"
"golang.org/x/tools/internal/astutil"
"golang.org/x/tools/internal/goplsexport"
"golang.org/x/tools/internal/refactor"
"golang.org/x/tools/internal/typesinternal"
"golang.org/x/tools/internal/typesinternal/typeindex"
"golang.org/x/tools/internal/versions"
)
var stringscutAnalyzer = &analysis.Analyzer{
Name: "stringscut",
Doc: analyzerutil.MustExtractDoc(doc, "stringscut"),
Requires: []*analysis.Analyzer{
inspect.Analyzer,
typeindexanalyzer.Analyzer,
},
Run: stringscut,
URL: "https://pkg.go.dev/golang.org/x/tools/go/analysis/passes/modernize#stringscut",
}
func init() {
// Export to gopls until this is a published modernizer.
goplsexport.StringsCutModernizer = stringscutAnalyzer
}
// stringscut offers a fix to replace an occurrence of strings.Index{,Byte} with
// strings.{Cut,Contains}, and similar fixes for functions in the bytes package.
// Consider some candidate for replacement i := strings.Index(s, substr).
// The following must hold for a replacement to occur:
//
// 1. All instances of i and s must be in one of these forms.
//
// Binary expressions must be inequalities equivalent to
// "Index failed" (e.g. i < 0) or "Index succeeded" (i >= 0),
// or identities such as these (and their negations):
//
// 0 > i (flips left and right)
// i <= -1, -1 >= i (replace strict inequality by non-strict)
// i == -1, -1 == i (Index() guarantees i < 0 => i == -1)
//
// Slice expressions:
// a: s[:i], s[0:i]
// b: s[i+len(substr):], s[len(substr) + i:], s[i + const], s[k + i] (where k = len(substr))
//
// 2. There can be no uses of s, substr, or i where they are
// potentially modified (i.e. in assignments, or function calls with unknown side
// effects).
//
// Then, the replacement involves the following substitutions:
//
// 1. Replace "i := strings.Index(s, substr)" with "before, after, ok := strings.Cut(s, substr)"
//
// 2. Replace instances of binary expressions (a) with !ok and binary expressions (b) with ok.
//
// 3. Replace slice expressions (a) with "before" and slice expressions (b) with after.
//
// 4. The assignments to before, after, and ok may use the blank identifier "_" if they are unused.
//
// For example:
//
// i := strings.Index(s, substr)
// if i >= 0 {
// use(s[:i], s[i+len(substr):])
// }
//
// Would become:
//
// before, after, ok := strings.Cut(s, substr)
// if ok {
// use(before, after)
// }
//
// If the condition involving `i` is equivalent to i >= 0, then we replace it with
// `if ok“.
// If the condition is negated (e.g. equivalent to `i < 0`), we use `if !ok` instead.
// If the slices of `s` match `s[:i]` or `s[i+len(substr):]` or their variants listed above,
// then we replace them with before and after.
//
// When the index `i` is used only to check for the presence of the substring or byte slice,
// the suggested fix uses Contains() instead of Cut.
//
// For example:
//
// i := strings.Index(s, substr)
// if i >= 0 {
// return
// }
//
// Would become:
//
// found := strings.Contains(s, substr)
// if found {
// return
// }
func stringscut(pass *analysis.Pass) (any, error) {
var (
index = pass.ResultOf[typeindexanalyzer.Analyzer].(*typeindex.Index)
info = pass.TypesInfo
stringsIndex = index.Object("strings", "Index")
stringsIndexByte = index.Object("strings", "IndexByte")
bytesIndex = index.Object("bytes", "Index")
bytesIndexByte = index.Object("bytes", "IndexByte")
)
for _, obj := range []types.Object{
stringsIndex,
stringsIndexByte,
bytesIndex,
bytesIndexByte,
} {
// (obj may be nil)
nextcall:
for curCall := range index.Calls(obj) {
// Check file version.
if !analyzerutil.FileUsesGoVersion(pass, astutil.EnclosingFile(curCall), versions.Go1_18) {
continue // strings.Index not available in this file
}
indexCall := curCall.Node().(*ast.CallExpr) // the call to strings.Index, etc.
obj := typeutil.Callee(info, indexCall)
if obj == nil {
continue
}
var iIdent *ast.Ident // defining identifier of i var
switch ek, idx := curCall.ParentEdge(); ek {
case edge.ValueSpec_Values:
// Have: var i = strings.Index(...)
curName := curCall.Parent().ChildAt(edge.ValueSpec_Names, idx)
iIdent = curName.Node().(*ast.Ident)
case edge.AssignStmt_Rhs:
// Have: i := strings.Index(...)
// (Must be i's definition.)
curLhs := curCall.Parent().ChildAt(edge.AssignStmt_Lhs, idx)
iIdent, _ = curLhs.Node().(*ast.Ident) // may be nil
}
if iIdent == nil {
continue
}
// Inv: iIdent is i's definition. The following would be skipped: 'var i int; i = strings.Index(...)'
// Get uses of i.
iObj := info.ObjectOf(iIdent)
if iObj == nil {
continue
}
var (
s = indexCall.Args[0]
substr = indexCall.Args[1]
)
// Check that there are no statements that alter the value of s
// or substr after the call to Index().
if !indexArgValid(info, index, s, indexCall.Pos()) ||
!indexArgValid(info, index, substr, indexCall.Pos()) {
continue nextcall
}
// Next, examine all uses of i. If the only uses are of the
// forms mentioned above (e.g. i < 0, i >= 0, s[:i] and s[i +
// len(substr)]), then we can replace the call to Index()
// with a call to Cut() and use the returned ok, before,
// and after variables accordingly.
negative, nonnegative, beforeSlice, afterSlice := checkIdxUses(pass.TypesInfo, index.Uses(iObj), s, substr)
// Either there are no uses of before, after, or ok, or some use
// of i does not match our criteria - don't suggest a fix.
if negative == nil && nonnegative == nil && beforeSlice == nil && afterSlice == nil {
continue
}
// If the only uses are ok and !ok, don't suggest a Cut() fix - these should be using Contains()
isContains := (len(negative) > 0 || len(nonnegative) > 0) && len(beforeSlice) == 0 && len(afterSlice) == 0
scope := iObj.Parent()
var (
// TODO(adonovan): avoid FreshName when not needed; see errorsastype.
okVarName = refactor.FreshName(scope, iIdent.Pos(), "ok")
beforeVarName = refactor.FreshName(scope, iIdent.Pos(), "before")
afterVarName = refactor.FreshName(scope, iIdent.Pos(), "after")
foundVarName = refactor.FreshName(scope, iIdent.Pos(), "found") // for Contains()
)
// If there will be no uses of ok, before, or after, use the
// blank identifier instead.
if len(negative) == 0 && len(nonnegative) == 0 {
okVarName = "_"
}
if len(beforeSlice) == 0 {
beforeVarName = "_"
}
if len(afterSlice) == 0 {
afterVarName = "_"
}
var edits []analysis.TextEdit
replace := func(exprs []ast.Expr, new string) {
for _, expr := range exprs {
edits = append(edits, analysis.TextEdit{
Pos: expr.Pos(),
End: expr.End(),
NewText: []byte(new),
})
}
}
// Get the ident for the call to strings.Index, which could just be
// "Index" if the strings package is dot imported.
indexCallId := typesinternal.UsedIdent(info, indexCall.Fun)
replacedFunc := "Cut"
if isContains {
replacedFunc = "Contains"
replace(negative, "!"+foundVarName) // idx < 0 -> !found
replace(nonnegative, foundVarName) // idx > -1 -> found
// Replace the assignment with found, and replace the call to
// Index or IndexByte with a call to Contains.
// i := strings.Index (...)
// ----- --------
// found := strings.Contains(...)
edits = append(edits, analysis.TextEdit{
Pos: iIdent.Pos(),
End: iIdent.End(),
NewText: []byte(foundVarName),
}, analysis.TextEdit{
Pos: indexCallId.Pos(),
End: indexCallId.End(),
NewText: []byte("Contains"),
})
} else {
replace(negative, "!"+okVarName) // idx < 0 -> !ok
replace(nonnegative, okVarName) // idx > -1 -> ok
replace(beforeSlice, beforeVarName) // s[:idx] -> before
replace(afterSlice, afterVarName) // s[idx+k:] -> after
// Replace the assignment with before, after, ok, and replace
// the call to Index or IndexByte with a call to Cut.
// i := strings.Index(...)
// ----------------- -----
// before, after, ok := strings.Cut (...)
edits = append(edits, analysis.TextEdit{
Pos: iIdent.Pos(),
End: iIdent.End(),
NewText: fmt.Appendf(nil, "%s, %s, %s", beforeVarName, afterVarName, okVarName),
}, analysis.TextEdit{
Pos: indexCallId.Pos(),
End: indexCallId.End(),
NewText: []byte("Cut"),
})
}
// Calls to IndexByte have a byte as their second arg, which
// must be converted to a string or []byte to be a valid arg for Cut/Contains.
if obj.Name() == "IndexByte" {
switch obj.Pkg().Name() {
case "strings":
searchByteVal := info.Types[substr].Value
if searchByteVal == nil {
// substr is a variable, e.g. substr := byte('b')
// use string(substr)
edits = append(edits, []analysis.TextEdit{
{
Pos: substr.Pos(),
NewText: []byte("string("),
},
{
Pos: substr.End(),
NewText: []byte(")"),
},
}...)
} else {
// substr is a byte constant
val, _ := constant.Int64Val(searchByteVal) // inv: must be a valid byte
// strings.Cut/Contains requires a string, so convert byte literal to string literal; e.g. 'a' -> "a", 55 -> "7"
edits = append(edits, analysis.TextEdit{
Pos: substr.Pos(),
End: substr.End(),
NewText: strconv.AppendQuote(nil, string(byte(val))),
})
}
case "bytes":
// bytes.Cut/Contains requires a []byte, so wrap substr in a []byte{}
edits = append(edits, []analysis.TextEdit{
{
Pos: substr.Pos(),
NewText: []byte("[]byte{"),
},
{
Pos: substr.End(),
NewText: []byte("}"),
},
}...)
}
}
pass.Report(analysis.Diagnostic{
Pos: indexCall.Fun.Pos(),
End: indexCall.Fun.End(),
Message: fmt.Sprintf("%s.%s can be simplified using %s.%s",
obj.Pkg().Name(), obj.Name(), obj.Pkg().Name(), replacedFunc),
Category: "stringscut",
SuggestedFixes: []analysis.SuggestedFix{{
Message: fmt.Sprintf("Simplify %s.%s call using %s.%s", obj.Pkg().Name(), obj.Name(), obj.Pkg().Name(), replacedFunc),
TextEdits: edits,
}},
})
}
}
return nil, nil
}
// indexArgValid reports whether expr is a valid strings.Index(_, _) arg
// for the transformation. An arg is valid iff it is:
// - constant;
// - a local variable with no modifying uses after the Index() call; or
// - []byte(x) where x is also valid by this definition.
// All other expressions are assumed not referentially transparent,
// so we cannot be sure that all uses are safe to replace.
func indexArgValid(info *types.Info, index *typeindex.Index, expr ast.Expr, afterPos token.Pos) bool {
tv := info.Types[expr]
if tv.Value != nil {
return true // constant
}
switch expr := expr.(type) {
case *ast.CallExpr:
return types.Identical(tv.Type, byteSliceType) &&
indexArgValid(info, index, expr.Args[0], afterPos) // check s in []byte(s)
case *ast.Ident:
sObj := info.Uses[expr]
sUses := index.Uses(sObj)
return !hasModifyingUses(info, sUses, afterPos)
default:
// For now, skip instances where s or substr are not
// identifers, basic lits, or call expressions of the form
// []byte(s).
// TODO(mkalil): Handle s and substr being expressions like ptr.field[i].
// From adonovan: We'd need to analyze s and substr to see
// whether they are referentially transparent, and if not,
// analyze all code between declaration and use and see if
// there are statements or expressions with potential side
// effects.
return false
}
}
// checkIdxUses inspects the uses of i to make sure they match certain criteria that
// allows us to suggest a modernization. If all uses of i, s and substr match
// one of the following four valid formats, it returns a list of occurrences for
// each format. If any of the uses do not match one of the formats, return nil
// for all values, since we should not offer a replacement.
// 1. negative - a condition equivalent to i < 0
// 2. nonnegative - a condition equivalent to i >= 0
// 3. beforeSlice - a slice of `s` that matches either s[:i], s[0:i]
// 4. afterSlice - a slice of `s` that matches one of: s[i+len(substr):], s[len(substr) + i:], s[i + const], s[k + i] (where k = len(substr))
func checkIdxUses(info *types.Info, uses iter.Seq[inspector.Cursor], s, substr ast.Expr) (negative, nonnegative, beforeSlice, afterSlice []ast.Expr) {
use := func(cur inspector.Cursor) bool {
ek, _ := cur.ParentEdge()
n := cur.Parent().Node()
switch ek {
case edge.BinaryExpr_X, edge.BinaryExpr_Y:
check := n.(*ast.BinaryExpr)
switch checkIdxComparison(info, check) {
case -1:
negative = append(negative, check)
return true
case 1:
nonnegative = append(nonnegative, check)
return true
}
// Check is not equivalent to that i < 0 or i >= 0.
// Might be part of an outer slice expression like s[i + k]
// which requires a different check.
// Check that the thing being sliced is s and that the slice
// doesn't have a max index.
if slice, ok := cur.Parent().Parent().Node().(*ast.SliceExpr); ok &&
sameObject(info, s, slice.X) &&
slice.Max == nil {
if isBeforeSlice(info, ek, slice) {
beforeSlice = append(beforeSlice, slice)
return true
} else if isAfterSlice(info, ek, slice, substr) {
afterSlice = append(afterSlice, slice)
return true
}
}
case edge.SliceExpr_Low, edge.SliceExpr_High:
slice := n.(*ast.SliceExpr)
// Check that the thing being sliced is s and that the slice doesn't
// have a max index.
if sameObject(info, s, slice.X) && slice.Max == nil {
if isBeforeSlice(info, ek, slice) {
beforeSlice = append(beforeSlice, slice)
return true
} else if isAfterSlice(info, ek, slice, substr) {
afterSlice = append(afterSlice, slice)
return true
}
}
}
return false
}
for curIdent := range uses {
if !use(curIdent) {
return nil, nil, nil, nil
}
}
return negative, nonnegative, beforeSlice, afterSlice
}
// hasModifyingUses reports whether any of the uses involve potential
// modifications. Uses involving assignments before the "afterPos" won't be
// considered.
func hasModifyingUses(info *types.Info, uses iter.Seq[inspector.Cursor], afterPos token.Pos) bool {
for curUse := range uses {
ek, _ := curUse.ParentEdge()
if ek == edge.AssignStmt_Lhs {
if curUse.Node().Pos() <= afterPos {
continue
}
assign := curUse.Parent().Node().(*ast.AssignStmt)
if sameObject(info, assign.Lhs[0], curUse.Node().(*ast.Ident)) {
// Modifying use because we are reassigning the value of the object.
return true
}
} else if ek == edge.UnaryExpr_X &&
curUse.Parent().Node().(*ast.UnaryExpr).Op == token.AND {
// Modifying use because we might be passing the object by reference (an explicit &).
// We can ignore the case where we have a method call on the expression (which
// has an implicit &) because we know the type of s and substr are strings
// which cannot have methods on them.
return true
}
}
return false
}
// checkIdxComparison reports whether the check is equivalent to i < 0 or its negation, or neither.
// For equivalent to i >= 0, we only accept this exact BinaryExpr since
// expressions like i > 0 or i >= 1 make a stronger statement about the value of i.
// We avoid suggesting a fix in this case since it may result in an invalid
// transformation (See golang/go#76687).
// Since strings.Index returns exactly -1 if the substring is not found, we
// don't need to handle expressions like i <= -3.
// We return 0 if the expression does not match any of these options.
// We assume that a check passed to checkIdxComparison has i as one of its operands.
func checkIdxComparison(info *types.Info, check *ast.BinaryExpr) int {
// Ensure that the constant (if any) is on the right.
x, op, y := check.X, check.Op, check.Y
if info.Types[x].Value != nil {
x, op, y = y, flip(op), x
}
yIsInt := func(k int64) bool {
return isIntLiteral(info, y, k)
}
if op == token.LSS && yIsInt(0) || // i < 0
op == token.EQL && yIsInt(-1) || // i == -1
op == token.LEQ && yIsInt(-1) { // i <= -1
return -1 // check <=> i is negative
}
if op == token.GEQ && yIsInt(0) || // i >= 0
op == token.NEQ && yIsInt(-1) || // i != -1
op == token.GTR && yIsInt(-1) { // i > -1
return +1 // check <=> i is non-negative
}
return 0 // unknown
}
// flip changes the comparison token as if the operands were flipped.
// It is defined only for == and the four inequalities.
func flip(op token.Token) token.Token {
switch op {
case token.EQL:
return token.EQL // (same)
case token.GEQ:
return token.LEQ
case token.GTR:
return token.LSS
case token.LEQ:
return token.GEQ
case token.LSS:
return token.GTR
}
return op
}
// isBeforeSlice reports whether the SliceExpr is of the form s[:i] or s[0:i].
func isBeforeSlice(info *types.Info, ek edge.Kind, slice *ast.SliceExpr) bool {
return ek == edge.SliceExpr_High && (slice.Low == nil || isZeroIntConst(info, slice.Low))
}
// isAfterSlice reports whether the SliceExpr is of the form s[i+len(substr):],
// or s[i + k:] where k is a const is equal to len(substr).
func isAfterSlice(info *types.Info, ek edge.Kind, slice *ast.SliceExpr, substr ast.Expr) bool {
lowExpr, ok := slice.Low.(*ast.BinaryExpr)
if !ok || slice.High != nil {
return false
}
// Returns true if the expression is a call to len(substr).
isLenCall := func(expr ast.Expr) bool {
call, ok := expr.(*ast.CallExpr)
if !ok || len(call.Args) != 1 {
return false
}
return sameObject(info, substr, call.Args[0]) && typeutil.Callee(info, call) == builtinLen
}
// Handle len([]byte(substr))
if is[*ast.CallExpr](substr) {
call := substr.(*ast.CallExpr)
tv := info.Types[call.Fun]
if tv.IsType() && types.Identical(tv.Type, byteSliceType) {
// Only one arg in []byte conversion.
substr = call.Args[0]
}
}
substrLen := -1
substrVal := info.Types[substr].Value
if substrVal != nil {
switch substrVal.Kind() {
case constant.String:
substrLen = len(constant.StringVal(substrVal))
case constant.Int:
// constant.Value is a byte literal, e.g. bytes.IndexByte(_, 'a')
// or a numeric byte literal, e.g. bytes.IndexByte(_, 65)
substrLen = 1
}
}
switch ek {
case edge.BinaryExpr_X:
kVal := info.Types[lowExpr.Y].Value
if kVal == nil {
// i + len(substr)
return lowExpr.Op == token.ADD && isLenCall(lowExpr.Y)
} else {
// i + k
kInt, ok := constant.Int64Val(kVal)
return ok && substrLen == int(kInt)
}
case edge.BinaryExpr_Y:
kVal := info.Types[lowExpr.X].Value
if kVal == nil {
// len(substr) + i
return lowExpr.Op == token.ADD && isLenCall(lowExpr.X)
} else {
// k + i
kInt, ok := constant.Int64Val(kVal)
return ok && substrLen == int(kInt)
}
}
return false
}
// sameObject reports whether we know that the expressions resolve to the same object.
func sameObject(info *types.Info, expr1, expr2 ast.Expr) bool {
if ident1, ok := expr1.(*ast.Ident); ok {
if ident2, ok := expr2.(*ast.Ident); ok {
uses1, ok1 := info.Uses[ident1]
uses2, ok2 := info.Uses[ident2]
return ok1 && ok2 && uses1 == uses2
}
}
return false
}