blob: cbbb1ac863de7bcf25e90f6009b9212bbe37347a [file] [log] [blame]
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package symbols
import (
"bytes"
"errors"
"fmt"
"go/ast"
"go/parser"
"go/printer"
"go/token"
"io/fs"
"os"
"path"
"path/filepath"
"reflect"
"strings"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/config"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/object"
"golang.org/x/mod/modfile"
"golang.org/x/vulndb/internal/derrors"
)
// Patched returns symbols of module patched in commit identified
// by commitHash. r is the git repository containing the module.
//
// Patched returns a map from package import paths to symbols
// patched in the package. Test packages and symbols are omitted.
//
// If the commit has more than one parent, an error is returned.
func Patched(module, commitHash string, r *repository) (_ map[string][]string, err error) {
defer derrors.Wrap(&err, "Patched(%s, %s, %s)", module, r.url, commitHash)
repo := r.repo
w, err := repo.Worktree()
if err != nil {
return nil, err
}
defer resetWorktree(r.repo, w)
hash := plumbing.NewHash(commitHash)
commit, err := findCommit(repo, w, hash)
if err != nil {
return nil, err
}
if commit.NumParents() != 1 {
return nil, fmt.Errorf("more than 1 parent: %d", commit.NumParents())
}
parent, err := commit.Parent(0)
if err != nil {
return nil, err
}
if err := w.Checkout(&git.CheckoutOptions{Hash: hash, Force: true}); err != nil {
return nil, err
}
newSymbols, err := moduleSymbols(r.root, module)
if err != nil {
return nil, err
}
if err := w.Checkout(&git.CheckoutOptions{Hash: parent.Hash, Force: true}); err != nil {
return nil, err
}
oldSymbols, err := moduleSymbols(r.root, module)
if err != nil {
return nil, err
}
patched, err := patchedSymbols(oldSymbols, newSymbols)
if err != nil {
return nil, err
}
pkgSyms := make(map[string][]string)
for _, sym := range patched {
pkgSyms[sym.pkg] = append(pkgSyms[sym.pkg], sym.symbol)
}
return pkgSyms, nil
}
// resetWorktree takes a repository and its worktree and resets it to MAIN/MASTER@HEAD
func resetWorktree(r *git.Repository, w *git.Worktree) {
_ = r.Fetch(&git.FetchOptions{})
_ = w.Reset(&git.ResetOptions{
Mode: git.HardReset,
})
}
// findCommit attempts to find a commit with hash in repo's w work tree.
// If it cannot find the fix at the current branch, it tries to identify
// the commit at all remote branches. Once it finds a commit, it returns
// the commit object and keeps the work tree at the corresponding branch.
func findCommit(repo *git.Repository, w *git.Worktree, hash plumbing.Hash) (*object.Commit, error) {
commit, err := repo.CommitObject(hash)
if err == nil {
return commit, nil
}
err = repo.Fetch(&git.FetchOptions{
RefSpecs: []config.RefSpec{"refs/*:refs/*", "HEAD:refs/heads/HEAD"},
})
if err != nil {
return nil, fmt.Errorf("commit not on current branch, failed to fetch remote branches: %v", err)
}
remote, err := repo.Remote("origin")
if err != nil {
return nil, fmt.Errorf("commit not on current branch, failed to find remote origin: %v", err)
}
refList, err := remote.List(&git.ListOptions{})
if err != nil {
return nil, fmt.Errorf("commit not on current branch, failed to list remote branches: %v", err)
}
for _, ref := range refList {
err := w.Checkout(&git.CheckoutOptions{Branch: ref.Name(), Force: true})
if err != nil {
continue
}
commit, err := repo.CommitObject(hash)
if err == nil {
return commit, nil
}
}
return nil, fmt.Errorf("failed to find the commit %v on %d remote branches", hash, len(refList))
}
// patchedSymbols returns symbol indices in oldSymbols that either 1) cannot
// be identified in newSymbols or 2) the corresponding functions have their
// source code changed.
func patchedSymbols(oldSymbols, newSymbols map[symKey]*ast.FuncDecl) ([]symKey, error) {
var syms []symKey
for key, of := range oldSymbols {
nf, ok := newSymbols[key]
if !ok {
// We cannot locate the symbol in the new version
// of code, so we designate it as being patched.
syms = append(syms, key)
continue
}
osrc, err := source(of)
if err != nil {
return nil, err
}
nsrc, err := source(nf)
if err != nil {
return nil, err
}
if osrc != nsrc {
syms = append(syms, key)
}
}
return syms, nil
}
// source returns f's source code as text.
func source(f *ast.FuncDecl) (string, error) {
var b bytes.Buffer
fs := token.NewFileSet()
if err := printer.Fprint(&b, fs, f); err != nil {
return "", fmt.Errorf("getting source of %s failed: %w", astSymbolName(f), err)
}
return strings.TrimSpace(b.String()), nil
}
// moduleSymbols indexes all symbols of a module located
// within repo at repoRoot. Test symbols are omitted.
//
// If the module is not defined in the repo, an empty
// index is returned.
func moduleSymbols(repoRoot, module string) (map[symKey]*ast.FuncDecl, error) {
modRoot, files, err := moduleRootAndFiles(repoRoot, module)
if err != nil {
return nil, err
}
m := make(map[symKey]*ast.FuncDecl)
fset := token.NewFileSet()
for _, file := range files {
f, err := parser.ParseFile(fset, file, nil, 0)
if err != nil {
return nil, err
}
for _, decl := range f.Decls {
if fn, ok := decl.(*ast.FuncDecl); ok {
m[symKey{
pkg: packageImportPath(module, modRoot, file),
file: filepath.Base(file),
symbol: astSymbolName(fn)}] = fn
}
}
}
// Remove file info from indices that don't actually need it.
// This should make things more robust for cases when there
// the function name is unique and the patch moves the function
// to a different file (due to, say, refactoring).
return cleanFileInfo(m), nil
}
// cleanFileInfo deletes the value of file field in symKeys for
// function declarations that do not need the file information to
// differentiate between other same-named symbols in the same package.
func cleanFileInfo(syms map[symKey]*ast.FuncDecl) map[symKey]*ast.FuncDecl {
// collisions tracks which symbols have multiple
// function declarations in a package.
collisions := make(map[symKey]int)
for sk := range syms {
k := symKey{pkg: sk.pkg, symbol: sk.symbol}
collisions[k]++
}
m := make(map[symKey]*ast.FuncDecl)
for sk, f := range syms {
k := symKey{pkg: sk.pkg, symbol: sk.symbol}
if collisions[k] > 1 {
// multiple functions with the same name,
// so we keep the file info.
m[sk] = f
} else {
m[k] = f // get rid of file info
}
}
return m
}
// symKey is used as a unique key for
// a Go symbol in a repo.
type symKey struct {
pkg string
// file is the name of the file where the symbol
// is defined. Set when multiple same-named
// symbols are hidden under different build tags.
file string
symbol string
}
// moduleRootAndFiles returns the root of Go module within
// repo and all of its .go files. Test Go files (*_test.go)
// and Go files in "testdata" subdirectories are omitted.
//
// If there are no Go files or module does not exist in the
// repo, empty file slice is returned. Each returned file
// path has repoRoot as its prefix.
func moduleRootAndFiles(repoRoot, module string) (string, []string, error) {
modRoots, err := moduleRoots(repoRoot)
if err != nil {
return "", nil, err
}
moduleRoot, ok := modRoots[module]
if !ok {
return "", nil, nil
}
// directlyUnder checks if path belongs
// to module and not some of its sub-modules.
directlyUnder := func(path string) bool {
maxModPath := ""
for _, modPath := range modRoots {
if subdir(path, modPath) && len(modPath) > len(maxModPath) {
maxModPath = modPath
}
}
return maxModPath == moduleRoot
}
var files []string
err = filepath.Walk(moduleRoot, func(path string, fi fs.FileInfo, err error) error {
if err != nil {
return err
}
if fi.IsDir() {
if filepath.Base(path) == "testdata" {
// Skip test harness Go files.
return filepath.SkipDir
}
return nil
}
if filepath.Ext(path) != ".go" {
// We are only interested in Go files.
return nil
}
if !strings.HasSuffix(path, "_test.go") && directlyUnder(path) {
// Skip test Go files and files that belong to sub-modules.
files = append(files, path)
}
return nil
})
if err != nil {
return "", nil, err
}
return moduleRoot, files, err
}
// subdir checks if target is a sub-directory of base. It assumes
// that both target and base are either absolute paths or relative
// paths with the same offset.
func subdir(target, base string) bool {
p, err := filepath.Rel(base, target)
return err == nil && !strings.Contains(p, "..")
}
// moduleRoots returns paths in repoRoot that are roots
// of a Go module. Each such discovered path is indexed
// with the name of the corresponding module. Each returned
// path has repoRoot as prefix.
func moduleRoots(repoRoot string) (map[string]string, error) {
mods := make(map[string]string)
err := filepath.Walk(repoRoot, func(path string, fi fs.FileInfo, err error) error {
if err != nil {
return err
}
if !fi.IsDir() {
return nil
}
if filepath.Base(path) == "testdata" {
return filepath.SkipDir
}
if modName, err := moduleName(path); err != nil {
return err
} else if modName != "" {
mods[modName] = path
}
return nil
})
if err != nil {
return nil, err
}
return mods, nil
}
// moduleName returns the name of the module if path is
// the root of a Go module. Otherwise, returns empty string.
func moduleName(path string) (string, error) {
gomodPath := filepath.Join(path, "go.mod")
data, err := os.ReadFile(gomodPath)
if err != nil {
// go.mod does not exist, so this is not an error.
// It is just that the current path is not the root
// of a Go module.
if errors.Is(err, os.ErrNotExist) {
return "", nil
}
return "", err
}
gomod, err := modfile.ParseLax("go.mod", data, nil)
if err != nil {
return "", err
}
return gomod.Module.Mod.Path, nil
}
// packageImportPath computes the full package import path for a
// a package directory or file on local disk, given a module path
// and root of a module on local disk. For instance,
//
// packageImportPath("golang.org/module", "/module/root",
// "module/root/internal/foo/foo.go") =
// "golang.org/module/internal/foo"
//
// Returns empty string in case of any errors or if moduleRoot is
// not a sub-path of path.
//
// moduleRoot and path have to be either both absolute or both
// relative paths. The last element in path will always be interpreted
// as a file, hence directory paths should end with a file separator.
func packageImportPath(module, moduleRoot, pkgPath string) string {
if !subdir(pkgPath, moduleRoot) {
return ""
}
dir := filepath.Dir(pkgPath)
rel, err := filepath.Rel(moduleRoot, dir)
if err != nil {
return ""
}
if rel == "." {
// The path is moduleRoot
return module
}
rel = filepath.ToSlash(rel) // cross platform
return path.Join(module, rel)
}
// astSymbolName returns the name of f as a symbol in
// a vulnerability database.
func astSymbolName(f *ast.FuncDecl) string {
name := f.Name.Name
if f.Recv == nil || len(f.Recv.List) == 0 {
return name
}
field := f.Recv.List[0]
if len(field.Names) == 0 {
return "" // sanity
}
// unpackIdent assumes e is of the form id or id[...]
// and then returns id. Otherwise, returns "".
unpackIdent := func(e ast.Expr) string {
switch xv := e.(type) {
case *ast.Ident:
return xv.Name
case *ast.IndexExpr:
if si, ok := xv.X.(*ast.Ident); ok {
return si.Name
}
}
return ""
}
// supported receiver type names are id, *id, id[...], and *id[...].
t := ""
switch xv := field.Type.(type) {
case *ast.StarExpr:
t = unpackIdent(xv.X)
case *ast.Ident, *ast.IndexExpr:
t = unpackIdent(xv)
case *ast.IndexListExpr:
t = unpackIdent(xv.X)
default:
panic(fmt.Sprintf("astSymbolName: unexpected receiver type: %v\n", reflect.TypeOf(field.Type)))
}
return t + "." + name
}