Alan Donovan | 7e1bfe8 | 2023-07-16 13:14:42 -0400 | [diff] [blame] | 1 | // Copyright 2023 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package unitchecker_test |
| 6 | |
| 7 | // This file illustrates separate analysis with an example. |
| 8 | |
| 9 | import ( |
| 10 | "bytes" |
| 11 | "encoding/json" |
| 12 | "fmt" |
| 13 | "go/token" |
| 14 | "go/types" |
| 15 | "io" |
| 16 | "log" |
| 17 | "os" |
| 18 | "os/exec" |
| 19 | "path/filepath" |
| 20 | "strings" |
| 21 | "sync/atomic" |
| 22 | |
| 23 | "golang.org/x/tools/go/analysis/passes/printf" |
| 24 | "golang.org/x/tools/go/analysis/unitchecker" |
| 25 | "golang.org/x/tools/go/gcexportdata" |
| 26 | "golang.org/x/tools/go/packages" |
| 27 | "golang.org/x/tools/txtar" |
| 28 | ) |
| 29 | |
| 30 | // ExampleSeparateAnalysis demonstrates the principle of separate |
| 31 | // analysis, the distribution of units of type-checking and analysis |
| 32 | // work across several processes, using serialized summaries to |
| 33 | // communicate between them. |
| 34 | // |
| 35 | // It uses two different kinds of task, "manager" and "worker": |
| 36 | // |
| 37 | // - The manager computes the graph of package dependencies, and makes |
| 38 | // a request to the worker for each package. It does not parse, |
| 39 | // type-check, or analyze Go code. It is analogous "go vet". |
| 40 | // |
| 41 | // - The worker, which contains the Analyzers, reads each request, |
| 42 | // loads, parses, and type-checks the files of one package, |
| 43 | // applies all necessary analyzers to the package, then writes |
| 44 | // its results to a file. It is a unitchecker-based driver, |
| 45 | // analogous to the program specified by go vet -vettool= flag. |
| 46 | // |
| 47 | // In practice these would be separate executables, but for simplicity |
| 48 | // of this example they are provided by one executable in two |
| 49 | // different modes: the Example function is the manager, and the same |
| 50 | // executable invoked with ENTRYPOINT=worker is the worker. |
| 51 | // (See TestIntegration for how this happens.) |
| 52 | func ExampleSeparateAnalysis() { |
| 53 | // src is an archive containing a module with a printf mistake. |
| 54 | const src = ` |
| 55 | -- go.mod -- |
| 56 | module separate |
| 57 | go 1.18 |
| 58 | |
| 59 | -- main/main.go -- |
| 60 | package main |
| 61 | |
| 62 | import "separate/lib" |
| 63 | |
| 64 | func main() { |
| 65 | lib.MyPrintf("%s", 123) |
| 66 | } |
| 67 | |
| 68 | -- lib/lib.go -- |
| 69 | package lib |
| 70 | |
| 71 | import "fmt" |
| 72 | |
| 73 | func MyPrintf(format string, args ...any) { |
| 74 | fmt.Printf(format, args...) |
| 75 | } |
| 76 | ` |
| 77 | |
| 78 | // Expand archive into tmp tree. |
| 79 | tmpdir, err := os.MkdirTemp("", "SeparateAnalysis") |
| 80 | if err != nil { |
| 81 | log.Fatal(err) |
| 82 | } |
| 83 | if err := extractTxtar(txtar.Parse([]byte(src)), tmpdir); err != nil { |
| 84 | log.Fatal(err) |
| 85 | } |
| 86 | |
| 87 | // Load metadata for the main package and all its dependencies. |
| 88 | cfg := &packages.Config{ |
| 89 | Mode: packages.NeedName | packages.NeedFiles | packages.NeedCompiledGoFiles | packages.NeedImports | packages.NeedModule, |
| 90 | Dir: tmpdir, |
| 91 | Env: append(os.Environ(), |
| 92 | "GOPROXY=off", // disable network |
| 93 | "GOWORK=off", // an ambient GOWORK value would break package loading |
| 94 | ), |
| 95 | } |
| 96 | pkgs, err := packages.Load(cfg, "separate/main") |
| 97 | if err != nil { |
| 98 | log.Fatal(err) |
| 99 | } |
| 100 | // Stop if any package had a metadata error. |
| 101 | if packages.PrintErrors(pkgs) > 0 { |
| 102 | os.Exit(1) |
| 103 | } |
| 104 | |
| 105 | // Now we have loaded the import graph, |
| 106 | // let's begin the proper work of the manager. |
| 107 | |
| 108 | // Gather root packages. They will get all analyzers, |
| 109 | // whereas dependencies get only the subset that |
| 110 | // produce facts or are required by them. |
| 111 | roots := make(map[*packages.Package]bool) |
| 112 | for _, pkg := range pkgs { |
| 113 | roots[pkg] = true |
| 114 | } |
| 115 | |
| 116 | // nextID generates sequence numbers for each unit of work. |
| 117 | // We use it to create names of temporary files. |
| 118 | var nextID atomic.Int32 |
| 119 | |
| 120 | // Visit all packages in postorder: dependencies first. |
| 121 | // TODO(adonovan): opt: use parallel postorder. |
| 122 | packages.Visit(pkgs, nil, func(pkg *packages.Package) { |
| 123 | if pkg.PkgPath == "unsafe" { |
| 124 | return |
| 125 | } |
| 126 | |
| 127 | // Choose a unique prefix for temporary files |
| 128 | // (.cfg .types .facts) produced by this package. |
| 129 | // We stow it in an otherwise unused field of |
| 130 | // Package so it can be accessed by our importers. |
| 131 | prefix := fmt.Sprintf("%s/%d", tmpdir, nextID.Add(1)) |
| 132 | pkg.ExportFile = prefix |
| 133 | |
| 134 | // Construct the request to the worker. |
| 135 | var ( |
| 136 | importMap = make(map[string]string) |
| 137 | packageFile = make(map[string]string) |
| 138 | packageVetx = make(map[string]string) |
| 139 | ) |
| 140 | for importPath, dep := range pkg.Imports { |
| 141 | importMap[importPath] = dep.PkgPath |
| 142 | if depPrefix := dep.ExportFile; depPrefix != "" { // skip "unsafe" |
| 143 | packageFile[dep.PkgPath] = depPrefix + ".types" |
| 144 | packageVetx[dep.PkgPath] = depPrefix + ".facts" |
| 145 | } |
| 146 | } |
| 147 | cfg := unitchecker.Config{ |
| 148 | ID: pkg.ID, |
| 149 | ImportPath: pkg.PkgPath, |
| 150 | GoFiles: pkg.CompiledGoFiles, |
| 151 | NonGoFiles: pkg.OtherFiles, |
| 152 | IgnoredFiles: pkg.IgnoredFiles, |
| 153 | ImportMap: importMap, |
| 154 | PackageFile: packageFile, |
| 155 | PackageVetx: packageVetx, |
| 156 | VetxOnly: !roots[pkg], |
| 157 | VetxOutput: prefix + ".facts", |
| 158 | } |
| 159 | if pkg.Module != nil { |
| 160 | if v := pkg.Module.GoVersion; v != "" { |
| 161 | cfg.GoVersion = "go" + v |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | // Write the JSON configuration message to a file. |
| 166 | cfgData, err := json.Marshal(cfg) |
| 167 | if err != nil { |
| 168 | log.Fatal(err) |
| 169 | } |
| 170 | cfgFile := prefix + ".cfg" |
| 171 | if err := os.WriteFile(cfgFile, cfgData, 0666); err != nil { |
| 172 | log.Fatal(err) |
| 173 | } |
| 174 | |
| 175 | // Send the request to the worker. |
| 176 | cmd := exec.Command(os.Args[0], "-json", cfgFile) |
| 177 | cmd.Stderr = os.Stderr |
| 178 | cmd.Stdout = new(bytes.Buffer) |
| 179 | cmd.Env = append(os.Environ(), "ENTRYPOINT=worker") |
| 180 | if err := cmd.Run(); err != nil { |
| 181 | log.Fatal(err) |
| 182 | } |
| 183 | |
| 184 | // Parse JSON output and print plainly. |
| 185 | dec := json.NewDecoder(cmd.Stdout.(io.Reader)) |
| 186 | for { |
| 187 | type jsonDiagnostic struct { |
| 188 | Posn string `json:"posn"` |
| 189 | Message string `json:"message"` |
| 190 | } |
| 191 | // 'results' maps Package.Path -> Analyzer.Name -> diagnostics |
| 192 | var results map[string]map[string][]jsonDiagnostic |
| 193 | if err := dec.Decode(&results); err != nil { |
| 194 | if err == io.EOF { |
| 195 | break |
| 196 | } |
| 197 | log.Fatal(err) |
| 198 | } |
| 199 | for _, result := range results { |
| 200 | for analyzer, diags := range result { |
| 201 | for _, diag := range diags { |
| 202 | rel := strings.ReplaceAll(diag.Posn, tmpdir, "") |
| 203 | rel = filepath.ToSlash(rel) |
| 204 | fmt.Printf("%s: [%s] %s\n", |
| 205 | rel, analyzer, diag.Message) |
| 206 | } |
| 207 | } |
| 208 | } |
| 209 | } |
| 210 | }) |
| 211 | |
| 212 | // Observe that the example produces a fact-based diagnostic |
| 213 | // from separate analysis of "main", "lib", and "fmt": |
| 214 | |
| 215 | // Output: |
| 216 | // /main/main.go:6:2: [printf] separate/lib.MyPrintf format %s has arg 123 of wrong type int |
| 217 | } |
| 218 | |
| 219 | // -- worker process -- |
| 220 | |
| 221 | // worker is the main entry point for a unitchecker-based driver |
| 222 | // with only a single analyzer, for illustration. |
| 223 | func worker() { |
| 224 | // Currently the unitchecker API doesn't allow clients to |
| 225 | // control exactly how and where fact and type information |
| 226 | // is produced and consumed. |
| 227 | // |
| 228 | // So, for example, it assumes that type information has |
| 229 | // already been produced by the compiler, which is true when |
| 230 | // running under "go vet", but isn't necessary. It may be more |
| 231 | // convenient and efficient for a distributed analysis system |
| 232 | // if the worker generates both of them, which is the approach |
| 233 | // taken in this example; they could even be saved as two |
| 234 | // sections of a single file. |
| 235 | // |
| 236 | // Consequently, this test currently needs special access to |
| 237 | // private hooks in unitchecker to control how and where facts |
| 238 | // and types are produced and consumed. In due course this |
| 239 | // will become a respectable public API. In the meantime, it |
| 240 | // should at least serve as a demonstration of how one could |
| 241 | // fork unitchecker to achieve separate analysis without go vet. |
| 242 | unitchecker.SetTypeImportExport(makeTypesImporter, exportTypes) |
| 243 | |
| 244 | unitchecker.Main(printf.Analyzer) |
| 245 | } |
| 246 | |
| 247 | func makeTypesImporter(cfg *unitchecker.Config, fset *token.FileSet) types.Importer { |
| 248 | imports := make(map[string]*types.Package) |
| 249 | return importerFunc(func(importPath string) (*types.Package, error) { |
| 250 | // Resolve import path to package path (vendoring, etc) |
| 251 | path, ok := cfg.ImportMap[importPath] |
| 252 | if !ok { |
| 253 | return nil, fmt.Errorf("can't resolve import %q", path) |
| 254 | } |
| 255 | if path == "unsafe" { |
| 256 | return types.Unsafe, nil |
| 257 | } |
| 258 | |
| 259 | // Find, read, and decode file containing type information. |
| 260 | file, ok := cfg.PackageFile[path] |
| 261 | if !ok { |
| 262 | return nil, fmt.Errorf("no package file for %q", path) |
| 263 | } |
| 264 | f, err := os.Open(file) |
| 265 | if err != nil { |
| 266 | return nil, err |
| 267 | } |
| 268 | defer f.Close() // ignore error |
| 269 | return gcexportdata.Read(f, fset, imports, path) |
| 270 | }) |
| 271 | } |
| 272 | |
| 273 | func exportTypes(cfg *unitchecker.Config, fset *token.FileSet, pkg *types.Package) error { |
| 274 | var out bytes.Buffer |
| 275 | if err := gcexportdata.Write(&out, fset, pkg); err != nil { |
| 276 | return err |
| 277 | } |
| 278 | typesFile := strings.TrimSuffix(cfg.VetxOutput, ".facts") + ".types" |
| 279 | return os.WriteFile(typesFile, out.Bytes(), 0666) |
| 280 | } |
| 281 | |
| 282 | // -- helpers -- |
| 283 | |
| 284 | type importerFunc func(path string) (*types.Package, error) |
| 285 | |
| 286 | func (f importerFunc) Import(path string) (*types.Package, error) { return f(path) } |
| 287 | |
| 288 | // extractTxtar writes each archive file to the corresponding location beneath dir. |
| 289 | // |
| 290 | // TODO(adonovan): move this to txtar package, we need it all the time (#61386). |
| 291 | func extractTxtar(ar *txtar.Archive, dir string) error { |
| 292 | for _, file := range ar.Files { |
| 293 | name := filepath.Join(dir, file.Name) |
| 294 | if err := os.MkdirAll(filepath.Dir(name), 0777); err != nil { |
| 295 | return err |
| 296 | } |
| 297 | if err := os.WriteFile(name, file.Data, 0666); err != nil { |
| 298 | return err |
| 299 | } |
| 300 | } |
| 301 | return nil |
| 302 | } |