Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 1 | // Copyright 2023 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package cache |
| 6 | |
| 7 | import ( |
| 8 | "context" |
| 9 | "os" |
| 10 | "sync" |
| 11 | "time" |
| 12 | |
Rob Findley | c823488 | 2023-11-17 16:14:35 -0500 | [diff] [blame] | 13 | "golang.org/x/tools/gopls/internal/file" |
Alan Donovan | 6d109d1 | 2024-01-23 15:35:40 -0500 | [diff] [blame] | 14 | "golang.org/x/tools/gopls/internal/protocol" |
Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 15 | "golang.org/x/tools/internal/event" |
| 16 | "golang.org/x/tools/internal/event/tag" |
| 17 | "golang.org/x/tools/internal/robustio" |
| 18 | ) |
| 19 | |
| 20 | // A memoizedFS is a file source that memoizes reads, to reduce IO. |
| 21 | type memoizedFS struct { |
| 22 | mu sync.Mutex |
| 23 | |
| 24 | // filesByID maps existing file inodes to the result of a read. |
| 25 | // (The read may have failed, e.g. due to EACCES or a delete between stat+read.) |
| 26 | // Each slice is a non-empty list of aliases: different URIs. |
Alan Donovan | bd547e5 | 2024-01-22 16:57:51 -0500 | [diff] [blame] | 27 | filesByID map[robustio.FileID][]*diskFile |
Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 28 | } |
| 29 | |
Rob Findley | b85cfba | 2023-12-15 18:12:52 -0500 | [diff] [blame] | 30 | func newMemoizedFS() *memoizedFS { |
Alan Donovan | bd547e5 | 2024-01-22 16:57:51 -0500 | [diff] [blame] | 31 | return &memoizedFS{filesByID: make(map[robustio.FileID][]*diskFile)} |
Rob Findley | b85cfba | 2023-12-15 18:12:52 -0500 | [diff] [blame] | 32 | } |
| 33 | |
Alan Donovan | bd547e5 | 2024-01-22 16:57:51 -0500 | [diff] [blame] | 34 | // A diskFile is a file in the filesystem, or a failure to read one. |
Alan Donovan | 6823da4 | 2024-01-23 15:29:30 -0500 | [diff] [blame] | 35 | // It implements the file.Source interface. |
Alan Donovan | bd547e5 | 2024-01-22 16:57:51 -0500 | [diff] [blame] | 36 | type diskFile struct { |
Alan Donovan | ccacebc | 2023-11-17 16:06:37 -0500 | [diff] [blame] | 37 | uri protocol.DocumentURI |
Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 38 | modTime time.Time |
| 39 | content []byte |
Rob Findley | c823488 | 2023-11-17 16:14:35 -0500 | [diff] [blame] | 40 | hash file.Hash |
Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 41 | err error |
| 42 | } |
| 43 | |
Alan Donovan | bd547e5 | 2024-01-22 16:57:51 -0500 | [diff] [blame] | 44 | func (h *diskFile) URI() protocol.DocumentURI { return h.uri } |
Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 45 | |
Alan Donovan | bd547e5 | 2024-01-22 16:57:51 -0500 | [diff] [blame] | 46 | func (h *diskFile) Identity() file.Identity { |
Rob Findley | c823488 | 2023-11-17 16:14:35 -0500 | [diff] [blame] | 47 | return file.Identity{ |
Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 48 | URI: h.uri, |
| 49 | Hash: h.hash, |
| 50 | } |
| 51 | } |
| 52 | |
Alan Donovan | bd547e5 | 2024-01-22 16:57:51 -0500 | [diff] [blame] | 53 | func (h *diskFile) SameContentsOnDisk() bool { return true } |
| 54 | func (h *diskFile) Version() int32 { return 0 } |
| 55 | func (h *diskFile) Content() ([]byte, error) { return h.content, h.err } |
Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 56 | |
Alan Donovan | 36ed0b1 | 2023-03-13 14:20:23 -0400 | [diff] [blame] | 57 | // ReadFile stats and (maybe) reads the file, updates the cache, and returns it. |
Rob Findley | c823488 | 2023-11-17 16:14:35 -0500 | [diff] [blame] | 58 | func (fs *memoizedFS) ReadFile(ctx context.Context, uri protocol.DocumentURI) (file.Handle, error) { |
Alan Donovan | 1cab127 | 2023-11-17 16:16:59 -0500 | [diff] [blame] | 59 | id, mtime, err := robustio.GetFileID(uri.Path()) |
Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 60 | if err != nil { |
| 61 | // file does not exist |
Alan Donovan | bd547e5 | 2024-01-22 16:57:51 -0500 | [diff] [blame] | 62 | return &diskFile{ |
Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 63 | err: err, |
| 64 | uri: uri, |
| 65 | }, nil |
| 66 | } |
| 67 | |
| 68 | // We check if the file has changed by comparing modification times. Notably, |
| 69 | // this is an imperfect heuristic as various systems have low resolution |
| 70 | // mtimes (as much as 1s on WSL or s390x builders), so we only cache |
| 71 | // filehandles if mtime is old enough to be reliable, meaning that we don't |
| 72 | // expect a subsequent write to have the same mtime. |
| 73 | // |
| 74 | // The coarsest mtime precision we've seen in practice is 1s, so consider |
| 75 | // mtime to be unreliable if it is less than 2s old. Capture this before |
| 76 | // doing anything else. |
| 77 | recentlyModified := time.Since(mtime) < 2*time.Second |
| 78 | |
| 79 | fs.mu.Lock() |
| 80 | fhs, ok := fs.filesByID[id] |
| 81 | if ok && fhs[0].modTime.Equal(mtime) { |
Alan Donovan | bd547e5 | 2024-01-22 16:57:51 -0500 | [diff] [blame] | 82 | var fh *diskFile |
Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 83 | // We have already seen this file and it has not changed. |
| 84 | for _, h := range fhs { |
| 85 | if h.uri == uri { |
| 86 | fh = h |
| 87 | break |
| 88 | } |
| 89 | } |
| 90 | // No file handle for this exact URI. Create an alias, but share content. |
| 91 | if fh == nil { |
| 92 | newFH := *fhs[0] |
| 93 | newFH.uri = uri |
| 94 | fh = &newFH |
| 95 | fhs = append(fhs, fh) |
| 96 | fs.filesByID[id] = fhs |
| 97 | } |
| 98 | fs.mu.Unlock() |
| 99 | return fh, nil |
| 100 | } |
| 101 | fs.mu.Unlock() |
| 102 | |
| 103 | // Unknown file, or file has changed. Read (or re-read) it. |
| 104 | fh, err := readFile(ctx, uri, mtime) // ~25us |
| 105 | if err != nil { |
| 106 | return nil, err // e.g. cancelled (not: read failed) |
| 107 | } |
| 108 | |
| 109 | fs.mu.Lock() |
| 110 | if !recentlyModified { |
Alan Donovan | bd547e5 | 2024-01-22 16:57:51 -0500 | [diff] [blame] | 111 | fs.filesByID[id] = []*diskFile{fh} |
Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 112 | } else { |
| 113 | delete(fs.filesByID, id) |
| 114 | } |
| 115 | fs.mu.Unlock() |
| 116 | return fh, nil |
| 117 | } |
| 118 | |
Rob Findley | 1c5ccad | 2023-04-18 13:48:38 -0400 | [diff] [blame] | 119 | // fileStats returns information about the set of files stored in fs. It is |
| 120 | // intended for debugging only. |
| 121 | func (fs *memoizedFS) fileStats() (files, largest, errs int) { |
| 122 | fs.mu.Lock() |
| 123 | defer fs.mu.Unlock() |
| 124 | |
| 125 | files = len(fs.filesByID) |
| 126 | largest = 0 |
| 127 | errs = 0 |
| 128 | |
| 129 | for _, files := range fs.filesByID { |
| 130 | rep := files[0] |
| 131 | if len(rep.content) > largest { |
| 132 | largest = len(rep.content) |
| 133 | } |
| 134 | if rep.err != nil { |
| 135 | errs++ |
| 136 | } |
| 137 | } |
| 138 | return files, largest, errs |
| 139 | } |
| 140 | |
Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 141 | // ioLimit limits the number of parallel file reads per process. |
| 142 | var ioLimit = make(chan struct{}, 128) |
| 143 | |
Alan Donovan | bd547e5 | 2024-01-22 16:57:51 -0500 | [diff] [blame] | 144 | func readFile(ctx context.Context, uri protocol.DocumentURI, mtime time.Time) (*diskFile, error) { |
Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 145 | select { |
| 146 | case ioLimit <- struct{}{}: |
| 147 | case <-ctx.Done(): |
| 148 | return nil, ctx.Err() |
| 149 | } |
| 150 | defer func() { <-ioLimit }() |
| 151 | |
Alan Donovan | 1cab127 | 2023-11-17 16:16:59 -0500 | [diff] [blame] | 152 | ctx, done := event.Start(ctx, "cache.readFile", tag.File.Of(uri.Path())) |
Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 153 | _ = ctx |
| 154 | defer done() |
| 155 | |
| 156 | // It is possible that a race causes us to read a file with different file |
| 157 | // ID, or whose mtime differs from the given mtime. However, in these cases |
| 158 | // we expect the client to notify of a subsequent file change, and the file |
| 159 | // content should be eventually consistent. |
Alan Donovan | 1cab127 | 2023-11-17 16:16:59 -0500 | [diff] [blame] | 160 | content, err := os.ReadFile(uri.Path()) // ~20us |
Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 161 | if err != nil { |
| 162 | content = nil // just in case |
| 163 | } |
Alan Donovan | bd547e5 | 2024-01-22 16:57:51 -0500 | [diff] [blame] | 164 | return &diskFile{ |
Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 165 | modTime: mtime, |
| 166 | uri: uri, |
| 167 | content: content, |
Rob Findley | c823488 | 2023-11-17 16:14:35 -0500 | [diff] [blame] | 168 | hash: file.HashOf(content), |
Robert Findley | ae242ec | 2023-01-19 16:41:08 -0500 | [diff] [blame] | 169 | err: err, |
| 170 | }, nil |
| 171 | } |